Java RDD.collect方法代码示例

本文整理汇总了Java中org.apache.spark.rdd.RDD.collect方法的典型用法代码示例。如果您正苦于以下问题：Java RDD.collect方法的具体用法？Java RDD.collect怎么用？Java RDD.collect使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.spark.rdd.RDD的用法示例。

在下文中一共展示了RDD.collect方法的11个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testGetAllElementsInRDD

import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
private void testGetAllElementsInRDD(final Graph graph, final GetRDDOfAllElements getRDD) throws OperationException,
        IOException, InterruptedException, AccumuloSecurityException, AccumuloException {
    final Set<Element> expectedElements = new HashSet<>(getElements());
    final RDD<Element> rdd = graph.execute(getRDD, USER);
    if (rdd == null) {
        fail("No RDD returned");
    }
    final Set<Element> results = new HashSet<>();
    final Element[] returnedElements = (Element[]) rdd.collect();
    // Check the number of elements returned is correct to ensure edges
    // aren't returned twice
    assertEquals(30, returnedElements.length);
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    assertEquals(expectedElements, results);
}

开发者ID:gchq，项目名称:Gaffer，代码行数:18，代码来源:GetRDDOfAllElementsHandlerIT.java

示例2: testGetAllElementsInRDDWithIngestAggregationApplied

import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
private void testGetAllElementsInRDDWithIngestAggregationApplied(final Graph graph, final GetRDDOfAllElements getRDD)
        throws OperationException {
    final RDD<Element> rdd = graph.execute(getRDD, USER);
    if (rdd == null) {
        fail("No RDD returned");
    }

    // Should get aggregated data
    final Element[] returnedElements = (Element[]) rdd.collect();

    assertEquals(1, returnedElements.length);
    final Entity entity1 = new Entity.Builder()
            .group(TestGroups.ENTITY)
            .vertex("A")
            .property("count", 2)
            .build();
    assertEquals(entity1, returnedElements[0]);
}

开发者ID:gchq，项目名称:Gaffer，代码行数:19，代码来源:GetRDDOfAllElementsHandlerIT.java

示例3: testGetAllElementsInRDDWithView

import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
private void testGetAllElementsInRDDWithView(final Graph graph, final GetRDDOfAllElements getRDD) throws OperationException,
        IOException, InterruptedException, AccumuloSecurityException, AccumuloException {
    final Set<Element> expectedElements = new HashSet<>();
    getElements().stream()
            .filter(e -> e.getGroup().equals(TestGroups.EDGE))
            .map(e -> (Edge) e)
            .map(e -> {
                e.putProperty("newProperty", e.getSource().toString() + "," + e.getProperty(TestPropertyNames.COUNT));
                return e;
            })
            .filter(e -> e.getProperty("newProperty").equals("0,2"))
            .forEach(expectedElements::add);
    getRDD.setView(new View.Builder()
            .edge(TestGroups.EDGE, new ViewElementDefinition.Builder()
                    .transientProperty("newProperty", String.class)
                    .transformer(new ElementTransformer.Builder()
                            .select(IdentifierType.SOURCE.name(), TestPropertyNames.COUNT)
                            .execute(new Concat())
                            .project("newProperty")
                            .build())
                    .postTransformFilter(new ElementFilter.Builder()
                            .select("newProperty")
                            .execute(new IsEqual("0,2"))
                            .build())
                    .build())
            .build());
    final RDD<Element> rdd = graph.execute(getRDD, USER);
    if (rdd == null) {
        fail("No RDD returned");
    }

    final Set<Element> results = new HashSet<>();
    final Element[] returnedElements = (Element[]) rdd.collect();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    assertEquals(expectedElements, results);
}

开发者ID:gchq，项目名称:Gaffer，代码行数:39，代码来源:GetRDDOfAllElementsHandlerIT.java

示例4: testGetAllElementsInRDDWithValidationApplied

import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
private void testGetAllElementsInRDDWithValidationApplied(final Graph graph, final GetRDDOfAllElements getRDD)
        throws InterruptedException, IOException, OperationException {
    // Sleep for 1 second to give chance for Entity A to age off
    Thread.sleep(1000L);

    final RDD<Element> rdd = graph.execute(getRDD, USER);
    if (rdd == null) {
        fail("No RDD returned");
    }

    // Should get Entity B but not Entity A
    final Element[] returnedElements = (Element[]) rdd.collect();
    assertEquals(1, returnedElements.length);
    assertEquals(entityRetainedAfterValidation, returnedElements[0]);
}

开发者ID:gchq，项目名称:Gaffer，代码行数:16，代码来源:GetRDDOfAllElementsHandlerIT.java

示例5: testBuildScanWithView

import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
private void testBuildScanWithView(final String name, final View view, final Predicate<Element> returnElement)
        throws OperationException, StoreException {
    // Given
    final SparkSession sparkSession = SparkSessionProvider.getSparkSession();
    final Schema schema = getSchema();
    final AccumuloProperties properties = AccumuloProperties
            .loadStoreProperties(AccumuloStoreRelationTest.class.getResourceAsStream("/store.properties"));
    final SingleUseMockAccumuloStore store = new SingleUseMockAccumuloStore();
    store.initialise("graphId", schema, properties);
    addElements(store);

    // When
    final AccumuloStoreRelation relation = new AccumuloStoreRelation(
            SparkContextUtil.createContext(new User(), sparkSession),
            Collections.emptyList(), view,
            store, null);
    final RDD<Row> rdd = relation.buildScan();
    final Row[] returnedElements = (Row[]) rdd.collect();

    // Then
    //  - Actual results are:
    final Set<Row> results = new HashSet<>();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    //  - Expected results are:
    final SchemaToStructTypeConverter schemaConverter = new SchemaToStructTypeConverter(schema, view,
            new ArrayList<>());
    final ConvertElementToRow elementConverter = new ConvertElementToRow(schemaConverter.getUsedProperties(),
            schemaConverter.getPropertyNeedsConversion(), schemaConverter.getConverterByProperty());
    final Set<Row> expectedRows = new HashSet<>();
    Streams.toStream(getElements())
            .filter(returnElement)
            .map(elementConverter::apply)
            .forEach(expectedRows::add);
    assertEquals(expectedRows, results);
}

开发者ID:gchq，项目名称:Gaffer，代码行数:38，代码来源:AccumuloStoreRelationTest.java

示例6: testBuildScanSpecifyColumnsWithView

import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
private void testBuildScanSpecifyColumnsWithView(final View view, final String[] requiredColumns,
                                                 final Predicate<Element> returnElement)
        throws OperationException, StoreException {
    // Given
    final SparkSession sparkSession = SparkSessionProvider.getSparkSession();
    final Schema schema = getSchema();
    final AccumuloProperties properties = AccumuloProperties
            .loadStoreProperties(getClass().getResourceAsStream("/store.properties"));
    final SingleUseMockAccumuloStore store = new SingleUseMockAccumuloStore();
    store.initialise("graphId", schema, properties);
    addElements(store);

    // When
    final AccumuloStoreRelation relation = new AccumuloStoreRelation(
            SparkContextUtil.createContext(new User(), sparkSession),
            Collections.emptyList(), view,
            store, null);
    final RDD<Row> rdd = relation.buildScan(requiredColumns);
    final Row[] returnedElements = (Row[]) rdd.collect();

    // Then
    //  - Actual results are:
    final Set<Row> results = new HashSet<>();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    //  - Expected results are:
    final SchemaToStructTypeConverter schemaConverter = new SchemaToStructTypeConverter(schema, view,
            new ArrayList<>());
    final ConvertElementToRow elementConverter = new ConvertElementToRow(new LinkedHashSet<>(Arrays.asList(requiredColumns)),
            schemaConverter.getPropertyNeedsConversion(), schemaConverter.getConverterByProperty());
    final Set<Row> expectedRows = new HashSet<>();
    Streams.toStream(getElements())
            .filter(returnElement)
            .map(elementConverter::apply)
            .forEach(expectedRows::add);
    assertEquals(expectedRows, results);
}

开发者ID:gchq，项目名称:Gaffer，代码行数:39，代码来源:AccumuloStoreRelationTest.java

示例7: testBuildScanSpecifyColumnsAndFiltersWithView

import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
private void testBuildScanSpecifyColumnsAndFiltersWithView(final View view,
                                                           final String[] requiredColumns,
                                                           final Filter[] filters,
                                                           final Predicate<Element> returnElement)
        throws OperationException, StoreException {
    // Given
    final SparkSession sparkSession = SparkSessionProvider.getSparkSession();
    final Schema schema = getSchema();
    final AccumuloProperties properties = AccumuloProperties
            .loadStoreProperties(getClass().getResourceAsStream("/store.properties"));
    final SingleUseMockAccumuloStore store = new SingleUseMockAccumuloStore();
    store.initialise("graphId", schema, properties);
    addElements(store);

    // When
    final AccumuloStoreRelation relation = new AccumuloStoreRelation(
            SparkContextUtil.createContext(new User(), sparkSession),
            Collections.emptyList(), view,
            store, null);
    final RDD<Row> rdd = relation.buildScan(requiredColumns, filters);
    final Row[] returnedElements = (Row[]) rdd.collect();

    // Then
    //  - Actual results are:
    final Set<Row> results = new HashSet<>();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    //  - Expected results are:
    final SchemaToStructTypeConverter schemaConverter = new SchemaToStructTypeConverter(schema, view,
            new ArrayList<>());
    final ConvertElementToRow elementConverter = new ConvertElementToRow(new LinkedHashSet<>(Arrays.asList(requiredColumns)),
            schemaConverter.getPropertyNeedsConversion(), schemaConverter.getConverterByProperty());
    final Set<Row> expectedRows = new HashSet<>();
    Streams.toStream(getElements())
            .filter(returnElement)
            .map(elementConverter::apply)
            .forEach(expectedRows::add);
    assertEquals(expectedRows, results);
}

开发者ID:gchq，项目名称:Gaffer，代码行数:41，代码来源:AccumuloStoreRelationTest.java

示例8: testOutputRDDStringIJVFromMatrixDML

import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
@Test
public void testOutputRDDStringIJVFromMatrixDML() {
	System.out.println("MLContextTest - output RDD String IJV from matrix DML");

	String s = "M = matrix('1 2 3 4', rows=2, cols=2);";
	Script script = dml(s).out("M");
	RDD<String> rddStringIJV = ml.execute(script).getMatrix("M").toRDDStringIJV();
	String[] rows = (String[]) rddStringIJV.collect();
	Arrays.sort(rows);
	Assert.assertEquals("1 1 1.0", rows[0]);
	Assert.assertEquals("1 2 2.0", rows[1]);
	Assert.assertEquals("2 1 3.0", rows[2]);
	Assert.assertEquals("2 2 4.0", rows[3]);
}

开发者ID:apache，项目名称:systemml，代码行数:15，代码来源:MLContextTest.java

示例9: testGetAllElementsInRDDWithVisibilityFilteringApplied

import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
private void testGetAllElementsInRDDWithVisibilityFilteringApplied(final Graph graph,
                                                                   final GetRDDOfAllElements getRDD)
        throws OperationException, IOException, InterruptedException, AccumuloSecurityException, AccumuloException {
    final Set<Element> expectedElements = new HashSet<>();

    // Test with user with public visibility
    getElementsWithVisibilities()
            .stream()
            .filter(e -> e.getProperty(TestPropertyNames.VISIBILITY).equals("public"))
            .forEach(expectedElements::add);
    RDD<Element> rdd = graph.execute(getRDD, USER_WITH_PUBLIC);
    if (rdd == null) {
        fail("No RDD returned");
    }
    final Set<Element> results = new HashSet<>();
    Element[] returnedElements = (Element[]) rdd.collect();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    assertEquals(expectedElements, results);

    // Test with user with public and private visibility
    getElementsWithVisibilities().forEach(expectedElements::add);
    rdd = graph.execute(getRDD, USER_WITH_PUBLIC_AND_PRIVATE);
    if (rdd == null) {
        fail("No RDD returned");
    }
    results.clear();
    returnedElements = (Element[]) rdd.collect();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    assertEquals(expectedElements, results);

    // Test with user with no visibilities
    rdd = graph.execute(getRDD, USER);
    if (rdd == null) {
        fail("No RDD returned");
    }
    results.clear();
    returnedElements = (Element[]) rdd.collect();
    assertEquals(0, returnedElements.length);
}

开发者ID:gchq，项目名称:Gaffer，代码行数:44，代码来源:GetRDDOfAllElementsHandlerIT.java

示例10: checkImportRDDOfElements

import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
@Test
public void checkImportRDDOfElements() throws OperationException, IOException {
    final Graph graph1 = new Graph.Builder()
            .config(new GraphConfig.Builder()
                    .graphId("graphId")
                    .build())
            .addSchema(getClass().getResourceAsStream("/schema/elements.json"))
            .addSchema(getClass().getResourceAsStream("/schema/types.json"))
            .addSchema(getClass().getResourceAsStream("/schema/serialisation.json"))
            .storeProperties(getClass().getResourceAsStream("/store.properties"))
            .build();

    final ArrayBuffer<Element> elements = new ArrayBuffer<>();
    for (int i = 0; i < 10; i++) {
        final Entity entity = new Entity.Builder()
                .group(TestGroups.ENTITY)
                .vertex("" + i)
                .build();

        final Edge edge1 = new Edge.Builder()
                .group(TestGroups.EDGE)
                .source("" + i)
                .dest("B")
                .directed(false)
                .property(TestPropertyNames.COUNT, 2)
                .build();

        final Edge edge2 = new Edge.Builder()
                .group(TestGroups.EDGE)
                .source("" + i)
                .dest("C")
                .directed(false)
                .property(TestPropertyNames.COUNT, 4)
                .build();

        elements.$plus$eq(edge1);
        elements.$plus$eq(edge2);
        elements.$plus$eq(entity);
    }
    final User user = new User();
    final SparkSession sparkSession = SparkSessionProvider.getSparkSession();

    // Create Hadoop configuration and serialise to a string
    final Configuration configuration = new Configuration();
    final String configurationString = AbstractGetRDDHandler
            .convertConfigurationToString(configuration);

    final String outputPath = testFolder.getRoot().getAbsolutePath() + "/output";
    final String failurePath = testFolder.getRoot().getAbsolutePath() + "/failure";

    final RDD<Element> elementRDD = sparkSession.sparkContext().parallelize(elements, 8, ELEMENT_CLASS_TAG);
    final ImportRDDOfElements addRdd = new ImportRDDOfElements.Builder()
            .input(elementRDD)
            .option("outputPath", outputPath)
            .option("failurePath", failurePath)
            .build();
    graph1.execute(addRdd, user);

    // Check all elements were added
    final GetRDDOfAllElements rddQuery = new GetRDDOfAllElements.Builder()
            .option(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString)
            .build();

    final RDD<Element> rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }

    final Set<Element> results = new HashSet<>();
    final Element[] returnedElements = (Element[]) rdd.collect();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    assertEquals(elements.size(), results.size());
}

开发者ID:gchq，项目名称:Gaffer，代码行数:76，代码来源:ImportRDDOfElementsHandlerTest.java

示例11: checkImportRDDOfElements

import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
@Test
public void checkImportRDDOfElements() throws OperationException, IOException {
    final Graph graph1 = new Graph.Builder()
            .config(new GraphConfig.Builder()
                    .graphId("graphId")
                    .build())
            .addSchema(getClass().getResourceAsStream("/schema/elements.json"))
            .addSchema(getClass().getResourceAsStream("/schema/types.json"))
            .addSchema(getClass().getResourceAsStream("/schema/serialisation.json"))
            .storeProperties(getClass().getResourceAsStream("/store.properties"))
            .build();

    final ArrayBuffer<Element> elements = new ArrayBuffer<>();
    for (int i = 0; i < 10; i++) {
        final Entity entity = new Entity.Builder()
                .group(TestGroups.ENTITY)
                .vertex("" + i)
                .build();

        final Edge edge1 = new Edge.Builder()
                .group(TestGroups.EDGE)
                .source("" + i)
                .dest("B").directed(false)
                .property(TestPropertyNames.COUNT, 2)
                .build();

        final Edge edge2 = new Edge.Builder()
                .group(TestGroups.EDGE)
                .source("" + i)
                .dest("C")
                .directed(false)
                .property(TestPropertyNames.COUNT, 4)
                .build();

        elements.$plus$eq(edge1);
        elements.$plus$eq(edge2);
        elements.$plus$eq(entity);
    }
    final User user = new User();

    final SparkSession sparkSession = SparkSessionProvider.getSparkSession();

    // Create Hadoop configuration and serialise to a string
    final Configuration configuration = new Configuration();
    final String configurationString = AbstractGetRDDHandler
            .convertConfigurationToString(configuration);

    final String outputPath = testFolder.getRoot().getAbsolutePath() + "/output";
    final String failurePath = testFolder.getRoot().getAbsolutePath() + "/failure";

    final ElementConverterFunction func = new ElementConverterFunction(sparkSession.sparkContext().broadcast(new ByteEntityAccumuloElementConverter(graph1.getSchema()), ACCUMULO_ELEMENT_CONVERTER_CLASS_TAG));
    final RDD<Tuple2<Key, Value>> elementRDD = sparkSession.sparkContext().parallelize(elements, 1, ELEMENT_CLASS_TAG).flatMap(func, TUPLE2_CLASS_TAG);
    final ImportKeyValuePairRDDToAccumulo addRdd = new ImportKeyValuePairRDDToAccumulo.Builder()
            .input(elementRDD)
            .outputPath(outputPath)
            .failurePath(failurePath)
            .build();
    graph1.execute(addRdd, user);

    // Check all elements were added
    final GetRDDOfAllElements rddQuery = new GetRDDOfAllElements.Builder()
            .option(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString)
            .build();

    final RDD<Element> rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    final Set<Element> results = new HashSet<>();
    final Element[] returnedElements = (Element[]) rdd.collect();
    Collections.addAll(results, returnedElements);
    assertEquals(elements.size(), results.size());
}

开发者ID:gchq，项目名称:Gaffer，代码行数:74，代码来源:ImportKeyValuePairRDDToAccumuloHandlerTest.java

注：本文中的org.apache.spark.rdd.RDD.collect方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。