本文整理汇总了Java中org.apache.spark.rdd.RDD.collect方法的典型用法代码示例。如果您正苦于以下问题:Java RDD.collect方法的具体用法?Java RDD.collect怎么用?Java RDD.collect使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.spark.rdd.RDD
的用法示例。
在下文中一共展示了RDD.collect方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testGetAllElementsInRDD
import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
private void testGetAllElementsInRDD(final Graph graph, final GetRDDOfAllElements getRDD) throws OperationException,
IOException, InterruptedException, AccumuloSecurityException, AccumuloException {
final Set<Element> expectedElements = new HashSet<>(getElements());
final RDD<Element> rdd = graph.execute(getRDD, USER);
if (rdd == null) {
fail("No RDD returned");
}
final Set<Element> results = new HashSet<>();
final Element[] returnedElements = (Element[]) rdd.collect();
// Check the number of elements returned is correct to ensure edges
// aren't returned twice
assertEquals(30, returnedElements.length);
for (int i = 0; i < returnedElements.length; i++) {
results.add(returnedElements[i]);
}
assertEquals(expectedElements, results);
}
示例2: testGetAllElementsInRDDWithIngestAggregationApplied
import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
private void testGetAllElementsInRDDWithIngestAggregationApplied(final Graph graph, final GetRDDOfAllElements getRDD)
throws OperationException {
final RDD<Element> rdd = graph.execute(getRDD, USER);
if (rdd == null) {
fail("No RDD returned");
}
// Should get aggregated data
final Element[] returnedElements = (Element[]) rdd.collect();
assertEquals(1, returnedElements.length);
final Entity entity1 = new Entity.Builder()
.group(TestGroups.ENTITY)
.vertex("A")
.property("count", 2)
.build();
assertEquals(entity1, returnedElements[0]);
}
示例3: testGetAllElementsInRDDWithView
import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
private void testGetAllElementsInRDDWithView(final Graph graph, final GetRDDOfAllElements getRDD) throws OperationException,
IOException, InterruptedException, AccumuloSecurityException, AccumuloException {
final Set<Element> expectedElements = new HashSet<>();
getElements().stream()
.filter(e -> e.getGroup().equals(TestGroups.EDGE))
.map(e -> (Edge) e)
.map(e -> {
e.putProperty("newProperty", e.getSource().toString() + "," + e.getProperty(TestPropertyNames.COUNT));
return e;
})
.filter(e -> e.getProperty("newProperty").equals("0,2"))
.forEach(expectedElements::add);
getRDD.setView(new View.Builder()
.edge(TestGroups.EDGE, new ViewElementDefinition.Builder()
.transientProperty("newProperty", String.class)
.transformer(new ElementTransformer.Builder()
.select(IdentifierType.SOURCE.name(), TestPropertyNames.COUNT)
.execute(new Concat())
.project("newProperty")
.build())
.postTransformFilter(new ElementFilter.Builder()
.select("newProperty")
.execute(new IsEqual("0,2"))
.build())
.build())
.build());
final RDD<Element> rdd = graph.execute(getRDD, USER);
if (rdd == null) {
fail("No RDD returned");
}
final Set<Element> results = new HashSet<>();
final Element[] returnedElements = (Element[]) rdd.collect();
for (int i = 0; i < returnedElements.length; i++) {
results.add(returnedElements[i]);
}
assertEquals(expectedElements, results);
}
示例4: testGetAllElementsInRDDWithValidationApplied
import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
private void testGetAllElementsInRDDWithValidationApplied(final Graph graph, final GetRDDOfAllElements getRDD)
throws InterruptedException, IOException, OperationException {
// Sleep for 1 second to give chance for Entity A to age off
Thread.sleep(1000L);
final RDD<Element> rdd = graph.execute(getRDD, USER);
if (rdd == null) {
fail("No RDD returned");
}
// Should get Entity B but not Entity A
final Element[] returnedElements = (Element[]) rdd.collect();
assertEquals(1, returnedElements.length);
assertEquals(entityRetainedAfterValidation, returnedElements[0]);
}
示例5: testBuildScanWithView
import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
private void testBuildScanWithView(final String name, final View view, final Predicate<Element> returnElement)
throws OperationException, StoreException {
// Given
final SparkSession sparkSession = SparkSessionProvider.getSparkSession();
final Schema schema = getSchema();
final AccumuloProperties properties = AccumuloProperties
.loadStoreProperties(AccumuloStoreRelationTest.class.getResourceAsStream("/store.properties"));
final SingleUseMockAccumuloStore store = new SingleUseMockAccumuloStore();
store.initialise("graphId", schema, properties);
addElements(store);
// When
final AccumuloStoreRelation relation = new AccumuloStoreRelation(
SparkContextUtil.createContext(new User(), sparkSession),
Collections.emptyList(), view,
store, null);
final RDD<Row> rdd = relation.buildScan();
final Row[] returnedElements = (Row[]) rdd.collect();
// Then
// - Actual results are:
final Set<Row> results = new HashSet<>();
for (int i = 0; i < returnedElements.length; i++) {
results.add(returnedElements[i]);
}
// - Expected results are:
final SchemaToStructTypeConverter schemaConverter = new SchemaToStructTypeConverter(schema, view,
new ArrayList<>());
final ConvertElementToRow elementConverter = new ConvertElementToRow(schemaConverter.getUsedProperties(),
schemaConverter.getPropertyNeedsConversion(), schemaConverter.getConverterByProperty());
final Set<Row> expectedRows = new HashSet<>();
Streams.toStream(getElements())
.filter(returnElement)
.map(elementConverter::apply)
.forEach(expectedRows::add);
assertEquals(expectedRows, results);
}
示例6: testBuildScanSpecifyColumnsWithView
import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
private void testBuildScanSpecifyColumnsWithView(final View view, final String[] requiredColumns,
final Predicate<Element> returnElement)
throws OperationException, StoreException {
// Given
final SparkSession sparkSession = SparkSessionProvider.getSparkSession();
final Schema schema = getSchema();
final AccumuloProperties properties = AccumuloProperties
.loadStoreProperties(getClass().getResourceAsStream("/store.properties"));
final SingleUseMockAccumuloStore store = new SingleUseMockAccumuloStore();
store.initialise("graphId", schema, properties);
addElements(store);
// When
final AccumuloStoreRelation relation = new AccumuloStoreRelation(
SparkContextUtil.createContext(new User(), sparkSession),
Collections.emptyList(), view,
store, null);
final RDD<Row> rdd = relation.buildScan(requiredColumns);
final Row[] returnedElements = (Row[]) rdd.collect();
// Then
// - Actual results are:
final Set<Row> results = new HashSet<>();
for (int i = 0; i < returnedElements.length; i++) {
results.add(returnedElements[i]);
}
// - Expected results are:
final SchemaToStructTypeConverter schemaConverter = new SchemaToStructTypeConverter(schema, view,
new ArrayList<>());
final ConvertElementToRow elementConverter = new ConvertElementToRow(new LinkedHashSet<>(Arrays.asList(requiredColumns)),
schemaConverter.getPropertyNeedsConversion(), schemaConverter.getConverterByProperty());
final Set<Row> expectedRows = new HashSet<>();
Streams.toStream(getElements())
.filter(returnElement)
.map(elementConverter::apply)
.forEach(expectedRows::add);
assertEquals(expectedRows, results);
}
示例7: testBuildScanSpecifyColumnsAndFiltersWithView
import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
private void testBuildScanSpecifyColumnsAndFiltersWithView(final View view,
final String[] requiredColumns,
final Filter[] filters,
final Predicate<Element> returnElement)
throws OperationException, StoreException {
// Given
final SparkSession sparkSession = SparkSessionProvider.getSparkSession();
final Schema schema = getSchema();
final AccumuloProperties properties = AccumuloProperties
.loadStoreProperties(getClass().getResourceAsStream("/store.properties"));
final SingleUseMockAccumuloStore store = new SingleUseMockAccumuloStore();
store.initialise("graphId", schema, properties);
addElements(store);
// When
final AccumuloStoreRelation relation = new AccumuloStoreRelation(
SparkContextUtil.createContext(new User(), sparkSession),
Collections.emptyList(), view,
store, null);
final RDD<Row> rdd = relation.buildScan(requiredColumns, filters);
final Row[] returnedElements = (Row[]) rdd.collect();
// Then
// - Actual results are:
final Set<Row> results = new HashSet<>();
for (int i = 0; i < returnedElements.length; i++) {
results.add(returnedElements[i]);
}
// - Expected results are:
final SchemaToStructTypeConverter schemaConverter = new SchemaToStructTypeConverter(schema, view,
new ArrayList<>());
final ConvertElementToRow elementConverter = new ConvertElementToRow(new LinkedHashSet<>(Arrays.asList(requiredColumns)),
schemaConverter.getPropertyNeedsConversion(), schemaConverter.getConverterByProperty());
final Set<Row> expectedRows = new HashSet<>();
Streams.toStream(getElements())
.filter(returnElement)
.map(elementConverter::apply)
.forEach(expectedRows::add);
assertEquals(expectedRows, results);
}
示例8: testOutputRDDStringIJVFromMatrixDML
import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
@Test
public void testOutputRDDStringIJVFromMatrixDML() {
System.out.println("MLContextTest - output RDD String IJV from matrix DML");
String s = "M = matrix('1 2 3 4', rows=2, cols=2);";
Script script = dml(s).out("M");
RDD<String> rddStringIJV = ml.execute(script).getMatrix("M").toRDDStringIJV();
String[] rows = (String[]) rddStringIJV.collect();
Arrays.sort(rows);
Assert.assertEquals("1 1 1.0", rows[0]);
Assert.assertEquals("1 2 2.0", rows[1]);
Assert.assertEquals("2 1 3.0", rows[2]);
Assert.assertEquals("2 2 4.0", rows[3]);
}
示例9: testGetAllElementsInRDDWithVisibilityFilteringApplied
import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
private void testGetAllElementsInRDDWithVisibilityFilteringApplied(final Graph graph,
final GetRDDOfAllElements getRDD)
throws OperationException, IOException, InterruptedException, AccumuloSecurityException, AccumuloException {
final Set<Element> expectedElements = new HashSet<>();
// Test with user with public visibility
getElementsWithVisibilities()
.stream()
.filter(e -> e.getProperty(TestPropertyNames.VISIBILITY).equals("public"))
.forEach(expectedElements::add);
RDD<Element> rdd = graph.execute(getRDD, USER_WITH_PUBLIC);
if (rdd == null) {
fail("No RDD returned");
}
final Set<Element> results = new HashSet<>();
Element[] returnedElements = (Element[]) rdd.collect();
for (int i = 0; i < returnedElements.length; i++) {
results.add(returnedElements[i]);
}
assertEquals(expectedElements, results);
// Test with user with public and private visibility
getElementsWithVisibilities().forEach(expectedElements::add);
rdd = graph.execute(getRDD, USER_WITH_PUBLIC_AND_PRIVATE);
if (rdd == null) {
fail("No RDD returned");
}
results.clear();
returnedElements = (Element[]) rdd.collect();
for (int i = 0; i < returnedElements.length; i++) {
results.add(returnedElements[i]);
}
assertEquals(expectedElements, results);
// Test with user with no visibilities
rdd = graph.execute(getRDD, USER);
if (rdd == null) {
fail("No RDD returned");
}
results.clear();
returnedElements = (Element[]) rdd.collect();
assertEquals(0, returnedElements.length);
}
示例10: checkImportRDDOfElements
import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
@Test
public void checkImportRDDOfElements() throws OperationException, IOException {
final Graph graph1 = new Graph.Builder()
.config(new GraphConfig.Builder()
.graphId("graphId")
.build())
.addSchema(getClass().getResourceAsStream("/schema/elements.json"))
.addSchema(getClass().getResourceAsStream("/schema/types.json"))
.addSchema(getClass().getResourceAsStream("/schema/serialisation.json"))
.storeProperties(getClass().getResourceAsStream("/store.properties"))
.build();
final ArrayBuffer<Element> elements = new ArrayBuffer<>();
for (int i = 0; i < 10; i++) {
final Entity entity = new Entity.Builder()
.group(TestGroups.ENTITY)
.vertex("" + i)
.build();
final Edge edge1 = new Edge.Builder()
.group(TestGroups.EDGE)
.source("" + i)
.dest("B")
.directed(false)
.property(TestPropertyNames.COUNT, 2)
.build();
final Edge edge2 = new Edge.Builder()
.group(TestGroups.EDGE)
.source("" + i)
.dest("C")
.directed(false)
.property(TestPropertyNames.COUNT, 4)
.build();
elements.$plus$eq(edge1);
elements.$plus$eq(edge2);
elements.$plus$eq(entity);
}
final User user = new User();
final SparkSession sparkSession = SparkSessionProvider.getSparkSession();
// Create Hadoop configuration and serialise to a string
final Configuration configuration = new Configuration();
final String configurationString = AbstractGetRDDHandler
.convertConfigurationToString(configuration);
final String outputPath = testFolder.getRoot().getAbsolutePath() + "/output";
final String failurePath = testFolder.getRoot().getAbsolutePath() + "/failure";
final RDD<Element> elementRDD = sparkSession.sparkContext().parallelize(elements, 8, ELEMENT_CLASS_TAG);
final ImportRDDOfElements addRdd = new ImportRDDOfElements.Builder()
.input(elementRDD)
.option("outputPath", outputPath)
.option("failurePath", failurePath)
.build();
graph1.execute(addRdd, user);
// Check all elements were added
final GetRDDOfAllElements rddQuery = new GetRDDOfAllElements.Builder()
.option(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString)
.build();
final RDD<Element> rdd = graph1.execute(rddQuery, user);
if (rdd == null) {
fail("No RDD returned");
}
final Set<Element> results = new HashSet<>();
final Element[] returnedElements = (Element[]) rdd.collect();
for (int i = 0; i < returnedElements.length; i++) {
results.add(returnedElements[i]);
}
assertEquals(elements.size(), results.size());
}
示例11: checkImportRDDOfElements
import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
@Test
public void checkImportRDDOfElements() throws OperationException, IOException {
final Graph graph1 = new Graph.Builder()
.config(new GraphConfig.Builder()
.graphId("graphId")
.build())
.addSchema(getClass().getResourceAsStream("/schema/elements.json"))
.addSchema(getClass().getResourceAsStream("/schema/types.json"))
.addSchema(getClass().getResourceAsStream("/schema/serialisation.json"))
.storeProperties(getClass().getResourceAsStream("/store.properties"))
.build();
final ArrayBuffer<Element> elements = new ArrayBuffer<>();
for (int i = 0; i < 10; i++) {
final Entity entity = new Entity.Builder()
.group(TestGroups.ENTITY)
.vertex("" + i)
.build();
final Edge edge1 = new Edge.Builder()
.group(TestGroups.EDGE)
.source("" + i)
.dest("B").directed(false)
.property(TestPropertyNames.COUNT, 2)
.build();
final Edge edge2 = new Edge.Builder()
.group(TestGroups.EDGE)
.source("" + i)
.dest("C")
.directed(false)
.property(TestPropertyNames.COUNT, 4)
.build();
elements.$plus$eq(edge1);
elements.$plus$eq(edge2);
elements.$plus$eq(entity);
}
final User user = new User();
final SparkSession sparkSession = SparkSessionProvider.getSparkSession();
// Create Hadoop configuration and serialise to a string
final Configuration configuration = new Configuration();
final String configurationString = AbstractGetRDDHandler
.convertConfigurationToString(configuration);
final String outputPath = testFolder.getRoot().getAbsolutePath() + "/output";
final String failurePath = testFolder.getRoot().getAbsolutePath() + "/failure";
final ElementConverterFunction func = new ElementConverterFunction(sparkSession.sparkContext().broadcast(new ByteEntityAccumuloElementConverter(graph1.getSchema()), ACCUMULO_ELEMENT_CONVERTER_CLASS_TAG));
final RDD<Tuple2<Key, Value>> elementRDD = sparkSession.sparkContext().parallelize(elements, 1, ELEMENT_CLASS_TAG).flatMap(func, TUPLE2_CLASS_TAG);
final ImportKeyValuePairRDDToAccumulo addRdd = new ImportKeyValuePairRDDToAccumulo.Builder()
.input(elementRDD)
.outputPath(outputPath)
.failurePath(failurePath)
.build();
graph1.execute(addRdd, user);
// Check all elements were added
final GetRDDOfAllElements rddQuery = new GetRDDOfAllElements.Builder()
.option(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString)
.build();
final RDD<Element> rdd = graph1.execute(rddQuery, user);
if (rdd == null) {
fail("No RDD returned");
}
final Set<Element> results = new HashSet<>();
final Element[] returnedElements = (Element[]) rdd.collect();
Collections.addAll(results, returnedElements);
assertEquals(elements.size(), results.size());
}