本文整理汇总了Java中org.apache.flink.api.java.operators.DeltaIteration.closeWith方法的典型用法代码示例。如果您正苦于以下问题:Java DeltaIteration.closeWith方法的具体用法?Java DeltaIteration.closeWith怎么用?Java DeltaIteration.closeWith使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.flink.api.java.operators.DeltaIteration
的用法示例。
在下文中一共展示了DeltaIteration.closeWith方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testProgram
import org.apache.flink.api.java.operators.DeltaIteration; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
// set up execution environment
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// read vertex and edge data
DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);
DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
.flatMap(new ConnectedComponents.UndirectEdge());
// assign the initial components (equal to the vertex id)
DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>());
// open a delta iteration
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
.groupBy(0).aggregate(Aggregations.MIN, 1)
.join(iteration.getSolutionSet()).where(0).equalTo(0)
.with(new ConnectedComponents.ComponentIdFilter());
// close the delta iteration (delta and new workset are identical)
DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);
result.writeAsCsv(resultPath, "\n", " ");
// execute program
env.execute("Connected Components Example");
}
示例2: testRangePartitionInIteration
import org.apache.flink.api.java.operators.DeltaIteration; //导入方法依赖的package包/类
@Test(expected = InvalidProgramException.class)
public void testRangePartitionInIteration() throws Exception {
// does not apply for collection execution
if (super.mode == TestExecutionMode.COLLECTION) {
throw new InvalidProgramException("Does not apply for collection execution");
}
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSource<Long> source = env.generateSequence(0, 10000);
DataSet<Tuple2<Long, String>> tuples = source.map(new MapFunction<Long, Tuple2<Long, String>>() {
@Override
public Tuple2<Long, String> map(Long v) throws Exception {
return new Tuple2<>(v, Long.toString(v));
}
});
DeltaIteration<Tuple2<Long, String>, Tuple2<Long, String>> it = tuples.iterateDelta(tuples, 10, 0);
DataSet<Tuple2<Long, String>> body = it.getWorkset()
.partitionByRange(1) // Verify that range partition is not allowed in iteration
.join(it.getSolutionSet())
.where(0).equalTo(0).projectFirst(0).projectSecond(1);
DataSet<Tuple2<Long, String>> result = it.closeWith(body, body);
result.collect(); // should fail
}
示例3: testProgram
import org.apache.flink.api.java.operators.DeltaIteration; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
// set up execution environment
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// read vertex and edge data
DataSet<Long> vertices = env.fromElements(ConnectedComponentsData.getEnumeratingVertices(NUM_VERTICES).split("\n"))
.map(new VertexParser());
DataSet<Tuple2<Long, Long>> edges = env.fromElements(ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED).split("\n"))
.flatMap(new EdgeParser());
// assign the initial components (equal to the vertex id)
DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>());
// open a delta iteration
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
DataSet<Tuple2<Long, Long>> changes = iteration
.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
.coGroup(iteration.getSolutionSet()).where(0).equalTo(0)
.with(new MinIdAndUpdate());
// close the delta iteration (delta and new workset are identical)
DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);
// emit result
List<Tuple2<Long, Long>> resutTuples = new ArrayList<>();
result.output(new LocalCollectionOutputFormat<>(resutTuples));
env.execute();
}
示例4: testProgram
import org.apache.flink.api.java.operators.DeltaIteration; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
// set up execution environment
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// read vertex and edge data
DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);
DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
.flatMap(new ConnectedComponents.UndirectEdge());
// assign the initial components (equal to the vertex id)
DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());
// open a delta iteration
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
DataSet<Tuple2<Long, Long>> minNeighbor = iteration.getWorkset()
.join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
.groupBy(0).aggregate(Aggregations.MIN, 1);
DataSet<Tuple2<Long, Long>> updatedIds = iteration.getSolutionSet()
.join(minNeighbor).where(0).equalTo(0).with(new UpdateComponentIdMatchMirrored());
// close the delta iteration (delta and new workset are identical)
DataSet<Tuple2<Long, Long>> result = iteration.closeWith(updatedIds, updatedIds);
result.writeAsCsv(resultPath, "\n", " ");
// execute program
env.execute("Connected Components Example");
}
示例5: testAggregatorWithoutParameterForIterateDelta
import org.apache.flink.api.java.operators.DeltaIteration; //导入方法依赖的package包/类
@Test
public void testAggregatorWithoutParameterForIterateDelta() throws Exception {
/*
* Test aggregator without parameter for iterateDelta
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(parallelism);
DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap());
DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta(
initialSolutionSet, MAX_ITERATIONS, 0);
// register aggregator
LongSumAggregator aggr = new LongSumAggregator();
iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);
DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateMapDelta());
DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet())
.where(0).equalTo(0).flatMap(new UpdateFilter());
DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements);
DataSet<Integer> result = iterationRes.map(new ProjectSecondMapper());
result.writeAsText(resultPath);
env.execute();
expected = "1\n" + "2\n" + "2\n" + "3\n" + "3\n"
+ "3\n" + "4\n" + "4\n" + "4\n" + "4\n"
+ "5\n" + "5\n" + "5\n" + "5\n" + "5\n";
}
示例6: testAggregatorWithParameterForIterateDelta
import org.apache.flink.api.java.operators.DeltaIteration; //导入方法依赖的package包/类
@Test
public void testAggregatorWithParameterForIterateDelta() throws Exception {
/*
* Test aggregator with parameter for iterateDelta
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(parallelism);
DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap());
DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta(
initialSolutionSet, MAX_ITERATIONS, 0);
// register aggregator
LongSumAggregator aggr = new LongSumAggregatorWithParameter(4);
iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);
DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateMapDelta());
DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet())
.where(0).equalTo(0).flatMap(new UpdateFilter());
DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements);
DataSet<Integer> result = iterationRes.map(new ProjectSecondMapper());
result.writeAsText(resultPath);
env.execute();
expected = "1\n" + "2\n" + "2\n" + "3\n" + "3\n"
+ "3\n" + "4\n" + "4\n" + "4\n" + "4\n"
+ "5\n" + "5\n" + "5\n" + "5\n" + "5\n";
}
示例7: testConvergenceCriterionWithParameterForIterateDelta
import org.apache.flink.api.java.operators.DeltaIteration; //导入方法依赖的package包/类
@Test
public void testConvergenceCriterionWithParameterForIterateDelta() throws Exception {
/*
* Test convergence criterion with parameter for iterate delta
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(parallelism);
DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap());
DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta(
initialSolutionSet, MAX_ITERATIONS, 0);
// register aggregator
LongSumAggregator aggr = new LongSumAggregator();
iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);
// register convergence criterion
iteration.registerAggregationConvergenceCriterion(NEGATIVE_ELEMENTS_AGGR, aggr,
new NegativeElementsConvergenceCriterionWithParam(3));
DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateAndSubtractOneDelta());
DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet())
.where(0).equalTo(0).projectFirst(0, 1);
DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements);
DataSet<Integer> result = iterationRes.map(new ProjectSecondMapper());
result.writeAsText(resultPath);
env.execute();
expected = "-3\n" + "-2\n" + "-2\n" + "-1\n" + "-1\n"
+ "-1\n" + "0\n" + "0\n" + "0\n" + "0\n"
+ "1\n" + "1\n" + "1\n" + "1\n" + "1\n";
}
示例8: testProgram
import org.apache.flink.api.java.operators.DeltaIteration; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
// set up execution environment
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// read vertex and edge data
DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);
DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
.flatMap(new UndirectEdge());
// assign the initial components (equal to the vertex id)
DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());
// open a delta iteration
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
iteration.setSolutionSetUnManaged(true);
// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
.groupBy(0).aggregate(Aggregations.MIN, 1)
.join(iteration.getSolutionSet()).where(0).equalTo(0)
.with(new ComponentIdFilter());
// close the delta iteration (delta and new workset are identical)
DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);
result.writeAsCsv(resultPath, "\n", " ");
// execute program
env.execute("Connected Components Example");
}
示例9: runConnectedComponents
import org.apache.flink.api.java.operators.DeltaIteration; //导入方法依赖的package包/类
private static void runConnectedComponents(ExecutionEnvironment env) throws Exception {
env.setParallelism(PARALLELISM);
env.getConfig().disableSysoutLogging();
// read vertex and edge data
DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env)
.rebalance();
DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env)
.rebalance()
.flatMap(new ConnectedComponents.UndirectEdge());
// assign the initial components (equal to the vertex id)
DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices
.map(new ConnectedComponents.DuplicateValue<Long>());
// open a delta iteration
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
// apply the step logic: join with the edges, select the minimum neighbor,
// update if the component of the candidate is smaller
DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges)
.where(0).equalTo(0)
.with(new ConnectedComponents.NeighborWithComponentIDJoin())
.groupBy(0).aggregate(Aggregations.MIN, 1)
.join(iteration.getSolutionSet())
.where(0).equalTo(0)
.with(new ConnectedComponents.ComponentIdFilter());
// close the delta iteration (delta and new workset are identical)
DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);
result.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
env.execute();
}
示例10: testWorksetIterationPipelineBreakerPlacement
import org.apache.flink.api.java.operators.DeltaIteration; //导入方法依赖的package包/类
@Test
public void testWorksetIterationPipelineBreakerPlacement() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
// the workset (input two of the delta iteration) is the same as what is consumed be the successive join
DataSet<Tuple2<Long, Long>> initialWorkset = env.readCsvFile("/some/file/path").types(Long.class).map(new DuplicateValue());
DataSet<Tuple2<Long, Long>> initialSolutionSet = env.readCsvFile("/some/file/path").types(Long.class).map(new DuplicateValue());
// trivial iteration, since we are interested in the inputs to the iteration
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = initialSolutionSet.iterateDelta(initialWorkset, 100, 0);
DataSet<Tuple2<Long, Long>> next = iteration.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>());
DataSet<Tuple2<Long, Long>> result = iteration.closeWith(next, next);
initialWorkset
.join(result, JoinHint.REPARTITION_HASH_FIRST)
.where(0).equalTo(0)
.output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>());
Plan p = env.createProgramPlan();
compileNoStats(p);
}
catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
示例11: testClosureDeltaIteration
import org.apache.flink.api.java.operators.DeltaIteration; //导入方法依赖的package包/类
/**
* <pre>
* (SRC A) (SRC B) (SRC C)
* / \ / / \
* (SINK 1) (DELTA ITERATION) | (SINK 2)
* / | \ /
* (SINK 3) | (CROSS => NEXT WORKSET)
* | |
* (JOIN => SOLUTION SET DELTA)
* </pre>
*/
@Test
public void testClosureDeltaIteration() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Tuple2<Long, Long>> sourceA = env.generateSequence(0,1).map(new Duplicator<Long>());
DataSet<Tuple2<Long, Long>> sourceB = env.generateSequence(0,1).map(new Duplicator<Long>());
DataSet<Tuple2<Long, Long>> sourceC = env.generateSequence(0,1).map(new Duplicator<Long>());
sourceA.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
sourceC.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> loop = sourceA.iterateDelta(sourceB, 10, 0);
DataSet<Tuple2<Long, Long>> workset = loop.getWorkset().cross(sourceB).with(new IdentityCrosser<Tuple2<Long, Long>>()).name("Next work set");
DataSet<Tuple2<Long, Long>> delta = workset.join(loop.getSolutionSet()).where(0).equalTo(0).with(new IdentityJoiner<Tuple2<Long, Long>>()).name("Solution set delta");
DataSet<Tuple2<Long, Long>> result = loop.closeWith(delta, workset);
result.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
Plan plan = env.createProgramPlan();
try{
compileNoStats(plan);
}catch(Exception e){
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
示例12: createResult
import org.apache.flink.api.java.operators.DeltaIteration; //导入方法依赖的package包/类
@Override
public DataSet<Vertex<K, VV>> createResult() {
if (this.initialVertices == null) {
throw new RuntimeException("Initial vertices not set");
}
TypeInformation<Vertex<K, VV>> vertexType = initialVertices.getType();
TypeInformation<K> keyType = ((TupleTypeInfo<?>) vertexType).getTypeAt(0);
TypeInformation<Tuple2<K, Message>> messageTypeInfo = new TupleTypeInfo<>(keyType, messageType);
// Start the iteration
DeltaIteration<Vertex<K, VV>, Vertex<K, VV>> iteration =
initialVertices.iterateDelta(initialVertices, maxIteration, 0);
String defaultName = "Partition-centric iteration (" + partitionProcessFunction + " | " + vertexUpdateFunction + ")";
iteration.name(defaultName);
// Prepare the partition input
DataSet<RichEdge<K, VV, EV>> vertexEdges = iteration.getWorkset()
.coGroup(edges).where(0).equalTo(0).with(new PreparePartitionInput<K, VV, EV>());
// Update the partition, receive a dataset of message
PartitionUpdateUdf<K, VV, EV, Message> partitionUpdater =
new PartitionUpdateUdf<>(partitionProcessFunction, messageTypeInfo);
DataSet<Tuple2<K, Message>> messages = vertexEdges.mapPartition(partitionUpdater);
// Send the message to the vertex for updating, receive a set of updated vertices
DataSet<Vertex<K, VV>> updatedVertices =
messages.coGroup(iteration.getSolutionSet())
.where(0).equalTo(0)
.with(new VertexUpdateUdf<>(vertexUpdateFunction));
// Finish iteration
return iteration.closeWith(updatedVertices, updatedVertices);
}
示例13: testProgram
import org.apache.flink.api.java.operators.DeltaIteration; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
// set up execution environment
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// read vertex and edge data
DataSet<Long> vertices = env.fromElements(ConnectedComponentsData.getEnumeratingVertices(NUM_VERTICES).split("\n"))
.map(new VertexParser());
DataSet<Tuple2<Long, Long>> edges = env.fromElements(ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED).split("\n"))
.flatMap(new EdgeParser());
// assign the initial components (equal to the vertex id)
DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>());
// open a delta iteration
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
DataSet<Tuple2<Long, Long>> changes = iteration
.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
.coGroup(iteration.getSolutionSet()).where(0).equalTo(0)
.with(new MinIdAndUpdate());
// close the delta iteration (delta and new workset are identical)
DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);
// emit result
List<Tuple2<Long,Long>> resutTuples = new ArrayList<Tuple2<Long,Long>>();
result.output(new LocalCollectionOutputFormat<Tuple2<Long,Long>>(resutTuples));
env.execute();
}
示例14: testProgram
import org.apache.flink.api.java.operators.DeltaIteration; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
// set up execution environment
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// read vertex and edge data
DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);
DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(' ').types(Long.class, Long.class)
.flatMap(new UndirectEdge());
// assign the initial components (equal to the vertex id)
DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>());
// open a delta iteration
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
iteration.setSolutionSetUnManaged(true);
// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
.groupBy(0).aggregate(Aggregations.MIN, 1)
.join(iteration.getSolutionSet()).where(0).equalTo(0)
.with(new ComponentIdFilter());
// close the delta iteration (delta and new workset are identical)
DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);
result.writeAsCsv(resultPath, "\n", " ");
// execute program
env.execute("Connected Components Example");
}
示例15: main
import org.apache.flink.api.java.operators.DeltaIteration; //导入方法依赖的package包/类
public static void main(String... args) throws Exception {
if(!parseParameters(args)) {
return;
}
// set up execution environment
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// read vertex and edge data
DataSet<Long> vertices = getVertexDataSet(env);
DataSet<Tuple2<Long, Long>> edges = getEdgeDataSet(env).flatMap(new UndirectEdge());
// assign the initial components (equal to the vertex id)
DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>());
// open a delta iteration
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
verticesWithInitialId.iterateDelta(verticesWithInitialId, maxIterations, 0);
// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
.groupBy(0).aggregate(Aggregations.MIN, 1)
.join(iteration.getSolutionSet()).where(0).equalTo(0)
.with(new ComponentIdFilter());
// close the delta iteration (delta and new workset are identical)
DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);
// emit result
if(fileOutput) {
result.writeAsCsv(outputPath, "\n", " ");
} else {
result.print();
}
// execute program
env.execute("Connected Components Example");
}