本文整理汇总了Java中org.apache.flink.api.java.DataSet.iterateDelta方法的典型用法代码示例。如果您正苦于以下问题:Java DataSet.iterateDelta方法的具体用法?Java DataSet.iterateDelta怎么用?Java DataSet.iterateDelta使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.flink.api.java.DataSet
的用法示例。
在下文中一共展示了DataSet.iterateDelta方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testProgram
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
DataSet<Tuple2<Long, Long>> input = env.generateSequence(0, 9).map(new Duplicator<Long>());
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 5, 1);
iteration.closeWith(iteration.getWorkset(), iteration.getWorkset().map(new TestMapper()))
.output(new LocalCollectionOutputFormat<Tuple2<Long, Long>>(result));
env.execute();
}
catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
示例2: testRangePartitionInIteration
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test(expected = InvalidProgramException.class)
public void testRangePartitionInIteration() throws Exception {
// does not apply for collection execution
if (super.mode == TestExecutionMode.COLLECTION) {
throw new InvalidProgramException("Does not apply for collection execution");
}
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSource<Long> source = env.generateSequence(0, 10000);
DataSet<Tuple2<Long, String>> tuples = source.map(new MapFunction<Long, Tuple2<Long, String>>() {
@Override
public Tuple2<Long, String> map(Long v) throws Exception {
return new Tuple2<>(v, Long.toString(v));
}
});
DeltaIteration<Tuple2<Long, String>, Tuple2<Long, String>> it = tuples.iterateDelta(tuples, 10, 0);
DataSet<Tuple2<Long, String>> body = it.getWorkset()
.partitionByRange(1) // Verify that range partition is not allowed in iteration
.join(it.getSolutionSet())
.where(0).equalTo(0).projectFirst(0).projectSecond(1);
DataSet<Tuple2<Long, String>> result = it.closeWith(body, body);
result.collect(); // should fail
}
示例3: testSolutionSetDeltaDependsOnBroadcastVariable
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testSolutionSetDeltaDependsOnBroadcastVariable() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<Long, Long>> source =
env.generateSequence(1, 1000).map(new DuplicateValueScalar<Long>());
DataSet<Tuple2<Long, Long>> invariantInput =
env.generateSequence(1, 1000).map(new DuplicateValueScalar<Long>());
// iteration from here
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = source.iterateDelta(source, 1000, 1);
DataSet<Tuple2<Long, Long>> result =
invariantInput
.map(new IdentityMapper<Tuple2<Long, Long>>()).withBroadcastSet(iter.getWorkset(), "bc data")
.join(iter.getSolutionSet()).where(0).equalTo(1).projectFirst(1).projectSecond(1);
iter.closeWith(result.map(new IdentityMapper<Tuple2<Long,Long>>()), result)
.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
OptimizedPlan p = compileNoStats(env.createProgramPlan());
// check that the JSON generator accepts this plan
new PlanJSONDumpGenerator().getOptimizerPlanAsJSON(p);
// check that the JobGraphGenerator accepts the plan
new JobGraphGenerator().compileJobGraph(p);
}
catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
示例4: testProgram
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
// set up execution environment
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// read vertex and edge data
DataSet<Long> vertices = env.fromElements(ConnectedComponentsData.getEnumeratingVertices(NUM_VERTICES).split("\n"))
.map(new VertexParser());
DataSet<Tuple2<Long, Long>> edges = env.fromElements(ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED).split("\n"))
.flatMap(new EdgeParser());
// assign the initial components (equal to the vertex id)
DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>());
// open a delta iteration
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
DataSet<Tuple2<Long, Long>> changes = iteration
.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
.coGroup(iteration.getSolutionSet()).where(0).equalTo(0)
.with(new MinIdAndUpdate());
// close the delta iteration (delta and new workset are identical)
DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);
// emit result
List<Tuple2<Long, Long>> resutTuples = new ArrayList<>();
result.output(new LocalCollectionOutputFormat<>(resutTuples));
env.execute();
}
示例5: testProgram
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
// set up execution environment
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// read vertex and edge data
DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);
DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
.flatMap(new ConnectedComponents.UndirectEdge());
// assign the initial components (equal to the vertex id)
DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());
// open a delta iteration
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
DataSet<Tuple2<Long, Long>> minNeighbor = iteration.getWorkset()
.join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
.groupBy(0).aggregate(Aggregations.MIN, 1);
DataSet<Tuple2<Long, Long>> updatedIds = iteration.getSolutionSet()
.join(minNeighbor).where(0).equalTo(0).with(new UpdateComponentIdMatchMirrored());
// close the delta iteration (delta and new workset are identical)
DataSet<Tuple2<Long, Long>> result = iteration.closeWith(updatedIds, updatedIds);
result.writeAsCsv(resultPath, "\n", " ");
// execute program
env.execute("Connected Components Example");
}
示例6: testWorksetIterationPipelineBreakerPlacement
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testWorksetIterationPipelineBreakerPlacement() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
// the workset (input two of the delta iteration) is the same as what is consumed be the successive join
DataSet<Tuple2<Long, Long>> initialWorkset = env.readCsvFile("/some/file/path").types(Long.class).map(new DuplicateValue());
DataSet<Tuple2<Long, Long>> initialSolutionSet = env.readCsvFile("/some/file/path").types(Long.class).map(new DuplicateValue());
// trivial iteration, since we are interested in the inputs to the iteration
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = initialSolutionSet.iterateDelta(initialWorkset, 100, 0);
DataSet<Tuple2<Long, Long>> next = iteration.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>());
DataSet<Tuple2<Long, Long>> result = iteration.closeWith(next, next);
initialWorkset
.join(result, JoinHint.REPARTITION_HASH_FIRST)
.where(0).equalTo(0)
.output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>());
Plan p = env.createProgramPlan();
compileNoStats(p);
}
catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
示例7: testAggregatorWithoutParameterForIterateDelta
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testAggregatorWithoutParameterForIterateDelta() throws Exception {
/*
* Test aggregator without parameter for iterateDelta
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(parallelism);
DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap());
DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta(
initialSolutionSet, MAX_ITERATIONS, 0);
// register aggregator
LongSumAggregator aggr = new LongSumAggregator();
iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);
DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateMapDelta());
DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet())
.where(0).equalTo(0).flatMap(new UpdateFilter());
DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements);
DataSet<Integer> result = iterationRes.map(new ProjectSecondMapper());
result.writeAsText(resultPath);
env.execute();
expected = "1\n" + "2\n" + "2\n" + "3\n" + "3\n"
+ "3\n" + "4\n" + "4\n" + "4\n" + "4\n"
+ "5\n" + "5\n" + "5\n" + "5\n" + "5\n";
}
示例8: testAggregatorWithParameterForIterateDelta
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testAggregatorWithParameterForIterateDelta() throws Exception {
/*
* Test aggregator with parameter for iterateDelta
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(parallelism);
DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap());
DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta(
initialSolutionSet, MAX_ITERATIONS, 0);
// register aggregator
LongSumAggregator aggr = new LongSumAggregatorWithParameter(4);
iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);
DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateMapDelta());
DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet())
.where(0).equalTo(0).flatMap(new UpdateFilter());
DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements);
DataSet<Integer> result = iterationRes.map(new ProjectSecondMapper());
result.writeAsText(resultPath);
env.execute();
expected = "1\n" + "2\n" + "2\n" + "3\n" + "3\n"
+ "3\n" + "4\n" + "4\n" + "4\n" + "4\n"
+ "5\n" + "5\n" + "5\n" + "5\n" + "5\n";
}
示例9: testConvergenceCriterionWithParameterForIterateDelta
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testConvergenceCriterionWithParameterForIterateDelta() throws Exception {
/*
* Test convergence criterion with parameter for iterate delta
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(parallelism);
DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap());
DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta(
initialSolutionSet, MAX_ITERATIONS, 0);
// register aggregator
LongSumAggregator aggr = new LongSumAggregator();
iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);
// register convergence criterion
iteration.registerAggregationConvergenceCriterion(NEGATIVE_ELEMENTS_AGGR, aggr,
new NegativeElementsConvergenceCriterionWithParam(3));
DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateAndSubtractOneDelta());
DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet())
.where(0).equalTo(0).projectFirst(0, 1);
DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements);
DataSet<Integer> result = iterationRes.map(new ProjectSecondMapper());
result.writeAsText(resultPath);
env.execute();
expected = "-3\n" + "-2\n" + "-2\n" + "-1\n" + "-1\n"
+ "-1\n" + "0\n" + "0\n" + "0\n" + "0\n"
+ "1\n" + "1\n" + "1\n" + "1\n" + "1\n";
}
示例10: testWorksetIterationNotDependingOnSolutionSet
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testWorksetIterationNotDependingOnSolutionSet() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 100).map(new Duplicator<Long>());
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 100, 1);
DataSet<Tuple2<Long, Long>> iterEnd = iteration.getWorkset().map(new TestMapper<Tuple2<Long,Long>>());
iteration.closeWith(iterEnd, iterEnd)
.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();
assertTrue(wipn.getSolutionSetPlanNode().getOutgoingChannels().isEmpty());
JobGraphGenerator jgg = new JobGraphGenerator();
jgg.compileJobGraph(op);
}
catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
示例11: testTempInIterationTest
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testTempInIterationTest() throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<Long, Long>> input = env.readCsvFile("file:///does/not/exist").types(Long.class, Long.class);
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
input.iterateDelta(input, 1, 0);
DataSet<Tuple2<Long, Long>> update = iteration.getWorkset()
.join(iteration.getSolutionSet()).where(0).equalTo(0)
.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());
iteration.closeWith(update, update)
.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
Plan plan = env.createProgramPlan();
OptimizedPlan oPlan = (new Optimizer(new Configuration())).compile(plan);
JobGraphGenerator jgg = new JobGraphGenerator();
JobGraph jg = jgg.compileJobGraph(oPlan);
boolean solutionSetUpdateChecked = false;
for(JobVertex v : jg.getVertices()) {
if(v.getName().equals("SolutionSet Delta")) {
// check if input of solution set delta is temped
TaskConfig tc = new TaskConfig(v.getConfiguration());
assertTrue(tc.isInputAsynchronouslyMaterialized(0));
solutionSetUpdateChecked = true;
}
}
assertTrue(solutionSetUpdateChecked);
}
示例12: runConnectedComponents
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
private static void runConnectedComponents(ExecutionEnvironment env) throws Exception {
env.setParallelism(PARALLELISM);
env.getConfig().disableSysoutLogging();
// read vertex and edge data
DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env)
.rebalance();
DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env)
.rebalance()
.flatMap(new ConnectedComponents.UndirectEdge());
// assign the initial components (equal to the vertex id)
DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices
.map(new ConnectedComponents.DuplicateValue<Long>());
// open a delta iteration
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
// apply the step logic: join with the edges, select the minimum neighbor,
// update if the component of the candidate is smaller
DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges)
.where(0).equalTo(0)
.with(new ConnectedComponents.NeighborWithComponentIDJoin())
.groupBy(0).aggregate(Aggregations.MIN, 1)
.join(iteration.getSolutionSet())
.where(0).equalTo(0)
.with(new ConnectedComponents.ComponentIdFilter());
// close the delta iteration (delta and new workset are identical)
DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);
result.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
env.execute();
}
示例13: testEmptyWorksetIteration
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testEmptyWorksetIteration() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(43);
DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 20)
.map(new MapFunction<Long, Tuple2<Long, Long>>() {
@Override
public Tuple2<Long, Long> map(Long value){ return null; }
});
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = input.iterateDelta(input, 100, 0);
iter.closeWith(iter.getWorkset(), iter.getWorkset())
.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
new JobGraphGenerator().compileJobGraph(op);
}
catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
示例14: doDeltaIteration
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
public static DataSet<Tuple2<Long, Long>> doDeltaIteration(DataSet<Tuple2<Long, Long>> vertices, DataSet<Tuple2<Long, Long>> edges) {
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> depIteration = vertices.iterateDelta(vertices, 100, 0);
DataSet<Tuple1<Long>> candidates = depIteration.getWorkset().join(edges).where(0).equalTo(0)
.projectSecond(1);
DataSet<Tuple1<Long>> grouped = candidates.groupBy(0).reduceGroup(new Reduce101());
DataSet<Tuple2<Long, Long>> candidatesDependencies =
grouped.join(edges).where(0).equalTo(1).projectSecond(0, 1);
DataSet<Tuple2<Long, Long>> verticesWithNewComponents =
candidatesDependencies.join(depIteration.getSolutionSet()).where(0).equalTo(0)
.with(new Join222())
.groupBy(0).aggregate(Aggregations.MIN, 1);
DataSet<Tuple2<Long, Long>> updatedComponentId =
verticesWithNewComponents.join(depIteration.getSolutionSet()).where(0).equalTo(0)
.flatMap(new FlatMapJoin());
DataSet<Tuple2<Long, Long>> depResult = depIteration.closeWith(updatedComponentId, updatedComponentId);
return depResult;
}
示例15: testProgram
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
// set up execution environment
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// read vertex and edge data
DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);
DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
.flatMap(new ConnectedComponents.UndirectEdge());
// assign the initial components (equal to the vertex id)
DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());
// open a delta iteration
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset()
.join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
.groupBy(0).aggregate(Aggregations.MIN, 1)
.join(iteration.getSolutionSet()).where(0).equalTo(0)
.with(new UpdateComponentIdMatchNonPreserving());
DataSet<Tuple2<Long, Long>> delta;
if (extraMapper) {
delta = changes.map(
// ID Mapper
new MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() {
private static final long serialVersionUID = -3929364091829757322L;
@Override
public Tuple2<Long, Long> map(Tuple2<Long, Long> v) throws Exception {
return v;
}
});
}
else {
delta = changes;
}
// close the delta iteration (delta and new workset are identical)
DataSet<Tuple2<Long, Long>> result = iteration.closeWith(delta, changes);
result.writeAsCsv(resultPath, "\n", " ");
// execute program
env.execute("Connected Components Example");
}