本文整理汇总了Java中org.apache.flink.api.java.ExecutionEnvironment.generateSequence方法的典型用法代码示例。如果您正苦于以下问题:Java ExecutionEnvironment.generateSequence方法的具体用法?Java ExecutionEnvironment.generateSequence怎么用?Java ExecutionEnvironment.generateSequence使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.flink.api.java.ExecutionEnvironment
的用法示例。
在下文中一共展示了ExecutionEnvironment.generateSequence方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testRangePartitionerOnSequenceData
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Test
public void testRangePartitionerOnSequenceData() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSource<Long> dataSource = env.generateSequence(0, 10000);
KeySelector<Long, Long> keyExtractor = new ObjectSelfKeySelector();
MapPartitionFunction<Long, Tuple2<Long, Long>> MinMaxSelector = new MinMaxSelector<>(new LongComparator(true));
Comparator<Tuple2<Long, Long>> tuple2Comparator = new Tuple2Comparator(new LongComparator(true));
List<Tuple2<Long, Long>> collected = dataSource.partitionByRange(keyExtractor).mapPartition(MinMaxSelector).collect();
Collections.sort(collected, tuple2Comparator);
long previousMax = -1;
for (Tuple2<Long, Long> tuple2 : collected) {
if (previousMax == -1) {
previousMax = tuple2.f1;
} else {
long currentMin = tuple2.f0;
assertTrue(tuple2.f0 < tuple2.f1);
assertEquals(previousMax + 1, currentMin);
previousMax = tuple2.f1;
}
}
}
示例2: testProgram
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);
DataSet<Long> input = env.generateSequence(1, 10);
DataSet<Long> bc1 = env.generateSequence(1, 5);
DataSet<Long> bc2 = env.generateSequence(6, 10);
List<Long> result = input
.map(new Mapper())
.withBroadcastSet(bc1.union(bc2), BC_NAME)
.reduce(new Reducer())
.collect();
Assert.assertEquals(Long.valueOf(3025), result.get(0));
}
示例3: testRangePartitionerOnSequenceData
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Test
public void testRangePartitionerOnSequenceData() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSource<Long> dataSource = env.generateSequence(0, 10000);
KeySelector<Long, Long> keyExtractor = new ObjectSelfKeySelector();
MapPartitionFunction<Long, Tuple2<Long, Long>> minMaxSelector = new MinMaxSelector<>(new LongComparator(true));
Comparator<Tuple2<Long, Long>> tuple2Comparator = new Tuple2Comparator(new LongComparator(true));
List<Tuple2<Long, Long>> collected = dataSource.partitionByRange(keyExtractor).mapPartition(minMaxSelector).collect();
Collections.sort(collected, tuple2Comparator);
long previousMax = -1;
for (Tuple2<Long, Long> tuple2 : collected) {
if (previousMax == -1) {
previousMax = tuple2.f1;
} else {
long currentMin = tuple2.f0;
assertTrue(tuple2.f0 < tuple2.f1);
assertEquals(previousMax + 1, currentMin);
previousMax = tuple2.f1;
}
}
}
示例4: testReduce
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Test
public void testReduce() {
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Long> set1 = env.generateSequence(0,1);
set1.reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1")
.output(new DiscardingOutputFormat<Long>()).name("Sink");
Plan plan = env.createProgramPlan();
try {
OptimizedPlan oPlan = compileNoStats(plan);
JobGraphGenerator jobGen = new JobGraphGenerator();
jobGen.compileJobGraph(oPlan);
} catch(CompilerException ce) {
ce.printStackTrace();
fail("The pact compiler is unable to compile this plan correctly");
}
}
示例5: testProgram
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Long> longs = env.generateSequence(0, 100000);
DataSet<Tuple1<Long>> longT1 = longs.map(new TupleWrapper());
DataSet<Tuple1<Long>> longT2 = longT1.project(0);
DataSet<Tuple1<Long>> longT3 = longs.map(new TupleWrapper());
longT2.join(longT3).where(0).equalTo(0).projectFirst(0)
.join(longT1).where(0).equalTo(0).projectFirst(0)
.writeAsText(resultPath);
env.execute();
}
示例6: testZipWithIndex
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Test
public void testZipWithIndex() throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
long expectedSize = 100L;
DataSet<Long> numbers = env.generateSequence(0, expectedSize - 1);
List<Tuple2<Long, Long>> result = new ArrayList<>(DataSetUtils.zipWithIndex(numbers).collect());
Assert.assertEquals(expectedSize, result.size());
// sort result by created index
Collections.sort(result, new Comparator<Tuple2<Long, Long>>() {
@Override
public int compare(Tuple2<Long, Long> o1, Tuple2<Long, Long> o2) {
return o1.f0.compareTo(o2.f0);
}
});
// test if index is consecutive
for (int i = 0; i < expectedSize; i++) {
Assert.assertEquals(i, result.get(i).f0.longValue());
}
}
示例7: testBranchAfterIteration
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Test
public void testBranchAfterIteration() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Long> sourceA = env.generateSequence(0,1);
IterativeDataSet<Long> loopHead = sourceA.iterate(10);
DataSet<Long> loopTail = loopHead.map(new IdentityMapper<Long>()).name("Mapper");
DataSet<Long> loopRes = loopHead.closeWith(loopTail);
loopRes.output(new DiscardingOutputFormat<Long>());
loopRes.map(new IdentityMapper<Long>())
.output(new DiscardingOutputFormat<Long>());
Plan plan = env.createProgramPlan();
try {
compileNoStats(plan);
}
catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
示例8: testForcedRebalancing
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Test
public void testForcedRebalancing() throws Exception {
/*
* Test forced rebalancing
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// generate some number in parallel
DataSet<Long> ds = env.generateSequence(1,3000);
DataSet<Tuple2<Integer, Integer>> uniqLongs = ds
// introduce some partition skew by filtering
.filter(new Filter1())
// rebalance
.rebalance()
// count values in each partition
.map(new PartitionIndexMapper())
.groupBy(0)
.reduce(new Reducer1())
// round counts to mitigate runtime scheduling effects (lazy split assignment)
.map(new Mapper1());
List<Tuple2<Integer, Integer>> result = uniqLongs.collect();
StringBuilder expected = new StringBuilder();
int numPerPartition = 2220 / env.getParallelism() / 10;
for (int i = 0; i < env.getParallelism(); i++) {
expected.append('(').append(i).append(',')
.append(numPerPartition).append(")\n");
}
compareResultAsText(result, expected.toString());
}
示例9: testRangePartitionInIteration
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Test(expected = InvalidProgramException.class)
public void testRangePartitionInIteration() throws Exception {
// does not apply for collection execution
if (super.mode == TestExecutionMode.COLLECTION) {
throw new InvalidProgramException("Does not apply for collection execution");
}
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSource<Long> source = env.generateSequence(0, 10000);
DataSet<Tuple2<Long, String>> tuples = source.map(new MapFunction<Long, Tuple2<Long, String>>() {
@Override
public Tuple2<Long, String> map(Long v) throws Exception {
return new Tuple2<>(v, Long.toString(v));
}
});
DeltaIteration<Tuple2<Long, String>, Tuple2<Long, String>> it = tuples.iterateDelta(tuples, 10, 0);
DataSet<Tuple2<Long, String>> body = it.getWorkset()
.partitionByRange(1) // Verify that range partition is not allowed in iteration
.join(it.getSolutionSet())
.where(0).equalTo(0).projectFirst(0).projectSecond(1);
DataSet<Tuple2<Long, String>> result = it.closeWith(body, body);
result.collect(); // should fail
}
示例10: testForcedRebalancing
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Test
public void testForcedRebalancing() throws Exception {
/*
* Test forced rebalancing
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// generate some number in parallel
DataSet<Long> ds = env.generateSequence(1, 3000);
DataSet<Tuple2<Integer, Integer>> uniqLongs = ds
// introduce some partition skew by filtering
.filter(new Filter1())
// rebalance
.rebalance()
// count values in each partition
.map(new PartitionIndexMapper())
.groupBy(0)
.reduce(new Reducer1())
// round counts to mitigate runtime scheduling effects (lazy split assignment)
.map(new Mapper1());
List<Tuple2<Integer, Integer>> result = uniqLongs.collect();
StringBuilder expected = new StringBuilder();
int numPerPartition = 2220 / env.getParallelism() / 10;
for (int i = 0; i < env.getParallelism(); i++) {
expected.append('(').append(i).append(',')
.append(numPerPartition).append(")\n");
}
compareResultAsText(result, expected.toString());
}
示例11: testSortingParallelism4
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Test
public void testSortingParallelism4() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Long> ds = env.generateSequence(0, 1000);
// randomize
ds.map(new MapFunction<Long, Long>() {
Random rand = new Random(1234L);
@Override
public Long map(Long value) throws Exception {
return rand.nextLong();
}
}).writeAsText(resultPath)
.sortLocalOutput("*", Order.ASCENDING)
.setParallelism(4);
env.execute();
BufferedReader[] resReaders = getResultReader(resultPath);
for (BufferedReader br : resReaders) {
long cmp = Long.MIN_VALUE;
while (br.ready()) {
long cur = Long.parseLong(br.readLine());
assertTrue("Invalid order of sorted output", cmp <= cur);
cmp = cur;
}
br.close();
}
}
示例12: testRegisterTypeWithKryoSerializer
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
/**
* Tests whether the kryo serializer is forwarded via the ExecutionConfig.
* @throws Exception
*/
@Test
public void testRegisterTypeWithKryoSerializer() throws Exception {
int numElements = 10;
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.registerTypeWithKryoSerializer(TestClass.class, new TestClassSerializer());
DataSet<Long> input = env.generateSequence(0, numElements - 1);
DataSet<TestClass> mapped = input.map(new MapFunction<Long, TestClass>() {
private static final long serialVersionUID = -529116076312998262L;
@Override
public TestClass map(Long value) throws Exception {
return new TestClass(value);
}
});
List<TestClass> expected = new ArrayList<>(numElements);
for (int i = 0; i < numElements; i++) {
expected.add(new TestClass(42));
}
compareResultCollections(expected, mapped.collect(), new Comparator<TestClass>() {
@Override
public int compare(TestClass o1, TestClass o2) {
return (int) (o1.getValue() - o2.getValue());
}
});
}
示例13: testCrossWithSmall
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Test
public void testCrossWithSmall() {
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Long> set1 = env.generateSequence(0,1);
DataSet<Long> set2 = env.generateSequence(0,1);
set1.crossWithTiny(set2).name("Cross")
.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
try {
Plan plan = env.createProgramPlan();
OptimizedPlan oPlan = compileWithStats(plan);
OptimizerPlanNodeResolver resolver = new OptimizerPlanNodeResolver(oPlan);
DualInputPlanNode crossPlanNode = resolver.getNode("Cross");
Channel in1 = crossPlanNode.getInput1();
Channel in2 = crossPlanNode.getInput2();
assertEquals(ShipStrategyType.FORWARD, in1.getShipStrategy());
assertEquals(ShipStrategyType.BROADCAST, in2.getShipStrategy());
} catch(CompilerException ce) {
ce.printStackTrace();
fail("The Flink optimizer is unable to compile this plan correctly.");
}
}
示例14: testBranchingWithMultipleDataSinksSmall
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
/**
*
* <pre>
* (SRC A)
* / \
* (SINK A) (SINK B)
* </pre>
*/
@Test
public void testBranchingWithMultipleDataSinksSmall() {
try {
String outPath1 = "/tmp/out1";
String outPath2 = "/tmp/out2";
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Long> source1 = env.generateSequence(0,1);
source1.writeAsText(outPath1);
source1.writeAsText(outPath2);
Plan plan = env.createProgramPlan();
OptimizedPlan oPlan = compileNoStats(plan);
// ---------- check the optimizer plan ----------
// number of sinks
Assert.assertEquals("Wrong number of data sinks.", 2, oPlan.getDataSinks().size());
// sinks contain all sink paths
Set<String> allSinks = new HashSet<String>();
allSinks.add(outPath1);
allSinks.add(outPath2);
for (SinkPlanNode n : oPlan.getDataSinks()) {
String path = ((TextOutputFormat<String>)n.getSinkNode().getOperator()
.getFormatWrapper().getUserCodeObject()).getOutputFilePath().toString();
Assert.assertTrue("Invalid data sink.", allSinks.remove(path));
}
// ---------- compile plan to job graph to verify that no error is thrown ----------
JobGraphGenerator jobGen = new JobGraphGenerator();
jobGen.compileJobGraph(oPlan);
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
示例15: main
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
if(!parseParameters(args)) {
return;
}
// set up execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// get input data
DataSet<Long> pagesInput = env.generateSequence(0, numPages - 1);
DataSet<Tuple2<Long, Long>> linksInput = getLinksDataSet(env);
// assign initial rank to pages
DataSet<Tuple2<Long, Double>> pagesWithRanks = pagesInput
.map(new RankAssigner(1.0d / numPages));
// build adjacency list from link input
DataSet<Tuple2<Long, Long[]>> adjacencyListInput =
linksInput.groupBy(0).reduceGroup(new BuildOutgoingEdgeList());
// set iterative data set
IterativeDataSet<Tuple2<Long, Double>> iteration =
pagesWithRanks.iterate(maxIterations);
DataSet<Tuple2<Long, Double>> newRanks = iteration
// join pages with outgoing edges and distribute rank
.join(adjacencyListInput).where(0).equalTo(0)
.flatMap(new JoinVertexWithEdgesMatch())
// collect and sum ranks
.groupBy(0).aggregate(SUM, 1)
// apply dampening factor
.map(new Dampener(DAMPENING_FACTOR, numPages));
DataSet<Tuple2<Long, Double>> finalPageRanks = iteration
.closeWith(newRanks);
// emit result
finalPageRanks.writeAsCsv(outputPath, "\n", " ");
// execute program
env.execute("Basic Page Rank Example");
}