本文整理汇总了Java中org.apache.flink.api.java.ExecutionEnvironment.setParallelism方法的典型用法代码示例。如果您正苦于以下问题:Java ExecutionEnvironment.setParallelism方法的具体用法?Java ExecutionEnvironment.setParallelism怎么用?Java ExecutionEnvironment.setParallelism使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.flink.api.java.ExecutionEnvironment
的用法示例。
在下文中一共展示了ExecutionEnvironment.setParallelism方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testNonPojoToVerifyFullTupleKeys
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Test
public void testNonPojoToVerifyFullTupleKeys() throws Exception {
/*
* Non-POJO test to verify that full-tuple keys are working.
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds1 = CollectionDataSets.getSmallNestedTupleDataSet(env);
DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds2 = CollectionDataSets.getSmallNestedTupleDataSet(env);
DataSet<Tuple2<Tuple2<Tuple2<Integer, Integer>, String>, Tuple2<Tuple2<Integer, Integer>, String>>> joinDs =
ds1.fullOuterJoin(ds2)
.where(0)
.equalTo("f0.f0", "f0.f1") // key is now Tuple2<Integer, Integer>
.with(new ProjectBothFunction<Tuple2<Tuple2<Integer, Integer>, String>, Tuple2<Tuple2<Integer, Integer>, String>>());
env.setParallelism(1);
List<Tuple2<Tuple2<Tuple2<Integer, Integer>, String>, Tuple2<Tuple2<Integer, Integer>, String>>> result = joinDs.collect();
String expected = "((1,1),one),((1,1),one)\n" +
"((2,2),two),((2,2),two)\n" +
"((3,3),three),((3,3),three)\n";
compareResultAsTuples(result, expected);
}
示例2: testSortPartitionPojoByNestedFieldExpression
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Test
public void testSortPartitionPojoByNestedFieldExpression() throws Exception {
/*
* Test sort partition on field expression
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(3);
DataSet<POJO> ds = CollectionDataSets.getMixedPojoDataSet(env);
List<Tuple1<Boolean>> result = ds
.map(new IdMapper<POJO>()).setParallelism(1) // parallelize input
.sortPartition("nestedTupleWithCustom.f1.myString", Order.ASCENDING)
.sortPartition("number", Order.DESCENDING)
.mapPartition(new OrderCheckMapper<>(new PojoChecker()))
.distinct().collect();
String expected = "(true)\n";
compareResultAsText(result, expected);
}
示例3: testGroupReduceOnNeighborsInvalidEdgeTrgId
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
/**
* Test groupReduceOnNeighbors() -NeighborsFunction-
* with an edge having a trgId that does not exist in the vertex DataSet.
*/
@Test
public void testGroupReduceOnNeighborsInvalidEdgeTrgId() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(PARALLELISM);
env.getConfig().disableSysoutLogging();
Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env),
TestGraphUtils.getLongLongEdgeInvalidSrcData(env), env);
try {
DataSet<Tuple2<Long, Long>> verticesWithSumOfAllNeighborValues =
graph.reduceOnNeighbors(new SumNeighbors(), EdgeDirection.ALL);
verticesWithSumOfAllNeighborValues.output(new DiscardingOutputFormat<>());
env.execute();
} catch (Exception e) {
// We expect the job to fail with an exception
}
}
示例4: testReduceOnKeyedDatasetWithSelector
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Test
public void testReduceOnKeyedDatasetWithSelector() throws Exception {
// set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);
// creates the input data and distributes them evenly among the available downstream tasks
DataSet<Tuple3<String, Integer, Boolean>> input = createKeyedInput(env);
List<Tuple3<String, Integer, Boolean>> actual = input
.groupBy(new KeySelectorX())
.reduceGroup(new KeyedCombReducer())
.collect();
String expected = "k1,6,true\nk2,4,true\n";
compareResultAsTuples(actual, expected);
}
示例5: testGetDegreesInvalidEdgeTrgId
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
/**
* Test getDegrees() with an edge having a trgId that does not exist in the vertex DataSet.
*/
@Test
public void testGetDegreesInvalidEdgeTrgId() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(PARALLELISM);
env.getConfig().disableSysoutLogging();
Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env),
TestGraphUtils.getLongLongEdgeInvalidTrgData(env), env);
try {
graph.getDegrees().output(new DiscardingOutputFormat<>());
env.execute();
fail("graph.getDegrees() did not fail.");
} catch (Exception e) {
// We expect the job to fail with an exception
}
}
示例6: testCorrectnessOfGroupReduceOnTuplesWithCombine
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Test
public void testCorrectnessOfGroupReduceOnTuplesWithCombine() throws Exception {
/*
* check correctness of groupReduce on tuples with combine
*/
org.junit.Assume.assumeTrue(mode != TestExecutionMode.COLLECTION);
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(2); // important because it determines how often the combiner is called
DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
DataSet<Tuple2<Integer, String>> reduceDs = ds.
groupBy(1).reduceGroup(new Tuple3GroupReduceWithCombine());
List<Tuple2<Integer, String>> result = reduceDs.collect();
String expected = "1,test1\n" +
"5,test2\n" +
"15,test3\n" +
"34,test4\n" +
"65,test5\n" +
"111,test6\n";
compareResultAsTuples(result, expected);
}
示例7: run
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
public void run() throws Exception {
LOG.info("Random seed = {}", RANDOM_SEED);
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().disableSysoutLogging();
for (int parallelism = MAX_PARALLELISM; parallelism > 0; parallelism--) {
LOG.info("Parallelism = {}", parallelism);
env.setParallelism(parallelism);
testReduce(env);
testGroupedReduce(env);
testJoin(env);
testCross(env);
}
}
示例8: testStringBasedDefinitionOnGroupSortForTwoGroupingKeysWithPojos
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Test
public void testStringBasedDefinitionOnGroupSortForTwoGroupingKeysWithPojos() throws Exception {
/*
* Test string-based definition on group sort, for two grouping keys with Pojos
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
DataSet<PojoContainingTupleAndWritable> ds = CollectionDataSets.getGroupSortedPojoContainingTupleAndWritable(env);
// f0.f0 is first integer
DataSet<String> reduceDs = ds.groupBy("hadoopFan").sortGroup("theTuple.f0", Order.DESCENDING).sortGroup("theTuple.f1", Order.DESCENDING)
.reduceGroup(new GroupReducer5());
List<String> result = reduceDs.collect();
String expected = "1---(10,100)-\n"
+
"2---(30,600)-(30,400)-(30,200)-(20,201)-(20,200)-\n";
compareResultAsText(result, expected);
}
示例9: testTableSourceFieldOrder
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Test
public void testTableSourceFieldOrder() throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);
BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env, new TableConfig());
HBaseTableSource hbaseTable = new HBaseTableSource(getConf(), TEST_TABLE);
// shuffle order of column registration
hbaseTable.addColumn(FAMILY2, F2COL1, String.class);
hbaseTable.addColumn(FAMILY3, F3COL1, Double.class);
hbaseTable.addColumn(FAMILY1, F1COL1, Integer.class);
hbaseTable.addColumn(FAMILY2, F2COL2, Long.class);
hbaseTable.addColumn(FAMILY3, F3COL2, Boolean.class);
hbaseTable.addColumn(FAMILY3, F3COL3, String.class);
tableEnv.registerTableSource("hTable", hbaseTable);
Table result = tableEnv.sqlQuery(
"SELECT * FROM hTable AS h"
);
DataSet<Row> resultSet = tableEnv.toDataSet(result, Row.class);
List<Row> results = resultSet.collect();
String expected =
"Hello-1,100,1.01,false,Welt-1,10\n" +
"Hello-2,200,2.02,true,Welt-2,20\n" +
"Hello-3,300,3.03,false,Welt-3,30\n" +
"null,400,4.04,true,Welt-4,40\n" +
"Hello-5,500,5.05,false,Welt-5,50\n" +
"Hello-6,600,6.06,true,Welt-6,60\n" +
"Hello-7,700,7.07,false,Welt-7,70\n" +
"null,800,8.08,true,Welt-8,80\n";
TestBaseUtils.compareResultAsText(results, expected);
}
示例10: checkSinglePartitionedGroupedSource8
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Test
public void checkSinglePartitionedGroupedSource8() {
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);
data.getSplitDataProperties()
.splitsPartitionedBy("f1")
.splitsGroupedBy("f1.stringField");
data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();
GlobalProperties gprops = sourceNode.getGlobalProperties();
LocalProperties lprops = sourceNode.getLocalProperties();
Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(1,2,3)));
Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
Assert.assertTrue(lprops.getGroupedFields() == null);
Assert.assertTrue(lprops.getOrdering() == null);
}
示例11: testJoinWithRangePartitioning
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Test
public void testJoinWithRangePartitioning() throws Exception {
/*
* Test Join on tuples with multiple key field positions and same customized distribution
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.get3TupleDataSet(env);
DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
env.setParallelism(4);
TestDistribution testDis = new TestDistribution();
DataSet<Tuple2<String, String>> joinDs =
DataSetUtils.partitionByRange(ds1, testDis, 0, 1)
.join(DataSetUtils.partitionByRange(ds2, testDis, 0, 4))
.where(0, 1)
.equalTo(0, 4)
.with(new T3T5FlatJoin());
List<Tuple2<String, String>> result = joinDs.collect();
String expected = "Hi,Hallo\n" +
"Hello,Hallo Welt\n" +
"Hello world,Hallo Welt wie gehts?\n" +
"Hello world,ABC\n" +
"I am fine.,HIJ\n" +
"I am fine.,IJK\n";
compareResultAsTuples(result, expected);
}
示例12: receiveParameters
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
private void receiveParameters(ExecutionEnvironment env) throws IOException {
for (int x = 0; x < 4; x++) {
Tuple value = (Tuple) streamer.getRecord(true);
switch (Parameters.valueOf(((String) value.getField(0)).toUpperCase())) {
case DOP:
Integer dop = value.<Integer>getField(1);
env.setParallelism(dop);
break;
case MODE:
if (value.<Boolean>getField(1)) {
LOG.info("Local execution specified, using default for {}.", PythonOptions.DC_TMP_DIR);
tmpDistributedDir = new Path(PythonOptions.DC_TMP_DIR.defaultValue());
}
break;
case RETRY:
int retry = value.<Integer>getField(1);
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(retry, 10000L));
break;
case ID:
currentEnvironmentID = value.<Integer>getField(1);
break;
}
}
if (env.getParallelism() < 0) {
env.setParallelism(1);
}
}
示例13: testReduceOnNonKeyedDataset
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Test
public void testReduceOnNonKeyedDataset() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);
// creates the input data and distributes them evenly among the available downstream tasks
DataSet<Tuple2<Integer, Boolean>> input = createNonKeyedInput(env);
List<Tuple2<Integer, Boolean>> actual = input.reduceGroup(new NonKeyedCombReducer()).collect();
String expected = "10,true\n";
compareResultAsTuples(actual, expected);
}
示例14: tcph3
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
public static void tcph3(String[] args) throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(Integer.parseInt(args[0]));
//order id, order status, order data, order prio, ship prio
DataSet<Tuple5<Long, String, String, String, Integer>> orders =
env.readCsvFile(args[1])
.fieldDelimiter("|").lineDelimiter("\n")
.includeFields("101011001").types(Long.class, String.class, String.class, String.class, Integer.class)
.name(ORDERS);
//order id, extended price
DataSet<Tuple2<Long, Double>> lineItems =
env.readCsvFile(args[2])
.fieldDelimiter("|").lineDelimiter("\n")
.includeFields("100001").types(Long.class, Double.class)
.name(LINEITEM);
DataSet<Tuple2<Long, Integer>> filterO = orders.flatMap(new FilterO()).name(MAPPER_NAME);
DataSet<Tuple3<Long, Integer, Double>> joinLiO = filterO.join(lineItems).where(0).equalTo(0).with(new JoinLiO()).name(JOIN_NAME);
DataSet<Tuple3<Long, Integer, Double>> aggLiO = joinLiO.groupBy(0, 1).reduceGroup(new AggLiO()).name(REDUCE_NAME);
aggLiO.writeAsCsv(args[3], "\n", "|").name(SINK);
env.execute();
}
示例15: checkJoinWithReplicatedSourceInputBehindMap
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
/**
* Tests join program with replicated data source behind map.
*/
@Test
public void checkJoinWithReplicatedSourceInputBehindMap() {
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));
DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);
DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
.map(new IdMap())
.join(source2).where("*").equalTo("*")
.writeAsText("/some/newpath");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
// when join should have forward strategy on both sides
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();
ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}