本文整理汇总了Java中org.apache.flink.api.java.ExecutionEnvironment.createInput方法的典型用法代码示例。如果您正苦于以下问题:Java ExecutionEnvironment.createInput方法的具体用法?Java ExecutionEnvironment.createInput怎么用?Java ExecutionEnvironment.createInput使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.flink.api.java.ExecutionEnvironment
的用法示例。
在下文中一共展示了ExecutionEnvironment.createInput方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testCassandraBatchFormats
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Test
public void testCassandraBatchFormats() throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
DataSet<Tuple3<String, Integer, Integer>> dataSet = env.fromCollection(collection);
dataSet.output(new CassandraOutputFormat<Tuple3<String, Integer, Integer>>(INSERT_DATA_QUERY, builder));
env.execute("Write data");
DataSet<Tuple3<String, Integer, Integer>> inputDS = env.createInput(
new CassandraInputFormat<Tuple3<String, Integer, Integer>>(SELECT_DATA_QUERY, builder),
TypeInformation.of(new TypeHint<Tuple3<String, Integer, Integer>>(){}));
long count = inputDS.count();
Assert.assertEquals(count, 20L);
}
示例2: testKeySelection
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Test
public void testKeySelection() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().enableObjectReuse();
Path in = new Path(inFile.getAbsoluteFile().toURI());
AvroInputFormat<User> users = new AvroInputFormat<User>(in, User.class);
DataSet<User> usersDS = env.createInput(users);
DataSet<Tuple2<String, Integer>> res = usersDS.groupBy("name").reduceGroup(new GroupReduceFunction<User, Tuple2<String, Integer>>() {
@Override
public void reduce(Iterable<User> values, Collector<Tuple2<String, Integer>> out) throws Exception {
for (User u : values) {
out.collect(new Tuple2<String, Integer>(u.getName().toString(), 1));
}
}
});
res.writeAsText(resultPath);
env.execute("Avro Key selection");
expected = "(Alyssa,1)\n(Charlie,1)\n";
}
示例3: checkJoinWithReplicatedSourceInputBehindReduce
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
/**
* Tests compiler fail for join program with replicated data source behind reduce.
*/
@Test(expected = CompilerException.class)
public void checkJoinWithReplicatedSourceInputBehindReduce() {
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));
DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);
DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
.reduce(new LastReduce())
.join(source2).where("*").equalTo("*")
.writeAsText("/some/newpath");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
}
示例4: testKeySelection
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Test
public void testKeySelection() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().enableObjectReuse();
Path in = new Path(inFile.getAbsoluteFile().toURI());
AvroInputFormat<User> users = new AvroInputFormat<User>(in, User.class);
DataSet<User> usersDS = env.createInput(users);
DataSet<Tuple2<String, Integer>> res = usersDS.groupBy("name").reduceGroup(new GroupReduceFunction<User, Tuple2<String, Integer>>() {
@Override
public void reduce(Iterable<User> values, Collector<Tuple2<String, Integer>> out) throws Exception {
for (User u : values) {
out.collect(new Tuple2<String, Integer>(u.getName().toString(), 1));
}
}
});
res.writeAsText(resultPath);
env.execute("Avro Key selection");
expected = "(Alyssa,1)\n(Charlie,1)\n";
}
示例5: main
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length < 2) {
System.err.println("Usage: WordCount <input path> <result path>");
return;
}
final String inputPath = args[0];
final String outputPath = args[1];
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// Set up the Hadoop Input Format
Job job = Job.getInstance();
HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job);
TextInputFormat.addInputPath(job, new Path(inputPath));
// Create a Flink job with it
DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);
// Tokenize the line and convert from Writable "Text" to String for better handling
DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());
// Sum up the words
DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);
// Convert String back to Writable "Text" for use with Hadoop Output Format
DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper());
// Set up Hadoop Output Format
HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), job);
hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " ");
hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test
TextOutputFormat.setOutputPath(job, new Path(outputPath));
// Output & Execute
hadoopResult.output(hadoopOutputFormat);
env.execute("Word Count");
}
示例6: checkJoinWithReplicatedSourceInputBehindMapChangingparallelism
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
/**
* Tests compiler fail for join program with replicated data source behind map and changing parallelism.
*/
@Test(expected = CompilerException.class)
public void checkJoinWithReplicatedSourceInputBehindMapChangingparallelism() {
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));
DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);
DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
.map(new IdMap()).setParallelism(DEFAULT_PARALLELISM+1)
.join(source2).where("*").equalTo("*")
.writeAsText("/some/newpath");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
}
示例7: checkJoinWithReplicatedSourceInputBehindRebalance
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
/**
* Tests compiler fail for join program with replicated data source behind rebalance.
*/
@Test(expected = CompilerException.class)
public void checkJoinWithReplicatedSourceInputBehindRebalance() {
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));
DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);
DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
.rebalance()
.join(source2).where("*").equalTo("*")
.writeAsText("/some/newpath");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
}
示例8: checkJoinWithReplicatedSourceInputChangingparallelism
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
/**
* Tests compiler fail for join program with replicated data source and changing parallelism.
*/
@Test(expected = CompilerException.class)
public void checkJoinWithReplicatedSourceInputChangingparallelism() {
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));
DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);
DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
.join(source2).where("*").equalTo("*").setParallelism(DEFAULT_PARALLELISM+2)
.writeAsText("/some/newpath");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
}
示例9: checkJoinWithReplicatedSourceInputBehindMapPartition
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
/**
* Tests join program with replicated data source behind map partition.
*/
@Test
public void checkJoinWithReplicatedSourceInputBehindMapPartition() {
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));
DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);
DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
.mapPartition(new IdPMap())
.join(source2).where("*").equalTo("*")
.writeAsText("/some/newpath");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
// when join should have forward strategy on both sides
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();
ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
示例10: main
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
public static void main(String[] argv) throws Exception {
// BasicConfigurator.configure(new ConsoleAppender(new
// PatternLayout()));
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
OSMPBFWayInputFormat iformat = new OSMPBFWayInputFormat();
iformat.setFilePath("C:/projets/OSMImport/france-latest.osm.pbf");
// iformat.setFilePath("C:/projets/OSMImport/rhone-alpes-latest.osm.pbf");
FileInputSplit[] s = iformat.createInputSplits(4);
DataSource<WayEntity> r = env.createInput(iformat, new GenericTypeInfo<WayEntity>(WayEntity.class));
r.flatMap(new FlatMapFunction<WayEntity, Tuple2<Long, String>>() {
@Override
public void flatMap(WayEntity value, Collector<Tuple2<Long, String>> out)
throws Exception {
if (value.fields != null) {
if (value.fields.containsKey("type")) {
out.collect(new Tuple2<>(value.id, (String) value.fields.get("type")));
}
}
}
}).writeAsCsv("test.csv");
env.execute();
}
示例11: testWithAvroGenericSer
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Test
public void testWithAvroGenericSer() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().enableForceAvro();
Path in = new Path(inFile.getAbsoluteFile().toURI());
AvroInputFormat<User> users = new AvroInputFormat<User>(in, User.class);
DataSet<User> usersDS = env.createInput(users);
DataSet<Tuple2<String, Integer>> res = usersDS.groupBy(new KeySelector<User, String>() {
@Override
public String getKey(User value) throws Exception {
return String.valueOf(value.getName());
}
}).reduceGroup(new GroupReduceFunction<User, Tuple2<String, Integer>>() {
@Override
public void reduce(Iterable<User> values, Collector<Tuple2<String, Integer>> out) throws Exception {
for (User u : values) {
out.collect(new Tuple2<String, Integer>(u.getName().toString(), 1));
}
}
});
res.writeAsText(resultPath);
env.execute("Avro Key selection");
expected = "(Charlie,1)\n(Alyssa,1)\n";
}
示例12: testWithKryoGenericSer
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
@Test
public void testWithKryoGenericSer() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().enableForceKryo();
Path in = new Path(inFile.getAbsoluteFile().toURI());
AvroInputFormat<User> users = new AvroInputFormat<User>(in, User.class);
DataSet<User> usersDS = env.createInput(users);
DataSet<Tuple2<String, Integer>> res = usersDS.groupBy(new KeySelector<User, String>() {
@Override
public String getKey(User value) throws Exception {
return String.valueOf(value.getName());
}
}).reduceGroup(new GroupReduceFunction<User, Tuple2<String, Integer>>() {
@Override
public void reduce(Iterable<User> values, Collector<Tuple2<String, Integer>> out) throws Exception {
for (User u : values) {
out.collect(new Tuple2<String, Integer>(u.getName().toString(), 1));
}
}
});
res.writeAsText(resultPath);
env.execute("Avro Key selection");
expected = "(Charlie,1)\n(Alyssa,1)\n";
}
示例13: testField
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
private void testField(final String fieldName) throws Exception {
before();
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
Path in = new Path(inFile.getAbsoluteFile().toURI());
AvroInputFormat<User> users = new AvroInputFormat<User>(in, User.class);
DataSet<User> usersDS = env.createInput(users);
DataSet<Object> res = usersDS.groupBy(fieldName).reduceGroup(new GroupReduceFunction<User, Object>() {
@Override
public void reduce(Iterable<User> values, Collector<Object> out) throws Exception {
for (User u : values) {
out.collect(u.get(fieldName));
}
}
});
res.writeAsText(resultPath);
env.execute("Simple Avro read job");
// test if automatic registration of the Types worked
ExecutionConfig ec = env.getConfig();
Assert.assertTrue(ec.getRegisteredKryoTypes().contains(org.apache.flink.api.io.avro.generated.Fixed16.class));
if (fieldName.equals("name")) {
expected = "Alyssa\nCharlie";
} else if (fieldName.equals("type_enum")) {
expected = "GREEN\nRED\n";
} else if (fieldName.equals("type_double_test")) {
expected = "123.45\n1.337\n";
} else {
Assert.fail("Unknown field");
}
after();
}
示例14: checkJoinWithReplicatedSourceInputBehindMap
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
/**
* Tests join program with replicated data source behind map.
*/
@Test
public void checkJoinWithReplicatedSourceInputBehindMap() {
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));
DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);
DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
.map(new IdMap())
.join(source2).where("*").equalTo("*")
.writeAsText("/some/newpath");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
// when join should have forward strategy on both sides
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();
ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
示例15: checkJoinWithReplicatedSourceInputBehindFlatMap
import org.apache.flink.api.java.ExecutionEnvironment; //导入方法依赖的package包/类
/**
* Tests join program with replicated data source behind flatMap.
*/
@Test
public void checkJoinWithReplicatedSourceInputBehindFlatMap() {
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));
DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);
DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
.flatMap(new IdFlatMap())
.join(source2).where("*").equalTo("*")
.writeAsText("/some/newpath");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
// when join should have forward strategy on both sides
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();
ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}