本文整理汇总了Java中org.apache.flink.api.java.DataSet.flatMap方法的典型用法代码示例。如果您正苦于以下问题:Java DataSet.flatMap方法的具体用法?Java DataSet.flatMap怎么用?Java DataSet.flatMap使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.flink.api.java.DataSet
的用法示例。
在下文中一共展示了DataSet.flatMap方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testNonPassingFlatMap
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testNonPassingFlatMap() throws Exception {
/*
* Test non-passing flatmap
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<String> ds = CollectionDataSets.getStringDataSet(env);
DataSet<String> nonPassingFlatMapDs = ds.
flatMap(new FlatMapper1());
List<String> result = nonPassingFlatMapDs.collect();
String expected = "\n";
compareResultAsText(result, expected);
}
示例2: main
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length < 2) {
System.err.println("Usage: WordCount <input path> <result path>");
return;
}
final String inputPath = args[0];
final String outputPath = args[1];
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// Set up the Hadoop Input Format
Job job = Job.getInstance();
HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job);
TextInputFormat.addInputPath(job, new Path(inputPath));
// Create a Flink job with it
DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);
// Tokenize the line and convert from Writable "Text" to String for better handling
DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());
// Sum up the words
DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);
// Convert String back to Writable "Text" for use with Hadoop Output Format
DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper());
// Set up Hadoop Output Format
HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), job);
hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " ");
hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test
TextOutputFormat.setOutputPath(job, new Path(outputPath));
// Output & Execute
hadoopResult.output(hadoopOutputFormat);
env.execute("Word Count");
}
示例3: testConfigurableMapper
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testConfigurableMapper() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
JobConf conf = new JobConf();
conf.set("my.filterPrefix", "Hello");
DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);
DataSet<Tuple2<IntWritable, Text>> hellos = ds.
flatMap(new HadoopMapFunction<IntWritable, Text, IntWritable, Text>(new ConfigurableMapper(), conf));
String resultPath = tempFolder.newFile().toURI().toString();
hellos.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
env.execute();
String expected = "(2,Hello)\n" +
"(3,Hello world)\n" +
"(4,Hello world, how are you?)\n";
compareResultsByLinesInMemory(expected, resultPath);
}
示例4: testDataDuplicatingFlatMap
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testDataDuplicatingFlatMap() throws Exception {
/*
* Test data duplicating flatmap
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<String> ds = CollectionDataSets.getStringDataSet(env);
DataSet<String> duplicatingFlatMapDs = ds.
flatMap(new FlatMapper2());
List<String> result = duplicatingFlatMapDs.collect();
String expected = "Hi\n" + "HI\n" +
"Hello\n" + "HELLO\n" +
"Hello world\n" + "HELLO WORLD\n" +
"Hello world, how are you?\n" + "HELLO WORLD, HOW ARE YOU?\n" +
"I am fine.\n" + "I AM FINE.\n" +
"Luke Skywalker\n" + "LUKE SKYWALKER\n" +
"Random comment\n" + "RANDOM COMMENT\n" +
"LOL\n" + "LOL\n";
compareResultAsText(result, expected);
}
示例5: testFlatMapWithVaryingNumberOfEmittedTuples
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testFlatMapWithVaryingNumberOfEmittedTuples() throws Exception {
/*
* Test flatmap with varying number of emitted tuples
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
DataSet<Tuple3<Integer, Long, String>> varyingTuplesMapDs = ds.
flatMap(new FlatMapper3());
List<Tuple3<Integer, Long, String>> result = varyingTuplesMapDs.collect();
String expected = "1,1,Hi\n" +
"2,2,Hello\n" + "2,2,Hello\n" +
"4,3,Hello world, how are you?\n" +
"5,3,I am fine.\n" + "5,3,I am fine.\n" +
"7,4,Comment#1\n" +
"8,4,Comment#2\n" + "8,4,Comment#2\n" +
"10,4,Comment#4\n" +
"11,5,Comment#5\n" + "11,5,Comment#5\n" +
"13,5,Comment#7\n" +
"14,5,Comment#8\n" + "14,5,Comment#8\n" +
"16,6,Comment#10\n" +
"17,6,Comment#11\n" + "17,6,Comment#11\n" +
"19,6,Comment#13\n" +
"20,6,Comment#14\n" + "20,6,Comment#14\n";
compareResultAsTuples(result, expected);
}
示例6: testTypeConversionFlatMapperCustomToTuple
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testTypeConversionFlatMapperCustomToTuple() throws Exception {
/*
* Test type conversion flatmapper (Custom -> Tuple)
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
DataSet<Tuple3<Integer, Long, String>> typeConversionFlatMapDs = ds.
flatMap(new FlatMapper4());
List<Tuple3<Integer, Long, String>> result = typeConversionFlatMapDs.collect();
String expected = "1,0,Hi\n" +
"2,1,Hello\n" +
"2,2,Hello world\n" +
"3,3,Hello world, how are you?\n" +
"3,4,I am fine.\n" +
"3,5,Luke Skywalker\n" +
"4,6,Comment#1\n" +
"4,7,Comment#2\n" +
"4,8,Comment#3\n" +
"4,9,Comment#4\n" +
"5,10,Comment#5\n" +
"5,11,Comment#6\n" +
"5,12,Comment#7\n" +
"5,13,Comment#8\n" +
"5,14,Comment#9\n" +
"6,15,Comment#10\n" +
"6,16,Comment#11\n" +
"6,17,Comment#12\n" +
"6,18,Comment#13\n" +
"6,19,Comment#14\n" +
"6,20,Comment#15\n";
compareResultAsTuples(result, expected);
}
示例7: testTypeConversionFlatMapperTupleToBasic
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testTypeConversionFlatMapperTupleToBasic() throws Exception {
/*
* Test type conversion flatmapper (Tuple -> Basic)
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
DataSet<String> typeConversionFlatMapDs = ds.
flatMap(new FlatMapper5());
List<String> result = typeConversionFlatMapDs.collect();
String expected = "Hi\n" + "Hello\n" + "Hello world\n"
+
"Hello world, how are you?\n" +
"I am fine.\n" + "Luke Skywalker\n" +
"Comment#1\n" + "Comment#2\n" +
"Comment#3\n" + "Comment#4\n" +
"Comment#5\n" + "Comment#6\n" +
"Comment#7\n" + "Comment#8\n" +
"Comment#9\n" + "Comment#10\n" +
"Comment#11\n" + "Comment#12\n" +
"Comment#13\n" + "Comment#14\n" +
"Comment#15\n";
compareResultAsText(result, expected);
}
示例8: startPipeline
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
private void startPipeline(Path input, Configuration parameters) throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
parameters.setBoolean("recursive.file.enumeration", true);
final DataSet<String> text = new DataSource<>(
env,
new TextInputFormat(input),
BasicTypeInfo.STRING_TYPE_INFO,
Utils.getCallLocationName()
).withParameters(parameters);
//Deserialize and convert
DataSet<JsonObject> tweets = text
.flatMap(new Deserializer());
DataSet<Tuple2<Long, JsonObject>> reducedUserObjects = tweets
.flatMap(new UserObjectExtractor())
.groupBy(0)
.reduce(new LatestUserObjectReduce())
.project(0, 1);
reducedUserObjects
.map(new Serializer())
.output(objectsOutputFormat).withParameters(parameters);
/*tweets
.flatMap(new IndexExtractor())
.groupBy(0, 1)
.sum(2)
.output(indexOutputFormat).withParameters(parameters);*/
env.execute();
}
示例9: testNonPassingMapper
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testNonPassingMapper() throws Exception{
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);
DataSet<Tuple2<IntWritable, Text>> nonPassingFlatMapDs = ds.
flatMap(new HadoopMapFunction<IntWritable, Text, IntWritable, Text>(new NonPassingMapper()));
String resultPath = tempFolder.newFile().toURI().toString();
nonPassingFlatMapDs.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
env.execute();
compareResultsByLinesInMemory("\n", resultPath);
}
示例10: testDataDuplicatingMapper
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testDataDuplicatingMapper() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);
DataSet<Tuple2<IntWritable, Text>> duplicatingFlatMapDs = ds.
flatMap(new HadoopMapFunction<IntWritable, Text, IntWritable, Text>(new DuplicatingMapper()));
String resultPath = tempFolder.newFile().toURI().toString();
duplicatingFlatMapDs.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
env.execute();
String expected = "(1,Hi)\n" + "(1,HI)\n" +
"(2,Hello)\n" + "(2,HELLO)\n" +
"(3,Hello world)\n" + "(3,HELLO WORLD)\n" +
"(4,Hello world, how are you?)\n" + "(4,HELLO WORLD, HOW ARE YOU?)\n" +
"(5,I am fine.)\n" + "(5,I AM FINE.)\n" +
"(6,Luke Skywalker)\n" + "(6,LUKE SKYWALKER)\n" +
"(7,Comment#1)\n" + "(7,COMMENT#1)\n" +
"(8,Comment#2)\n" + "(8,COMMENT#2)\n" +
"(9,Comment#3)\n" + "(9,COMMENT#3)\n" +
"(10,Comment#4)\n" + "(10,COMMENT#4)\n" +
"(11,Comment#5)\n" + "(11,COMMENT#5)\n" +
"(12,Comment#6)\n" + "(12,COMMENT#6)\n" +
"(13,Comment#7)\n" + "(13,COMMENT#7)\n" +
"(14,Comment#8)\n" + "(14,COMMENT#8)\n" +
"(15,Comment#9)\n" + "(15,COMMENT#9)\n" +
"(16,Comment#10)\n" + "(16,COMMENT#10)\n" +
"(17,Comment#11)\n" + "(17,COMMENT#11)\n" +
"(18,Comment#12)\n" + "(18,COMMENT#12)\n" +
"(19,Comment#13)\n" + "(19,COMMENT#13)\n" +
"(20,Comment#14)\n" + "(20,COMMENT#14)\n" +
"(21,Comment#15)\n" + "(21,COMMENT#15)\n";
compareResultsByLinesInMemory(expected, resultPath);
}
示例11: main
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length < 2) {
System.err.println("Usage: WordCount <input path> <result path>");
return;
}
final String inputPath = args[0];
final String outputPath = args[1];
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// Set up the Hadoop Input Format
Job job = Job.getInstance();
HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job);
TextInputFormat.addInputPath(job, new Path(inputPath));
// Create a Flink job with it
DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);
// Tokenize the line and convert from Writable "Text" to String for better handling
DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());
// Sum up the words
DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);
// Convert String back to Writable "Text" for use with Hadoop Output Format
DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper());
// Set up Hadoop Output Format
HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), job);
hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " ");
hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test
TextOutputFormat.setOutputPath(job, new Path(outputPath));
// Output & Execute
hadoopResult.output(hadoopOutputFormat);
env.execute("Word Count");
}
示例12: testProgram
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<String> text = env.fromElements(WordCountData.TEXT);
DataSet<Tuple2<String, Integer>> words = text.flatMap(new WordCount.Tokenizer());
DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);
result.output(new LocalCollectionOutputFormat<Tuple2<String, Integer>>(resultsCollected));
env.execute("Word Count Collection");
}
示例13: testFlatMapperIfUDFReturnsInputObjectMultipleTimesWhileChangingIt
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testFlatMapperIfUDFReturnsInputObjectMultipleTimesWhileChangingIt() throws Exception {
/*
* Test flatmapper if UDF returns input object
* multiple times and changes it in between
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
DataSet<Tuple3<Integer, Long, String>> inputObjFlatMapDs = ds.
flatMap(new FlatMapper6());
List<Tuple3<Integer, Long, String>> result = inputObjFlatMapDs.collect();
String expected = "0,1,Hi\n" +
"0,2,Hello\n" + "1,2,Hello\n" +
"0,2,Hello world\n" + "1,2,Hello world\n" + "2,2,Hello world\n" +
"0,3,I am fine.\n" +
"0,3,Luke Skywalker\n" + "1,3,Luke Skywalker\n" +
"0,4,Comment#1\n" + "1,4,Comment#1\n" + "2,4,Comment#1\n" +
"0,4,Comment#3\n" +
"0,4,Comment#4\n" + "1,4,Comment#4\n" +
"0,5,Comment#5\n" + "1,5,Comment#5\n" + "2,5,Comment#5\n" +
"0,5,Comment#7\n" +
"0,5,Comment#8\n" + "1,5,Comment#8\n" +
"0,5,Comment#9\n" + "1,5,Comment#9\n" + "2,5,Comment#9\n" +
"0,6,Comment#11\n" +
"0,6,Comment#12\n" + "1,6,Comment#12\n" +
"0,6,Comment#13\n" + "1,6,Comment#13\n" + "2,6,Comment#13\n" +
"0,6,Comment#15\n";
compareResultAsTuples(result, expected);
}
示例14: run
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
/**
* run conversion process
* @param configPath path to config file
* @throws Exception
*/
public void run(String configPath) throws Exception {
FlinkEnvManager fem = new FlinkEnvManager(configPath, "converterJob",
TableIdentifier.RAW_TWITTER_DATA.get(),
TableIdentifier.TERM_INDEX.get());
DataSet<Tuple2<Key,Value>> rawTwitterDataRows = fem.getDataFromAccumulo();
DataSet<Tuple2<Text, Mutation>> termIndexMutations = rawTwitterDataRows
.flatMap(new ConverterFlatMap(new Tokenizer(),
TableIdentifier.TERM_INDEX.get()));
termIndexMutations.output(fem.getHadoopOF());
fem.getExecutionEnvironment().execute("ConverterProcess");
}
示例15: testDeltaIteration
import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testDeltaIteration() {
try {
ExecutionEnvironment env = ExecutionEnvironment.createCollectionsEnvironment();
@SuppressWarnings("unchecked")
DataSet<Tuple2<Integer, Integer>> solInput = env.fromElements(
new Tuple2<Integer, Integer>(1, 0),
new Tuple2<Integer, Integer>(2, 0),
new Tuple2<Integer, Integer>(3, 0),
new Tuple2<Integer, Integer>(4, 0));
@SuppressWarnings("unchecked")
DataSet<Tuple1<Integer>> workInput = env.fromElements(
new Tuple1<Integer>(1),
new Tuple1<Integer>(2),
new Tuple1<Integer>(3),
new Tuple1<Integer>(4));
// Perform a delta iteration where we add those values to the workset where
// the second tuple field is smaller than the first tuple field.
// At the end both tuple fields must be the same.
DeltaIteration<Tuple2<Integer, Integer>, Tuple1<Integer>> iteration =
solInput.iterateDelta(workInput, 10, 0);
DataSet<Tuple2<Integer, Integer>> solDelta = iteration.getSolutionSet().join(
iteration.getWorkset()).where(0).equalTo(0).with(
new JoinFunction<Tuple2<Integer, Integer>, Tuple1<Integer>, Tuple2<Integer, Integer>>() {
@Override
public Tuple2<Integer, Integer> join(Tuple2<Integer, Integer> first,
Tuple1<Integer> second) throws Exception {
return new Tuple2<Integer, Integer>(first.f0, first.f1 + 1);
}
});
DataSet<Tuple1<Integer>> nextWorkset = solDelta.flatMap(
new FlatMapFunction<Tuple2<Integer, Integer>, Tuple1<Integer>>() {
@Override
public void flatMap(Tuple2<Integer, Integer> in, Collector<Tuple1<Integer>>
out) throws Exception {
if (in.f1 < in.f0) {
out.collect(new Tuple1<Integer>(in.f0));
}
}
});
List<Tuple2<Integer, Integer>> collected = new ArrayList<Tuple2<Integer, Integer>>();
iteration.closeWith(solDelta, nextWorkset)
.output(new LocalCollectionOutputFormat<Tuple2<Integer, Integer>>(collected));
env.execute();
// verify that both tuple fields are now the same
for (Tuple2<Integer, Integer> t: collected) {
assertEquals(t.f0, t.f1);
}
}
catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}