当前位置: 首页>>代码示例>>Java>>正文


Java DataSet.flatMap方法代码示例

本文整理汇总了Java中org.apache.flink.api.java.DataSet.flatMap方法的典型用法代码示例。如果您正苦于以下问题:Java DataSet.flatMap方法的具体用法?Java DataSet.flatMap怎么用?Java DataSet.flatMap使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.flink.api.java.DataSet的用法示例。


在下文中一共展示了DataSet.flatMap方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testNonPassingFlatMap

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testNonPassingFlatMap() throws Exception {
	/*
	 * Test non-passing flatmap
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<String> ds = CollectionDataSets.getStringDataSet(env);
	DataSet<String> nonPassingFlatMapDs = ds.
			flatMap(new FlatMapper1());

	List<String> result = nonPassingFlatMapDs.collect();

	String expected = "\n";

	compareResultAsText(result, expected);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:19,代码来源:FlatMapITCase.java

示例2: main

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
	if (args.length < 2) {
		System.err.println("Usage: WordCount <input path> <result path>");
		return;
	}
	
	final String inputPath = args[0];
	final String outputPath = args[1];
	
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	
	// Set up the Hadoop Input Format
	Job job = Job.getInstance();
	HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job);
	TextInputFormat.addInputPath(job, new Path(inputPath));
	
	// Create a Flink job with it
	DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);
	
	// Tokenize the line and convert from Writable "Text" to String for better handling
	DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());
	
	// Sum up the words
	DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);
	
	// Convert String back to Writable "Text" for use with Hadoop Output Format
	DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper());
	
	// Set up Hadoop Output Format
	HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), job);
	hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " ");
	hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test
	TextOutputFormat.setOutputPath(job, new Path(outputPath));
	
	// Output & Execute
	hadoopResult.output(hadoopOutputFormat);
	env.execute("Word Count");
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:39,代码来源:WordCount.java

示例3: testConfigurableMapper

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testConfigurableMapper() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	JobConf conf = new JobConf();
	conf.set("my.filterPrefix", "Hello");

	DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);
	DataSet<Tuple2<IntWritable, Text>> hellos = ds.
			flatMap(new HadoopMapFunction<IntWritable, Text, IntWritable, Text>(new ConfigurableMapper(), conf));

	String resultPath = tempFolder.newFile().toURI().toString();

	hellos.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
	env.execute();

	String expected = "(2,Hello)\n" +
			"(3,Hello world)\n" +
			"(4,Hello world, how are you?)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:23,代码来源:HadoopMapFunctionITCase.java

示例4: testDataDuplicatingFlatMap

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testDataDuplicatingFlatMap() throws Exception {
	/*
	 * Test data duplicating flatmap
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<String> ds = CollectionDataSets.getStringDataSet(env);
	DataSet<String> duplicatingFlatMapDs = ds.
			flatMap(new FlatMapper2());

	List<String> result = duplicatingFlatMapDs.collect();

	String expected = "Hi\n" + "HI\n" +
			"Hello\n" + "HELLO\n" +
			"Hello world\n" + "HELLO WORLD\n" +
			"Hello world, how are you?\n" + "HELLO WORLD, HOW ARE YOU?\n" +
			"I am fine.\n" + "I AM FINE.\n" +
			"Luke Skywalker\n" + "LUKE SKYWALKER\n" +
			"Random comment\n" + "RANDOM COMMENT\n" +
			"LOL\n" + "LOL\n";

	compareResultAsText(result, expected);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:26,代码来源:FlatMapITCase.java

示例5: testFlatMapWithVaryingNumberOfEmittedTuples

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testFlatMapWithVaryingNumberOfEmittedTuples() throws Exception {
	/*
	 * Test flatmap with varying number of emitted tuples
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> varyingTuplesMapDs = ds.
			flatMap(new FlatMapper3());

	List<Tuple3<Integer, Long, String>> result = varyingTuplesMapDs.collect();

	String expected = "1,1,Hi\n" +
			"2,2,Hello\n" + "2,2,Hello\n" +
			"4,3,Hello world, how are you?\n" +
			"5,3,I am fine.\n" + "5,3,I am fine.\n" +
			"7,4,Comment#1\n" +
			"8,4,Comment#2\n" + "8,4,Comment#2\n" +
			"10,4,Comment#4\n" +
			"11,5,Comment#5\n" + "11,5,Comment#5\n" +
			"13,5,Comment#7\n" +
			"14,5,Comment#8\n" + "14,5,Comment#8\n" +
			"16,6,Comment#10\n" +
			"17,6,Comment#11\n" + "17,6,Comment#11\n" +
			"19,6,Comment#13\n" +
			"20,6,Comment#14\n" + "20,6,Comment#14\n";

	compareResultAsTuples(result, expected);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:32,代码来源:FlatMapITCase.java

示例6: testTypeConversionFlatMapperCustomToTuple

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testTypeConversionFlatMapperCustomToTuple() throws Exception {
	/*
	 * Test type conversion flatmapper (Custom -> Tuple)
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> typeConversionFlatMapDs = ds.
			flatMap(new FlatMapper4());

	List<Tuple3<Integer, Long, String>> result = typeConversionFlatMapDs.collect();

	String expected = "1,0,Hi\n" +
			"2,1,Hello\n" +
			"2,2,Hello world\n" +
			"3,3,Hello world, how are you?\n" +
			"3,4,I am fine.\n" +
			"3,5,Luke Skywalker\n" +
			"4,6,Comment#1\n" +
			"4,7,Comment#2\n" +
			"4,8,Comment#3\n" +
			"4,9,Comment#4\n" +
			"5,10,Comment#5\n" +
			"5,11,Comment#6\n" +
			"5,12,Comment#7\n" +
			"5,13,Comment#8\n" +
			"5,14,Comment#9\n" +
			"6,15,Comment#10\n" +
			"6,16,Comment#11\n" +
			"6,17,Comment#12\n" +
			"6,18,Comment#13\n" +
			"6,19,Comment#14\n" +
			"6,20,Comment#15\n";

	compareResultAsTuples(result, expected);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:39,代码来源:FlatMapITCase.java

示例7: testTypeConversionFlatMapperTupleToBasic

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testTypeConversionFlatMapperTupleToBasic() throws Exception {
	/*
	 * Test type conversion flatmapper (Tuple -> Basic)
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<String> typeConversionFlatMapDs = ds.
			flatMap(new FlatMapper5());

	List<String> result = typeConversionFlatMapDs.collect();

	String expected = "Hi\n" + "Hello\n" + "Hello world\n"
			+
			"Hello world, how are you?\n" +
			"I am fine.\n" + "Luke Skywalker\n" +
			"Comment#1\n" +	"Comment#2\n" +
			"Comment#3\n" +	"Comment#4\n" +
			"Comment#5\n" +	"Comment#6\n" +
			"Comment#7\n" + "Comment#8\n" +
			"Comment#9\n" +	"Comment#10\n" +
			"Comment#11\n" + "Comment#12\n" +
			"Comment#13\n" + "Comment#14\n" +
			"Comment#15\n";

	compareResultAsText(result, expected);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:30,代码来源:FlatMapITCase.java

示例8: startPipeline

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
private void startPipeline(Path input, Configuration parameters) throws Exception {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    parameters.setBoolean("recursive.file.enumeration", true);

    final DataSet<String> text = new DataSource<>(
            env,
            new TextInputFormat(input),
            BasicTypeInfo.STRING_TYPE_INFO,
            Utils.getCallLocationName()
    ).withParameters(parameters);

    //Deserialize and convert
    DataSet<JsonObject> tweets = text
            .flatMap(new Deserializer());


    DataSet<Tuple2<Long, JsonObject>> reducedUserObjects = tweets
            .flatMap(new UserObjectExtractor())
            .groupBy(0)
            .reduce(new LatestUserObjectReduce())
            .project(0, 1);

    reducedUserObjects
            .map(new Serializer())
            .output(objectsOutputFormat).withParameters(parameters);

    /*tweets
            .flatMap(new IndexExtractor())
            .groupBy(0, 1)
            .sum(2)
            .output(indexOutputFormat).withParameters(parameters);*/

    env.execute();
}
 
开发者ID:Remper,项目名称:sociallink,代码行数:35,代码来源:BuildUserIndex.java

示例9: testNonPassingMapper

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testNonPassingMapper() throws Exception{
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);
	DataSet<Tuple2<IntWritable, Text>> nonPassingFlatMapDs = ds.
			flatMap(new HadoopMapFunction<IntWritable, Text, IntWritable, Text>(new NonPassingMapper()));

	String resultPath = tempFolder.newFile().toURI().toString();

	nonPassingFlatMapDs.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
	env.execute();

	compareResultsByLinesInMemory("\n", resultPath);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:16,代码来源:HadoopMapFunctionITCase.java

示例10: testDataDuplicatingMapper

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testDataDuplicatingMapper() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);
	DataSet<Tuple2<IntWritable, Text>> duplicatingFlatMapDs = ds.
			flatMap(new HadoopMapFunction<IntWritable, Text, IntWritable, Text>(new DuplicatingMapper()));

	String resultPath = tempFolder.newFile().toURI().toString();

	duplicatingFlatMapDs.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
	env.execute();

	String expected = "(1,Hi)\n" + "(1,HI)\n" +
			"(2,Hello)\n" + "(2,HELLO)\n" +
			"(3,Hello world)\n" + "(3,HELLO WORLD)\n" +
			"(4,Hello world, how are you?)\n" + "(4,HELLO WORLD, HOW ARE YOU?)\n" +
			"(5,I am fine.)\n" + "(5,I AM FINE.)\n" +
			"(6,Luke Skywalker)\n" + "(6,LUKE SKYWALKER)\n" +
			"(7,Comment#1)\n" + "(7,COMMENT#1)\n" +
			"(8,Comment#2)\n" + "(8,COMMENT#2)\n" +
			"(9,Comment#3)\n" + "(9,COMMENT#3)\n" +
			"(10,Comment#4)\n" + "(10,COMMENT#4)\n" +
			"(11,Comment#5)\n" + "(11,COMMENT#5)\n" +
			"(12,Comment#6)\n" + "(12,COMMENT#6)\n" +
			"(13,Comment#7)\n" + "(13,COMMENT#7)\n" +
			"(14,Comment#8)\n" + "(14,COMMENT#8)\n" +
			"(15,Comment#9)\n" + "(15,COMMENT#9)\n" +
			"(16,Comment#10)\n" + "(16,COMMENT#10)\n" +
			"(17,Comment#11)\n" + "(17,COMMENT#11)\n" +
			"(18,Comment#12)\n" + "(18,COMMENT#12)\n" +
			"(19,Comment#13)\n" + "(19,COMMENT#13)\n" +
			"(20,Comment#14)\n" + "(20,COMMENT#14)\n" +
			"(21,Comment#15)\n" + "(21,COMMENT#15)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:38,代码来源:HadoopMapFunctionITCase.java

示例11: main

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
	if (args.length < 2) {
		System.err.println("Usage: WordCount <input path> <result path>");
		return;
	}

	final String inputPath = args[0];
	final String outputPath = args[1];

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// Set up the Hadoop Input Format
	Job job = Job.getInstance();
	HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job);
	TextInputFormat.addInputPath(job, new Path(inputPath));

	// Create a Flink job with it
	DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);

	// Tokenize the line and convert from Writable "Text" to String for better handling
	DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());

	// Sum up the words
	DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);

	// Convert String back to Writable "Text" for use with Hadoop Output Format
	DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper());

	// Set up Hadoop Output Format
	HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), job);
	hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " ");
	hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test
	TextOutputFormat.setOutputPath(job, new Path(outputPath));

	// Output & Execute
	hadoopResult.output(hadoopOutputFormat);
	env.execute("Word Count");
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:39,代码来源:WordCount.java

示例12: testProgram

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Override
protected void testProgram() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<String> text = env.fromElements(WordCountData.TEXT);
	DataSet<Tuple2<String, Integer>> words = text.flatMap(new WordCount.Tokenizer());
	DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);

	result.output(new LocalCollectionOutputFormat<Tuple2<String, Integer>>(resultsCollected));
	env.execute("Word Count Collection");
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:12,代码来源:WordCountWithCollectionITCase.java

示例13: testFlatMapperIfUDFReturnsInputObjectMultipleTimesWhileChangingIt

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testFlatMapperIfUDFReturnsInputObjectMultipleTimesWhileChangingIt() throws Exception {
	/*
	 * Test flatmapper if UDF returns input object
	 * multiple times and changes it in between
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> inputObjFlatMapDs = ds.
			flatMap(new FlatMapper6());

	List<Tuple3<Integer, Long, String>> result = inputObjFlatMapDs.collect();

	String expected = "0,1,Hi\n" +
			"0,2,Hello\n" + "1,2,Hello\n" +
			"0,2,Hello world\n" + "1,2,Hello world\n" + "2,2,Hello world\n" +
			"0,3,I am fine.\n" +
			"0,3,Luke Skywalker\n" + "1,3,Luke Skywalker\n" +
			"0,4,Comment#1\n" + "1,4,Comment#1\n" + "2,4,Comment#1\n" +
			"0,4,Comment#3\n" +
			"0,4,Comment#4\n" + "1,4,Comment#4\n" +
			"0,5,Comment#5\n" + "1,5,Comment#5\n" + "2,5,Comment#5\n" +
			"0,5,Comment#7\n" +
			"0,5,Comment#8\n" + "1,5,Comment#8\n" +
			"0,5,Comment#9\n" + "1,5,Comment#9\n" + "2,5,Comment#9\n" +
			"0,6,Comment#11\n" +
			"0,6,Comment#12\n" + "1,6,Comment#12\n" +
			"0,6,Comment#13\n" + "1,6,Comment#13\n" + "2,6,Comment#13\n" +
			"0,6,Comment#15\n";

	compareResultAsTuples(result, expected);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:35,代码来源:FlatMapITCase.java

示例14: run

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
/**
 * run conversion process
 * @param configPath path to config file
 * @throws Exception
 */
public void run(String configPath) throws Exception {

    FlinkEnvManager fem = new FlinkEnvManager(configPath, "converterJob",
            TableIdentifier.RAW_TWITTER_DATA.get(),
            TableIdentifier.TERM_INDEX.get());

    DataSet<Tuple2<Key,Value>> rawTwitterDataRows = fem.getDataFromAccumulo();

    DataSet<Tuple2<Text, Mutation>> termIndexMutations = rawTwitterDataRows
            .flatMap(new ConverterFlatMap(new Tokenizer(),
                    TableIdentifier.TERM_INDEX.get()));

    termIndexMutations.output(fem.getHadoopOF());

    fem.getExecutionEnvironment().execute("ConverterProcess");

}
 
开发者ID:IIDP,项目名称:OSTMap,代码行数:23,代码来源:ConverterProcess.java

示例15: testDeltaIteration

import org.apache.flink.api.java.DataSet; //导入方法依赖的package包/类
@Test
public void testDeltaIteration() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.createCollectionsEnvironment();

		@SuppressWarnings("unchecked")
		DataSet<Tuple2<Integer, Integer>> solInput = env.fromElements(
				new Tuple2<Integer, Integer>(1, 0),
				new Tuple2<Integer, Integer>(2, 0),
				new Tuple2<Integer, Integer>(3, 0),
				new Tuple2<Integer, Integer>(4, 0));

		@SuppressWarnings("unchecked")
		DataSet<Tuple1<Integer>> workInput = env.fromElements(
				new Tuple1<Integer>(1),
				new Tuple1<Integer>(2),
				new Tuple1<Integer>(3),
				new Tuple1<Integer>(4));

		// Perform a delta iteration where we add those values to the workset where
		// the second tuple field is smaller than the first tuple field.
		// At the end both tuple fields must be the same.

		DeltaIteration<Tuple2<Integer, Integer>, Tuple1<Integer>> iteration =
			solInput.iterateDelta(workInput, 10, 0);

		DataSet<Tuple2<Integer, Integer>> solDelta = iteration.getSolutionSet().join(
				iteration.getWorkset()).where(0).equalTo(0).with(
				new JoinFunction<Tuple2<Integer, Integer>, Tuple1<Integer>, Tuple2<Integer, Integer>>() {

			@Override
			public Tuple2<Integer, Integer> join(Tuple2<Integer, Integer> first,
					Tuple1<Integer> second) throws Exception {
				return new Tuple2<Integer, Integer>(first.f0, first.f1 + 1);
			}
		});

		DataSet<Tuple1<Integer>> nextWorkset = solDelta.flatMap(
				new FlatMapFunction<Tuple2<Integer, Integer>, Tuple1<Integer>>() {
			@Override
			public void flatMap(Tuple2<Integer, Integer> in, Collector<Tuple1<Integer>>
					out) throws Exception {
				if (in.f1 < in.f0) {
					out.collect(new Tuple1<Integer>(in.f0));
				}
			}
		});

		List<Tuple2<Integer, Integer>> collected = new ArrayList<Tuple2<Integer, Integer>>();

		iteration.closeWith(solDelta, nextWorkset)
				.output(new LocalCollectionOutputFormat<Tuple2<Integer, Integer>>(collected));

		env.execute();

		// verify that both tuple fields are now the same
		for (Tuple2<Integer, Integer> t: collected) {
			assertEquals(t.f0, t.f1);
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:66,代码来源:CollectionExecutionIterationTest.java


注:本文中的org.apache.flink.api.java.DataSet.flatMap方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。