当前位置: 首页>>代码示例>>Java>>正文


Java FlatMapFunction类代码示例

本文整理汇总了Java中org.apache.flink.api.common.functions.FlatMapFunction的典型用法代码示例。如果您正苦于以下问题:Java FlatMapFunction类的具体用法?Java FlatMapFunction怎么用?Java FlatMapFunction使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


FlatMapFunction类属于org.apache.flink.api.common.functions包,在下文中一共展示了FlatMapFunction类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
	// parse arguments
	ParameterTool params = ParameterTool.fromPropertiesFile(args[0]);

	// create streaming environment
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	// enable event time processing
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	// enable fault-tolerance
	env.enableCheckpointing(1000);

	// enable restarts
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(50, 500L));

	env.setStateBackend(new FsStateBackend("file:///home/robert/flink-workdir/flink-streaming-etl/state-backend"));

	// run each operator separately
	env.disableOperatorChaining();

	// get data from Kafka
	Properties kParams = params.getProperties();
	kParams.setProperty("group.id", UUID.randomUUID().toString());
	DataStream<ObjectNode> inputStream = env.addSource(new FlinkKafkaConsumer09<>(params.getRequired("topic"), new JSONDeserializationSchema(), kParams)).name("Kafka 0.9 Source")
		.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ObjectNode>(Time.minutes(1L)) {
			@Override
			public long extractTimestamp(ObjectNode jsonNodes) {
				return jsonNodes.get("timestamp_ms").asLong();
			}
		}).name("Timestamp extractor");

	// filter out records without lang field
	DataStream<ObjectNode> tweetsWithLang = inputStream.filter(jsonNode -> jsonNode.has("user") && jsonNode.get("user").has("lang")).name("Filter records without 'lang' field");

	// select only lang = "en" tweets
	DataStream<ObjectNode> englishTweets = tweetsWithLang.filter(jsonNode -> jsonNode.get("user").get("lang").asText().equals("en")).name("Select 'lang'=en tweets");

	// write to file system
	RollingSink<ObjectNode> rollingSink = new RollingSink<>(params.get("sinkPath", "/home/robert/flink-workdir/flink-streaming-etl/rolling-sink"));
	rollingSink.setBucketer(new DateTimeBucketer("yyyy-MM-dd-HH-mm")); // do a bucket for each minute
	englishTweets.addSink(rollingSink).name("Rolling FileSystem Sink");

	// build aggregates (count per language) using window (10 seconds tumbling):
	DataStream<Tuple3<Long, String, Long>> languageCounts = tweetsWithLang.keyBy(jsonNode -> jsonNode.get("user").get("lang").asText())
		.timeWindow(Time.seconds(10))
		.apply(new Tuple3<>(0L, "", 0L), new JsonFoldCounter(), new CountEmitter()).name("Count per Langauage (10 seconds tumbling)");

	// write window aggregate to ElasticSearch
	List<InetSocketAddress> transportNodes = ImmutableList.of(new InetSocketAddress(InetAddress.getByName("localhost"), 9300));
	ElasticsearchSink<Tuple3<Long, String, Long>> elasticsearchSink = new ElasticsearchSink<>(params.toMap(), transportNodes, new ESRequest());

	languageCounts.addSink(elasticsearchSink).name("ElasticSearch2 Sink");

	// word-count on the tweet stream
	DataStream<Tuple2<Date, List<Tuple2<String, Long>>>> topWordCount = tweetsWithLang
		// get text from tweets
		.map(tweet -> tweet.get("text").asText()).name("Get text from Tweets")
		// split text into (word, 1) tuples
		.flatMap(new FlatMapFunction<String, Tuple2<String, Long>>() {
			@Override
			public void flatMap(String s, Collector<Tuple2<String, Long>> collector) throws Exception {
				String[] splits = s.split(" ");
				for (String sp : splits) {
					collector.collect(new Tuple2<>(sp, 1L));
				}
			}
		}).name("Tokenize words")
		// group by word
		.keyBy(0)
		// build 1 min windows, compute every 10 seconds --> count word frequency
		.timeWindow(Time.minutes(1L), Time.seconds(10L)).apply(new WordCountingWindow()).name("Count word frequency (1 min, 10 sec sliding window)")
		// build top n every 10 seconds
		.timeWindowAll(Time.seconds(10L)).apply(new TopNWords(10)).name("TopN Window (10s)");

	// write top Ns to Kafka topic
	topWordCount.addSink(new FlinkKafkaProducer09<>(params.getRequired("wc-topic"), new ListSerSchema(), params.getProperties())).name("Write topN to Kafka");

	env.execute("Streaming ETL");

}
 
开发者ID:rmetzger,项目名称:flink-streaming-etl,代码行数:82,代码来源:StreamingETL.java

示例2: getUserNodes

import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
private DataSet<Tuple2<String, UserNodeValues>> getUserNodes(DataSet<JSONObject> jsonData) {
    DataSet<Tuple2<String, UserNodeValues>> userNodes = jsonData.flatMap(new FlatMapFunction<JSONObject, Tuple2<String, UserNodeValues>>() {
        @Override
        public void flatMap(JSONObject jsonObject, Collector<Tuple2<String, UserNodeValues>> out) throws Exception {
            JSONObject user = jsonObject.getJSONObject("user");
            String userId = user.getString("id_str");
            String userName = user.getString("name");
            out.collect(new Tuple2<String, UserNodeValues>(userId, new UserNodeValues(userId,userName)));

            // other mentioned users
            JSONObject entities = jsonObject.getJSONObject("entities");
            JSONArray userMentions = entities.getJSONArray("user_mentions");
            for (int i = 0; i < userMentions.length(); i++) {
                JSONObject current = userMentions.getJSONObject(i);
                String oUserId = current.getString("id_str");
                String oUserName = current.getString("name");
                out.collect(new Tuple2<String, UserNodeValues>(oUserId, new UserNodeValues(oUserId,oUserName)));
            }
        }
    }).distinct(0);
    return userNodes;
}
 
开发者ID:IIDP,项目名称:OSTMap,代码行数:23,代码来源:GraphLoader.java

示例3: getUserEdges

import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
private DataSet<Tuple3<String, String, UserEdgeValues>> getUserEdges(DataSet<JSONObject> jsonData) {

        DataSet<Tuple3<String, String, UserEdgeValues>> userEdges = jsonData.flatMap(new FlatMapFunction<JSONObject, Tuple3<String, String, UserEdgeValues>>() {
            @Override
            public void flatMap(JSONObject jsonObject, Collector<Tuple3<String, String, UserEdgeValues>> out) throws Exception {
                // count initialized to 1
                int count = 1;

                // from the current node
                JSONObject user = jsonObject.getJSONObject("user");
                String from = user.getString("id_str");

                // to other nodes
                JSONObject entities = jsonObject.getJSONObject("entities");
                JSONArray userMentions = entities.getJSONArray("user_mentions");
                for (int i = 0; i < userMentions.length(); i++) {
                    JSONObject current = userMentions.getJSONObject(i);
                    String to = current.getString("id_str");
                    out.collect(new Tuple3<String, String, UserEdgeValues>(from, to, new UserEdgeValues(count)));
                }
                return;
            }
        });
        return userEdges;
    }
 
开发者ID:IIDP,项目名称:OSTMap,代码行数:26,代码来源:GraphLoader.java

示例4: translateToDataFlow

import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
@Override
protected FlatMapOperatorBase<IN, OUT, FlatMapFunction<IN, OUT>> translateToDataFlow(Operator<IN> input) {
	String name = getName() != null ? getName() : "FlatMap at " + defaultName;
	// create operator
	FlatMapOperatorBase<IN, OUT, FlatMapFunction<IN, OUT>> po = new FlatMapOperatorBase<IN, OUT, FlatMapFunction<IN, OUT>>(function,
		new UnaryOperatorInformation<IN, OUT>(getInputType(), getResultType()), name);
	// set input
	po.setInput(input);
	// set parallelism
	if (this.getParallelism() > 0) {
		// use specified parallelism
		po.setParallelism(this.getParallelism());
	} else {
		// if no parallelism has been specified, use parallelism of input operator to enable chaining
		po.setParallelism(input.getParallelism());
	}

	return po;
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:20,代码来源:FlatMapOperator.java

示例5: translateToDataFlow

import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
@Override
protected org.apache.flink.api.common.operators.base.FilterOperatorBase<T, FlatMapFunction<T, T>> translateToDataFlow(Operator<T> input) {

	String name = getName() != null ? getName() : "Filter at " + defaultName;

	// create operator
	PlanFilterOperator<T> po = new PlanFilterOperator<T>(function, name, getInputType());
	po.setInput(input);

	// set parallelism
	if (getParallelism() > 0) {
		// use specified parallelism
		po.setParallelism(getParallelism());
	} else {
		// if no parallelism has been specified, use parallelism of input operator to enable chaining
		po.setParallelism(input.getParallelism());
	}

	return po;
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:21,代码来源:FilterOperator.java

示例6: executeOnCollections

import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
@Override
protected List<T> executeOnCollections(List<T> inputData, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception {
	FlatMapFunction<T, T> function = this.userFunction.getUserCodeObject();
	
	FunctionUtils.setFunctionRuntimeContext(function, ctx);
	FunctionUtils.openFunction(function, this.parameters);
	
	ArrayList<T> result = new ArrayList<T>(inputData.size());
	ListCollector<T> collector = new ListCollector<T>(result);

	for (T element : inputData) {
		function.flatMap(element, collector);
	}
	
	FunctionUtils.closeFunction(function);
	
	return result;
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:19,代码来源:FilterOperatorBase.java

示例7: executeOnCollections

import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
@Override
protected List<OUT> executeOnCollections(List<IN> input, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception {
	FlatMapFunction<IN, OUT> function = userFunction.getUserCodeObject();
	
	FunctionUtils.setFunctionRuntimeContext(function, ctx);
	FunctionUtils.openFunction(function, parameters);

	ArrayList<OUT> result = new ArrayList<OUT>(input.size());

	TypeSerializer<IN> inSerializer = getOperatorInfo().getInputType().createSerializer(executionConfig);
	TypeSerializer<OUT> outSerializer = getOperatorInfo().getOutputType().createSerializer(executionConfig);

	CopyingListCollector<OUT> resultCollector = new CopyingListCollector<OUT>(result, outSerializer);

	for (IN element : input) {
		IN inCopy = inSerializer.copy(element);
		function.flatMap(inCopy, resultCollector);
	}

	FunctionUtils.closeFunction(function);

	return result;
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:24,代码来源:FlatMapOperatorBase.java

示例8: testExecuteOnCollection

import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
private void testExecuteOnCollection(FlatMapFunction<String, String> udf, List<String> input, boolean mutableSafe) throws Exception {
	ExecutionConfig executionConfig = new ExecutionConfig();
	if (mutableSafe) {
		executionConfig.disableObjectReuse();
	} else {
		executionConfig.enableObjectReuse();
	}
	final TaskInfo taskInfo = new TaskInfo("Test UDF", 4, 0, 4, 0);
	// run on collections
	final List<String> result = getTestFlatMapOperator(udf)
			.executeOnCollections(input,
					new RuntimeUDFContext(
						taskInfo,  null, executionConfig, new HashMap<String, Future<Path>>(),
						new HashMap<String, Accumulator<?, ?>>(), new UnregisteredMetricsGroup()),
					executionConfig);

	Assert.assertEquals(input.size(), result.size());
	Assert.assertEquals(input, result);
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:20,代码来源:FlatMapOperatorCollectionTest.java

示例9: getRandomEdges

import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
@SuppressWarnings("serial")
public static DataSet<Edge<Long, NullValue>> getRandomEdges(
		ExecutionEnvironment env, final long numVertices) {
	return env.generateSequence(1, numVertices).flatMap(
			new FlatMapFunction<Long, Edge<Long, NullValue>>() {
				@Override
				public void flatMap(Long key, Collector<Edge<Long, NullValue>> out) throws Exception {
					int numOutEdges = (int) (Math.random() * (numVertices / 2));
					for (int i = 0; i < numOutEdges; i++) {
						long target = (long) (Math.random() * numVertices) + 1;
						out.collect(new Edge<Long, NullValue>(key, target,
								NullValue.getInstance()));
					}
				}
			});
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:17,代码来源:ExampleUtils.java

示例10: applyOperation

import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
/**
 * Applies a stream of high level {@link Operation}s to the {@link KVStore}.
 * 
 */
@SuppressWarnings({ "unchecked", "serial" })
public Query<KVOperation<K, V>> applyOperation(DataStream<? extends Operation<K, V>> opStream) {
	final int qid = storeBuilder.nextID();
	storeBuilder.applyOperation(
			((DataStream<Operation<K, V>>) opStream).flatMap(
					new FlatMapFunction<Operation<K, V>, KVOperation<K, V>>() {

						@Override
						public void flatMap(Operation<K, V> op, Collector<KVOperation<K, V>> out) throws Exception {
							for (KVOperation<K, V> kvOp : Operation.createTransaction(qid, op)) {
								out.collect(kvOp);
							}
						}
					}).setParallelism(opStream.getParallelism()), qid);
	Query<KVOperation<K, V>> q = new Query<KVOperation<K, V>>(qid, storeBuilder);
	queries.add(q);
	return q;
}
 
开发者ID:gyfora,项目名称:StreamKV,代码行数:23,代码来源:KVStore.java

示例11: getEdgesDataSet

import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
@SuppressWarnings("serial")
private static DataStream<Edge<Long, NullValue>> getEdgesDataSet(StreamExecutionEnvironment env) {

	if (fileOutput) {
		return env.readTextFile(edgeInputPath)
				.map(new MapFunction<String, Edge<Long, NullValue>>() {
					@Override
					public Edge<Long, NullValue> map(String s) throws Exception {
						String[] fields = s.split("\\t");
						long src = Long.parseLong(fields[0]);
						long trg = Long.parseLong(fields[1]);
						return new Edge<>(src, trg, NullValue.getInstance());
					}
				});
	}

	return env.generateSequence(0, 999).flatMap(
			new FlatMapFunction<Long, Edge<Long, NullValue>>() {
				@Override
				public void flatMap(Long key, Collector<Edge<Long, NullValue>> out) throws Exception {
					out.collect(new Edge<>(key, (key + 2) % 1000, NullValue.getInstance()));
					out.collect(new Edge<>(key, (key + 4) % 1000, NullValue.getInstance()));
				}
			});
}
 
开发者ID:vasia,项目名称:gelly-streaming,代码行数:26,代码来源:BroadcastTriangleCount.java

示例12: globalAggregate

import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
/**
 * Returns a global aggregate on the previously split vertex stream
 *
 * @param edgeMapper the mapper that converts the edge stream to a vertex stream
 * @param vertexMapper the mapper that aggregates vertex values
 * @param collectUpdates boolean specifying whether the aggregate should only be collected when there is an update
 * @param <VV> the return value type
 * @return a stream of the aggregated values
 */
public <VV> DataStream<VV> globalAggregate(FlatMapFunction<Edge<K, EV>, Vertex<K, VV>> edgeMapper,
		FlatMapFunction<Vertex<K, VV>, VV> vertexMapper, boolean collectUpdates) {

	DataStream<VV> result = this.edges.flatMap(edgeMapper)
			.setParallelism(1)
			.flatMap(vertexMapper)
			.setParallelism(1);

	if (collectUpdates) {
		result = result.flatMap(new GlobalAggregateMapper<VV>())
				.setParallelism(1);
	}

	return result;
}
 
开发者ID:vasia,项目名称:gelly-streaming,代码行数:25,代码来源:SimpleEdgeStream.java

示例13: getIdTitleMapping

import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
public static DataSet<IdTitleMapping> getIdTitleMapping(ExecutionEnvironment env, String idTitleMappingFilename,
                                                        String wikiDumpInputFilename) throws Exception {
    if(idTitleMappingFilename != null) {
        return env.readTextFile(idTitleMappingFilename).flatMap(new FlatMapFunction<String, IdTitleMapping>() {
            @Override
            public void flatMap(String s, Collector<IdTitleMapping> out) throws Exception {
                String[] cols = s.split(Pattern.quote("|"));
                if (cols.length != 2) {
                    throw new Exception("Invalid id title mapping: " + s);
                }

                out.collect(new IdTitleMapping(Integer.valueOf(cols[0]), cols[1]));
            }
        });
    } else if(wikiDumpInputFilename != null) {
        return extractIdTitleMapping(env, wikiDumpInputFilename);
    } else {
        throw new Exception("Could not get IdTitleMapping. Either idTitleMappingFilename or wikiDumpInputFilename needs to be set.");
    }
}
 
开发者ID:wikimedia,项目名称:citolytics,代码行数:21,代码来源:IdTitleMappingExtractor.java

示例14: getArticleStatsFromFile

import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
public static DataSet<ArticleStatsTuple> getArticleStatsFromFile(ExecutionEnvironment env, String inputFilename) {
    return env.readTextFile(inputFilename).flatMap(new FlatMapFunction<String, ArticleStatsTuple>() {
        @Override
        public void flatMap(String s, Collector<ArticleStatsTuple> out) throws Exception {
            String[] cols = s.split(Pattern.quote(WikiSimConfiguration.csvFieldDelimiter));

            if(cols.length != ArticleStatsTuple.IN_LINKS_KEY + 1)
                throw new Exception("Invalid article stats row: " + s);

            out.collect(new ArticleStatsTuple(
                    cols[ArticleStatsTuple.ARTICLE_NAME_KEY],
                    Integer.valueOf(cols[ArticleStatsTuple.WORDS_KEY]),
                    Integer.valueOf(cols[ArticleStatsTuple.HEADLINES_KEY]),
                    Integer.valueOf(cols[ArticleStatsTuple.OUT_LINKS_KEY]),
                    Double.valueOf(cols[ArticleStatsTuple.AVG_LINK_DISTANCE_KEY]),
                    Integer.valueOf(cols[ArticleStatsTuple.IN_LINKS_KEY])
                    ));
        }
    });
}
 
开发者ID:wikimedia,项目名称:citolytics,代码行数:21,代码来源:ArticleStats.java

示例15: translateToDataFlow

import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
@Override
protected org.apache.flink.api.common.operators.base.FlatMapOperatorBase<IN, OUT, FlatMapFunction<IN,OUT>> translateToDataFlow(Operator<IN> input) {
	
	String name = getName() != null ? getName() : function.getClass().getName();
	// create operator
	FlatMapOperatorBase<IN, OUT, FlatMapFunction<IN, OUT>> po = new FlatMapOperatorBase<IN, OUT, FlatMapFunction<IN, OUT>>(function, new UnaryOperatorInformation<IN, OUT>(getInputType(), getResultType()), name);
	// set input
	po.setInput(input);
	// set dop
	if(this.getParallelism() > 0) {
		// use specified dop
		po.setDegreeOfParallelism(this.getParallelism());
	} else {
		// if no dop has been specified, use dop of input operator to enable chaining
		po.setDegreeOfParallelism(input.getDegreeOfParallelism());
	}
	
	return po;
}
 
开发者ID:citlab,项目名称:vs.msc.ws14,代码行数:20,代码来源:FlatMapOperator.java


注:本文中的org.apache.flink.api.common.functions.FlatMapFunction类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。