本文整理汇总了Java中org.apache.flink.api.common.functions.FlatMapFunction类的典型用法代码示例。如果您正苦于以下问题:Java FlatMapFunction类的具体用法?Java FlatMapFunction怎么用?Java FlatMapFunction使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
FlatMapFunction类属于org.apache.flink.api.common.functions包,在下文中一共展示了FlatMapFunction类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
// parse arguments
ParameterTool params = ParameterTool.fromPropertiesFile(args[0]);
// create streaming environment
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// enable event time processing
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
// enable fault-tolerance
env.enableCheckpointing(1000);
// enable restarts
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(50, 500L));
env.setStateBackend(new FsStateBackend("file:///home/robert/flink-workdir/flink-streaming-etl/state-backend"));
// run each operator separately
env.disableOperatorChaining();
// get data from Kafka
Properties kParams = params.getProperties();
kParams.setProperty("group.id", UUID.randomUUID().toString());
DataStream<ObjectNode> inputStream = env.addSource(new FlinkKafkaConsumer09<>(params.getRequired("topic"), new JSONDeserializationSchema(), kParams)).name("Kafka 0.9 Source")
.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ObjectNode>(Time.minutes(1L)) {
@Override
public long extractTimestamp(ObjectNode jsonNodes) {
return jsonNodes.get("timestamp_ms").asLong();
}
}).name("Timestamp extractor");
// filter out records without lang field
DataStream<ObjectNode> tweetsWithLang = inputStream.filter(jsonNode -> jsonNode.has("user") && jsonNode.get("user").has("lang")).name("Filter records without 'lang' field");
// select only lang = "en" tweets
DataStream<ObjectNode> englishTweets = tweetsWithLang.filter(jsonNode -> jsonNode.get("user").get("lang").asText().equals("en")).name("Select 'lang'=en tweets");
// write to file system
RollingSink<ObjectNode> rollingSink = new RollingSink<>(params.get("sinkPath", "/home/robert/flink-workdir/flink-streaming-etl/rolling-sink"));
rollingSink.setBucketer(new DateTimeBucketer("yyyy-MM-dd-HH-mm")); // do a bucket for each minute
englishTweets.addSink(rollingSink).name("Rolling FileSystem Sink");
// build aggregates (count per language) using window (10 seconds tumbling):
DataStream<Tuple3<Long, String, Long>> languageCounts = tweetsWithLang.keyBy(jsonNode -> jsonNode.get("user").get("lang").asText())
.timeWindow(Time.seconds(10))
.apply(new Tuple3<>(0L, "", 0L), new JsonFoldCounter(), new CountEmitter()).name("Count per Langauage (10 seconds tumbling)");
// write window aggregate to ElasticSearch
List<InetSocketAddress> transportNodes = ImmutableList.of(new InetSocketAddress(InetAddress.getByName("localhost"), 9300));
ElasticsearchSink<Tuple3<Long, String, Long>> elasticsearchSink = new ElasticsearchSink<>(params.toMap(), transportNodes, new ESRequest());
languageCounts.addSink(elasticsearchSink).name("ElasticSearch2 Sink");
// word-count on the tweet stream
DataStream<Tuple2<Date, List<Tuple2<String, Long>>>> topWordCount = tweetsWithLang
// get text from tweets
.map(tweet -> tweet.get("text").asText()).name("Get text from Tweets")
// split text into (word, 1) tuples
.flatMap(new FlatMapFunction<String, Tuple2<String, Long>>() {
@Override
public void flatMap(String s, Collector<Tuple2<String, Long>> collector) throws Exception {
String[] splits = s.split(" ");
for (String sp : splits) {
collector.collect(new Tuple2<>(sp, 1L));
}
}
}).name("Tokenize words")
// group by word
.keyBy(0)
// build 1 min windows, compute every 10 seconds --> count word frequency
.timeWindow(Time.minutes(1L), Time.seconds(10L)).apply(new WordCountingWindow()).name("Count word frequency (1 min, 10 sec sliding window)")
// build top n every 10 seconds
.timeWindowAll(Time.seconds(10L)).apply(new TopNWords(10)).name("TopN Window (10s)");
// write top Ns to Kafka topic
topWordCount.addSink(new FlinkKafkaProducer09<>(params.getRequired("wc-topic"), new ListSerSchema(), params.getProperties())).name("Write topN to Kafka");
env.execute("Streaming ETL");
}
示例2: getUserNodes
import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
private DataSet<Tuple2<String, UserNodeValues>> getUserNodes(DataSet<JSONObject> jsonData) {
DataSet<Tuple2<String, UserNodeValues>> userNodes = jsonData.flatMap(new FlatMapFunction<JSONObject, Tuple2<String, UserNodeValues>>() {
@Override
public void flatMap(JSONObject jsonObject, Collector<Tuple2<String, UserNodeValues>> out) throws Exception {
JSONObject user = jsonObject.getJSONObject("user");
String userId = user.getString("id_str");
String userName = user.getString("name");
out.collect(new Tuple2<String, UserNodeValues>(userId, new UserNodeValues(userId,userName)));
// other mentioned users
JSONObject entities = jsonObject.getJSONObject("entities");
JSONArray userMentions = entities.getJSONArray("user_mentions");
for (int i = 0; i < userMentions.length(); i++) {
JSONObject current = userMentions.getJSONObject(i);
String oUserId = current.getString("id_str");
String oUserName = current.getString("name");
out.collect(new Tuple2<String, UserNodeValues>(oUserId, new UserNodeValues(oUserId,oUserName)));
}
}
}).distinct(0);
return userNodes;
}
示例3: getUserEdges
import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
private DataSet<Tuple3<String, String, UserEdgeValues>> getUserEdges(DataSet<JSONObject> jsonData) {
DataSet<Tuple3<String, String, UserEdgeValues>> userEdges = jsonData.flatMap(new FlatMapFunction<JSONObject, Tuple3<String, String, UserEdgeValues>>() {
@Override
public void flatMap(JSONObject jsonObject, Collector<Tuple3<String, String, UserEdgeValues>> out) throws Exception {
// count initialized to 1
int count = 1;
// from the current node
JSONObject user = jsonObject.getJSONObject("user");
String from = user.getString("id_str");
// to other nodes
JSONObject entities = jsonObject.getJSONObject("entities");
JSONArray userMentions = entities.getJSONArray("user_mentions");
for (int i = 0; i < userMentions.length(); i++) {
JSONObject current = userMentions.getJSONObject(i);
String to = current.getString("id_str");
out.collect(new Tuple3<String, String, UserEdgeValues>(from, to, new UserEdgeValues(count)));
}
return;
}
});
return userEdges;
}
示例4: translateToDataFlow
import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
@Override
protected FlatMapOperatorBase<IN, OUT, FlatMapFunction<IN, OUT>> translateToDataFlow(Operator<IN> input) {
String name = getName() != null ? getName() : "FlatMap at " + defaultName;
// create operator
FlatMapOperatorBase<IN, OUT, FlatMapFunction<IN, OUT>> po = new FlatMapOperatorBase<IN, OUT, FlatMapFunction<IN, OUT>>(function,
new UnaryOperatorInformation<IN, OUT>(getInputType(), getResultType()), name);
// set input
po.setInput(input);
// set parallelism
if (this.getParallelism() > 0) {
// use specified parallelism
po.setParallelism(this.getParallelism());
} else {
// if no parallelism has been specified, use parallelism of input operator to enable chaining
po.setParallelism(input.getParallelism());
}
return po;
}
示例5: translateToDataFlow
import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
@Override
protected org.apache.flink.api.common.operators.base.FilterOperatorBase<T, FlatMapFunction<T, T>> translateToDataFlow(Operator<T> input) {
String name = getName() != null ? getName() : "Filter at " + defaultName;
// create operator
PlanFilterOperator<T> po = new PlanFilterOperator<T>(function, name, getInputType());
po.setInput(input);
// set parallelism
if (getParallelism() > 0) {
// use specified parallelism
po.setParallelism(getParallelism());
} else {
// if no parallelism has been specified, use parallelism of input operator to enable chaining
po.setParallelism(input.getParallelism());
}
return po;
}
示例6: executeOnCollections
import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
@Override
protected List<T> executeOnCollections(List<T> inputData, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception {
FlatMapFunction<T, T> function = this.userFunction.getUserCodeObject();
FunctionUtils.setFunctionRuntimeContext(function, ctx);
FunctionUtils.openFunction(function, this.parameters);
ArrayList<T> result = new ArrayList<T>(inputData.size());
ListCollector<T> collector = new ListCollector<T>(result);
for (T element : inputData) {
function.flatMap(element, collector);
}
FunctionUtils.closeFunction(function);
return result;
}
示例7: executeOnCollections
import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
@Override
protected List<OUT> executeOnCollections(List<IN> input, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception {
FlatMapFunction<IN, OUT> function = userFunction.getUserCodeObject();
FunctionUtils.setFunctionRuntimeContext(function, ctx);
FunctionUtils.openFunction(function, parameters);
ArrayList<OUT> result = new ArrayList<OUT>(input.size());
TypeSerializer<IN> inSerializer = getOperatorInfo().getInputType().createSerializer(executionConfig);
TypeSerializer<OUT> outSerializer = getOperatorInfo().getOutputType().createSerializer(executionConfig);
CopyingListCollector<OUT> resultCollector = new CopyingListCollector<OUT>(result, outSerializer);
for (IN element : input) {
IN inCopy = inSerializer.copy(element);
function.flatMap(inCopy, resultCollector);
}
FunctionUtils.closeFunction(function);
return result;
}
示例8: testExecuteOnCollection
import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
private void testExecuteOnCollection(FlatMapFunction<String, String> udf, List<String> input, boolean mutableSafe) throws Exception {
ExecutionConfig executionConfig = new ExecutionConfig();
if (mutableSafe) {
executionConfig.disableObjectReuse();
} else {
executionConfig.enableObjectReuse();
}
final TaskInfo taskInfo = new TaskInfo("Test UDF", 4, 0, 4, 0);
// run on collections
final List<String> result = getTestFlatMapOperator(udf)
.executeOnCollections(input,
new RuntimeUDFContext(
taskInfo, null, executionConfig, new HashMap<String, Future<Path>>(),
new HashMap<String, Accumulator<?, ?>>(), new UnregisteredMetricsGroup()),
executionConfig);
Assert.assertEquals(input.size(), result.size());
Assert.assertEquals(input, result);
}
示例9: getRandomEdges
import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
@SuppressWarnings("serial")
public static DataSet<Edge<Long, NullValue>> getRandomEdges(
ExecutionEnvironment env, final long numVertices) {
return env.generateSequence(1, numVertices).flatMap(
new FlatMapFunction<Long, Edge<Long, NullValue>>() {
@Override
public void flatMap(Long key, Collector<Edge<Long, NullValue>> out) throws Exception {
int numOutEdges = (int) (Math.random() * (numVertices / 2));
for (int i = 0; i < numOutEdges; i++) {
long target = (long) (Math.random() * numVertices) + 1;
out.collect(new Edge<Long, NullValue>(key, target,
NullValue.getInstance()));
}
}
});
}
示例10: applyOperation
import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
/**
* Applies a stream of high level {@link Operation}s to the {@link KVStore}.
*
*/
@SuppressWarnings({ "unchecked", "serial" })
public Query<KVOperation<K, V>> applyOperation(DataStream<? extends Operation<K, V>> opStream) {
final int qid = storeBuilder.nextID();
storeBuilder.applyOperation(
((DataStream<Operation<K, V>>) opStream).flatMap(
new FlatMapFunction<Operation<K, V>, KVOperation<K, V>>() {
@Override
public void flatMap(Operation<K, V> op, Collector<KVOperation<K, V>> out) throws Exception {
for (KVOperation<K, V> kvOp : Operation.createTransaction(qid, op)) {
out.collect(kvOp);
}
}
}).setParallelism(opStream.getParallelism()), qid);
Query<KVOperation<K, V>> q = new Query<KVOperation<K, V>>(qid, storeBuilder);
queries.add(q);
return q;
}
示例11: getEdgesDataSet
import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
@SuppressWarnings("serial")
private static DataStream<Edge<Long, NullValue>> getEdgesDataSet(StreamExecutionEnvironment env) {
if (fileOutput) {
return env.readTextFile(edgeInputPath)
.map(new MapFunction<String, Edge<Long, NullValue>>() {
@Override
public Edge<Long, NullValue> map(String s) throws Exception {
String[] fields = s.split("\\t");
long src = Long.parseLong(fields[0]);
long trg = Long.parseLong(fields[1]);
return new Edge<>(src, trg, NullValue.getInstance());
}
});
}
return env.generateSequence(0, 999).flatMap(
new FlatMapFunction<Long, Edge<Long, NullValue>>() {
@Override
public void flatMap(Long key, Collector<Edge<Long, NullValue>> out) throws Exception {
out.collect(new Edge<>(key, (key + 2) % 1000, NullValue.getInstance()));
out.collect(new Edge<>(key, (key + 4) % 1000, NullValue.getInstance()));
}
});
}
示例12: globalAggregate
import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
/**
* Returns a global aggregate on the previously split vertex stream
*
* @param edgeMapper the mapper that converts the edge stream to a vertex stream
* @param vertexMapper the mapper that aggregates vertex values
* @param collectUpdates boolean specifying whether the aggregate should only be collected when there is an update
* @param <VV> the return value type
* @return a stream of the aggregated values
*/
public <VV> DataStream<VV> globalAggregate(FlatMapFunction<Edge<K, EV>, Vertex<K, VV>> edgeMapper,
FlatMapFunction<Vertex<K, VV>, VV> vertexMapper, boolean collectUpdates) {
DataStream<VV> result = this.edges.flatMap(edgeMapper)
.setParallelism(1)
.flatMap(vertexMapper)
.setParallelism(1);
if (collectUpdates) {
result = result.flatMap(new GlobalAggregateMapper<VV>())
.setParallelism(1);
}
return result;
}
示例13: getIdTitleMapping
import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
public static DataSet<IdTitleMapping> getIdTitleMapping(ExecutionEnvironment env, String idTitleMappingFilename,
String wikiDumpInputFilename) throws Exception {
if(idTitleMappingFilename != null) {
return env.readTextFile(idTitleMappingFilename).flatMap(new FlatMapFunction<String, IdTitleMapping>() {
@Override
public void flatMap(String s, Collector<IdTitleMapping> out) throws Exception {
String[] cols = s.split(Pattern.quote("|"));
if (cols.length != 2) {
throw new Exception("Invalid id title mapping: " + s);
}
out.collect(new IdTitleMapping(Integer.valueOf(cols[0]), cols[1]));
}
});
} else if(wikiDumpInputFilename != null) {
return extractIdTitleMapping(env, wikiDumpInputFilename);
} else {
throw new Exception("Could not get IdTitleMapping. Either idTitleMappingFilename or wikiDumpInputFilename needs to be set.");
}
}
示例14: getArticleStatsFromFile
import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
public static DataSet<ArticleStatsTuple> getArticleStatsFromFile(ExecutionEnvironment env, String inputFilename) {
return env.readTextFile(inputFilename).flatMap(new FlatMapFunction<String, ArticleStatsTuple>() {
@Override
public void flatMap(String s, Collector<ArticleStatsTuple> out) throws Exception {
String[] cols = s.split(Pattern.quote(WikiSimConfiguration.csvFieldDelimiter));
if(cols.length != ArticleStatsTuple.IN_LINKS_KEY + 1)
throw new Exception("Invalid article stats row: " + s);
out.collect(new ArticleStatsTuple(
cols[ArticleStatsTuple.ARTICLE_NAME_KEY],
Integer.valueOf(cols[ArticleStatsTuple.WORDS_KEY]),
Integer.valueOf(cols[ArticleStatsTuple.HEADLINES_KEY]),
Integer.valueOf(cols[ArticleStatsTuple.OUT_LINKS_KEY]),
Double.valueOf(cols[ArticleStatsTuple.AVG_LINK_DISTANCE_KEY]),
Integer.valueOf(cols[ArticleStatsTuple.IN_LINKS_KEY])
));
}
});
}
示例15: translateToDataFlow
import org.apache.flink.api.common.functions.FlatMapFunction; //导入依赖的package包/类
@Override
protected org.apache.flink.api.common.operators.base.FlatMapOperatorBase<IN, OUT, FlatMapFunction<IN,OUT>> translateToDataFlow(Operator<IN> input) {
String name = getName() != null ? getName() : function.getClass().getName();
// create operator
FlatMapOperatorBase<IN, OUT, FlatMapFunction<IN, OUT>> po = new FlatMapOperatorBase<IN, OUT, FlatMapFunction<IN, OUT>>(function, new UnaryOperatorInformation<IN, OUT>(getInputType(), getResultType()), name);
// set input
po.setInput(input);
// set dop
if(this.getParallelism() > 0) {
// use specified dop
po.setDegreeOfParallelism(this.getParallelism());
} else {
// if no dop has been specified, use dop of input operator to enable chaining
po.setDegreeOfParallelism(input.getDegreeOfParallelism());
}
return po;
}