本文整理汇总了Java中org.apache.flink.api.java.utils.ParameterTool.get方法的典型用法代码示例。如果您正苦于以下问题:Java ParameterTool.get方法的具体用法?Java ParameterTool.get怎么用?Java ParameterTool.get使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.flink.api.java.utils.ParameterTool
的用法示例。
在下文中一共展示了ParameterTool.get方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.flink.api.java.utils.ParameterTool; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
ParameterTool params = ParameterTool.fromArgs(args);
final String nycTaxiRidesPath = params.get("nycTaxiRidesPath");
final int maxEventDelay = 60; // events are out of order by max 60 seconds
final int servingSpeedFactor = 600; // events of 10 minutes are served in 1 second
// set up streaming execution environment
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// start the data generator
DataStream<TaxiRide> rides = env.addSource(
new TaxiRideSource(nycTaxiRidesPath, maxEventDelay, servingSpeedFactor));
// ===============================================================================
// 1. clean up `rides`, so that the output stream only contains events
// with valid geo coordinates within NYC.
// 2. print out the result stream to console
// ===============================================================================
// run the cleansing pipeline
env.execute("Taxi Ride Cleansing");
}
示例2: main
import org.apache.flink.api.java.utils.ParameterTool; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
// parse arguments
ParameterTool params = ParameterTool.fromPropertiesFile(args[0]);
// create streaming environment
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// enable event time processing
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
// enable fault-tolerance
env.enableCheckpointing(1000);
// enable restarts
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(50, 500L));
env.setStateBackend(new FsStateBackend("file:///home/robert/flink-workdir/flink-streaming-etl/state-backend"));
// run each operator separately
env.disableOperatorChaining();
// get data from Kafka
Properties kParams = params.getProperties();
kParams.setProperty("group.id", UUID.randomUUID().toString());
DataStream<ObjectNode> inputStream = env.addSource(new FlinkKafkaConsumer09<>(params.getRequired("topic"), new JSONDeserializationSchema(), kParams)).name("Kafka 0.9 Source")
.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ObjectNode>(Time.minutes(1L)) {
@Override
public long extractTimestamp(ObjectNode jsonNodes) {
return jsonNodes.get("timestamp_ms").asLong();
}
}).name("Timestamp extractor");
// filter out records without lang field
DataStream<ObjectNode> tweetsWithLang = inputStream.filter(jsonNode -> jsonNode.has("user") && jsonNode.get("user").has("lang")).name("Filter records without 'lang' field");
// select only lang = "en" tweets
DataStream<ObjectNode> englishTweets = tweetsWithLang.filter(jsonNode -> jsonNode.get("user").get("lang").asText().equals("en")).name("Select 'lang'=en tweets");
// write to file system
RollingSink<ObjectNode> rollingSink = new RollingSink<>(params.get("sinkPath", "/home/robert/flink-workdir/flink-streaming-etl/rolling-sink"));
rollingSink.setBucketer(new DateTimeBucketer("yyyy-MM-dd-HH-mm")); // do a bucket for each minute
englishTweets.addSink(rollingSink).name("Rolling FileSystem Sink");
// build aggregates (count per language) using window (10 seconds tumbling):
DataStream<Tuple3<Long, String, Long>> languageCounts = tweetsWithLang.keyBy(jsonNode -> jsonNode.get("user").get("lang").asText())
.timeWindow(Time.seconds(10))
.apply(new Tuple3<>(0L, "", 0L), new JsonFoldCounter(), new CountEmitter()).name("Count per Langauage (10 seconds tumbling)");
// write window aggregate to ElasticSearch
List<InetSocketAddress> transportNodes = ImmutableList.of(new InetSocketAddress(InetAddress.getByName("localhost"), 9300));
ElasticsearchSink<Tuple3<Long, String, Long>> elasticsearchSink = new ElasticsearchSink<>(params.toMap(), transportNodes, new ESRequest());
languageCounts.addSink(elasticsearchSink).name("ElasticSearch2 Sink");
// word-count on the tweet stream
DataStream<Tuple2<Date, List<Tuple2<String, Long>>>> topWordCount = tweetsWithLang
// get text from tweets
.map(tweet -> tweet.get("text").asText()).name("Get text from Tweets")
// split text into (word, 1) tuples
.flatMap(new FlatMapFunction<String, Tuple2<String, Long>>() {
@Override
public void flatMap(String s, Collector<Tuple2<String, Long>> collector) throws Exception {
String[] splits = s.split(" ");
for (String sp : splits) {
collector.collect(new Tuple2<>(sp, 1L));
}
}
}).name("Tokenize words")
// group by word
.keyBy(0)
// build 1 min windows, compute every 10 seconds --> count word frequency
.timeWindow(Time.minutes(1L), Time.seconds(10L)).apply(new WordCountingWindow()).name("Count word frequency (1 min, 10 sec sliding window)")
// build top n every 10 seconds
.timeWindowAll(Time.seconds(10L)).apply(new TopNWords(10)).name("TopN Window (10s)");
// write top Ns to Kafka topic
topWordCount.addSink(new FlinkKafkaProducer09<>(params.getRequired("wc-topic"), new ListSerSchema(), params.getProperties())).name("Write topN to Kafka");
env.execute("Streaming ETL");
}
示例3: main
import org.apache.flink.api.java.utils.ParameterTool; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
ParameterTool params = ParameterTool.fromArgs(args);
// define the dataflow
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(2);
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(10, 1000));
env.readFileStream("input/", 60000, FileMonitoringFunction.WatchType.ONLY_NEW_FILES)
.addSink(new DiscardingSink<String>());
// generate a job graph
final JobGraph jobGraph = env.getStreamGraph().getJobGraph();
File jobGraphFile = new File(params.get("output", "job.graph"));
try (FileOutputStream output = new FileOutputStream(jobGraphFile);
ObjectOutputStream obOutput = new ObjectOutputStream(output)){
obOutput.writeObject(jobGraph);
}
}
示例4: configure
import org.apache.flink.api.java.utils.ParameterTool; //导入方法依赖的package包/类
@Override
public void configure(ParameterTool parameterTool) {
String ordering = parameterTool.get("simplify");
if (ordering == null) {
value = Ordering.NONE;
} else {
switch (ordering.toLowerCase()) {
case "directed":
value = Ordering.DIRECTED;
break;
case "undirected":
value = parameterTool.has("clip_and_flip") ? Ordering.UNDIRECTED_CLIP_AND_FLIP : Ordering.UNDIRECTED;
break;
default:
throw new ProgramParametrizationException(
"Expected 'directed' or 'undirected' ordering but received '" + ordering + "'");
}
}
}
示例5: main
import org.apache.flink.api.java.utils.ParameterTool; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// get CLI parameters
ParameterTool parameters = ParameterTool.fromArgs(args);
String topic = parameters.getRequired("topic");
String groupId = parameters.get("group-id", "flink-kafka-consumer");
String propertiesFile = parameters.getRequired("env");
ParameterTool envProperties = ParameterTool.fromPropertiesFile(propertiesFile);
String schemaRegistryUrl = envProperties.getRequired("registry_url");
String bootstrapServers = envProperties.getRequired("brokers");
String zookeeperConnect = envProperties.getRequired("zookeeper");
// setup Kafka sink
ConfluentAvroDeserializationSchema deserSchema = new ConfluentAvroDeserializationSchema(schemaRegistryUrl);
Properties kafkaProps = new Properties();
kafkaProps.setProperty("bootstrap.servers", bootstrapServers);
kafkaProps.setProperty("zookeeper.connect", zookeeperConnect);
kafkaProps.setProperty("group.id", groupId);
FlinkKafkaConsumer08<String> flinkKafkaConsumer = new FlinkKafkaConsumer08<String>(topic, deserSchema, kafkaProps);
DataStream<String> kafkaStream = env.addSource(flinkKafkaConsumer);
DataStream<Integer> counts = kafkaStream
.map(new MapFunction<String, Integer>() {
public Integer map(String s) throws Exception {
return 1;
}
})
.timeWindowAll(Time.seconds(3))
.sum(0);
counts.print();
env.execute("Flink Kafka Java Example");
}
示例6: SummarizationJobParameters
import org.apache.flink.api.java.utils.ParameterTool; //导入方法依赖的package包/类
public SummarizationJobParameters(ParameterTool params) {
timelyHostname = params.getRequired("timelyHostname");
timelyTcpPort = params.getInt("timelyTcpPort", 4241);
timelyHttpsPort = params.getInt("timelyHttpsPort", 4242);
timelyWssPort = params.getInt("timelyWssPort", 4243);
doLogin = params.getBoolean("doLogin", false);
timelyUsername = params.get("timelyUsername", null);
timelyPassword = params.get("timelyPassword", null);
keyStoreFile = params.getRequired("keyStoreFile");
keyStoreType = params.get("keyStoreType", "JKS");
keyStorePass = params.getRequired("keyStorePass");
trustStoreFile = params.getRequired("trustStoreFile");
trustStoreType = params.get("trustStoreType", "JKS");
trustStorePass = params.getRequired("trustStorePass");
hostVerificationEnabled = params.getBoolean("hostVerificationEnabled", true);
bufferSize = params.getInt("bufferSize", 10485760);
String metricNames = params.getRequired("metrics");
if (null != metricNames) {
metrics = metricNames.split(",");
} else {
metrics = null;
}
startTime = params.getLong("startTime", 0L);
endTime = params.getLong("endTime", 0L);
interval = params.getRequired("interval");
intervalUnits = params.getRequired("intervalUnits");
}
示例7: main
import org.apache.flink.api.java.utils.ParameterTool; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
// startup checks and logging
EnvironmentInformation.logEnvironmentInfo(LOG, "TaskManager", args);
SignalHandler.register(LOG);
JvmShutdownSafeguard.installAsShutdownHook(LOG);
long maxOpenFileHandles = EnvironmentInformation.getOpenFileHandlesLimit();
if (maxOpenFileHandles != -1L) {
LOG.info("Maximum number of open file descriptors is {}.", maxOpenFileHandles);
} else {
LOG.info("Cannot determine the maximum number of open file descriptors");
}
ParameterTool parameterTool = ParameterTool.fromArgs(args);
final String configDir = parameterTool.get("configDir");
final Configuration configuration = GlobalConfiguration.loadConfiguration(configDir);
SecurityUtils.install(new SecurityConfiguration(configuration));
try {
SecurityUtils.getInstalledContext().runSecured(new Callable<Void>() {
@Override
public Void call() throws Exception {
runTaskManager(configuration, ResourceID.generate());
return null;
}
});
} catch (Throwable t) {
LOG.error("TaskManager initialization failed.", t);
System.exit(STARTUP_FAILURE_RETURN_CODE);
}
}
示例8: main
import org.apache.flink.api.java.utils.ParameterTool; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
// get the execution environment
final ExecutionEnvironment job = ExecutionEnvironment.getExecutionEnvironment();
String inputPath, outputPath = null;
try {
final ParameterTool params = ParameterTool.fromArgs(args);
inputPath = params.get("input");
if (params.has("output")) {
outputPath = params.get("output");
}
// make parameters available in the web interface
job.getConfig().setGlobalJobParameters(params);
} catch (Exception e) {
System.err.println("No input specified. Please run '" + org.apache.flink.connectors.cassandra.streaming.tuple.wordcount.FileWordCount.class.getSimpleName() +
"--input <file-path>', where 'input' is the path to a text file");
return;
}
DataServiceFacade dataService = new DataServiceFacade(DataEntityType.WORD_COUNT);
dataService.setUpEmbeddedCassandra();
dataService.setUpDataModel();
LOG.info("Example starts!");
// get input data by reading content from file
DataSet<String> text = job.readTextFile(inputPath);
DataSet<Tuple2<String, Long>> result =
// split up the lines in pairs (2-tuples) containing: (word,1)
text.flatMap(new FlatMapFunction<String, Tuple2<String, Long>>() {
@Override
public void flatMap(String value, Collector<Tuple2<String, Long>> out) throws Exception {
// normalize and split the line
String[] words = value.toLowerCase().split("\\W+");
// emit the pairs
for (String word : words) {
//Do not accept empty word, since word is defined as primary key in C* table
if (!word.isEmpty()) {
out.collect(new Tuple2<String, Long>(word, 1L));
}
}
}
})
// group by the tuple field "0" and sum up tuple field "1"
.groupBy(0)
.sum(1);
//Update the results to C* sink
CassandraOutputFormat sink = new CassandraOutputFormat("INSERT INTO " + WordCount.CQL_KEYSPACE_NAME + "." + WordCount.CQL_TABLE_NAME + "(word, count) " +
"values (?, ?);", new ClusterBuilder() {
@Override
protected Cluster buildCluster(Cluster.Builder builder) {
builder.addContactPoint("127.0.0.1");
return builder.build();
}
});
result.output(sink);
// emit result
if (outputPath != null) {
result.writeAsText(outputPath);
}
// execute program
job.execute("[BATCH] FileWordCount w/ C* Sink");
LOG.info("20 sec sleep ...");
Thread.sleep(20 * 1000);
LOG.info("20 sec sleep ... DONE");
}
示例9: main
import org.apache.flink.api.java.utils.ParameterTool; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
// the host and the port to connect to
final String hostname;
final int port;
try {
final ParameterTool params = ParameterTool.fromArgs(args);
hostname = params.has("hostname") ? params.get("hostname") : "localhost";
port = params.getInt("port");
} catch (Exception e) {
System.err.println("No port specified. Please run 'SocketWindowWordCount " +
"--hostname <hostname> --port <port>', where hostname (localhost by default) " +
"and port is the address of the text server");
System.err.println("To start a simple text server, run 'netcat -l <port>' and " +
"type the input text into the command line");
return;
}
DataServiceFacade dataService = new DataServiceFacade(DataEntityType.WORD_COUNT);
dataService.setUpEmbeddedCassandra();
dataService.setUpDataModel();
// get the execution environment
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// get input data by connecting to the socket
DataStream<String> text = env.socketTextStream(hostname, port, "\n");
// parse the data, group it, window it, and aggregate the counts
DataStream<WordCount> result = text
.flatMap(new FlatMapFunction<String, WordCount>() {
@Override
public void flatMap(String value, Collector<WordCount> out) {
// normalize and split the line
String[] words = value.toLowerCase().split("\\s");
// emit the pairs
for (String word : words) {
if (!word.isEmpty()) {
//Do not accept empty word, since word is defined as primary key in C* table
out.collect(new WordCount(word, 1L));
}
}
}
})
.keyBy("word")
.timeWindow(Time.seconds(5))
.reduce(new ReduceFunction<WordCount>() {
@Override
public WordCount reduce(WordCount a, WordCount b) {
return new WordCount(a.getWord(), a.getCount() + b.getCount());
}
});
CassandraSink.addSink(result)
.setHost("127.0.0.1")
.build();
CQLPrintSinkFunction<WordCount, WordCount> func = new CQLPrintSinkFunction();
func.setDataModel(dataService, 10);
result.addSink(func).setParallelism(1);
env.execute("Socket Window WordCount (POJO) w/ C* Sink");
}
示例10: main
import org.apache.flink.api.java.utils.ParameterTool; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
// get the execution environment
final StreamExecutionEnvironment job = StreamExecutionEnvironment.getExecutionEnvironment();
String inputPath, outputPath = null;
try {
final ParameterTool params = ParameterTool.fromArgs(args);
inputPath = params.get("input");
if (params.has("output")) {
outputPath = params.get("output");
}
// make parameters available in the web interface
job.getConfig().setGlobalJobParameters(params);
} catch (Exception e) {
System.err.println("No input specified. Please run '" + FileWordCount.class.getSimpleName() +
"--input <file-path>', where 'input' is the path to a text file");
return;
}
DataServiceFacade dataService = new DataServiceFacade(DataEntityType.WORD_COUNT);
dataService.setUpEmbeddedCassandra();
dataService.setUpDataModel();
LOG.info("Example starts!");
// get input data by reading content from file
DataStream<String> text = job.readTextFile(inputPath);
DataStream<Tuple2<String, Long>> result =
// split up the lines in pairs (2-tuples) containing: (word,1)
text.flatMap(new FlatMapFunction<String, Tuple2<String, Long>>() {
@Override
public void flatMap(String value, Collector<Tuple2<String, Long>> out) throws Exception {
// normalize and split the line
String[] words = value.toLowerCase().split("\\W+");
// emit the pairs
for (String word : words) {
//Do not accept empty word, since word is defined as primary key in C* table
if (!word.isEmpty()) {
out.collect(new Tuple2<String, Long>(word, 1L));
}
}
}
})
// group by the tuple field "0" and sum up tuple field "1"
.keyBy(0)
.sum(1);
//Update the results to C* sink
CassandraSink.addSink(result)
.setQuery("INSERT INTO " + WordCount.CQL_KEYSPACE_NAME + "." + WordCount.CQL_TABLE_NAME + "(word, count) " +
"values (?, ?);")
.setHost("127.0.0.1")
.build();
// emit result
if (outputPath != null) {
result.writeAsText(outputPath);
} else {
System.out.println("Printing result to stdout. Use --output to specify output path.");
CQLPrintSinkFunction<Tuple2<String, Long>, WordCount> func = new CQLPrintSinkFunction();
func.setDataModel(dataService, 10);
result.addSink(func).setParallelism(1);
}
// execute program
job.execute("[STREAM] FileWordCount w/ C* Sink");
}
示例11: main
import org.apache.flink.api.java.utils.ParameterTool; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
// the host and the port to connect to
final String hostname;
final int port;
try {
final ParameterTool params = ParameterTool.fromArgs(args);
hostname = params.has("hostname") ? params.get("hostname") : "localhost";
port = params.getInt("port");
} catch (Exception e) {
System.err.println("No port specified. Please run 'SocketWindowWordCount " +
"--hostname <hostname> --port <port>', where hostname (localhost by default) " +
"and port is the address of the text server");
System.err.println("To start a simple text server, run 'netcat -l <port>' and " +
"type the input text into the command line");
return;
}
DataServiceFacade dataService = new DataServiceFacade(DataEntityType.WORD_COUNT);
dataService.setUpEmbeddedCassandra();
dataService.setUpDataModel();
// get the execution environment
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// get input data by connecting to the socket
DataStream<String> text = env.socketTextStream(hostname, port, "\n");
// parse the data, group it, window it, and aggregate the counts
DataStream<Tuple2<String, Long>> result = text
.flatMap(new FlatMapFunction<String, Tuple2<String, Long>>() {
@Override
public void flatMap(String value, Collector<Tuple2<String, Long>> out) {
// normalize and split the line
String[] words = value.toLowerCase().split("\\s");
// emit the pairs
for (String word : words) {
//Do not accept empty word, since word is defined as primary key in C* table
if (!word.isEmpty()) {
out.collect(new Tuple2<String, Long>(word, 1L));
}
}
}
})
.keyBy(0)
.timeWindow(Time.seconds(5))
.sum(1)
;
CassandraSink.addSink(result)
.setQuery("INSERT INTO " + WordCount.CQL_KEYSPACE_NAME + "." + WordCount.CQL_TABLE_NAME + "(word, count) " +
"values (?, ?);")
.setHost("127.0.0.1")
.build();
CQLPrintSinkFunction<Tuple2<String, Long>, WordCount> func = new CQLPrintSinkFunction();
func.setDataModel(dataService, 10);
result.addSink(func).setParallelism(1);
env.execute("Socket Window WordCount (Tuple) w/ C* Sink");
}
示例12: main
import org.apache.flink.api.java.utils.ParameterTool; //导入方法依赖的package包/类
/**
* The program main method.
* @param args the command line arguments.
*/
public static void main(String[] args) throws Exception {
// CONFIGURATION
ParameterTool parameter = ParameterTool.fromArgs(args);
final String kafkaZookeeper = parameter.get("kafka.zookeeper", "localhost:2181");
final String kafkaBootstrap = parameter.get("kafka.bootstrap", "localhost:9092");
final String kafkaTopic = parameter.get("kafka.topic", "topic-query-3");
final Path outputPath = FileSystems.getDefault().getPath(parameter.get("output", PROGRAM_NAME + ".out"));
final String elasticsearch = parameter.get("elasticsearch", null);
final long windowSize = parameter.getLong("windowSize", 10);
final TimeUnit windowUnit = TimeUnit.valueOf(parameter.get("windowUnit", "SECONDS"));
final int rankSize = parameter.getInt("rankSize", 3);
final long tsEnd = parameter.getLong("tsEnd", 100000L);
final Set<String> ignoredWords = Sets.newHashSet(parameter.get("ignoredWords", "")
.trim().split(","));
final int parallelism = parameter.getInt("parallelism", 1);
// ENVIRONMENT
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.setParallelism(parallelism);
final KafkaProperties kafkaProps = new KafkaProperties(kafkaBootstrap, kafkaZookeeper);
final ESProperties elasticsearchProps = ESProperties.fromPropString(elasticsearch);
// CONFIGURATION RESUME
System.out.println("############################################################################");
System.out.printf("%s\n", PROGRAM_NAME);
System.out.println("----------------------------------------------------------------------------");
System.out.printf("%s\n", PROGRAM_DESCRIPTION);
System.out.println("****************************************************************************");
System.out.println("Kafka Zookeeper: " + kafkaZookeeper);
System.out.println("Kafka Bootstrap: " + kafkaBootstrap);
System.out.println("Kafka Topic: " + kafkaTopic);
System.out.println("Output: " + outputPath);
System.out.println("Elasticsearch: " + elasticsearch);
System.out.println("Window: " + windowSize + " " + windowUnit);
System.out.println("Rank Size: " + rankSize);
System.out.println("Timestamp End: " + tsEnd);
System.out.println("Ignored Words: " + ignoredWords);
System.out.println("Parallelism: " + parallelism);
System.out.println("############################################################################");
// TOPOLOGY
DataStream<TimedWord> timedWords = env.addSource(new StoppableTimedWordKafkaSource(kafkaTopic, kafkaProps, tsEnd));
DataStream<TimedWord> fileterTimedWords = timedWords.filter(new TimedWordFilter(ignoredWords))
.assignTimestampsAndWatermarks(new EventTimestampExtractor());
DataStream<WindowWordWithCount> windowCounts = fileterTimedWords
.keyBy(new WordKeySelector())
.timeWindow(Time.of(windowSize, windowUnit))
.aggregate(new TimedWordCounterAggregator(), new TimedWordCounterWindowFunction());
DataStream<WindowWordRanking> ranking = windowCounts.timeWindowAll(Time.of(windowSize, windowUnit))
.apply(new WordRankerWindowFunction(rankSize));
ranking.writeAsText(outputPath.toAbsolutePath().toString(), FileSystem.WriteMode.OVERWRITE);
if (elasticsearch != null) {
ranking.addSink(new ESSink<>(elasticsearchProps,
new MyESSinkFunction(elasticsearchProps.getIndexName(), elasticsearchProps.getTypeName()))
);
}
// EXECUTION
env.execute(PROGRAM_NAME);
}
示例13: main
import org.apache.flink.api.java.utils.ParameterTool; //导入方法依赖的package包/类
/**
* The program main method.
* @param args the command line arguments.
*/
public static void main(String[] args) throws Exception {
// CONFIGURATION
ParameterTool parameter = ParameterTool.fromArgs(args);
final String kafkaZookeeper = parameter.get("kafka.zookeeper", "localhost:2181");
final String kafkaBootstrap = parameter.get("kafka.bootstrap", "localhost:9092");
final String kafkaTopic = parameter.get("kafka.topic", "socstream");
final Path outputPath = FileSystems.getDefault().getPath(parameter.get("output", PROGRAM_NAME + ".out"));
final String elasticsearch = parameter.get("elasticsearch", null);
final Path metadataPath = FileSystems.getDefault().getPath(parameter.get("metadata", "./metadata.yml"));
final long windowSize = parameter.getLong("windowSize", 70);
final TimeUnit windowUnit = TimeUnit.valueOf(parameter.get("windowUnit", "MINUTES"));
final long matchStart = parameter.getLong("match.start", 10753295594424116L);
final long matchEnd = parameter.getLong("match.end", 14879639146403495L);
final long matchIntervalStart = parameter.getLong("match.interval.start", 12557295594424116L);
final long matchIntervalEnd = parameter.getLong("match.interval.end", 13086639146403495L);
final int parallelism = parameter.getInt("parallelism", 1);
final Match match = MatchService.fromYamlFile(metadataPath);
final Set<Long> ignoredSensors = MatchService.collectIgnoredSensors(match);
final Map<Long,Long> sid2Pid = MatchService.collectSid2Pid(match);
// ENVIRONMENT
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
final KafkaProperties kafkaProps = new KafkaProperties(kafkaBootstrap);
final ESProperties elasticsearchProps = ESProperties.fromPropString(elasticsearch);
// CONFIGURATION RESUME
System.out.println("############################################################################");
System.out.printf("%s\n", PROGRAM_NAME);
System.out.println("----------------------------------------------------------------------------");
System.out.printf("%s\n", PROGRAM_DESCRIPTION);
System.out.println("****************************************************************************");
System.out.println("Kafka Zookeeper: " + kafkaZookeeper);
System.out.println("Kafka Bootstrap: " + kafkaBootstrap);
System.out.println("Kafka Topic: " + kafkaTopic);
System.out.println("Output: " + outputPath);
System.out.println("Elasticsearch: " + elasticsearch);
System.out.println("Metadata: " + metadataPath);
System.out.println("Window: " + windowSize + " " + windowUnit);
System.out.println("Match Start: " + matchStart);
System.out.println("Match End: " + matchEnd);
System.out.println("Match Interval Start: " + matchIntervalStart);
System.out.println("Match Interval End: " + matchIntervalEnd);
System.out.println("Ignored Sensors: " + ignoredSensors);
System.out.println("Parallelism: " + parallelism);
System.out.println("############################################################################");
// TOPOLOGY
DataStream<RichSensorEvent> sensorEvents = env.addSource(
new RichSensorEventKafkaSource(kafkaTopic, kafkaProps, matchStart, matchEnd,
matchIntervalStart, matchIntervalEnd, ignoredSensors, sid2Pid
)
).assignTimestampsAndWatermarks(new RichSensorEventTimestampExtractor()).setParallelism(parallelism);
DataStream<PlayerRunningStatistics> statistics = sensorEvents.keyBy(new RichSensorEventKeyer())
.timeWindow(Time.of(windowSize, windowUnit))
.aggregate(new PlayerRunningStatisticsCalculatorAggregator(), new PlayerRunningStatisticsCalculatorWindowFunction())
.setParallelism(parallelism);
statistics.writeAsText(outputPath.toAbsolutePath().toString(), FileSystem.WriteMode.OVERWRITE).setParallelism(1);
if (elasticsearch != null) {
statistics.addSink(new ESSink<>(elasticsearchProps,
new PlayerRunningStatisticsESSinkFunction(elasticsearchProps.getIndexName(), elasticsearchProps.getTypeName()))
).setParallelism(1);
}
// EXECUTION
env.execute(PROGRAM_NAME);
}
示例14: main
import org.apache.flink.api.java.utils.ParameterTool; //导入方法依赖的package包/类
/**
* The program main method.
* @param args the command line arguments.
*/
public static void main(String[] args) throws Exception {
// CONFIGURATION
ParameterTool parameter = ParameterTool.fromArgs(args);
final String kafkaZookeeper = parameter.get("kafka.zookeeper", "localhost:2181");
final String kafkaBootstrap = parameter.get("kafka.bootstrap", "localhost:9092");
final String kafkaTopic = parameter.get("kafka.topic", "socstream");
final Path outputPath = FileSystems.getDefault().getPath(parameter.get("output", PROGRAM_NAME + ".out"));
final String elasticsearch = parameter.get("elasticsearch", null);
final Path metadataPath = FileSystems.getDefault().getPath(parameter.get("metadata", "./metadata.yml"));
final long windowSize = parameter.getLong("windowSize", 70);
final TimeUnit windowUnit = TimeUnit.valueOf(parameter.get("windowUnit", "MINUTES"));
final int rankSize = parameter.getInt("rankSize", 5);
final long matchStart = parameter.getLong("match.start", 10753295594424116L);
final long matchEnd = parameter.getLong("match.end", 14879639146403495L);
final long matchIntervalStart = parameter.getLong("match.interval.start", 12557295594424116L);
final long matchIntervalEnd = parameter.getLong("match.interval.end", 13086639146403495L);
final int parallelism = parameter.getInt("parallelism", 1);
final Match match = MatchService.fromYamlFile(metadataPath);
final Set<Long> ignoredSensors = MatchService.collectIgnoredSensors(match);
final Map<Long,Long> sid2Pid = MatchService.collectSid2Pid(match);
// ENVIRONMENT
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
final KafkaProperties kafkaProps = new KafkaProperties(kafkaBootstrap);
final ESProperties elasticsearchProps = ESProperties.fromPropString(elasticsearch);
// CONFIGURATION RESUME
System.out.println("############################################################################");
System.out.printf("%s\n", PROGRAM_NAME);
System.out.println("----------------------------------------------------------------------------");
System.out.printf("%s\n", PROGRAM_DESCRIPTION);
System.out.println("****************************************************************************");
System.out.println("Kafka Zookeeper: " + kafkaZookeeper);
System.out.println("Kafka Bootstrap: " + kafkaBootstrap);
System.out.println("Kafka Topic: " + kafkaTopic);
System.out.println("Output: " + outputPath);
System.out.println("Elasticsearch: " + elasticsearch);
System.out.println("Metadata: " + metadataPath);
System.out.println("Window: " + windowSize + " " + windowUnit);
System.out.println("Rank Size: " + rankSize);
System.out.println("Match Start: " + matchStart);
System.out.println("Match End: " + matchEnd);
System.out.println("Match Interval Start: " + matchIntervalStart);
System.out.println("Match Interval End: " + matchIntervalEnd);
System.out.println("Ignored Sensors: " + ignoredSensors);
System.out.println("Parallelism: " + parallelism);
System.out.println("############################################################################");
// TOPOLOGY
DataStream<SpeedSensorEvent> sensorEvents = env.addSource(
new SpeedSensorEventKafkaSource(kafkaTopic, kafkaProps, matchStart, matchEnd,
matchIntervalStart, matchIntervalEnd, ignoredSensors, sid2Pid
)
).assignTimestampsAndWatermarks(new SpeedSensorEventTimestampExtractor()).setParallelism(parallelism);
DataStream<PlayerSpeedStatistics> statistics = sensorEvents.keyBy(new SpeedSensorEventKeyer())
.timeWindow(Time.of(windowSize, windowUnit))
.aggregate(new PlayerSpeedStatisticsCalculatorAggregator(), new PlayerSpeedStatisticsCalculatorWindowFunction())
.setParallelism(parallelism);
DataStream<PlayersSpeedRanking> ranking = statistics.timeWindowAll(Time.of(windowSize, windowUnit))
.apply(new GlobalRankerWindowFunction(rankSize));
ranking.writeAsText(outputPath.toAbsolutePath().toString(), FileSystem.WriteMode.OVERWRITE).setParallelism(1);
if (elasticsearch != null) {
ranking.addSink(new ESSink<>(elasticsearchProps,
new PlayerSpeedRankingESSinkFunction(elasticsearchProps.getIndexName(), elasticsearchProps.getTypeName()))
).setParallelism(1);
}
// EXECUTION
env.execute(PROGRAM_NAME);
}
示例15: main
import org.apache.flink.api.java.utils.ParameterTool; //导入方法依赖的package包/类
/**
* The program main method.
* @param args the command line arguments.
*/
public static void main(String[] args) throws Exception {
// CONFIGURATION
ParameterTool parameter = ParameterTool.fromArgs(args);
final String kafkaZookeeper = parameter.get("kafka.zookeeper", "localhost:2181");
final String kafkaBootstrap = parameter.get("kafka.bootstrap", "localhost:9092");
final String kafkaTopic = parameter.get("kafka.topic", "socstream");
final Path outputPath = FileSystems.getDefault().getPath(parameter.get("output", PROGRAM_NAME + ".out"));
final String elasticsearch = parameter.get("elasticsearch", null);
final Path metadataPath = FileSystems.getDefault().getPath(parameter.get("metadata", "./metadata.yml"));
final long windowSize = parameter.getLong("windowSize", 70);
final TimeUnit windowUnit = TimeUnit.valueOf(parameter.get("windowUnit", "MINUTES"));
final long matchStart = parameter.getLong("match.start", 10753295594424116L);
final long matchEnd = parameter.getLong("match.end", 14879639146403495L);
final long matchIntervalStart = parameter.getLong("match.interval.start", 12557295594424116L);
final long matchIntervalEnd = parameter.getLong("match.interval.end", 13086639146403495L);
final int parallelism = parameter.getInt("parallelism", 1);
final Match match = MatchService.fromYamlFile(metadataPath);
final Set<Long> ignoredSensors = MatchService.collectIgnoredSensors(match);
final Map<Long,Long> sid2Pid = MatchService.collectSid2Pid(match);
// ENVIRONMENT
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
final KafkaProperties kafkaProps = new KafkaProperties(kafkaBootstrap);
final ESProperties elasticsearchProps = ESProperties.fromPropString(elasticsearch);
// CONFIGURATION RESUME
System.out.println("############################################################################");
System.out.printf("%s\n", PROGRAM_NAME);
System.out.println("----------------------------------------------------------------------------");
System.out.printf("%s\n", PROGRAM_DESCRIPTION);
System.out.println("****************************************************************************");
System.out.println("Kafka Zookeeper: " + kafkaZookeeper);
System.out.println("Kafka Bootstrap: " + kafkaBootstrap);
System.out.println("Kafka Topic: " + kafkaTopic);
System.out.println("Output: " + outputPath);
System.out.println("Elasticsearch: " + elasticsearch);
System.out.println("Metadata: " + metadataPath);
System.out.println("Window: " + windowSize + " " + windowUnit);
System.out.println("Match Start: " + matchStart);
System.out.println("Match End: " + matchEnd);
System.out.println("Match Interval Start: " + matchIntervalStart);
System.out.println("Match Interval End: " + matchIntervalEnd);
System.out.println("Ignored Sensors: " + ignoredSensors);
System.out.println("Parallelism: " + parallelism);
System.out.println("############################################################################");
// TOPOLOGY
DataStream<PositionSensorEvent> sensorEvents = env.addSource(
new PositionSensorEventKafkaSource(kafkaTopic, kafkaProps, matchStart, matchEnd,
matchIntervalStart, matchIntervalEnd, ignoredSensors, sid2Pid
).assignTimestampsAndWatermarks(new PositionSensorEventTimestampExtractor())).setParallelism(1);
DataStream<PlayerGridStatistics> statistics = sensorEvents.keyBy(new PositionSensorEventKeyer())
.timeWindow(Time.of(windowSize, windowUnit))
.aggregate(new PlayerOnGridStatisticsCalculatorAggregator(), new PlayerOnGridStatisticsCalculatorWindowFunction())
.setParallelism(parallelism);
statistics.writeAsText(outputPath.toAbsolutePath().toString(), FileSystem.WriteMode.OVERWRITE).setParallelism(1);
if (elasticsearch != null) {
statistics.addSink(new ESSink<>(elasticsearchProps,
new PlayerGridStatisticsESSinkFunction(elasticsearchProps.getIndexName(), elasticsearchProps.getTypeName()))
).setParallelism(1);
}
// EXECUTION
env.execute(PROGRAM_NAME);
}