本文整理汇总了Java中org.apache.flink.api.java.utils.ParameterTool类的典型用法代码示例。如果您正苦于以下问题:Java ParameterTool类的具体用法?Java ParameterTool怎么用?Java ParameterTool使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
ParameterTool类属于org.apache.flink.api.java.utils包,在下文中一共展示了ParameterTool类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.flink.api.java.utils.ParameterTool; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
ParameterTool pt = ParameterTool.fromArgs(args);
StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
see.setParallelism(1);
Properties kinesisConsumerConfig = new Properties();
kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_REGION, pt.getRequired("region"));
kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey"));
kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey"));
DataStream<String> kinesis = see.addSource(new FlinkKinesisConsumer<>(
"flink-test",
new SimpleStringSchema(),
kinesisConsumerConfig));
kinesis.print();
see.execute();
}
示例2: main
import org.apache.flink.api.java.utils.ParameterTool; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
// parse parameters
ParameterTool params = ParameterTool.fromArgs(args);
// path to ratings.csv file
String ratingsCsvPath = params.getRequired("input");
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSource<String> file = env.readTextFile(ratingsCsvPath);
file.flatMap(new ExtractRating())
.groupBy(0)
// .reduceGroup(new SumRatingCount())
.sum(1)
.print();
}
示例3: parseParams
import org.apache.flink.api.java.utils.ParameterTool; //导入依赖的package包/类
public boolean parseParams(String[] args) throws Exception {
boolean wasHelpPrinted = false;
ParameterTool parameter = ParameterTool.fromArgs(args);
if(parameter.has("help")){
printHelpMessage();
wasHelpPrinted = true;
}
else {
try {
dataFilePath = parameter.getRequired("input");
}
catch(Exception e) {
printHelpMessage();
throw e;
}
}
return wasHelpPrinted;
}
示例4: main
import org.apache.flink.api.java.utils.ParameterTool; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
ParameterTool params = ParameterTool.fromArgs(args);
final String nycTaxiRidesPath = params.get("nycTaxiRidesPath");
final int maxEventDelay = 60; // events are out of order by max 60 seconds
final int servingSpeedFactor = 600; // events of 10 minutes are served in 1 second
// set up streaming execution environment
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// start the data generator
DataStream<TaxiRide> rides = env.addSource(
new TaxiRideSource(nycTaxiRidesPath, maxEventDelay, servingSpeedFactor));
// ===============================================================================
// 1. clean up `rides`, so that the output stream only contains events
// with valid geo coordinates within NYC.
// 2. print out the result stream to console
// ===============================================================================
// run the cleansing pipeline
env.execute("Taxi Ride Cleansing");
}
示例5: main
import org.apache.flink.api.java.utils.ParameterTool; //导入依赖的package包/类
/**
* Runs the JobManager process in {@link JobManagerMode#CLUSTER}.
*
* <p><strong>Required argument</strong>: <code>port</code>. Start the process with
* <code>--port PORT</code>.
*
* <p>Other arguments are parsed to a {@link Configuration} and passed to the
* JobManager, for instance: <code>--high-availability ZOOKEEPER --high-availability.zookeeper.quorum
* "xyz:123:456"</code>.
*/
public static void main(String[] args) {
try {
ParameterTool params = ParameterTool.fromArgs(args);
Configuration config = params.getConfiguration();
LOG.info("Configuration: {}.", config);
// Run the JobManager
JobManager.runJobManager(config, JobManagerMode.CLUSTER, "localhost", 0);
// Run forever. Forever, ever? Forever, ever!
new CountDownLatch(1).await();
}
catch (Throwable t) {
LOG.error("Failed to start JobManager process", t);
System.exit(1);
}
}
示例6: main
import org.apache.flink.api.java.utils.ParameterTool; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
// parse arguments
ParameterTool params = ParameterTool.fromPropertiesFile(args[0]);
// create streaming environment
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// enable event time processing
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
// enable fault-tolerance
env.enableCheckpointing(1000);
// enable restarts
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(50, 500L));
env.setStateBackend(new FsStateBackend("file:///home/robert/flink-workdir/flink-streaming-etl/state-backend"));
// run each operator separately
env.disableOperatorChaining();
// get data from Kafka
Properties kParams = params.getProperties();
kParams.setProperty("group.id", UUID.randomUUID().toString());
DataStream<ObjectNode> inputStream = env.addSource(new FlinkKafkaConsumer09<>(params.getRequired("topic"), new JSONDeserializationSchema(), kParams)).name("Kafka 0.9 Source")
.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ObjectNode>(Time.minutes(1L)) {
@Override
public long extractTimestamp(ObjectNode jsonNodes) {
return jsonNodes.get("timestamp_ms").asLong();
}
}).name("Timestamp extractor");
// filter out records without lang field
DataStream<ObjectNode> tweetsWithLang = inputStream.filter(jsonNode -> jsonNode.has("user") && jsonNode.get("user").has("lang")).name("Filter records without 'lang' field");
// select only lang = "en" tweets
DataStream<ObjectNode> englishTweets = tweetsWithLang.filter(jsonNode -> jsonNode.get("user").get("lang").asText().equals("en")).name("Select 'lang'=en tweets");
// write to file system
RollingSink<ObjectNode> rollingSink = new RollingSink<>(params.get("sinkPath", "/home/robert/flink-workdir/flink-streaming-etl/rolling-sink"));
rollingSink.setBucketer(new DateTimeBucketer("yyyy-MM-dd-HH-mm")); // do a bucket for each minute
englishTweets.addSink(rollingSink).name("Rolling FileSystem Sink");
// build aggregates (count per language) using window (10 seconds tumbling):
DataStream<Tuple3<Long, String, Long>> languageCounts = tweetsWithLang.keyBy(jsonNode -> jsonNode.get("user").get("lang").asText())
.timeWindow(Time.seconds(10))
.apply(new Tuple3<>(0L, "", 0L), new JsonFoldCounter(), new CountEmitter()).name("Count per Langauage (10 seconds tumbling)");
// write window aggregate to ElasticSearch
List<InetSocketAddress> transportNodes = ImmutableList.of(new InetSocketAddress(InetAddress.getByName("localhost"), 9300));
ElasticsearchSink<Tuple3<Long, String, Long>> elasticsearchSink = new ElasticsearchSink<>(params.toMap(), transportNodes, new ESRequest());
languageCounts.addSink(elasticsearchSink).name("ElasticSearch2 Sink");
// word-count on the tweet stream
DataStream<Tuple2<Date, List<Tuple2<String, Long>>>> topWordCount = tweetsWithLang
// get text from tweets
.map(tweet -> tweet.get("text").asText()).name("Get text from Tweets")
// split text into (word, 1) tuples
.flatMap(new FlatMapFunction<String, Tuple2<String, Long>>() {
@Override
public void flatMap(String s, Collector<Tuple2<String, Long>> collector) throws Exception {
String[] splits = s.split(" ");
for (String sp : splits) {
collector.collect(new Tuple2<>(sp, 1L));
}
}
}).name("Tokenize words")
// group by word
.keyBy(0)
// build 1 min windows, compute every 10 seconds --> count word frequency
.timeWindow(Time.minutes(1L), Time.seconds(10L)).apply(new WordCountingWindow()).name("Count word frequency (1 min, 10 sec sliding window)")
// build top n every 10 seconds
.timeWindowAll(Time.seconds(10L)).apply(new TopNWords(10)).name("TopN Window (10s)");
// write top Ns to Kafka topic
topWordCount.addSink(new FlinkKafkaProducer09<>(params.getRequired("wc-topic"), new ListSerSchema(), params.getProperties())).name("Write topN to Kafka");
env.execute("Streaming ETL");
}
示例7: main
import org.apache.flink.api.java.utils.ParameterTool; //导入依赖的package包/类
public static void main(String args[]) throws Exception{
StreamExecutionEnvironment env =
StreamExecutionEnvironment.getExecutionEnvironment();
ParameterTool parameterTool = ParameterTool.fromArgs(args);
DataStream<String> messageStream = env
.addSource(new FlinkKafkaConsumer082 <>(
parameterTool.getRequired("topic"),
new SimpleStringSchema(),
parameterTool.getProperties()));
// print() will write the contents of the stream to the TaskManager's standard out stream
// the rebelance call is causing a repartitioning of the data so that all machines
// see the messages (for example in cases when "num kafka partitions" < "num flink operators"
messageStream.rebalance().map(new MapFunction<String, String>() {
private static final long serialVersionUID = -6867736771747690202L;
@Override
public String map(String value) throws Exception {
return "Kafka and Flink says: " + value;
}
}).print();
env.execute();
}
示例8: main
import org.apache.flink.api.java.utils.ParameterTool; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
ParameterTool parameters = ParameterTool.fromArgs(args);
StreamExecutionEnvironment environment = new ParametersExecutionEnvironmentFactory(parameters)
.createExecutionEnvironment();
DataStream<String> stream = environment
.addSource(new FlinkKafkaConsumerFactory(parameters).createConsumer())
.name("Consume messages from Kafka.")
.rebalance();
DataStream<Tuple2<String, String>> events = stream
.flatMap(new EventJsonFlapMap())
.name("Run Eventor, serialize event and produce tuple.");
ElasticsearchSink sink = new ElasticsearchEventSinkFactory(parameters).create();
events.addSink(sink)
.name("Push event to Elasticsearch.");
environment.execute("Ingest events from Kafka and index in Elasticsearch.");
}
示例9: main
import org.apache.flink.api.java.utils.ParameterTool; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
ParameterTool tool = ParameterTool.fromArgs(args);
String topic = tool.getRequired("kafka.topic");
Properties kafkaConsumerProps = new Properties();
kafkaConsumerProps.setProperty("bootstrap.servers", tool.getRequired("kafkabroker"));
kafkaConsumerProps.setProperty("group.id", tool.getRequired("kafka.groupId"));
kafkaConsumerProps.setProperty("zookeeper.connect", tool.get("zookeeper.host", "localhost:2181"));
kafkaConsumerProps.setProperty("auto.offset.reset", tool.getBoolean("from-beginning", false) ? "smallest" : "largest");
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<String> textStream = env
.addSource(new FlinkKafkaConsumer08<>(topic, new SimpleStringSchema(), kafkaConsumerProps));
textStream.flatMap(new LineSplitter())
.keyBy(0)
.sum(1)
.print();
env.execute("WordCount from Kafka Example");
}
示例10: main
import org.apache.flink.api.java.utils.ParameterTool; //导入依赖的package包/类
public static void main(String[] args) {
try {
// startup checks and logging
EnvironmentInformation.logEnvironmentInfo(LOG, "ZooKeeper Quorum Peer", args);
final ParameterTool params = ParameterTool.fromArgs(args);
final String zkConfigFile = params.getRequired("zkConfigFile");
final int peerId = params.getInt("peerId");
// Run quorum peer
runFlinkZkQuorumPeer(zkConfigFile, peerId);
}
catch (Throwable t) {
LOG.error("Error running ZooKeeper quorum peer: " + t.getMessage(), t);
System.exit(-1);
}
}
示例11: main
import org.apache.flink.api.java.utils.ParameterTool; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
ParameterTool parameterTool = ParameterTool.fromArgs(args);
if (parameterTool.getNumberOfParameters() < 2) {
System.out.println("Missing parameters!");
System.out.println("Usage: Kafka --topic <topic> --bootstrap.servers <kafka brokers>");
return;
}
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.getConfig().disableSysoutLogging();
env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(4, 10000));
// very simple data generator
DataStream<String> messageStream = env.addSource(new SourceFunction<String>() {
private static final long serialVersionUID = 6369260445318862378L;
public boolean running = true;
@Override
public void run(SourceContext<String> ctx) throws Exception {
long i = 0;
while (this.running) {
ctx.collect("Element - " + i++);
Thread.sleep(500);
}
}
@Override
public void cancel() {
running = false;
}
});
// write data into Kafka
messageStream.addSink(new FlinkKafkaProducer08<>(parameterTool.getRequired("topic"), new SimpleStringSchema(), parameterTool.getProperties()));
env.execute("Write into Kafka example");
}
示例12: main
import org.apache.flink.api.java.utils.ParameterTool; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
ParameterTool pt = ParameterTool.fromArgs(args);
StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
see.setParallelism(1);
DataStream<String> simpleStringStream = see.addSource(new EventsGenerator());
Properties kinesisProducerConfig = new Properties();
kinesisProducerConfig.setProperty(AWSConfigConstants.AWS_REGION, pt.getRequired("region"));
kinesisProducerConfig.setProperty(AWSConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accessKey"));
kinesisProducerConfig.setProperty(AWSConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretKey"));
FlinkKinesisProducer<String> kinesis = new FlinkKinesisProducer<>(
new SimpleStringSchema(), kinesisProducerConfig);
kinesis.setFailOnError(true);
kinesis.setDefaultStream("flink-test");
kinesis.setDefaultPartition("0");
simpleStringStream.addSink(kinesis);
see.execute();
}
示例13: main
import org.apache.flink.api.java.utils.ParameterTool; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
ParameterTool params = ParameterTool.fromArgs(args);
// define the dataflow
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(2);
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(10, 1000));
env.readFileStream("input/", 60000, FileMonitoringFunction.WatchType.ONLY_NEW_FILES)
.addSink(new DiscardingSink<String>());
// generate a job graph
final JobGraph jobGraph = env.getStreamGraph().getJobGraph();
File jobGraphFile = new File(params.get("output", "job.graph"));
try (FileOutputStream output = new FileOutputStream(jobGraphFile);
ObjectOutputStream obOutput = new ObjectOutputStream(output)){
obOutput.writeObject(jobGraph);
}
}
示例14: configure
import org.apache.flink.api.java.utils.ParameterTool; //导入依赖的package包/类
@Override
public void configure(ParameterTool parameterTool) {
if (hasDefaultValue && !parameterTool.has(name)) {
// skip checks for min and max when using default value
value = defaultValue;
} else {
value = parameterTool.getLong(name);
if (hasMinimumValue) {
Util.checkParameter(value >= minimumValue,
name + " must be greater than or equal to " + minimumValue);
}
if (hasMaximumValue) {
Util.checkParameter(value <= maximumValue,
name + " must be less than or equal to " + maximumValue);
}
}
}
示例15: configure
import org.apache.flink.api.java.utils.ParameterTool; //导入依赖的package包/类
@Override
public void configure(ParameterTool parameterTool) {
if (!parameterTool.has("iterations") && !parameterTool.has("convergence_threshold")) {
// no configuration so use default iterations and maximum threshold
value.iterations = defaultIterations;
value.convergenceThreshold = Double.MAX_VALUE;
} else {
// use configured values and maximum default for unset values
value.iterations = parameterTool.getInt("iterations", Integer.MAX_VALUE);
Util.checkParameter(value.iterations > 0,
"iterations must be greater than zero");
value.convergenceThreshold = parameterTool.getDouble("convergence_threshold", Double.MAX_VALUE);
Util.checkParameter(value.convergenceThreshold > 0,
"convergence threshold must be greater than zero");
}
}