本文整理汇总了Java中org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.setStateBackend方法的典型用法代码示例。如果您正苦于以下问题:Java StreamExecutionEnvironment.setStateBackend方法的具体用法?Java StreamExecutionEnvironment.setStateBackend怎么用?Java StreamExecutionEnvironment.setStateBackend使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.flink.streaming.api.environment.StreamExecutionEnvironment
的用法示例。
在下文中一共展示了StreamExecutionEnvironment.setStateBackend方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
// parse arguments
ParameterTool params = ParameterTool.fromPropertiesFile(args[0]);
// create streaming environment
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// enable event time processing
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
// enable fault-tolerance
env.enableCheckpointing(1000);
// enable restarts
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(50, 500L));
env.setStateBackend(new FsStateBackend("file:///home/robert/flink-workdir/flink-streaming-etl/state-backend"));
// run each operator separately
env.disableOperatorChaining();
// get data from Kafka
Properties kParams = params.getProperties();
kParams.setProperty("group.id", UUID.randomUUID().toString());
DataStream<ObjectNode> inputStream = env.addSource(new FlinkKafkaConsumer09<>(params.getRequired("topic"), new JSONDeserializationSchema(), kParams)).name("Kafka 0.9 Source")
.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ObjectNode>(Time.minutes(1L)) {
@Override
public long extractTimestamp(ObjectNode jsonNodes) {
return jsonNodes.get("timestamp_ms").asLong();
}
}).name("Timestamp extractor");
// filter out records without lang field
DataStream<ObjectNode> tweetsWithLang = inputStream.filter(jsonNode -> jsonNode.has("user") && jsonNode.get("user").has("lang")).name("Filter records without 'lang' field");
// select only lang = "en" tweets
DataStream<ObjectNode> englishTweets = tweetsWithLang.filter(jsonNode -> jsonNode.get("user").get("lang").asText().equals("en")).name("Select 'lang'=en tweets");
// write to file system
RollingSink<ObjectNode> rollingSink = new RollingSink<>(params.get("sinkPath", "/home/robert/flink-workdir/flink-streaming-etl/rolling-sink"));
rollingSink.setBucketer(new DateTimeBucketer("yyyy-MM-dd-HH-mm")); // do a bucket for each minute
englishTweets.addSink(rollingSink).name("Rolling FileSystem Sink");
// build aggregates (count per language) using window (10 seconds tumbling):
DataStream<Tuple3<Long, String, Long>> languageCounts = tweetsWithLang.keyBy(jsonNode -> jsonNode.get("user").get("lang").asText())
.timeWindow(Time.seconds(10))
.apply(new Tuple3<>(0L, "", 0L), new JsonFoldCounter(), new CountEmitter()).name("Count per Langauage (10 seconds tumbling)");
// write window aggregate to ElasticSearch
List<InetSocketAddress> transportNodes = ImmutableList.of(new InetSocketAddress(InetAddress.getByName("localhost"), 9300));
ElasticsearchSink<Tuple3<Long, String, Long>> elasticsearchSink = new ElasticsearchSink<>(params.toMap(), transportNodes, new ESRequest());
languageCounts.addSink(elasticsearchSink).name("ElasticSearch2 Sink");
// word-count on the tweet stream
DataStream<Tuple2<Date, List<Tuple2<String, Long>>>> topWordCount = tweetsWithLang
// get text from tweets
.map(tweet -> tweet.get("text").asText()).name("Get text from Tweets")
// split text into (word, 1) tuples
.flatMap(new FlatMapFunction<String, Tuple2<String, Long>>() {
@Override
public void flatMap(String s, Collector<Tuple2<String, Long>> collector) throws Exception {
String[] splits = s.split(" ");
for (String sp : splits) {
collector.collect(new Tuple2<>(sp, 1L));
}
}
}).name("Tokenize words")
// group by word
.keyBy(0)
// build 1 min windows, compute every 10 seconds --> count word frequency
.timeWindow(Time.minutes(1L), Time.seconds(10L)).apply(new WordCountingWindow()).name("Count word frequency (1 min, 10 sec sliding window)")
// build top n every 10 seconds
.timeWindowAll(Time.seconds(10L)).apply(new TopNWords(10)).name("TopN Window (10s)");
// write top Ns to Kafka topic
topWordCount.addSink(new FlinkKafkaProducer09<>(params.getRequired("wc-topic"), new ListSerSchema(), params.getProperties())).name("Write topN to Kafka");
env.execute("Streaming ETL");
}
示例2: main
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.enableCheckpointing(1000);
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 1000));
env.setStateBackend(new FsStateBackend("file:///" + System.getProperty("java.io.tmpdir") + "/flink/backend"));
CassandraSink<Tuple2<String, Integer>> sink = CassandraSink.addSink(env.addSource(new MySource()))
.setQuery("INSERT INTO example.values (id, counter) values (?, ?);")
.enableWriteAheadLog()
.setClusterBuilder(new ClusterBuilder() {
@Override
public Cluster buildCluster(Cluster.Builder builder) {
return builder.addContactPoint("127.0.0.1").build();
}
})
.build();
sink.name("Cassandra Sink").disableChaining().setParallelism(1).uid("hello");
env.execute();
}
示例3: main
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
final ParameterTool params = ParameterTool.fromArgs(args);
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.getConfig().setGlobalJobParameters(params);
env.setParallelism(2);
env.enableCheckpointing(5000);
env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
env.setStateBackend(new FsStateBackend("file:///Users/zhouzhou/Binary/flink-1.3.2/testcheckpoints/"));
RawLogGroupListDeserializer deserializer = new RawLogGroupListDeserializer();
Properties configProps = new Properties();
configProps.put(ConfigConstants.LOG_ENDPOINT, sEndpoint);
configProps.put(ConfigConstants.LOG_ACCESSSKEYID, sAccessKeyId);
configProps.put(ConfigConstants.LOG_ACCESSKEY, sAccessKey);
configProps.put(ConfigConstants.LOG_PROJECT, sProject);
configProps.put(ConfigConstants.LOG_LOGSTORE, sLogstore);
configProps.put(ConfigConstants.LOG_MAX_NUMBER_PER_FETCH, "10");
configProps.put(ConfigConstants.LOG_CONSUMER_BEGIN_POSITION, Consts.LOG_FROM_CHECKPOINT);
configProps.put(ConfigConstants.LOG_CONSUMERGROUP, "23_ots_sla_etl_product");
DataStream<RawLogGroupList> logTestStream = env.addSource(
new FlinkLogConsumer<RawLogGroupList>(deserializer, configProps)
);
logTestStream.writeAsText("/Users/zhouzhou/Binary/flink-1.3.2/data/newb.txt." + System.nanoTime());
env.execute("flink log connector");
}
示例4: testProgram
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; //导入方法依赖的package包/类
@Override
public void testProgram(StreamExecutionEnvironment env) {
assertTrue("Broken test setup", (NUM_STRINGS/2) % NUM_KEYS == 0);
env.setStateBackend(stateBackend);
DataStream<Integer> stream1 = env.addSource(new IntGeneratingSourceFunction(NUM_STRINGS / 2));
DataStream<Integer> stream2 = env.addSource(new IntGeneratingSourceFunction(NUM_STRINGS / 2));
stream1.union(stream2)
.keyBy(new IdentityKeySelector<Integer>())
.map(new OnceFailingPartitionedSum(NUM_STRINGS))
.keyBy(0)
.addSink(new CounterSink());
}
示例5: testSavepointRestoreFromFlink11
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; //导入方法依赖的package包/类
@Test
public void testSavepointRestoreFromFlink11() throws Exception {
final int EXPECTED_SUCCESSFUL_CHECKS = 21;
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
// we only test memory state backend yet
env.setStateBackend(new MemoryStateBackend());
env.enableCheckpointing(500);
env.setParallelism(4);
env.setMaxParallelism(4);
// create source
env
.addSource(new RestoringCheckingSource(NUM_SOURCE_ELEMENTS)).setMaxParallelism(1).uid("LegacyCheckpointedSource")
.flatMap(new RestoringCheckingFlatMap()).startNewChain().uid("LegacyCheckpointedFlatMap")
.keyBy(0)
.flatMap(new RestoringCheckingFlatMapWithKeyedState()).startNewChain().uid("LegacyCheckpointedFlatMapWithKeyedState")
.keyBy(0)
.flatMap(new KeyedStateCheckingFlatMap()).startNewChain().uid("KeyedStateSettingFlatMap")
.keyBy(0)
.transform(
"custom_operator",
new TypeHint<Tuple2<Long, Long>>() {}.getTypeInfo(),
new RestoringCheckingUdfOperator(new RestoringCheckingFlatMapWithKeyedState())).uid("LegacyCheckpointedOperator")
.addSink(new AccumulatorCountingSink<Tuple2<Long, Long>>(EXPECTED_ELEMENTS_ACCUMULATOR));
restoreAndExecute(
env,
getResourceFilename("stateful-udf-migration-itcase-flink1.1-savepoint"),
new Tuple2<>(SUCCESSFUL_CHECK_ACCUMULATOR, EXPECTED_SUCCESSFUL_CHECKS));
}
示例6: testCreateSavepointOnFlink12
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; //导入方法依赖的package包/类
/**
* This has to be manually executed to create the savepoint on Flink 1.2.
*/
@Test
@Ignore
public void testCreateSavepointOnFlink12() throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.setStateBackend(new MemoryStateBackend());
env.enableCheckpointing(500);
env.setParallelism(4);
env.setMaxParallelism(4);
env
.addSource(new LegacyCheckpointedSource(NUM_SOURCE_ELEMENTS)).setMaxParallelism(1).uid("LegacyCheckpointedSource")
.flatMap(new LegacyCheckpointedFlatMap()).startNewChain().uid("LegacyCheckpointedFlatMap")
.keyBy(0)
.flatMap(new LegacyCheckpointedFlatMapWithKeyedState()).startNewChain().uid("LegacyCheckpointedFlatMapWithKeyedState")
.keyBy(0)
.flatMap(new KeyedStateSettingFlatMap()).startNewChain().uid("KeyedStateSettingFlatMap")
.keyBy(0)
.transform(
"custom_operator",
new TypeHint<Tuple2<Long, Long>>() {}.getTypeInfo(),
new CheckpointedUdfOperator(new LegacyCheckpointedFlatMapWithKeyedState())).uid("LegacyCheckpointedOperator")
.keyBy(0)
.transform(
"timely_stateful_operator",
new TypeHint<Tuple2<Long, Long>>() {}.getTypeInfo(),
new TimelyStatefulOperator()).uid("TimelyStatefulOperator")
.addSink(new AccumulatorCountingSink<Tuple2<Long, Long>>());
executeAndSavepoint(
env,
"src/test/resources/" + getSavepointPath(),
new Tuple2<>(AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS));
}
示例7: testCreateSavepointOnFlink12WithRocksDB
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; //导入方法依赖的package包/类
/**
* This has to be manually executed to create the savepoint on Flink 1.2.
*/
@Test
@Ignore
public void testCreateSavepointOnFlink12WithRocksDB() throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
RocksDBStateBackend rocksBackend =
new RocksDBStateBackend(new MemoryStateBackend());
env.setStateBackend(rocksBackend);
env.enableCheckpointing(500);
env.setParallelism(4);
env.setMaxParallelism(4);
env
.addSource(new LegacyCheckpointedSource(NUM_SOURCE_ELEMENTS)).setMaxParallelism(1).uid("LegacyCheckpointedSource")
.flatMap(new LegacyCheckpointedFlatMap()).startNewChain().uid("LegacyCheckpointedFlatMap")
.keyBy(0)
.flatMap(new LegacyCheckpointedFlatMapWithKeyedState()).startNewChain().uid("LegacyCheckpointedFlatMapWithKeyedState")
.keyBy(0)
.flatMap(new KeyedStateSettingFlatMap()).startNewChain().uid("KeyedStateSettingFlatMap")
.keyBy(0)
.transform(
"custom_operator",
new TypeHint<Tuple2<Long, Long>>() {}.getTypeInfo(),
new CheckpointedUdfOperator(new LegacyCheckpointedFlatMapWithKeyedState())).uid("LegacyCheckpointedOperator")
.keyBy(0)
.transform(
"timely_stateful_operator",
new TypeHint<Tuple2<Long, Long>>() {}.getTypeInfo(),
new TimelyStatefulOperator()).uid("TimelyStatefulOperator")
.addSink(new AccumulatorCountingSink<Tuple2<Long, Long>>());
executeAndSavepoint(
env,
"src/test/resources/" + getRocksDBSavepointPath(),
new Tuple2<>(AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS));
}
示例8: testStateBackendWithoutCheckpointing
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; //导入方法依赖的package包/类
/**
* Verify that the user-specified state backend is used even if checkpointing is disabled.
*/
@Test
public void testStateBackendWithoutCheckpointing() throws Exception {
StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
see.setParallelism(1);
see.getConfig().setRestartStrategy(RestartStrategies.noRestart());
see.setStateBackend(new FailingStateBackend());
see.fromElements(new Tuple2<>("Hello", 1))
.keyBy(0)
.map(new RichMapFunction<Tuple2<String, Integer>, String>() {
private static final long serialVersionUID = 1L;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
getRuntimeContext().getState(new ValueStateDescriptor<>("Test", Integer.class));
}
@Override
public String map(Tuple2<String, Integer> value) throws Exception {
return value.f0;
}
})
.print();
try {
see.execute();
fail();
}
catch (JobExecutionException e) {
Throwable t = e.getCause();
assertTrue("wrong exception", t instanceof SuccessException);
}
}
示例9: testCreateSavepointOnFlink11WithRocksDB
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; //导入方法依赖的package包/类
/**
* This has to be manually executed to create the savepoint on Flink 1.1.
*/
@Test
@Ignore
public void testCreateSavepointOnFlink11WithRocksDB() throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
RocksDBStateBackend rocksBackend =
new RocksDBStateBackend(new MemoryStateBackend());
// rocksBackend.enableFullyAsyncSnapshots();
env.setStateBackend(rocksBackend);
env.enableCheckpointing(500);
env.setParallelism(4);
env.setMaxParallelism(4);
// create source
env
.addSource(new LegacyCheckpointedSource(NUM_SOURCE_ELEMENTS)).setMaxParallelism(1).uid("LegacyCheckpointedSource")
.flatMap(new LegacyCheckpointedFlatMap()).startNewChain().uid("LegacyCheckpointedFlatMap")
.keyBy(0)
.flatMap(new LegacyCheckpointedFlatMapWithKeyedState()).startNewChain().uid("LegacyCheckpointedFlatMapWithKeyedState")
.keyBy(0)
.flatMap(new KeyedStateSettingFlatMap()).startNewChain().uid("KeyedStateSettingFlatMap")
.keyBy(0)
.transform(
"custom_operator",
new TypeHint<Tuple2<Long, Long>>() {}.getTypeInfo(),
new CheckpointedUdfOperator(new LegacyCheckpointedFlatMapWithKeyedState())).uid("LegacyCheckpointedOperator")
.addSink(new AccumulatorCountingSink<Tuple2<Long, Long>>(EXPECTED_ELEMENTS_ACCUMULATOR));
executeAndSavepoint(
env,
"src/test/resources/stateful-udf-migration-itcase-flink1.1-rocksdb-savepoint",
new Tuple2<>(EXPECTED_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS));
}
示例10: testSavepointRestoreFromFlink11
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; //导入方法依赖的package包/类
@Test
public void testSavepointRestoreFromFlink11() throws Exception {
final int expectedSuccessfulChecks = 21;
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
// we only test memory state backend yet
env.setStateBackend(new MemoryStateBackend());
env.enableCheckpointing(500);
env.setParallelism(4);
env.setMaxParallelism(4);
// create source
env
.addSource(new RestoringCheckingSource(NUM_SOURCE_ELEMENTS)).setMaxParallelism(1).uid("LegacyCheckpointedSource")
.flatMap(new RestoringCheckingFlatMap()).startNewChain().uid("LegacyCheckpointedFlatMap")
.keyBy(0)
.flatMap(new RestoringCheckingFlatMapWithKeyedState()).startNewChain().uid("LegacyCheckpointedFlatMapWithKeyedState")
.keyBy(0)
.flatMap(new KeyedStateCheckingFlatMap()).startNewChain().uid("KeyedStateSettingFlatMap")
.keyBy(0)
.transform(
"custom_operator",
new TypeHint<Tuple2<Long, Long>>() {}.getTypeInfo(),
new RestoringCheckingUdfOperator(new RestoringCheckingFlatMapWithKeyedState())).uid("LegacyCheckpointedOperator")
.addSink(new AccumulatorCountingSink<Tuple2<Long, Long>>(EXPECTED_ELEMENTS_ACCUMULATOR));
restoreAndExecute(
env,
getResourceFilename("stateful-udf-migration-itcase-flink1.1-savepoint"),
new Tuple2<>(SUCCESSFUL_CHECK_ACCUMULATOR, expectedSuccessfulChecks));
}
示例11: testValueState
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; //导入方法依赖的package包/类
/**
* Tests simple value state queryable state instance. Each source emits
* (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, which are then
* queried. The tests succeeds after each subtask index is queried with
* value numElements (the latest element updated the state).
*/
@Test
public void testValueState() throws Exception {
final Deadline deadline = TEST_TIMEOUT.fromNow();
final long numElements = 1024L;
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(maxParallelism);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
// Value state
ValueStateDescriptor<Tuple2<Integer, Long>> valueState = new ValueStateDescriptor<>("any", source.getType());
source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
private static final long serialVersionUID = 7662520075515707428L;
@Override
public Integer getKey(Tuple2<Integer, Long> value) {
return value.f0;
}
}).asQueryableState("hakuna", valueState);
try (AutoCancellableJob autoCancellableJob = new AutoCancellableJob(cluster, env, deadline)) {
final JobID jobId = autoCancellableJob.getJobId();
final JobGraph jobGraph = autoCancellableJob.getJobGraph();
cluster.submitJobDetached(jobGraph);
executeValueQuery(deadline, client, jobId, "hakuna", valueState, numElements);
}
}
示例12: testValueStateShortcut
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; //导入方法依赖的package包/类
/**
* Tests simple value state queryable state instance. Each source emits
* (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, which are then
* queried. The tests succeeds after each subtask index is queried with
* value numElements (the latest element updated the state).
*
* <p>This is the same as the simple value state test, but uses the API shortcut.
*/
@Test
public void testValueStateShortcut() throws Exception {
final Deadline deadline = TEST_TIMEOUT.fromNow();
final long numElements = 1024L;
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(maxParallelism);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
// Value state shortcut
final QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState =
source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
private static final long serialVersionUID = 9168901838808830068L;
@Override
public Integer getKey(Tuple2<Integer, Long> value) {
return value.f0;
}
}).asQueryableState("matata");
final ValueStateDescriptor<Tuple2<Integer, Long>> stateDesc =
(ValueStateDescriptor<Tuple2<Integer, Long>>) queryableState.getStateDescriptor();
try (AutoCancellableJob autoCancellableJob = new AutoCancellableJob(cluster, env, deadline)) {
final JobID jobId = autoCancellableJob.getJobId();
final JobGraph jobGraph = autoCancellableJob.getJobGraph();
cluster.submitJobDetached(jobGraph);
executeValueQuery(deadline, client, jobId, "matata", stateDesc, numElements);
}
}
示例13: getFlinkJob
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; //导入方法依赖的package包/类
private JobGraph getFlinkJob(
final int sourceParallelism,
final String streamName,
final int numElements) throws IOException {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(sourceParallelism);
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 0L));
// to make the test faster, we use a combination of fast triggering of checkpoints,
// but some pauses after completed checkpoints
env.getCheckpointConfig().setCheckpointInterval(100);
env.getCheckpointConfig().setMinPauseBetweenCheckpoints(2000);
// we currently need this to work around the case where tasks are
// started too late, a checkpoint was already triggered, and some tasks
// never see the checkpoint event
env.getCheckpointConfig().setCheckpointTimeout(5000);
// checkpoint to files (but aggregate state below 1 MB) and don't to any async checkpoints
env.setStateBackend(new FsStateBackend(tmpFolder.newFolder().toURI(), 1024 * 1024, false));
// the Pravega reader
final FlinkPravegaReader<Integer> pravegaSource = new FlinkPravegaReader<>(
SETUP_UTILS.getControllerUri(),
SETUP_UTILS.getScope(),
Collections.singleton(streamName),
0,
new IntDeserializer(),
"my_reader_name");
env
.addSource(pravegaSource)
// hook in the notifying mapper
.map(new NotifyingMapper<>())
.setParallelism(1)
// the sink validates that the exactly-once semantics hold
// it must be non-parallel so that it sees all elements and can trivially
// check for duplicates
.addSink(new IntSequenceExactlyOnceValidator(numElements))
.setParallelism(1);
return env.getStreamGraph().getJobGraph();
}
示例14: testValueStateShortcut
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; //导入方法依赖的package包/类
/**
* Tests simple value state queryable state instance. Each source emits
* (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, which are then
* queried. The tests succeeds after each subtask index is queried with
* value numElements (the latest element updated the state).
*
* <p>This is the same as the simple value state test, but uses the API shortcut.
*/
@Test
public void testValueStateShortcut() throws Exception {
// Config
final Deadline deadline = TEST_TIMEOUT.fromNow();
final long numElements = 1024L;
JobID jobId = null;
try {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(maxParallelism);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));
DataStream<Tuple2<Integer, Long>> source = env
.addSource(new TestAscendingValueSource(numElements));
// Value state shortcut
QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState =
source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
private static final long serialVersionUID = 9168901838808830068L;
@Override
public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
return value.f0;
}
}).asQueryableState("matata");
// Submit the job graph
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
jobId = jobGraph.getJobID();
cluster.submitJobDetached(jobGraph);
final ValueStateDescriptor<Tuple2<Integer, Long>> stateDesc =
(ValueStateDescriptor<Tuple2<Integer, Long>>) queryableState.getStateDescriptor();
executeValueQuery(deadline, client, jobId, "matata", stateDesc, numElements);
} finally {
// Free cluster resources
if (jobId != null) {
CompletableFuture<CancellationSuccess> cancellation = FutureUtils.toJava(
cluster.getLeaderGateway(deadline.timeLeft())
.ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft())
.mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class)));
cancellation.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
}
}
}
示例15: testValueState
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; //导入方法依赖的package包/类
/**
* Tests simple value state queryable state instance. Each source emits
* (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, which are then
* queried. The tests succeeds after each subtask index is queried with
* value numElements (the latest element updated the state).
*/
@Test
public void testValueState() throws Exception {
// Config
final Deadline deadline = TEST_TIMEOUT.fromNow();
final int numElements = 1024;
final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
JobID jobId = null;
try {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(maxParallelism);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
DataStream<Tuple2<Integer, Long>> source = env
.addSource(new TestAscendingValueSource(numElements));
// Value state
ValueStateDescriptor<Tuple2<Integer, Long>> valueState = new ValueStateDescriptor<>(
"any",
source.getType());
QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState =
source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
private static final long serialVersionUID = 7662520075515707428L;
@Override
public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
return value.f0;
}
}).asQueryableState("hakuna", valueState);
// Submit the job graph
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
jobId = jobGraph.getJobID();
cluster.submitJobDetached(jobGraph);
// Now query
long expected = numElements;
executeQuery(deadline, client, jobId, "hakuna", valueState, expected);
} finally {
// Free cluster resources
if (jobId != null) {
Future<CancellationSuccess> cancellation = cluster
.getLeaderGateway(deadline.timeLeft())
.ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft())
.mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
Await.ready(cancellation, deadline.timeLeft());
}
client.shutDown();
}
}