本文整理匯總了Java中org.apache.flink.streaming.api.datastream.DataStream.filter方法的典型用法代碼示例。如果您正苦於以下問題:Java DataStream.filter方法的具體用法?Java DataStream.filter怎麽用?Java DataStream.filter使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.flink.streaming.api.datastream.DataStream
的用法示例。
在下文中一共展示了DataStream.filter方法的6個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: main
import org.apache.flink.streaming.api.datastream.DataStream; //導入方法依賴的package包/類
public static void main(String[] args) throws Exception {
final String input = "C:\\dev\\github\\clojured-taxi-rides\\resources\\datasets\\nycTaxiRides.gz";
final int maxEventDelay = 60; // events are out of order by max 60 seconds
final int servingSpeedFactor = 600; // events of 10 minutes are served in 1 second
// set up streaming execution environment
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
// start the data generator
DataStream<TaxiRide> rides = env.addSource(
new TaxiRideSource(input, maxEventDelay, servingSpeedFactor));
DataStream<TaxiRide> filteredRides = rides
// filter out rides that do not start or stop in NYC
.filter(new NYCFilter());
// print the filtered stream
//filteredRides.print();
filteredRides.writeAsText("file:\\\\C:\\Users\\ht\\rides_java.txt");
// run the cleansing pipeline
env.execute("Taxi Ride Cleansing");
}
示例2: main
import org.apache.flink.streaming.api.datastream.DataStream; //導入方法依賴的package包/類
public static void main(String[] args) throws Exception {
final String input = "C:\\dev\\github\\clojured-taxi-rides\\resources\\datasets\\nycTaxiRides.gz";
final int maxEventDelay = 60; // events are out of order by max 60 seconds
final int servingSpeedFactor = 600; // events of 10 minute are served in 1 second
// set up streaming execution environment
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
// start the data generator
DataStream<TaxiRide> rides = env.addSource(new TaxiRideSource(input, maxEventDelay, servingSpeedFactor));
DataStream<TaxiRide> filteredRides = rides
// filter out rides that do not start or stop in NYC
.filter(new NYCFilter());
// write the filtered data to a Kafka sink
filteredRides.addSink(new FlinkKafkaProducer09<>(
LOCAL_KAFKA_BROKER,
CLEANSED_RIDES_TOPIC,
new TaxiRideSchema()));
// run the cleansing pipeline
env.execute("Taxi Ride Cleansing");
}
示例3: main
import org.apache.flink.streaming.api.datastream.DataStream; //導入方法依賴的package包/類
public static void main(String[] args) throws Exception {
ParameterTool params = ParameterTool.fromArgs(args);
final String nycTaxiRidesPath = params.getRequired("nycTaxiRidesPath");
final int maxEventDelay = 60; // events are out of order by max 60 seconds
final int servingSpeedFactor = 600; // events of 10 minutes are served in 1 second
// set up streaming execution environment
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// start the data generator
DataStream<TaxiRide> rides = env.addSource(
new TaxiRideSource(nycTaxiRidesPath, maxEventDelay, servingSpeedFactor));
// ===============================================================================
// 1. clean up `rides`, so that the output stream only contains events
// with valid geo coordinates within NYC.
// 2. print out the result stream to console
// ===============================================================================
DataStream<TaxiRide> filteredRides = rides
// filter out rides that do not start or stop in NYC
.filter(new NYCFilter());
// print the filtered stream
filteredRides.print();
// run the cleansing pipeline
env.execute("Taxi Ride Cleansing");
}
示例4: execute
import org.apache.flink.streaming.api.datastream.DataStream; //導入方法依賴的package包/類
public DataStream<TaxiRide> execute(DataStream<TaxiRide> rides) throws Exception {
DataStream<TaxiRide> filteredRidesByNYC = rides
.filter(new NewYorkTaxiFilter());
return filteredRidesByNYC;
}
示例5: main
import org.apache.flink.streaming.api.datastream.DataStream; //導入方法依賴的package包/類
public static void main(String[] args) throws Exception {
ParameterTool params = ParameterTool.fromArgs(args);
String nycTaxiRidesPath = params.getRequired("nycTaxiRidesPath");
// -------------------------------------------------------------------------------
// Clean the ride events and write them to Kafka (topic: CLEANSED_RIDES_TOPIC)
// -------------------------------------------------------------------------------
// set up streaming execution environment
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
// ===============================================================================
// 1. remember to set the auto watermark interval in the environment config
// ===============================================================================
// start the data generator
DataStream<TaxiRide> rawRidesFromFile = env.addSource(
new TaxiRideSource(nycTaxiRidesPath, MAX_EVENT_DELAY, SERVING_SPEED_FACTOR));
DataStream<TaxiRide> filteredRides = rawRidesFromFile
// filter out rides that do not start or stop in NYC
.filter(new NYCFilter());
// ===============================================================================
// 2. write the cleansed events to the Kafka topic CLEANSED_RIDES_TOPIC;
// the Kafka server location is at LOCAL_KAFKA_BROKER
// ===============================================================================
// -------------------------------------------------------------------------------
// Consume the cleansed ride events from Kafka and calculate popular places
// -------------------------------------------------------------------------------
// configure the Kafka consumer
Properties kafkaProps = new Properties();
kafkaProps.setProperty("bootstrap.servers", LOCAL_KAFKA_BROKER);
kafkaProps.setProperty("group.id", KAFKA_GROUP_ID);
// always read the Kafka topic from the start
kafkaProps.setProperty("auto.offset.reset", "earliest");
// ===============================================================================
// 3. replace "env.fromElements(new TaxiRide())" with a FlinkKafkaConsumer09
// that reads from the topic CLEANSED_RIDES_TOPIC.
// 4. remember to assign watermarks to the events read from Kafka by calling
// "assignTimestampsAndWatermarks". The events will at most be out-of-order
// by MAX_EVENT_DELAY, so you can simply use a default
// BoundedOutOfOrdernessTimestampExtractor for this.
// ===============================================================================
// create a TaxiRide data stream
DataStream<TaxiRide> ridesFromKafka = env.fromElements(new TaxiRide());
// find popular places
DataStream<Tuple5<Float, Float, Long, Boolean, Integer>> popularPlaces = ridesFromKafka
// match ride to grid cell and event type (start or end)
.map(new GridCellMatcher())
// partition by cell id and event type
.keyBy(0, 1)
// build sliding window
.timeWindow(Time.minutes(15), Time.minutes(5))
// count ride events in window
.apply(new RideCounter())
// filter by popularity threshold
.filter(new FilterFunction<Tuple4<Integer, Long, Boolean, Integer>>() {
@Override
public boolean filter(Tuple4<Integer, Long, Boolean, Integer> count) throws Exception {
return count.f3 >= POPULAR_THRESHOLD;
}
})
// map grid cell to coordinates
.map(new GridToCoordinates());
popularPlaces.print();
// run the cleansing pipeline
env.execute("Taxi Ride with Kafka");
}
示例6: main
import org.apache.flink.streaming.api.datastream.DataStream; //導入方法依賴的package包/類
public static void main(String[] args) throws Exception {
// Checking input parameters
final ParameterTool params = ParameterTool.fromArgs(args);
// set up the execution environment
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// make parameters available in the web interface
env.getConfig().setGlobalJobParameters(params);
env.setParallelism(params.getInt("parallelism", 1));
DataStream<String> streamSource = env.fromElements(TwitterExampleData.TEXTS);
DataStream<Tuple2<Long, String>> tweets = streamSource
.flatMap(new TwitterFilterFunction());
DataStream<Tuple2<Long, String>> filtered = tweets.filter(
tweet -> tweet != null
);
DataStream<Tuple2<Long, String>> tweetsFiltered = filtered
.flatMap(new TextFilterFunction());
tweetsFiltered = tweetsFiltered
.flatMap(new StemmingFunction());
DataStream<Tuple3<Long, String, Float>> positiveTweets =
tweetsFiltered.flatMap(new PositiveScoreFunction());
DataStream<Tuple3<Long, String, Float>> negativeTweets =
tweetsFiltered.flatMap(new NegativeScoreFunction());
DataStream<Tuple4<Long, String, Float, Float>> scoredTweets = positiveTweets
.join(negativeTweets)
.onWindow(10, TimeUnit.SECONDS)
.where(0,1)
.equalTo(0,1)
.with(new JoinFunction<Tuple3<Long, String, Float>, Tuple3<Long, String, Float>, Tuple4<Long, String, Float, Float>>() {
@Override
public Tuple4<Long, String, Float, Float> join(Tuple3<Long, String, Float> positive, Tuple3<Long, String, Float> negative) throws Exception {
return new Tuple4<>(positive.f0, positive.f1, positive.f2,negative.f2);
}
});
DataStream<Tuple5<Long, String, Float, Float, String>> result =
scoredTweets.flatMap(new ScoreTweetsFunction());
result.print();
result.writeAsText("file:///home/veith/erad2016-streamprocessing/results/teste-flink");
env.execute("Twitter Streaming Example");
}