本文整理汇总了Java中org.apache.spark.streaming.Durations类的典型用法代码示例。如果您正苦于以下问题:Java Durations类的具体用法?Java Durations怎么用?Java Durations使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Durations类属于org.apache.spark.streaming包,在下文中一共展示了Durations类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: run
import org.apache.spark.streaming.Durations; //导入依赖的package包/类
private void run(CompositeConfiguration conf) {
// Spark conf
SparkConf sparkConf = new SparkConf().setAppName("TwitterSparkCrawler").setMaster(conf.getString("spark.master"))
.set("spark.serializer", conf.getString("spark.serializer"));
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(conf.getLong("stream.duration")));
// Twitter4J
// IMPORTANT: put keys in twitter4J.properties
Configuration twitterConf = ConfigurationContext.getInstance();
Authorization twitterAuth = AuthorizationFactory.getInstance(twitterConf);
// Create twitter stream
String[] filters = { "#Car" };
TwitterUtils.createStream(jssc, twitterAuth, filters).print();
// Start the computation
jssc.start();
jssc.awaitTermination();
}
示例2: main
import org.apache.spark.streaming.Durations; //导入依赖的package包/类
public static void main(String[] args)
{
SparkConf conf = new SparkConf();
conf.setAppName("Wordcount Background");
conf.setMaster("local");
JavaStreamingContext ssc = new JavaStreamingContext(conf, Durations.seconds(15));
JavaDStream<String> lines = ssc.textFileStream("/home/rahul/DATASET");
JavaDStream<String> words = lines.flatMap(WORDS_EXTRACTOR);
JavaPairDStream<String, Integer> pairs = words.mapToPair(WORDS_MAPPER);
JavaPairDStream<String, Integer> counter = pairs.reduceByKey(WORDS_REDUCER);
counter.print();
ssc.start();
ssc.awaitTermination();
/*JavaRDD<String> file = context.textFile("/home/rahul/Desktop/palestine.txt");
JavaRDD<String> words = file.flatMap(WORDS_EXTRACTOR);
JavaPairRDD<String, Integer> pairs = words.mapToPair(WORDS_MAPPER);
JavaPairRDD<String, Integer> counter = pairs.reduceByKey(WORDS_REDUCER);
counter.saveAsTextFile("/home/rahul/Desktop/wc");
context.close();*/
}
示例3: processWindowTrafficData
import org.apache.spark.streaming.Durations; //导入依赖的package包/类
/**
* Method to get window traffic counts of different type of vehicles for each route.
* Window duration = 30 seconds and Slide interval = 10 seconds
*
* @param filteredIotDataStream IoT data stream
*/
public void processWindowTrafficData(JavaDStream<IoTData> filteredIotDataStream) {
// reduce by key and window (30 sec window and 10 sec slide).
JavaPairDStream<AggregateKey, Long> countDStreamPair = filteredIotDataStream
.mapToPair(iot -> new Tuple2<>(new AggregateKey(iot.getRouteId(), iot.getVehicleType()), 1L))
.reduceByKeyAndWindow((a, b) -> a + b, Durations.seconds(30), Durations.seconds(10));
// Transform to dstream of TrafficData
JavaDStream<WindowTrafficData> trafficDStream = countDStreamPair.map(windowTrafficDataFunc);
// Map Cassandra table column
Map<String, String> columnNameMappings = new HashMap<String, String>();
columnNameMappings.put("routeId", "routeid");
columnNameMappings.put("vehicleType", "vehicletype");
columnNameMappings.put("totalCount", "totalcount");
columnNameMappings.put("timeStamp", "timestamp");
columnNameMappings.put("recordDate", "recorddate");
// call CassandraStreamingJavaUtil function to save in DB
javaFunctions(trafficDStream).writerBuilder("traffickeyspace", "window_traffic",
CassandraJavaUtil.mapToRow(WindowTrafficData.class, columnNameMappings)).saveToCassandra();
}
示例4: main
import org.apache.spark.streaming.Durations; //导入依赖的package包/类
public static void main(String[] args) throws InterruptedException {
Map<String, Object> kafkaParams = new HashMap<>();
kafkaParams.put("bootstrap.servers", "localhost:9092");
kafkaParams.put("key.deserializer", StringDeserializer.class);
kafkaParams.put("value.deserializer", StringDeserializer.class);
kafkaParams.put("group.id", "use_a_separate_group_id_for_each_stream");
kafkaParams.put("auto.offset.reset", "latest");
kafkaParams.put("enable.auto.commit", false);
Collection<String> topics = Arrays.asList("data-in");
SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaSpark");
JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(5));
final JavaInputDStream<ConsumerRecord<String, String>> stream =
KafkaUtils.createDirectStream(
streamingContext,
LocationStrategies.PreferConsistent(),
ConsumerStrategies.<String, String>Subscribe(topics, kafkaParams)
);
JavaPairDStream<String, Integer> countOfMessageKeys = stream
.map((ConsumerRecord<String, String> record) -> record.key())
.mapToPair((String s) -> new Tuple2<>(s, 1))
.reduceByKey((Integer i1, Integer i2)-> i1 + i2);
countOfMessageKeys.print();
// Start the computation
streamingContext.start();
streamingContext.awaitTermination();
}
示例5: processMQTT
import org.apache.spark.streaming.Durations; //导入依赖的package包/类
/**
* This will start the spark stream that is reading from the MQTT queue
*
* @param broker - MQTT broker url
* @param topic - MQTT topic name
* @param numSeconds - Number of seconds between batch size
*/
public void processMQTT(final String broker, final String topic, final int numSeconds) {
LOG.info("************ SparkStreamingMQTTOutside.processMQTT start");
// Create the spark application and set the name to MQTT
SparkConf sparkConf = new SparkConf().setAppName("MQTT");
// Create the spark streaming context with a 'numSeconds' second batch size
jssc = new JavaStreamingContext(sparkConf, Durations.seconds(numSeconds));
jssc.checkpoint(checkpointDirectory);
LOG.info("************ SparkStreamingMQTTOutside.processMQTT about to read the MQTTUtils.createStream");
//2. MQTTUtils to collect MQTT messages
JavaReceiverInputDStream<String> messages = MQTTUtils.createStream(jssc, broker, topic);
LOG.info("************ SparkStreamingMQTTOutside.processMQTT about to do foreachRDD");
//process the messages on the queue and save them to the database
messages.foreachRDD(new SaveRDD());
LOG.info("************ SparkStreamingMQTTOutside.processMQTT prior to context.strt");
// Start the context
jssc.start();
jssc.awaitTermination();
}
示例6: start
import org.apache.spark.streaming.Durations; //导入依赖的package包/类
private void start() {
// Create a local StreamingContext with two working thread and batch interval of
// 1 second
SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("NetworkWordCount");
JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5));
JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils.getInputDirectory());
msgDataStream.print();
jssc.start();
try {
jssc.awaitTermination();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
示例7: start
import org.apache.spark.streaming.Durations; //导入依赖的package包/类
private void start() {
// Create a local StreamingContext with two working thread and batch interval of
// 1 second
SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("Streaming Ingestion File System Text File to Dataframe");
JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5));
JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils.getInputDirectory());
msgDataStream.print();
// Create JavaRDD<Row>
msgDataStream.foreachRDD(new RowProcessor());
jssc.start();
try {
jssc.awaitTermination();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
开发者ID:jgperrin,项目名称:net.jgp.labs.spark,代码行数:21,代码来源:StreamingIngestionFileSystemTextFileToDataframeMultipleClassesApp.java
示例8: run
import org.apache.spark.streaming.Durations; //导入依赖的package包/类
private void run(CompositeConfiguration conf) {
// Spark conf
SparkConf sparkConf = new SparkConf().setAppName("TwitterSparkCrawler").setMaster(conf.getString("spark.master"))
.set("spark.serializer", conf.getString("spark.serializer"))
.registerKryoClasses(new Class<?>[] { Parameter.class, BatchRequestBuilder.class, BatchRequest.class });
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(conf.getLong("stream.duration")));
// Create facebook stream
Parameter typeParam = Parameter.with("type", "event");
FacebookUtils
.createStream(jssc, conf.getString("access.token"),
new BatchRequestBuilder[] {
new BatchRequestBuilder("search").parameters(new Parameter[] { Parameter.with("q", "car"), typeParam }) })
.print();
// Start the computation
jssc.start();
jssc.awaitTermination();
}
示例9: main
import org.apache.spark.streaming.Durations; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
System.setProperty("hadoop.home.dir", "E:\\hadoop");
SparkConf sparkConf = new SparkConf().setAppName("WordCountSocketEx").setMaster("local[*]");
JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(1));
Logger rootLogger = LogManager.getRootLogger();
rootLogger.setLevel(Level.WARN);
List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<>("hello", 10), new Tuple2<>("world", 10));
JavaPairRDD<String, Integer> initialRDD = streamingContext.sparkContext().parallelizePairs(tuples);
JavaReceiverInputDStream<String> StreamingLines = streamingContext.socketTextStream( "10.0.75.1", Integer.parseInt("9000"), StorageLevels.MEMORY_AND_DISK_SER);
JavaDStream<String> words = StreamingLines.flatMap( str -> Arrays.asList(str.split(" ")).iterator() );
JavaPairDStream<String, Integer> wordCounts = words.mapToPair(str-> new Tuple2<>(str, 1)).reduceByKey((count1,count2) ->count1+count2 );
wordCounts.print();
JavaPairDStream<String, Integer> joinedDstream = wordCounts
.transformToPair(new Function<JavaPairRDD<String, Integer>, JavaPairRDD<String, Integer>>() {
@Override
public JavaPairRDD<String, Integer> call(JavaPairRDD<String, Integer> rdd) throws Exception {
JavaPairRDD<String, Integer> modRDD = rdd.join(initialRDD).mapToPair(
new PairFunction<Tuple2<String, Tuple2<Integer, Integer>>, String, Integer>() {
@Override
public Tuple2<String, Integer> call(
Tuple2<String, Tuple2<Integer, Integer>> joinedTuple) throws Exception {
return new Tuple2<>(joinedTuple._1(),(joinedTuple._2()._1() + joinedTuple._2()._2()));
}
});
return modRDD;
}
});
joinedDstream.print();
streamingContext.start();
streamingContext.awaitTermination();
}
开发者ID:PacktPublishing,项目名称:Apache-Spark-2x-for-Java-Developers,代码行数:41,代码来源:WordCountTransformOpEx.java
示例10: main
import org.apache.spark.streaming.Durations; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
System.setProperty("hadoop.home.dir", "E:\\hadoop");
SparkConf sparkConf = new SparkConf().setAppName("WordCountSocketEx").setMaster("local[*]");
JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(1));
streamingContext.checkpoint("E:\\hadoop\\checkpoint");
// Initial state RDD input to mapWithState
@SuppressWarnings("unchecked")
List<Tuple2<String, Integer>> tuples =Arrays.asList(new Tuple2<>("hello", 1), new Tuple2<>("world", 1));
JavaPairRDD<String, Integer> initialRDD = streamingContext.sparkContext().parallelizePairs(tuples);
JavaReceiverInputDStream<String> StreamingLines = streamingContext.socketTextStream( "10.0.75.1", Integer.parseInt("9000"), StorageLevels.MEMORY_AND_DISK_SER);
JavaDStream<String> words = StreamingLines.flatMap( str -> Arrays.asList(str.split(" ")).iterator() );
JavaPairDStream<String, Integer> wordCounts = words.mapToPair(str-> new Tuple2<>(str, 1)).reduceByKey((count1,count2) ->count1+count2 );
// Update the cumulative count function
Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc =
new Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>() {
@Override
public Tuple2<String, Integer> call(String word, Optional<Integer> one,
State<Integer> state) {
int sum = one.orElse(0) + (state.exists() ? state.get() : 0);
Tuple2<String, Integer> output = new Tuple2<>(word, sum);
state.update(sum);
return output;
}
};
// DStream made of get cumulative counts that get updated in every batch
JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordCounts.mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD));
stateDstream.print();
streamingContext.start();
streamingContext.awaitTermination();
}
开发者ID:PacktPublishing,项目名称:Apache-Spark-2x-for-Java-Developers,代码行数:40,代码来源:WordCountSocketStateful.java
示例11: main
import org.apache.spark.streaming.Durations; //导入依赖的package包/类
public static void main(String[] args) {
//Window Specific property if Hadoop is not instaalled or HADOOP_HOME is not set
System.setProperty("hadoop.home.dir", "E:\\hadoop");
//Logger rootLogger = LogManager.getRootLogger();
//rootLogger.setLevel(Level.WARN);
SparkConf conf = new SparkConf().setAppName("KafkaExample").setMaster("local[*]");
String inputDirectory="E:\\hadoop\\streamFolder\\";
JavaSparkContext sc = new JavaSparkContext(conf);
JavaStreamingContext streamingContext = new JavaStreamingContext(sc, Durations.seconds(1));
// streamingContext.checkpoint("E:\\hadoop\\checkpoint");
Logger rootLogger = LogManager.getRootLogger();
rootLogger.setLevel(Level.WARN);
JavaDStream<String> streamfile = streamingContext.textFileStream(inputDirectory);
streamfile.print();
streamfile.foreachRDD(rdd-> rdd.foreach(x -> System.out.println(x)));
JavaPairDStream<LongWritable, Text> streamedFile = streamingContext.fileStream(inputDirectory, LongWritable.class, Text.class, TextInputFormat.class);
streamedFile.print();
streamingContext.start();
try {
streamingContext.awaitTermination();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
示例12: main
import org.apache.spark.streaming.Durations; //导入依赖的package包/类
public static void main(String[] args) throws InterruptedException {
System.setProperty("hadoop.home.dir", "C:\\softwares\\Winutils");
SparkSession sparkSession = SparkSession.builder().master("local[*]").appName("Stateful Streaming Example")
.config("spark.sql.warehouse.dir", "file:////C:/Users/sgulati/spark-warehouse").getOrCreate();
JavaStreamingContext jssc= new JavaStreamingContext(new JavaSparkContext(sparkSession.sparkContext()),
Durations.milliseconds(1000));
JavaReceiverInputDStream<String> inStream = jssc.socketTextStream("10.204.136.223", 9999);
jssc.checkpoint("C:\\Users\\sgulati\\spark-checkpoint");
JavaDStream<FlightDetails> flightDetailsStream = inStream.map(x -> {
ObjectMapper mapper = new ObjectMapper();
return mapper.readValue(x, FlightDetails.class);
});
JavaPairDStream<String, FlightDetails> flightDetailsPairStream = flightDetailsStream
.mapToPair(f -> new Tuple2<String, FlightDetails>(f.getFlightId(), f));
Function3<String, Optional<FlightDetails>, State<List<FlightDetails>>, Tuple2<String, Double>> mappingFunc = (
flightId, curFlightDetail, state) -> {
List<FlightDetails> details = state.exists() ? state.get() : new ArrayList<>();
boolean isLanded = false;
if (curFlightDetail.isPresent()) {
details.add(curFlightDetail.get());
if (curFlightDetail.get().isLanded()) {
isLanded = true;
}
}
Double avgSpeed = details.stream().mapToDouble(f -> f.getTemperature()).average().orElse(0.0);
if (isLanded) {
state.remove();
} else {
state.update(details);
}
return new Tuple2<String, Double>(flightId, avgSpeed);
};
JavaMapWithStateDStream<String, FlightDetails, List<FlightDetails>, Tuple2<String, Double>> streamWithState = flightDetailsPairStream
.mapWithState(StateSpec.function(mappingFunc).timeout(Durations.minutes(5)));
streamWithState.print();
jssc.start();
jssc.awaitTermination();
}
开发者ID:PacktPublishing,项目名称:Apache-Spark-2x-for-Java-Developers,代码行数:52,代码来源:StateFulProcessingExample.java
示例13: main
import org.apache.spark.streaming.Durations; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
System.setProperty("hadoop.home.dir", "E:\\hadoop");
SparkConf sparkConf = new SparkConf().setAppName("WordCountSocketEx").setMaster("local[*]");
JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(1));
List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<>("hello", 10), new Tuple2<>("world", 10));
JavaPairRDD<String, Integer> initialRDD = streamingContext.sparkContext().parallelizePairs(tuples);
JavaReceiverInputDStream<String> StreamingLines = streamingContext.socketTextStream( "10.0.75.1", Integer.parseInt("9000"), StorageLevels.MEMORY_AND_DISK_SER);
JavaDStream<String> words = StreamingLines.flatMap( str -> Arrays.asList(str.split(" ")).iterator() );
JavaPairDStream<String, Integer> wordCounts = words.mapToPair(str-> new Tuple2<>(str, 1)).reduceByKey((count1,count2) ->count1+count2 );
wordCounts.print();
JavaPairDStream<String, Integer> joinedDstream = wordCounts.transformToPair(
new Function<JavaPairRDD<String, Integer>, JavaPairRDD<String, Integer>>() {
@Override public JavaPairRDD<String, Integer> call(JavaPairRDD<String, Integer> rdd) throws Exception {
rdd.join(initialRDD).mapToPair(new PairFunction<Tuple2<String,Tuple2<Integer,Integer>>, String, Integer>() {
@Override
public Tuple2<String, Integer> call(Tuple2<String, Tuple2<Integer, Integer>> joinedTuple)
throws Exception {
// TODO Auto-generated method stub
return new Tuple2<>( joinedTuple._1(), (joinedTuple._2()._1()+joinedTuple._2()._2()) );
}
});
return rdd;
}
});
joinedDstream.print();
streamingContext.start();
streamingContext.awaitTermination();
}
开发者ID:PacktPublishing,项目名称:Apache-Spark-2x-for-Java-Developers,代码行数:40,代码来源:WordCountSocketJava8Ex.java
示例14: createContext
import org.apache.spark.streaming.Durations; //导入依赖的package包/类
protected static JavaStreamingContext createContext(String ip, int port, String checkpointDirectory) {
SparkConf sparkConf = new SparkConf().setAppName("WordCountRecoverableEx").setMaster("local[*]");
JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(1));
streamingContext.checkpoint(checkpointDirectory);
// Initial state RDD input to mapWithState
@SuppressWarnings("unchecked")
List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<>("hello", 1), new Tuple2<>("world", 1));
JavaPairRDD<String, Integer> initialRDD = streamingContext.sparkContext().parallelizePairs(tuples);
JavaReceiverInputDStream<String> StreamingLines = streamingContext.socketTextStream(ip,port, StorageLevels.MEMORY_AND_DISK_SER);
JavaDStream<String> words = StreamingLines.flatMap(str -> Arrays.asList(str.split(" ")).iterator());
JavaPairDStream<String, Integer> wordCounts = words.mapToPair(str -> new Tuple2<>(str, 1))
.reduceByKey((count1, count2) -> count1 + count2);
// Update the cumulative count function
Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc = new Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>() {
@Override
public Tuple2<String, Integer> call(String word, Optional<Integer> one, State<Integer> state) {
int sum = one.orElse(0) + (state.exists() ? state.get() : 0);
Tuple2<String, Integer> output = new Tuple2<>(word, sum);
state.update(sum);
return output;
}
};
// DStream made of get cumulative counts that get updated in every batch
JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordCounts
.mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD));
stateDstream.print();
return streamingContext;
}
开发者ID:PacktPublishing,项目名称:Apache-Spark-2x-for-Java-Developers,代码行数:35,代码来源:WordCountRecoverableEx.java
示例15: start
import org.apache.spark.streaming.Durations; //导入依赖的package包/类
public void start() {
SparkConf sparkConf = getSparkConf();
streamingContext = new JavaStreamingContext(sparkConf,
Durations.seconds(Long.parseLong(config.getStreamingBatchIntervalInSec())));
JavaInputDStream<MessageAndMetadata<String, byte[]>> dStream = buildInputDStream(streamingContext);
JavaPairDStream<String, byte[]> pairDStream = dStream.mapToPair(km -> new Tuple2<>(km.key(), km.message()));
pairDStream.foreachRDD(new ProcessStreamingData<>(config)); // process data
dStream.foreachRDD(new UpdateOffsetsFn<>(config.getKafkaGroupId(), config.getZkOffsetManager()));
streamingContext.start();
}