本文整理匯總了Java中org.apache.spark.streaming.api.java.JavaDStream類的典型用法代碼示例。如果您正苦於以下問題:Java JavaDStream類的具體用法?Java JavaDStream怎麽用?Java JavaDStream使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
JavaDStream類屬於org.apache.spark.streaming.api.java包,在下文中一共展示了JavaDStream類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: main
import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
public static void main(String[] args) throws InterruptedException {
SparkConf sc = new SparkConf().setAppName("POC-Kafka-New");
try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(2000))) {
JavaPairInputDStream<String, String> stream = KafkaUtils.createDirectStream(
jsc, String.class, String.class, StringDecoder.class, StringDecoder.class,
Collections.singletonMap("metadata.broker.list", KAFKA_HOST_PORT),
Collections.singleton(EXAMPLE_TOPIC));
JavaDStream<ExampleXML> records = stream.map(t -> t._2()).map(new ParseXML());
records.foreachRDD(rdd -> System.out.printf("Amount of XMLs: %d\n", rdd.count()));
jsc.start();
jsc.awaitTermination();
}
}
示例2: main
import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
public static void main(String[] args) throws InterruptedException {
SparkConf sc = new SparkConf().setAppName("POC-Streaming");
try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(2000))) {
//JavaDStream<SampleXML> records = jsc.textFileStream("input/").map(new ParseXML());
//textFileStream process lines of files, so xml has to be 1 line to work //alternative below
JavaRDD<String> files = jsc.sparkContext().wholeTextFiles("input/").map(tuple -> tuple._2());
Queue<JavaRDD<String>> rddQueue = new LinkedList<>();
rddQueue.add(files);
JavaDStream<String> records = jsc.queueStream(rddQueue);
records.foreachRDD(rdd -> System.out.printf("Amount of XMLs: %d\n", rdd.count()));
jsc.start();
jsc.awaitTermination();
}
}
示例3: main
import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
public static void main(String[] args) throws InterruptedException, IOException {
SparkConf sc = new SparkConf().setAppName("POC-BigQuery");
try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(60000))) {
JavaPairInputDStream<String, String> stream = KafkaUtils.createDirectStream(
jsc, String.class, String.class, StringDecoder.class, StringDecoder.class,
Collections.singletonMap("metadata.broker.list", KAFKA_HOST_PORT), Collections.singleton(EXAMPLE_TOPIC));
Configuration conf = new Configuration();
BigQueryConfiguration.configureBigQueryOutput(conf, BQ_EXAMPLE_TABLE, BQ_EXAMPLE_SCHEMA);
conf.set("mapreduce.job.outputformat.class", BigQueryOutputFormat.class.getName());
JavaDStream<ExampleXML> records = stream.map(t -> t._2()).map(new ParseXML());
records.foreachRDD(rdd -> {
System.out.printf("Amount of XMLs: %d\n", rdd.count());
long time = System.currentTimeMillis();
rdd.mapToPair(new PrepToBQ()).saveAsNewAPIHadoopDataset(conf);
System.out.printf("Sent to BQ in %fs\n", (System.currentTimeMillis()-time)/1000f);
});
jsc.start();
jsc.awaitTermination();
}
}
示例4: main
import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
public static void main(String[] args) throws IOException {
Flags.setFromCommandLineArgs(THE_OPTIONS, args);
// 初始化Spark Conf.
SparkConf conf = new SparkConf().setAppName("A SECTONG Application: Apache Log Analysis with Spark");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaStreamingContext jssc = new JavaStreamingContext(sc, Flags.getInstance().getSlideInterval());
SQLContext sqlContext = new SQLContext(sc);
// 初始化參數
HashSet<String> topicsSet = new HashSet<String>(Arrays.asList(Flags.getInstance().getKafka_topic().split(",")));
HashMap<String, String> kafkaParams = new HashMap<String, String>();
kafkaParams.put("metadata.broker.list", Flags.getInstance().getKafka_broker());
// 從Kafka Stream獲取數據
JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(jssc, String.class, String.class,
StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet);
JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
private static final long serialVersionUID = 5266880065425088203L;
public String call(Tuple2<String, String> tuple2) {
return tuple2._2();
}
});
JavaDStream<ApacheAccessLog> accessLogsDStream = lines.flatMap(line -> {
List<ApacheAccessLog> list = new ArrayList<>();
try {
// 映射每一行
list.add(ApacheAccessLog.parseFromLogLine(line));
return list;
} catch (RuntimeException e) {
return list;
}
}).cache();
accessLogsDStream.foreachRDD(rdd -> {
// rdd to DataFrame
DataFrame df = sqlContext.createDataFrame(rdd, ApacheAccessLog.class);
// 寫入Parquet文件
df.write().partitionBy("ipAddress", "method", "responseCode").mode(SaveMode.Append).parquet(Flags.getInstance().getParquetFile());
return null;
});
// 啟動Streaming服務器
jssc.start(); // 啟動計算
jssc.awaitTermination(); // 等待終止
}
示例5: main
import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
public static void main(String[] args)
{
SparkConf conf = new SparkConf();
conf.setAppName("Wordcount Background");
conf.setMaster("local");
JavaStreamingContext ssc = new JavaStreamingContext(conf, Durations.seconds(15));
JavaDStream<String> lines = ssc.textFileStream("/home/rahul/DATASET");
JavaDStream<String> words = lines.flatMap(WORDS_EXTRACTOR);
JavaPairDStream<String, Integer> pairs = words.mapToPair(WORDS_MAPPER);
JavaPairDStream<String, Integer> counter = pairs.reduceByKey(WORDS_REDUCER);
counter.print();
ssc.start();
ssc.awaitTermination();
/*JavaRDD<String> file = context.textFile("/home/rahul/Desktop/palestine.txt");
JavaRDD<String> words = file.flatMap(WORDS_EXTRACTOR);
JavaPairRDD<String, Integer> pairs = words.mapToPair(WORDS_MAPPER);
JavaPairRDD<String, Integer> counter = pairs.reduceByKey(WORDS_REDUCER);
counter.saveAsTextFile("/home/rahul/Desktop/wc");
context.close();*/
}
示例6: processWindowTrafficData
import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
/**
* Method to get window traffic counts of different type of vehicles for each route.
* Window duration = 30 seconds and Slide interval = 10 seconds
*
* @param filteredIotDataStream IoT data stream
*/
public void processWindowTrafficData(JavaDStream<IoTData> filteredIotDataStream) {
// reduce by key and window (30 sec window and 10 sec slide).
JavaPairDStream<AggregateKey, Long> countDStreamPair = filteredIotDataStream
.mapToPair(iot -> new Tuple2<>(new AggregateKey(iot.getRouteId(), iot.getVehicleType()), 1L))
.reduceByKeyAndWindow((a, b) -> a + b, Durations.seconds(30), Durations.seconds(10));
// Transform to dstream of TrafficData
JavaDStream<WindowTrafficData> trafficDStream = countDStreamPair.map(windowTrafficDataFunc);
// Map Cassandra table column
Map<String, String> columnNameMappings = new HashMap<String, String>();
columnNameMappings.put("routeId", "routeid");
columnNameMappings.put("vehicleType", "vehicletype");
columnNameMappings.put("totalCount", "totalcount");
columnNameMappings.put("timeStamp", "timestamp");
columnNameMappings.put("recordDate", "recorddate");
// call CassandraStreamingJavaUtil function to save in DB
javaFunctions(trafficDStream).writerBuilder("traffickeyspace", "window_traffic",
CassandraJavaUtil.mapToRow(WindowTrafficData.class, columnNameMappings)).saveToCassandra();
}
示例7: run
import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
public void run() throws IOException {
SparkConf conf = new SparkConf();
conf.setAppName(getAppName());
conf.set(SPARK_SERIALIZER, ORG_APACHE_SPARK_SERIALIZER_KRYO_SERIALIZER);
JavaSparkUtil.packProjectJars(conf);
setupSparkConf(conf);
JavaStreamingContext ssc = new JavaStreamingContext(conf, getDuration());
List<JavaDStream<T>> streamsList = getStreamsList(ssc);
// Union all the streams if there is more than 1 stream
JavaDStream<T> streams = unionStreams(ssc, streamsList);
JavaPairDStream<String, RowMutation> pairDStream = streams.mapToPair(new PairFunction<T, String, RowMutation>() {
public Tuple2<String, RowMutation> call(T t) {
RowMutation rowMutation = convert(t);
return new Tuple2<String, RowMutation>(rowMutation.getRowId(), rowMutation);
}
});
pairDStream.foreachRDD(getFunction());
ssc.start();
ssc.awaitTermination();
}
示例8: streamSpansToStorage
import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
static void streamSpansToStorage(
JavaDStream<byte[]> stream,
ReadSpans readSpans,
AdjustAndConsumeSpansSharingTraceId adjustAndConsumeSpansSharingTraceId
) {
JavaDStream<Span> spans = stream.flatMap(readSpans);
// TODO: plug in some filter to drop spans regardless of trace ID
// spans = spans.filter(spanFilter);
JavaPairDStream<String, Iterable<Span>> tracesById = spans
.mapToPair(s -> new Tuple2<>(Util.toLowerHex(s.traceIdHigh, s.traceId), s))
.groupByKey();
tracesById.foreachRDD(rdd -> {
rdd.values().foreachPartition(adjustAndConsumeSpansSharingTraceId);
});
}
示例9: run
import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
@Override
public void run() {
JMetalLogger.logger.info("Run method in the streaming data source invoked") ;
JMetalLogger.logger.info("Directory: " + directoryName) ;
JavaDStream<Integer> time = streamingContext
.textFileStream(directoryName)
.map(line -> Integer.parseInt(line)) ;
time.foreachRDD(numbers -> {
List<Integer> numberList = numbers.collect() ;
for (Integer number : numberList) {
System.out.println(number) ;
observable.setChanged();
observable.notifyObservers(new SingleObservedData<Integer>(number));
}
}) ;
}
示例10: performQuery
import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
/**
* Method to read in data from an allowed input source/format and perform the query
*/
public void performQuery() throws IOException, PIRException
{
logger.info("Performing query: ");
JavaDStream<MapWritable> inputRDD = null;
if (dataInputFormat.equals(InputFormatConst.BASE_FORMAT))
{
inputRDD = readData();
}
else if (dataInputFormat.equals(InputFormatConst.ES))
{
inputRDD = readDataES();
}
else
{
throw new PIRException("Unknown data input format " + dataInputFormat);
}
performQuery(inputRDD);
}
示例11: print
import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
private static <T> TransformEvaluator<ConsoleIO.Write.Unbound<T>> print() {
return new TransformEvaluator<ConsoleIO.Write.Unbound<T>>() {
@Override
public void evaluate(ConsoleIO.Write.Unbound<T> transform, EvaluationContext context) {
@SuppressWarnings("unchecked")
JavaDStream<WindowedValue<T>> dstream =
((UnboundedDataset<T>) (context).borrowDataset(transform)).getDStream();
dstream.map(WindowingHelpers.<T>unwindowFunction()).print(transform.getNum());
}
@Override
public String toNativeString() {
return ".print(...)";
}
};
}
示例12: start
import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
private void start() {
// Create a local StreamingContext with two working thread and batch interval of
// 1 second
SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("NetworkWordCount");
JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5));
JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils.getInputDirectory());
msgDataStream.print();
jssc.start();
try {
jssc.awaitTermination();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
示例13: start
import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
private void start() {
// Create a local StreamingContext with two working thread and batch interval of
// 1 second
SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("Streaming Ingestion File System Text File to Dataframe");
JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5));
JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils.getInputDirectory());
msgDataStream.print();
// Create JavaRDD<Row>
msgDataStream.foreachRDD(new RowProcessor());
jssc.start();
try {
jssc.awaitTermination();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
開發者ID:jgperrin,項目名稱:net.jgp.labs.spark,代碼行數:21,代碼來源:StreamingIngestionFileSystemTextFileToDataframeMultipleClassesApp.java
示例14: createDStream
import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
private static JavaDStream<String> createDStream(JavaStreamingContext javaStreamingContext, String hostName, int port) {
JavaReceiverInputDStream<SparkFlumeEvent> flumeEventStream = FlumeUtils.createStream(javaStreamingContext, hostName, port);
// Set different storage level
// flumeEventStream.persist(StorageLevel.MEMORY_AND_DISK_SER());
JavaDStream<String> dStream = flumeEventStream.map(new Function<SparkFlumeEvent, String>() {
@Override
public String call(SparkFlumeEvent sparkFlumeEvent) throws Exception {
byte[] bodyArray = sparkFlumeEvent.event().getBody().array();
String logTxt = new String(bodyArray, "UTF-8");
logger.info(logTxt);
return logTxt;
}
});
// dStream.print();
return dStream;
}
示例15: publishToNats
import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
/**
* @param stream, the Spark Stream to publish to NATS
* @param dataEncoder, the function used to encode the Spark Stream Records into the NATS Message Payloads
*/
public <V extends Object> void publishToNats(final JavaDStream<V> stream, final Function<V, byte[]> dataEncoder) {
logger.trace("publishToNats(JavaDStream<String> stream)");
stream.foreachRDD((VoidFunction<JavaRDD<V>>) rdd -> {
logger.trace("stream.foreachRDD");
rdd.foreachPartitionAsync(objects -> {
logger.trace("rdd.foreachPartition");
final SparkToNatsConnector<?> connector = getConnector();
while(objects.hasNext()) {
final V obj = objects.next();
logger.trace("Will publish {}", obj);
connector.publishToNats(dataEncoder.apply(obj));
}
returnConnector(connector); // return to the pool for future reuse
});
});
}