當前位置: 首頁>>代碼示例>>Java>>正文


Java JavaDStream類代碼示例

本文整理匯總了Java中org.apache.spark.streaming.api.java.JavaDStream的典型用法代碼示例。如果您正苦於以下問題:Java JavaDStream類的具體用法?Java JavaDStream怎麽用?Java JavaDStream使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。


JavaDStream類屬於org.apache.spark.streaming.api.java包,在下文中一共展示了JavaDStream類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: main

import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
public static void main(String[] args) throws InterruptedException {
  SparkConf sc = new SparkConf().setAppName("POC-Kafka-New");
  
  try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(2000))) {
    
    JavaPairInputDStream<String, String> stream = KafkaUtils.createDirectStream(
        jsc, String.class, String.class, StringDecoder.class, StringDecoder.class,
        Collections.singletonMap("metadata.broker.list", KAFKA_HOST_PORT),
        Collections.singleton(EXAMPLE_TOPIC));

    JavaDStream<ExampleXML> records = stream.map(t -> t._2()).map(new ParseXML());
    records.foreachRDD(rdd -> System.out.printf("Amount of XMLs: %d\n", rdd.count()));

    jsc.start();
    jsc.awaitTermination();
  }
}
 
開發者ID:ciandt-dev,項目名稱:gcp,代碼行數:18,代碼來源:Spark4KafkaNew.java

示例2: main

import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
public static void main(String[] args) throws InterruptedException {
  SparkConf sc = new SparkConf().setAppName("POC-Streaming");
  try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(2000))) {
    //JavaDStream<SampleXML> records = jsc.textFileStream("input/").map(new ParseXML());
    //textFileStream process lines of files, so xml has to be 1 line to work //alternative below

    JavaRDD<String> files = jsc.sparkContext().wholeTextFiles("input/").map(tuple -> tuple._2());
    Queue<JavaRDD<String>> rddQueue = new LinkedList<>();
    rddQueue.add(files);
    JavaDStream<String> records = jsc.queueStream(rddQueue);

    records.foreachRDD(rdd -> System.out.printf("Amount of XMLs: %d\n", rdd.count()));

    jsc.start();
    jsc.awaitTermination();
  }
}
 
開發者ID:ciandt-dev,項目名稱:gcp,代碼行數:18,代碼來源:Spark2Streaming.java

示例3: main

import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
public static void main(String[] args) throws InterruptedException, IOException {
  SparkConf sc = new SparkConf().setAppName("POC-BigQuery");
  
  try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(60000))) {
    JavaPairInputDStream<String, String> stream = KafkaUtils.createDirectStream(
        jsc, String.class, String.class, StringDecoder.class, StringDecoder.class,
        Collections.singletonMap("metadata.broker.list", KAFKA_HOST_PORT), Collections.singleton(EXAMPLE_TOPIC));

    Configuration conf = new Configuration();
    BigQueryConfiguration.configureBigQueryOutput(conf, BQ_EXAMPLE_TABLE, BQ_EXAMPLE_SCHEMA);
    conf.set("mapreduce.job.outputformat.class", BigQueryOutputFormat.class.getName());

    JavaDStream<ExampleXML> records = stream.map(t -> t._2()).map(new ParseXML());
    records.foreachRDD(rdd -> {
      System.out.printf("Amount of XMLs: %d\n", rdd.count());
      long time = System.currentTimeMillis();
      rdd.mapToPair(new PrepToBQ()).saveAsNewAPIHadoopDataset(conf);
      System.out.printf("Sent to BQ in %fs\n", (System.currentTimeMillis()-time)/1000f);
    });
    
    jsc.start();
    jsc.awaitTermination();
  }
}
 
開發者ID:ciandt-dev,項目名稱:gcp,代碼行數:25,代碼來源:Spark6BigQuery.java

示例4: main

import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
public static void main(String[] args) throws IOException {
	Flags.setFromCommandLineArgs(THE_OPTIONS, args);

	// 初始化Spark Conf.
	SparkConf conf = new SparkConf().setAppName("A SECTONG Application: Apache Log Analysis with Spark");
	JavaSparkContext sc = new JavaSparkContext(conf);
	JavaStreamingContext jssc = new JavaStreamingContext(sc, Flags.getInstance().getSlideInterval());
	SQLContext sqlContext = new SQLContext(sc);

	// 初始化參數
	HashSet<String> topicsSet = new HashSet<String>(Arrays.asList(Flags.getInstance().getKafka_topic().split(",")));
	HashMap<String, String> kafkaParams = new HashMap<String, String>();
	kafkaParams.put("metadata.broker.list", Flags.getInstance().getKafka_broker());

	// 從Kafka Stream獲取數據
	JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(jssc, String.class, String.class,
			StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet);

	JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
		private static final long serialVersionUID = 5266880065425088203L;

		public String call(Tuple2<String, String> tuple2) {
			return tuple2._2();
		}
	});

	JavaDStream<ApacheAccessLog> accessLogsDStream = lines.flatMap(line -> {
		List<ApacheAccessLog> list = new ArrayList<>();
		try {
			// 映射每一行
			list.add(ApacheAccessLog.parseFromLogLine(line));
			return list;
		} catch (RuntimeException e) {
			return list;
		}
	}).cache();

	accessLogsDStream.foreachRDD(rdd -> {

		// rdd to DataFrame
		DataFrame df = sqlContext.createDataFrame(rdd, ApacheAccessLog.class);
		// 寫入Parquet文件
		df.write().partitionBy("ipAddress", "method", "responseCode").mode(SaveMode.Append).parquet(Flags.getInstance().getParquetFile());

		return null;
	});

	// 啟動Streaming服務器
	jssc.start(); // 啟動計算
	jssc.awaitTermination(); // 等待終止
}
 
開發者ID:sectong,項目名稱:SparkToParquet,代碼行數:52,代碼來源:AppMain.java

示例5: main

import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
public static void main(String[] args) 
{
 SparkConf conf = new SparkConf();
 conf.setAppName("Wordcount Background");
 conf.setMaster("local");
  
 
 JavaStreamingContext ssc = new JavaStreamingContext(conf, Durations.seconds(15));
 
 
 JavaDStream<String> lines = ssc.textFileStream("/home/rahul/DATASET");
 JavaDStream<String> words = lines.flatMap(WORDS_EXTRACTOR);
 JavaPairDStream<String, Integer> pairs = words.mapToPair(WORDS_MAPPER);
 JavaPairDStream<String, Integer> counter = pairs.reduceByKey(WORDS_REDUCER);
 
 counter.print();
 
 ssc.start();
 
 ssc.awaitTermination();
 

 /*JavaRDD<String> file = context.textFile("/home/rahul/Desktop/palestine.txt");
 JavaRDD<String> words = file.flatMap(WORDS_EXTRACTOR);
 JavaPairRDD<String, Integer> pairs = words.mapToPair(WORDS_MAPPER);
 JavaPairRDD<String, Integer> counter = pairs.reduceByKey(WORDS_REDUCER);
 counter.saveAsTextFile("/home/rahul/Desktop/wc"); 
 context.close();*/
}
 
開發者ID:arks-api,項目名稱:arks-api,代碼行數:30,代碼來源:WordCount.java

示例6: processWindowTrafficData

import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
/**
 * Method to get window traffic counts of different type of vehicles for each route.
 * Window duration = 30 seconds and Slide interval = 10 seconds
 * 
 * @param filteredIotDataStream IoT data stream
 */
public void processWindowTrafficData(JavaDStream<IoTData> filteredIotDataStream) {

	// reduce by key and window (30 sec window and 10 sec slide).
	JavaPairDStream<AggregateKey, Long> countDStreamPair = filteredIotDataStream
			.mapToPair(iot -> new Tuple2<>(new AggregateKey(iot.getRouteId(), iot.getVehicleType()), 1L))
			.reduceByKeyAndWindow((a, b) -> a + b, Durations.seconds(30), Durations.seconds(10));

	// Transform to dstream of TrafficData
	JavaDStream<WindowTrafficData> trafficDStream = countDStreamPair.map(windowTrafficDataFunc);

	// Map Cassandra table column
	Map<String, String> columnNameMappings = new HashMap<String, String>();
	columnNameMappings.put("routeId", "routeid");
	columnNameMappings.put("vehicleType", "vehicletype");
	columnNameMappings.put("totalCount", "totalcount");
	columnNameMappings.put("timeStamp", "timestamp");
	columnNameMappings.put("recordDate", "recorddate");

	// call CassandraStreamingJavaUtil function to save in DB
	javaFunctions(trafficDStream).writerBuilder("traffickeyspace", "window_traffic",
			CassandraJavaUtil.mapToRow(WindowTrafficData.class, columnNameMappings)).saveToCassandra();
}
 
開發者ID:baghelamit,項目名稱:iot-traffic-monitor,代碼行數:29,代碼來源:IoTTrafficDataProcessor.java

示例7: run

import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
public void run() throws IOException {
  SparkConf conf = new SparkConf();
  conf.setAppName(getAppName());
  conf.set(SPARK_SERIALIZER, ORG_APACHE_SPARK_SERIALIZER_KRYO_SERIALIZER);
  JavaSparkUtil.packProjectJars(conf);
  setupSparkConf(conf);

  JavaStreamingContext ssc = new JavaStreamingContext(conf, getDuration());
  List<JavaDStream<T>> streamsList = getStreamsList(ssc);

  // Union all the streams if there is more than 1 stream
  JavaDStream<T> streams = unionStreams(ssc, streamsList);

  JavaPairDStream<String, RowMutation> pairDStream = streams.mapToPair(new PairFunction<T, String, RowMutation>() {
    public Tuple2<String, RowMutation> call(T t) {
      RowMutation rowMutation = convert(t);
      return new Tuple2<String, RowMutation>(rowMutation.getRowId(), rowMutation);
    }
  });

  pairDStream.foreachRDD(getFunction());

  ssc.start();
  ssc.awaitTermination();
}
 
開發者ID:apache,項目名稱:incubator-blur,代碼行數:26,代碼來源:BlurLoadSparkProcessor.java

示例8: streamSpansToStorage

import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
static void streamSpansToStorage(
    JavaDStream<byte[]> stream,
    ReadSpans readSpans,
    AdjustAndConsumeSpansSharingTraceId adjustAndConsumeSpansSharingTraceId
) {
  JavaDStream<Span> spans = stream.flatMap(readSpans);

  // TODO: plug in some filter to drop spans regardless of trace ID
  // spans = spans.filter(spanFilter);

  JavaPairDStream<String, Iterable<Span>> tracesById = spans
      .mapToPair(s -> new Tuple2<>(Util.toLowerHex(s.traceIdHigh, s.traceId), s))
      .groupByKey();

  tracesById.foreachRDD(rdd -> {
    rdd.values().foreachPartition(adjustAndConsumeSpansSharingTraceId);
  });
}
 
開發者ID:openzipkin,項目名稱:zipkin-sparkstreaming,代碼行數:19,代碼來源:SparkStreamingJob.java

示例9: run

import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
@Override
public void run() {
	JMetalLogger.logger.info("Run method in the streaming data source invoked") ;
   JMetalLogger.logger.info("Directory: " + directoryName) ;

	JavaDStream<Integer> time = streamingContext
					.textFileStream(directoryName)
					.map(line -> Integer.parseInt(line)) ;

	time.foreachRDD(numbers -> {
		List<Integer> numberList = numbers.collect() ;
		for (Integer number : numberList) {
		  System.out.println(number) ;
       observable.setChanged();
			observable.notifyObservers(new SingleObservedData<Integer>(number));
		}
	}) ;
}
 
開發者ID:jMetal,項目名稱:jMetalSP,代碼行數:19,代碼來源:SimpleSparkStreamingCounterDataSource.java

示例10: performQuery

import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
/**
 * Method to read in data from an allowed input source/format and perform the query
 */
public void performQuery() throws IOException, PIRException
{
  logger.info("Performing query: ");

  JavaDStream<MapWritable> inputRDD = null;
  if (dataInputFormat.equals(InputFormatConst.BASE_FORMAT))
  {
    inputRDD = readData();
  }
  else if (dataInputFormat.equals(InputFormatConst.ES))
  {
    inputRDD = readDataES();
  }
  else
  {
    throw new PIRException("Unknown data input format " + dataInputFormat);
  }

  performQuery(inputRDD);
}
 
開發者ID:apache,項目名稱:incubator-pirk,代碼行數:24,代碼來源:ComputeStreamingResponse.java

示例11: print

import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
private static <T> TransformEvaluator<ConsoleIO.Write.Unbound<T>> print() {
  return new TransformEvaluator<ConsoleIO.Write.Unbound<T>>() {
    @Override
    public void evaluate(ConsoleIO.Write.Unbound<T> transform, EvaluationContext context) {
      @SuppressWarnings("unchecked")
      JavaDStream<WindowedValue<T>> dstream =
          ((UnboundedDataset<T>) (context).borrowDataset(transform)).getDStream();
      dstream.map(WindowingHelpers.<T>unwindowFunction()).print(transform.getNum());
    }

    @Override
    public String toNativeString() {
      return ".print(...)";
    }
  };
}
 
開發者ID:apache,項目名稱:beam,代碼行數:17,代碼來源:StreamingTransformTranslator.java

示例12: start

import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
private void start() {
	// Create a local StreamingContext with two working thread and batch interval of
	// 1 second
	SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("NetworkWordCount");
	JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5));

	JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils.getInputDirectory());
	msgDataStream.print();

	jssc.start();
	try {
		jssc.awaitTermination();
	} catch (InterruptedException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
}
 
開發者ID:jgperrin,項目名稱:net.jgp.labs.spark,代碼行數:18,代碼來源:StreamingIngestionFileSystemTextFileApp.java

示例13: start

import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
private void start() {
	// Create a local StreamingContext with two working thread and batch interval of
	// 1 second
	SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("Streaming Ingestion File System Text File to Dataframe");
	JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5));

	JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils.getInputDirectory());

	msgDataStream.print();
	// Create JavaRDD<Row>
	msgDataStream.foreachRDD(new RowProcessor());	

	jssc.start();
	try {
		jssc.awaitTermination();
	} catch (InterruptedException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
}
 
開發者ID:jgperrin,項目名稱:net.jgp.labs.spark,代碼行數:21,代碼來源:StreamingIngestionFileSystemTextFileToDataframeMultipleClassesApp.java

示例14: createDStream

import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
private static JavaDStream<String> createDStream(JavaStreamingContext javaStreamingContext, String hostName, int port) {
        
        JavaReceiverInputDStream<SparkFlumeEvent> flumeEventStream = FlumeUtils.createStream(javaStreamingContext, hostName, port);
        
        // Set different storage level 
//        flumeEventStream.persist(StorageLevel.MEMORY_AND_DISK_SER());
        
        JavaDStream<String> dStream = flumeEventStream.map(new Function<SparkFlumeEvent, String>() {

            @Override
            public String call(SparkFlumeEvent sparkFlumeEvent) throws Exception {

                byte[] bodyArray = sparkFlumeEvent.event().getBody().array();
                String logTxt = new String(bodyArray, "UTF-8");
                logger.info(logTxt);

                return logTxt;
            }
        });
        // dStream.print();
        
        return dStream;
    }
 
開發者ID:githoov,項目名稱:spark_log_data,代碼行數:24,代碼來源:LogDataWebinar.java

示例15: publishToNats

import org.apache.spark.streaming.api.java.JavaDStream; //導入依賴的package包/類
/**
 * @param stream, the Spark Stream to publish to NATS
 * @param dataEncoder, the function used to encode the Spark Stream Records into the NATS Message Payloads
 */
public <V extends Object> void publishToNats(final JavaDStream<V> stream, final Function<V, byte[]> dataEncoder) {
	logger.trace("publishToNats(JavaDStream<String> stream)");
	stream.foreachRDD((VoidFunction<JavaRDD<V>>) rdd -> {
		logger.trace("stream.foreachRDD");
		rdd.foreachPartitionAsync(objects -> {
			logger.trace("rdd.foreachPartition");
			final SparkToNatsConnector<?> connector = getConnector();
			while(objects.hasNext()) {
				final V obj = objects.next();
				logger.trace("Will publish {}", obj);
				connector.publishToNats(dataEncoder.apply(obj));
			}
			returnConnector(connector);  // return to the pool for future reuse
		});
	});
}
 
開發者ID:Logimethods,項目名稱:nats-connector-spark,代碼行數:21,代碼來源:SparkToNatsConnectorPool.java


注:本文中的org.apache.spark.streaming.api.java.JavaDStream類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。