當前位置: 首頁>>代碼示例>>Java>>正文


Java JavaDStream.foreachRDD方法代碼示例

本文整理匯總了Java中org.apache.spark.streaming.api.java.JavaDStream.foreachRDD方法的典型用法代碼示例。如果您正苦於以下問題:Java JavaDStream.foreachRDD方法的具體用法?Java JavaDStream.foreachRDD怎麽用?Java JavaDStream.foreachRDD使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在org.apache.spark.streaming.api.java.JavaDStream的用法示例。


在下文中一共展示了JavaDStream.foreachRDD方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: main

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args) throws InterruptedException {
  SparkConf sc = new SparkConf().setAppName("POC-Kafka-New");
  
  try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(2000))) {
    
    JavaPairInputDStream<String, String> stream = KafkaUtils.createDirectStream(
        jsc, String.class, String.class, StringDecoder.class, StringDecoder.class,
        Collections.singletonMap("metadata.broker.list", KAFKA_HOST_PORT),
        Collections.singleton(EXAMPLE_TOPIC));

    JavaDStream<ExampleXML> records = stream.map(t -> t._2()).map(new ParseXML());
    records.foreachRDD(rdd -> System.out.printf("Amount of XMLs: %d\n", rdd.count()));

    jsc.start();
    jsc.awaitTermination();
  }
}
 
開發者ID:ciandt-dev,項目名稱:gcp,代碼行數:18,代碼來源:Spark4KafkaNew.java

示例2: main

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args) throws InterruptedException {
  SparkConf sc = new SparkConf().setAppName("POC-Streaming");
  try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(2000))) {
    //JavaDStream<SampleXML> records = jsc.textFileStream("input/").map(new ParseXML());
    //textFileStream process lines of files, so xml has to be 1 line to work //alternative below

    JavaRDD<String> files = jsc.sparkContext().wholeTextFiles("input/").map(tuple -> tuple._2());
    Queue<JavaRDD<String>> rddQueue = new LinkedList<>();
    rddQueue.add(files);
    JavaDStream<String> records = jsc.queueStream(rddQueue);

    records.foreachRDD(rdd -> System.out.printf("Amount of XMLs: %d\n", rdd.count()));

    jsc.start();
    jsc.awaitTermination();
  }
}
 
開發者ID:ciandt-dev,項目名稱:gcp,代碼行數:18,代碼來源:Spark2Streaming.java

示例3: main

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args) throws InterruptedException, IOException {
  SparkConf sc = new SparkConf().setAppName("POC-BigQuery");
  
  try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(60000))) {
    JavaPairInputDStream<String, String> stream = KafkaUtils.createDirectStream(
        jsc, String.class, String.class, StringDecoder.class, StringDecoder.class,
        Collections.singletonMap("metadata.broker.list", KAFKA_HOST_PORT), Collections.singleton(EXAMPLE_TOPIC));

    Configuration conf = new Configuration();
    BigQueryConfiguration.configureBigQueryOutput(conf, BQ_EXAMPLE_TABLE, BQ_EXAMPLE_SCHEMA);
    conf.set("mapreduce.job.outputformat.class", BigQueryOutputFormat.class.getName());

    JavaDStream<ExampleXML> records = stream.map(t -> t._2()).map(new ParseXML());
    records.foreachRDD(rdd -> {
      System.out.printf("Amount of XMLs: %d\n", rdd.count());
      long time = System.currentTimeMillis();
      rdd.mapToPair(new PrepToBQ()).saveAsNewAPIHadoopDataset(conf);
      System.out.printf("Sent to BQ in %fs\n", (System.currentTimeMillis()-time)/1000f);
    });
    
    jsc.start();
    jsc.awaitTermination();
  }
}
 
開發者ID:ciandt-dev,項目名稱:gcp,代碼行數:25,代碼來源:Spark6BigQuery.java

示例4: main

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args) throws IOException {
	Flags.setFromCommandLineArgs(THE_OPTIONS, args);

	// 初始化Spark Conf.
	SparkConf conf = new SparkConf().setAppName("A SECTONG Application: Apache Log Analysis with Spark");
	JavaSparkContext sc = new JavaSparkContext(conf);
	JavaStreamingContext jssc = new JavaStreamingContext(sc, Flags.getInstance().getSlideInterval());
	SQLContext sqlContext = new SQLContext(sc);

	// 初始化參數
	HashSet<String> topicsSet = new HashSet<String>(Arrays.asList(Flags.getInstance().getKafka_topic().split(",")));
	HashMap<String, String> kafkaParams = new HashMap<String, String>();
	kafkaParams.put("metadata.broker.list", Flags.getInstance().getKafka_broker());

	// 從Kafka Stream獲取數據
	JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(jssc, String.class, String.class,
			StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet);

	JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
		private static final long serialVersionUID = 5266880065425088203L;

		public String call(Tuple2<String, String> tuple2) {
			return tuple2._2();
		}
	});

	JavaDStream<ApacheAccessLog> accessLogsDStream = lines.flatMap(line -> {
		List<ApacheAccessLog> list = new ArrayList<>();
		try {
			// 映射每一行
			list.add(ApacheAccessLog.parseFromLogLine(line));
			return list;
		} catch (RuntimeException e) {
			return list;
		}
	}).cache();

	accessLogsDStream.foreachRDD(rdd -> {

		// rdd to DataFrame
		DataFrame df = sqlContext.createDataFrame(rdd, ApacheAccessLog.class);
		// 寫入Parquet文件
		df.write().partitionBy("ipAddress", "method", "responseCode").mode(SaveMode.Append).parquet(Flags.getInstance().getParquetFile());

		return null;
	});

	// 啟動Streaming服務器
	jssc.start(); // 啟動計算
	jssc.awaitTermination(); // 等待終止
}
 
開發者ID:sectong,項目名稱:SparkToParquet,代碼行數:52,代碼來源:AppMain.java

示例5: run

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
@Override
public void run() {
	JMetalLogger.logger.info("Run method in the streaming data source invoked") ;
   JMetalLogger.logger.info("Directory: " + directoryName) ;

	JavaDStream<Integer> time = streamingContext
					.textFileStream(directoryName)
					.map(line -> Integer.parseInt(line)) ;

	time.foreachRDD(numbers -> {
		List<Integer> numberList = numbers.collect() ;
		for (Integer number : numberList) {
		  System.out.println(number) ;
       observable.setChanged();
			observable.notifyObservers(new SingleObservedData<Integer>(number));
		}
	}) ;
}
 
開發者ID:jMetal,項目名稱:jMetalSP,代碼行數:19,代碼來源:SimpleSparkStreamingCounterDataSource.java

示例6: start

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
private void start() {
	// Create a local StreamingContext with two working thread and batch interval of
	// 1 second
	SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("Streaming Ingestion File System Text File to Dataframe");
	JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5));

	JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils.getInputDirectory());

	msgDataStream.print();
	// Create JavaRDD<Row>
	msgDataStream.foreachRDD(new RowProcessor());	

	jssc.start();
	try {
		jssc.awaitTermination();
	} catch (InterruptedException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
}
 
開發者ID:jgperrin,項目名稱:net.jgp.labs.spark,代碼行數:21,代碼來源:StreamingIngestionFileSystemTextFileToDataframeMultipleClassesApp.java

示例7: publishToNats

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
/**
 * @param stream, the Spark Stream to publish to NATS
 * @param dataEncoder, the function used to encode the Spark Stream Records into the NATS Message Payloads
 */
public <V extends Object> void publishToNats(final JavaDStream<V> stream, final Function<V, byte[]> dataEncoder) {
	logger.trace("publishToNats(JavaDStream<String> stream)");
	stream.foreachRDD((VoidFunction<JavaRDD<V>>) rdd -> {
		logger.trace("stream.foreachRDD");
		rdd.foreachPartitionAsync(objects -> {
			logger.trace("rdd.foreachPartition");
			final SparkToNatsConnector<?> connector = getConnector();
			while(objects.hasNext()) {
				final V obj = objects.next();
				logger.trace("Will publish {}", obj);
				connector.publishToNats(dataEncoder.apply(obj));
			}
			returnConnector(connector);  // return to the pool for future reuse
		});
	});
}
 
開發者ID:Logimethods,項目名稱:nats-connector-spark,代碼行數:21,代碼來源:SparkToNatsConnectorPool.java

示例8: write

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
/**
 * Writes the content of the stream to the Kafka topic
 * behind this producer.
 */
@edu.umd.cs.findbugs.annotations.SuppressWarnings(
    value="SE_INNER_CLASS", justification="Uses state from outer class.")
public void write (JavaDStream<T> stream) {

  stream.foreachRDD(new Function<JavaRDD<T>, Void>() {
    @Override
    public Void call(JavaRDD<T> rdd) throws Exception {

      write(rdd);

      return null;
    }
  });
}
 
開發者ID:rbrush,項目名稱:kite-apps,代碼行數:19,代碼來源:KafkaOutput.java

示例9: main

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args) {
   	//Window Specific property if Hadoop is not instaalled or HADOOP_HOME is not set
	 System.setProperty("hadoop.home.dir", "E:\\hadoop");
   	//Logger rootLogger = LogManager.getRootLogger();
  		//rootLogger.setLevel(Level.WARN); 
       SparkConf conf = new SparkConf().setAppName("KafkaExample").setMaster("local[*]");
       String inputDirectory="E:\\hadoop\\streamFolder\\";
    
       JavaSparkContext sc = new JavaSparkContext(conf);
       JavaStreamingContext streamingContext = new JavaStreamingContext(sc, Durations.seconds(1));
      // streamingContext.checkpoint("E:\\hadoop\\checkpoint");
       Logger rootLogger = LogManager.getRootLogger();
  		rootLogger.setLevel(Level.WARN); 
  		
  		JavaDStream<String> streamfile = streamingContext.textFileStream(inputDirectory);
  		streamfile.print();
  		streamfile.foreachRDD(rdd-> rdd.foreach(x -> System.out.println(x)));
  		
  			   		
  		JavaPairDStream<LongWritable, Text> streamedFile = streamingContext.fileStream(inputDirectory, LongWritable.class, Text.class, TextInputFormat.class);
  	 streamedFile.print();
  		
  	 streamingContext.start();
  	 

       try {
		streamingContext.awaitTermination();
	} catch (InterruptedException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
}
 
開發者ID:PacktPublishing,項目名稱:Apache-Spark-2x-for-Java-Developers,代碼行數:33,代碼來源:FileStreamingEx.java

示例10: main

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args) throws InterruptedException {
  SparkConf sc = new SparkConf().setAppName("POC-Kafka");
  
  try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(2000))) {
    
    JavaPairReceiverInputDStream<String, String> stream = KafkaUtils.createStream(
        jsc, ZK_HOST_PORT, "a_group_id", Collections.singletonMap(EXAMPLE_TOPIC, 1));

    JavaDStream<ExampleXML> records = stream.map(t -> t._2()).map(new ParseXML());
    records.foreachRDD(rdd -> System.out.printf("Amount of XMLs: %d\n", rdd.count()));

    jsc.start();
    jsc.awaitTermination();
  }
}
 
開發者ID:ciandt-dev,項目名稱:gcp,代碼行數:16,代碼來源:Spark3Kafka.java

示例11: validateTheReceptionOfMessages

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
protected void validateTheReceptionOfMessages(JavaStreamingContext ssc,
		JavaReceiverInputDStream<String> stream) throws InterruptedException {
	JavaDStream<String> messages = stream.repartition(3);

	ExecutorService executor = Executors.newFixedThreadPool(6);

	final int nbOfMessages = 5;
	NatsPublisher np = getNatsPublisher(nbOfMessages);
	
	if (logger.isDebugEnabled()) {
		messages.print();
	}
	
	messages.foreachRDD(new VoidFunction<JavaRDD<String>>() {
		private static final long serialVersionUID = 1L;

		@Override
		public void call(JavaRDD<String> rdd) throws Exception {
			logger.debug("RDD received: {}", rdd.collect());
			
			final long count = rdd.count();
			if ((count != 0) && (count != nbOfMessages)) {
				rightNumber = false;
				logger.error("The number of messages received should have been {} instead of {}.", nbOfMessages, count);
			}
			
			TOTAL_COUNT.getAndAdd((int) count);
			
			atLeastSomeData = atLeastSomeData || (count > 0);
			
			for (String str :rdd.collect()) {
				if (! str.startsWith(NatsPublisher.NATS_PAYLOAD)) {
						payload = str;
					}
			}
		}			
	});
	
	closeTheValidation(ssc, executor, nbOfMessages, np);		
}
 
開發者ID:Logimethods,項目名稱:nats-connector-spark,代碼行數:41,代碼來源:AbstractNatsToSparkTest.java

示例12: validateTheReceptionOfIntegerMessages

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
protected void validateTheReceptionOfIntegerMessages(JavaStreamingContext ssc, 
		JavaReceiverInputDStream<Integer> stream) throws InterruptedException {
	JavaDStream<Integer> messages = stream.repartition(3);

	ExecutorService executor = Executors.newFixedThreadPool(6);

	final int nbOfMessages = 5;
	NatsPublisher np = getNatsPublisher(nbOfMessages);
	
	if (logger.isDebugEnabled()) {
		messages.print();
	}
	
	messages.foreachRDD(new VoidFunction<JavaRDD<Integer>>() {
		private static final long serialVersionUID = 1L;

		@Override
		public void call(JavaRDD<Integer> rdd) throws Exception {
			logger.debug("RDD received: {}", rdd.collect());
			
			final long count = rdd.count();
			if ((count != 0) && (count != nbOfMessages)) {
				rightNumber = false;
				logger.error("The number of messages received should have been {} instead of {}.", nbOfMessages, count);
			}
			
			TOTAL_COUNT.getAndAdd((int) count);
			
			atLeastSomeData = atLeastSomeData || (count > 0);
			
			for (Integer value :rdd.collect()) {
				if (value < NatsPublisher.NATS_PAYLOAD_INT) {
						payload = value.toString();
					}
			}
		}			
	});
	
	closeTheValidation(ssc, executor, nbOfMessages, np);
}
 
開發者ID:Logimethods,項目名稱:nats-connector-spark,代碼行數:41,代碼來源:AbstractNatsToSparkTest.java

示例13: save

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
/**
 * Save all RDDs in the given DStream to the given view.
 * @param dstream
 * @param view
 */
public static <T> void save(JavaDStream<T> dstream, final View<T> view) {

  final String uri = view.getUri().toString();

  dstream.foreachRDD(new Function2<JavaRDD<T>, Time, Void>() {
    @Override
    public Void call(JavaRDD<T> rdd, Time time) throws Exception {

      save(rdd, uri);

      return null;
    }
  });
}
 
開發者ID:rbrush,項目名稱:kite-apps,代碼行數:20,代碼來源:SparkDatasets.java

示例14: configureDataContext

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
private void configureDataContext(JavaStreamingContext context) {
    Map<String, Integer> baseTopicMap = new HashMap<>();


    configurationContext.getDataTopics().forEach( dataTopic -> baseTopicMap.put(dataTopic, 1));

    kafkaTopicService.createTopicsIfNotExist(configurationContext.getDataTopics(), configurationContext
            .getKafkaReplicationFactor(), configurationContext.getKafkaPartitions());

    HashMap<String, String> kafkaParams = new HashMap<>();
    kafkaParams.put("zookeeper.connect", configurationContext.getZookeeperHostsQuorumWithPath());
    kafkaParams.put("group.id", configurationContext.getGroupId());
     /*
     groupId must be the cluster groupId. Kafka assigns each partition of a topic to one, and one only, consumer of
      the group.
     Decision topics has only one partition (by default), so if we have two o more decision instances (consumers) reading the
     same topic with the same groupId, only one instance will be able to read from the topic
     */
    JavaPairDStream<String, byte[]> messages = KafkaUtils.createStream(context, String.class, byte[].class,
            kafka.serializer.StringDecoder.class, kafka.serializer.DefaultDecoder.class, kafkaParams, baseTopicMap,
            StorageLevel.MEMORY_AND_DISK_SER());

    AvroDeserializeMessageFunction avroDeserializeMessageFunction = new AvroDeserializeMessageFunction();
    JavaDStream<StratioStreamingMessage>  insertRequests = messages.filter(
            new FilterAvroMessagesByOperationFunction(STREAM_OPERATIONS.MANIPULATION.INSERT))
            .map(avroDeserializeMessageFunction);

    InsertIntoStreamFunction insertIntoStreamFunction = new InsertIntoStreamFunction(streamOperationService,
            configurationContext.getZookeeperHostsQuorum());
    insertRequests.foreachRDD(insertIntoStreamFunction);

}
 
開發者ID:Stratio,項目名稱:Decision,代碼行數:33,代碼來源:StreamingContextConfiguration.java

示例15: main

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args) {
Logger.getLogger("org").setLevel(Level.WARN);
Logger.getLogger("akka").setLevel(Level.WARN);

SparkConf sparkConf = new SparkConf().setMaster("spark://10.204.100.206:7077").setAppName("StreamingKafka101");
sparkConf.setJars(new String[] { "target\\TestProjects-1.0-SNAPSHOT.jar" });
	
//sparkConf.setExecutorEnv("executor-memory", "8G");
//sparkConf.setExecutorEnv("spark.executor.memory", "8G");
sparkConf.set("spark.executor.memory", "4G");
//sparkConf.set("executor-memory", "8G");
		
int duration = 2;
if(args.length > 0){
 try{
  duration = Integer.parseInt(args[0]);
  System.out.println("duration changed to " + duration);
 }catch(Exception e){
  System.out.println("Duration reset to defaults");
 }
}

JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(duration));
	
  
Map<String, Integer> topicMap = new HashMap<String, Integer>();
topicMap.put("loadtest", 4);
JavaPairReceiverInputDStream<String, String> kafkaStream = KafkaUtils.createStream(ssc,"10.204.100.172:2182","kafka-group1",topicMap);
  
JavaDStream<String> lines = kafkaStream.map(new Function<Tuple2<String, String>, String>() {
    @Override
    public String call(Tuple2<String, String> tuple2) {
      return tuple2._2();
    }
});
  
lines.foreachRDD(new Function<JavaRDD<String>, Void>() {
 @Override
 public Void call(JavaRDD<String> rdd) throws Exception {
  System.out.println(new Date() + "  Total records read: " + rdd.count() );
  return null;
 }
});
  	
ssc.start();
ssc.awaitTermination();
}
 
開發者ID:atulsm,項目名稱:Test_Projects,代碼行數:48,代碼來源:StreamingKafka101.java


注:本文中的org.apache.spark.streaming.api.java.JavaDStream.foreachRDD方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。