當前位置: 首頁>>代碼示例>>Java>>正文


Java JavaDStream.flatMap方法代碼示例

本文整理匯總了Java中org.apache.spark.streaming.api.java.JavaDStream.flatMap方法的典型用法代碼示例。如果您正苦於以下問題:Java JavaDStream.flatMap方法的具體用法?Java JavaDStream.flatMap怎麽用?Java JavaDStream.flatMap使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在org.apache.spark.streaming.api.java.JavaDStream的用法示例。


在下文中一共展示了JavaDStream.flatMap方法的10個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: main

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args) 
{
 SparkConf conf = new SparkConf();
 conf.setAppName("Wordcount Background");
 conf.setMaster("local");
  
 
 JavaStreamingContext ssc = new JavaStreamingContext(conf, Durations.seconds(15));
 
 
 JavaDStream<String> lines = ssc.textFileStream("/home/rahul/DATASET");
 JavaDStream<String> words = lines.flatMap(WORDS_EXTRACTOR);
 JavaPairDStream<String, Integer> pairs = words.mapToPair(WORDS_MAPPER);
 JavaPairDStream<String, Integer> counter = pairs.reduceByKey(WORDS_REDUCER);
 
 counter.print();
 
 ssc.start();
 
 ssc.awaitTermination();
 

 /*JavaRDD<String> file = context.textFile("/home/rahul/Desktop/palestine.txt");
 JavaRDD<String> words = file.flatMap(WORDS_EXTRACTOR);
 JavaPairRDD<String, Integer> pairs = words.mapToPair(WORDS_MAPPER);
 JavaPairRDD<String, Integer> counter = pairs.reduceByKey(WORDS_REDUCER);
 counter.saveAsTextFile("/home/rahul/Desktop/wc"); 
 context.close();*/
}
 
開發者ID:arks-api,項目名稱:arks-api,代碼行數:30,代碼來源:WordCount.java

示例2: streamSpansToStorage

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
static void streamSpansToStorage(
    JavaDStream<byte[]> stream,
    ReadSpans readSpans,
    AdjustAndConsumeSpansSharingTraceId adjustAndConsumeSpansSharingTraceId
) {
  JavaDStream<Span> spans = stream.flatMap(readSpans);

  // TODO: plug in some filter to drop spans regardless of trace ID
  // spans = spans.filter(spanFilter);

  JavaPairDStream<String, Iterable<Span>> tracesById = spans
      .mapToPair(s -> new Tuple2<>(Util.toLowerHex(s.traceIdHigh, s.traceId), s))
      .groupByKey();

  tracesById.foreachRDD(rdd -> {
    rdd.values().foreachPartition(adjustAndConsumeSpansSharingTraceId);
  });
}
 
開發者ID:openzipkin,項目名稱:zipkin-sparkstreaming,代碼行數:19,代碼來源:SparkStreamingJob.java

示例3: main

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args) {
    if (args.length < 4) {
        System.err.println("Usage: JavaKafkaWordCount <zkQuorum> <group> <topics> <numThreads>");
        System.exit(1);
    }

    SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaWordCount");
    // Create the context with a 1 second batch size
    JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(2000));
    int numThreads = Integer.parseInt(args[3]);
    Map<String, Integer> topicMap = new HashMap<String, Integer>();
    String[] topics = args[2].split(",");
    for (String topic : topics) {
        topicMap.put(topic, numThreads);
    }
    JavaPairReceiverInputDStream<String, String> messages = KafkaUtils.createStream(jssc, args[0], args[1],
            topicMap);
    JavaDStream<String> lines = messages.map(tuple2 -> tuple2._2());
    JavaDStream<String> words = lines.flatMap(x -> Lists.newArrayList(SPACE.split(x)));
    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(s -> new Tuple2<String, Integer>(s, 1)).reduceByKey(
            (i1, i2) -> i1 + i2);
    wordCounts.print();
    jssc.start();
    jssc.awaitTermination();
}
 
開發者ID:ogidogi,項目名稱:laughing-octo-sansa,代碼行數:26,代碼來源:TestSparkKafkaReceiverApproach.java

示例4: main

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args) {
    if (args.length < 2) {
        System.err.println("Usage: DirectKafkaWordCount <brokers> <topics>\n" + " <brokers> is a list of one or more Kafka brokers\n"
                + " <topics> is a list of one or more kafka topics to consume from\n\n");
        System.exit(1);
    }

    String brokers = args[0];
    String topics = args[1];
    // Create context with 2 second batch interval
    SparkConf sparkConf = new SparkConf().setAppName("JavaDirectKafkaWordCount");
    JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(2));
    HashSet<String> topicsSet = new HashSet<String>(Arrays.asList(topics.split(",")));
    HashMap<String, String> kafkaParams = new HashMap<String, String>();
    kafkaParams.put("metadata.broker.list", brokers);
    // Create direct kafka stream with brokers and topics
    JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(jssc, String.class, String.class,
            StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet);
    // Get the lines, split them into words, count the words and print
    JavaDStream<String> lines = messages.map(tuple2 -> tuple2._2());
    JavaDStream<String> words = lines.flatMap(x -> Lists.newArrayList(SPACE.split(x)));
    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(s -> new Tuple2<String, Integer>(s, 1)).reduceByKey(
            (i1, i2) -> i1 + i2);
    wordCounts.print();
    // Start the computation
    jssc.start();
    jssc.awaitTermination();
}
 
開發者ID:ogidogi,項目名稱:laughing-octo-sansa,代碼行數:29,代碼來源:TestSparkKafkaDirectApproach.java

示例5: main

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args) {

    // Create the context with a 1 second batch size
    SparkConf sparkConf = new SparkConf().setMaster("local[*]").setAppName("Streaming102");
	//SparkConf sparkConf = new SparkConf().setMaster("spark://10.204.100.206:7077").setAppName("Streaming102");
	sparkConf.setJars(new String[] { "target\\original-TestProjects-1.0-SNAPSHOT.jar" });
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(10));

    String folder = "./stream/";
    if(args.length == 1){
    	folder = args[0];
    }

    JavaDStream<String> lines = ssc.textFileStream(folder);
    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
      @Override
      public Iterable<String> call(String x) {
    	  System.out.println(x);
    	  return Lists.newArrayList(SPACE.split(x));
      }
    });
    
    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
      new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
          return new Tuple2<String, Integer>(s, 1);
        }
      }).reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
          return i1 + i2;
        }
    });

    wordCounts.print();
    ssc.start();
    ssc.awaitTermination();
  }
 
開發者ID:atulsm,項目名稱:Test_Projects,代碼行數:40,代碼來源:Streaming101.java

示例6: process

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
@Override
public JavaPairDStream<String, Integer> process(JavaDStream<String> input) {
	JavaDStream<String> words = input.flatMap(word -> Arrays.asList(word.split(" ")));
	JavaPairDStream<String, Integer> wordCounts = words.mapToPair(s -> new Tuple2<String, Integer>(s, 1))
			.reduceByKey((i1, i2) -> i1 + i2);
	return wordCounts;
}
 
開發者ID:spring-projects,項目名稱:spring-xd-samples,代碼行數:8,代碼來源:WordCount.java

示例7: main

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args) {
  	//Window Specific property if Hadoop is not instaalled or HADOOP_HOME is not set
 System.setProperty("hadoop.home.dir", "E:\\hadoop");
  	//Logger rootLogger = LogManager.getRootLogger();
 		//rootLogger.setLevel(Level.WARN); 
      SparkConf conf = new SparkConf().setAppName("KafkaExample").setMaster("local[*]");    
      JavaSparkContext sc = new JavaSparkContext(conf);
      JavaStreamingContext streamingContext = new JavaStreamingContext(sc, Durations.minutes(2));
      streamingContext.checkpoint("E:\\hadoop\\checkpoint");
      Logger rootLogger = LogManager.getRootLogger();
 		rootLogger.setLevel(Level.WARN); 
      Map<String, Object> kafkaParams = new HashMap<>();
      kafkaParams.put("bootstrap.servers", "10.0.75.1:9092");
      kafkaParams.put("key.deserializer", StringDeserializer.class);
      kafkaParams.put("value.deserializer", StringDeserializer.class);
      kafkaParams.put("group.id", "use_a_separate_group_id_for_each_strea");
      kafkaParams.put("auto.offset.reset", "latest");
     // kafkaParams.put("enable.auto.commit", false);

      Collection<String> topics = Arrays.asList("mytopic", "anothertopic");

      final JavaInputDStream<ConsumerRecord<String, String>> stream = KafkaUtils.createDirectStream(streamingContext,LocationStrategies.PreferConsistent(),
      				ConsumerStrategies.<String, String>Subscribe(topics, kafkaParams));

      JavaPairDStream<String, String> pairRDD = stream.mapToPair(record-> new Tuple2<>(record.key(), record.value()));
     
      pairRDD.foreachRDD(pRDD-> { pRDD.foreach(tuple-> System.out.println(new Date()+" :: Kafka msg key ::"+tuple._1() +" the val is ::"+tuple._2()));});
     
      JavaDStream<String> tweetRDD = pairRDD.map(x-> x._2()).map(new TweetText());
      
      tweetRDD.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" :: "+x)));
      
     JavaDStream<String> hashtagRDD = tweetRDD.flatMap(twt-> Arrays.stream(twt.split(" ")).filter(str-> str.contains("#")).collect(Collectors.toList()).iterator() );
 
      hashtagRDD.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(x)));
      
      JavaPairDStream<String, Long> cntByVal = hashtagRDD.countByValue();
      
      cntByVal.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The count tag is ::"+x._1() +" and the val is ::"+x._2())));
      
     /* hashtagRDD.window(Durations.seconds(60), Durations.seconds(30))
                .countByValue()
               .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
      
     hashtagRDD.countByValueAndWindow(Durations.seconds(60), Durations.seconds(30))
               .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println("The window&count tag is ::"+x._1() +" and the val is ::"+x._2())));
      */
     hashtagRDD.window(Durations.minutes(8)).countByValue()
     .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
     hashtagRDD.window(Durations.minutes(8),Durations.minutes(2)).countByValue()
     .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
     hashtagRDD.window(Durations.minutes(12),Durations.minutes(8)).countByValue()
     .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
     hashtagRDD.window(Durations.minutes(2),Durations.minutes(2)).countByValue()
     .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
     hashtagRDD.window(Durations.minutes(12),Durations.minutes(12)).countByValue()
     .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
     
     /*hashtagRDD.window(Durations.minutes(5),Durations.minutes(2)).countByValue()
     .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));*/
     /* hashtagRDD.window(Durations.minutes(10),Durations.minutes(1)).countByValue()
     .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));*/
     
      streamingContext.start();
      try {
	streamingContext.awaitTermination();
} catch (InterruptedException e) {
	// TODO Auto-generated catch block
	e.printStackTrace();
}
  }
 
開發者ID:PacktPublishing,項目名稱:Apache-Spark-2x-for-Java-Developers,代碼行數:72,代碼來源:KafkaExample.java

示例8: main

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args) throws Exception {
    String zkQuorum = "localhost:2181";
    String groupName = "stream";
    int numThreads = 3;
    String topicsName = "test1";
    SparkConf sparkConf = new SparkConf().setAppName("WordCountKafkaStream");

    JavaStreamingContext javaStreamingContext = new JavaStreamingContext(sparkConf, new Duration(5000));

    Map<String, Integer> topicToBeUsedBySpark = new HashMap<>();
    String[] topics = topicsName.split(",");
    for (String topic : topics) {
        topicToBeUsedBySpark.put(topic, numThreads);
    }

    JavaPairReceiverInputDStream<String, String> streamMessages =
            KafkaUtils.createStream(javaStreamingContext, zkQuorum, groupName, topicToBeUsedBySpark);

    JavaDStream<String> lines = streamMessages.map(new Function<Tuple2<String, String>, String>() {
        @Override
        public String call(Tuple2<String, String> tuple2) {
            return tuple2._2();
        }
    });

    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterator<String> call(String x) {
            return Arrays.asList(WORD_DELIMETER.split(x)).iterator();
        }
    });

    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
            new PairFunction<String, String, Integer>() {
                @Override
                public Tuple2<String, Integer> call(String s) {
                    return new Tuple2<>(s, 1);
                }
            }).reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });

    wordCounts.print();
    javaStreamingContext.start();
    javaStreamingContext.awaitTermination();
}
 
開發者ID:PacktPublishing,項目名稱:Building-Data-Streaming-Applications-with-Apache-Kafka,代碼行數:50,代碼來源:KafkaReceiverWordCountJava.java

示例9: main

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args) throws Exception {
    Logger.getLogger("org").setLevel(Level.WARN);
    Logger.getLogger("akka").setLevel(Level.WARN);

    final Pattern SPACE = Pattern.compile(" ");

    SparkConf conf = new SparkConf().setAppName("Big Apple").setMaster("local[2]");
    JavaStreamingContext ssc = new JavaStreamingContext(conf, Durations.seconds(1));

    JavaDStream<String> lines = ssc.textFileStream("src/main/resources/stream");
    lines.print();

    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterator<String> call(String x) {
            return Lists.newArrayList(SPACE.split(x)).iterator();
        }
    });

    words.foreachRDD(
            new VoidFunction2<JavaRDD<String>, Time>() {
                @Override
                public void call(JavaRDD<String> rdd, Time time) {

                    // Get the singleton instance of SQLContext
                    SQLContext sqlContext = SQLContext.getOrCreate(rdd.context());

                    // Convert RDD[String] to RDD[case class] to Dataset
                    JavaRDD<JavaRecord> rowRDD = rdd.map(new Function<String, JavaRecord>() {
                        public JavaRecord call(String word) {
                            JavaRecord record = new JavaRecord();
                            record.setWord(word);
                            return record;
                        }
                    });
                    Dataset<Row> wordsDataset = sqlContext.createDataFrame(rowRDD, JavaRecord.class);

                    // Register as table
                    wordsDataset.registerTempTable("words");

                    // Do word count on table using SQL and print it
                    Dataset wordCountsDataset =
                            sqlContext.sql("select word, count(*) as total from words group by word");
                    wordCountsDataset.show();
                }
            }
    );


    ssc.start();
    ssc.awaitTermination();

}
 
開發者ID:knoldus,項目名稱:Sparkathon,代碼行數:54,代碼來源:SQLonStreams.java

示例10: main

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args) throws Exception {

        final Pattern SPACE = Pattern.compile(" ");

        SparkConf conf = new SparkConf().setAppName("Big Apple").setMaster("local[2]");
        JavaStreamingContext ssc = new JavaStreamingContext(conf, Durations.seconds(1));

        JavaDStream<String> lines = ssc.textFileStream("src/main/resources/stream");
        lines.print();

        JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
            @Override
            public Iterator<String> call(String x) {
                return Lists.newArrayList(SPACE.split(x)).iterator();
            }
        });

        JavaPairDStream<String, Integer> wordsDstream = words.mapToPair(
                new PairFunction<String, String, Integer>() {
                    @Override
                    public Tuple2<String, Integer> call(String s) {
                        return new Tuple2<String, Integer>(s, 1);
                    }
                });

        wordsDstream.print();

        Function2<Integer, Integer, Integer> reduceFunc = new Function2<Integer, Integer, Integer>() {
            @Override
            public Integer call(Integer i1, Integer i2) {
                return i1 + i2;
            }
        };

        JavaPairDStream<String, Integer> windowedWordCounts = wordsDstream.reduceByKeyAndWindow(reduceFunc, Durations.seconds(30), Durations.seconds(10));

        windowedWordCounts.print();


        ssc.start();
        ssc.awaitTermination();

    }
 
開發者ID:knoldus,項目名稱:Sparkathon,代碼行數:44,代碼來源:Windowstream.java


注:本文中的org.apache.spark.streaming.api.java.JavaDStream.flatMap方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。