本文整理匯總了Java中org.apache.spark.streaming.api.java.JavaDStream.countByValue方法的典型用法代碼示例。如果您正苦於以下問題:Java JavaDStream.countByValue方法的具體用法?Java JavaDStream.countByValue怎麽用?Java JavaDStream.countByValue使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.spark.streaming.api.java.JavaDStream
的用法示例。
在下文中一共展示了JavaDStream.countByValue方法的2個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: queryTweets
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
protected void queryTweets(JavaReceiverInputDStream<Status> tweets, int rank) {
// Compute sketches
JavaPairDStream<ImageInfo, ImageFeature> imFeatures = computeImageFeatures(tweets);
JavaPairDStream<ImageInfo, ImageFeature> sketches = imFeatures.mapValues(new SketchProcedure(indParams.getSketchFunction(),
indParams.getNumTables()));
// Query specific and filter by hamming distance
JavaPairDStream<ImageFeature, ImageFeature> candidates = system.queryFeaturesStreaming(conn,indParams, sketches);
JavaPairDStream<ImageFeature, ImageFeature> filteredHamming =
candidates.filter(new HammingFiltering(indParams.getHammingDistance()));
// Group by image and assign weights
JavaDStream<ImageMatch> matchedIds = filteredHamming.map(new MatchExtractorStreaming());
JavaPairDStream<ImageMatch, Long> result = matchedIds.countByValue();
// Filter by weight if requested
if (rank > 0) {
result = result.filter(new WeightFiltering(rank));
}
// Print results
result.print();
}
示例2: main
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args) {
//Window Specific property if Hadoop is not instaalled or HADOOP_HOME is not set
System.setProperty("hadoop.home.dir", "E:\\hadoop");
//Logger rootLogger = LogManager.getRootLogger();
//rootLogger.setLevel(Level.WARN);
SparkConf conf = new SparkConf().setAppName("KafkaExample").setMaster("local[*]");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaStreamingContext streamingContext = new JavaStreamingContext(sc, Durations.minutes(2));
streamingContext.checkpoint("E:\\hadoop\\checkpoint");
Logger rootLogger = LogManager.getRootLogger();
rootLogger.setLevel(Level.WARN);
Map<String, Object> kafkaParams = new HashMap<>();
kafkaParams.put("bootstrap.servers", "10.0.75.1:9092");
kafkaParams.put("key.deserializer", StringDeserializer.class);
kafkaParams.put("value.deserializer", StringDeserializer.class);
kafkaParams.put("group.id", "use_a_separate_group_id_for_each_strea");
kafkaParams.put("auto.offset.reset", "latest");
// kafkaParams.put("enable.auto.commit", false);
Collection<String> topics = Arrays.asList("mytopic", "anothertopic");
final JavaInputDStream<ConsumerRecord<String, String>> stream = KafkaUtils.createDirectStream(streamingContext,LocationStrategies.PreferConsistent(),
ConsumerStrategies.<String, String>Subscribe(topics, kafkaParams));
JavaPairDStream<String, String> pairRDD = stream.mapToPair(record-> new Tuple2<>(record.key(), record.value()));
pairRDD.foreachRDD(pRDD-> { pRDD.foreach(tuple-> System.out.println(new Date()+" :: Kafka msg key ::"+tuple._1() +" the val is ::"+tuple._2()));});
JavaDStream<String> tweetRDD = pairRDD.map(x-> x._2()).map(new TweetText());
tweetRDD.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" :: "+x)));
JavaDStream<String> hashtagRDD = tweetRDD.flatMap(twt-> Arrays.stream(twt.split(" ")).filter(str-> str.contains("#")).collect(Collectors.toList()).iterator() );
hashtagRDD.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(x)));
JavaPairDStream<String, Long> cntByVal = hashtagRDD.countByValue();
cntByVal.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The count tag is ::"+x._1() +" and the val is ::"+x._2())));
/* hashtagRDD.window(Durations.seconds(60), Durations.seconds(30))
.countByValue()
.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
hashtagRDD.countByValueAndWindow(Durations.seconds(60), Durations.seconds(30))
.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println("The window&count tag is ::"+x._1() +" and the val is ::"+x._2())));
*/
hashtagRDD.window(Durations.minutes(8)).countByValue()
.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
hashtagRDD.window(Durations.minutes(8),Durations.minutes(2)).countByValue()
.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
hashtagRDD.window(Durations.minutes(12),Durations.minutes(8)).countByValue()
.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
hashtagRDD.window(Durations.minutes(2),Durations.minutes(2)).countByValue()
.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
hashtagRDD.window(Durations.minutes(12),Durations.minutes(12)).countByValue()
.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));
/*hashtagRDD.window(Durations.minutes(5),Durations.minutes(2)).countByValue()
.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));*/
/* hashtagRDD.window(Durations.minutes(10),Durations.minutes(1)).countByValue()
.foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2())));*/
streamingContext.start();
try {
streamingContext.awaitTermination();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}