本文整理汇总了Java中org.apache.spark.streaming.api.java.JavaPairReceiverInputDStream.map方法的典型用法代码示例。如果您正苦于以下问题:Java JavaPairReceiverInputDStream.map方法的具体用法?Java JavaPairReceiverInputDStream.map怎么用?Java JavaPairReceiverInputDStream.map使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.spark.streaming.api.java.JavaPairReceiverInputDStream
的用法示例。
在下文中一共展示了JavaPairReceiverInputDStream.map方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.spark.streaming.api.java.JavaPairReceiverInputDStream; //导入方法依赖的package包/类
public static void main(String[] args) {
if (args.length < 4) {
System.err.println("Usage: JavaKafkaWordCount <zkQuorum> <group> <topics> <numThreads>");
System.exit(1);
}
SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaWordCount");
// Create the context with a 1 second batch size
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(2000));
int numThreads = Integer.parseInt(args[3]);
Map<String, Integer> topicMap = new HashMap<String, Integer>();
String[] topics = args[2].split(",");
for (String topic : topics) {
topicMap.put(topic, numThreads);
}
JavaPairReceiverInputDStream<String, String> messages = KafkaUtils.createStream(jssc, args[0], args[1],
topicMap);
JavaDStream<String> lines = messages.map(tuple2 -> tuple2._2());
JavaDStream<String> words = lines.flatMap(x -> Lists.newArrayList(SPACE.split(x)));
JavaPairDStream<String, Integer> wordCounts = words.mapToPair(s -> new Tuple2<String, Integer>(s, 1)).reduceByKey(
(i1, i2) -> i1 + i2);
wordCounts.print();
jssc.start();
jssc.awaitTermination();
}
示例2: main
import org.apache.spark.streaming.api.java.JavaPairReceiverInputDStream; //导入方法依赖的package包/类
public static void main(String[] args) {
Logger.getLogger("org").setLevel(Level.WARN);
Logger.getLogger("akka").setLevel(Level.WARN);
SparkConf sparkConf = new SparkConf().setMaster("spark://10.204.100.206:7077").setAppName("StreamingKafka101");
sparkConf.setJars(new String[] { "target\\TestProjects-1.0-SNAPSHOT.jar" });
//sparkConf.setExecutorEnv("executor-memory", "8G");
//sparkConf.setExecutorEnv("spark.executor.memory", "8G");
sparkConf.set("spark.executor.memory", "4G");
//sparkConf.set("executor-memory", "8G");
int duration = 2;
if(args.length > 0){
try{
duration = Integer.parseInt(args[0]);
System.out.println("duration changed to " + duration);
}catch(Exception e){
System.out.println("Duration reset to defaults");
}
}
JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(duration));
Map<String, Integer> topicMap = new HashMap<String, Integer>();
topicMap.put("loadtest", 4);
JavaPairReceiverInputDStream<String, String> kafkaStream = KafkaUtils.createStream(ssc,"10.204.100.172:2182","kafka-group1",topicMap);
JavaDStream<String> lines = kafkaStream.map(new Function<Tuple2<String, String>, String>() {
@Override
public String call(Tuple2<String, String> tuple2) {
return tuple2._2();
}
});
lines.foreachRDD(new Function<JavaRDD<String>, Void>() {
@Override
public Void call(JavaRDD<String> rdd) throws Exception {
System.out.println(new Date() + " Total records read: " + rdd.count() );
return null;
}
});
ssc.start();
ssc.awaitTermination();
}
示例3: main
import org.apache.spark.streaming.api.java.JavaPairReceiverInputDStream; //导入方法依赖的package包/类
@SuppressWarnings("serial")
public static void main(String[] args) throws InterruptedException {
// if (args.length < 4) {
// System.err.println("Usage: JavaKafkaWordCount <zkQuorum> <group> <topics> <numThreads>");
// System.exit(1);
// }
args = new String[4];
args[0]="localhost:2181";
args[1]= "1";
args[2]= "test";
args[3]= "1";
SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaWordCount").setMaster("spark://Impetus-NL163U:7077");
// Create the context with a 1 second batch size
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(20000));
int numThreads = Integer.parseInt(args[3]);
Map<String, Integer> topicMap = new HashMap<String, Integer>();
String[] topics = args[2].split(",");
for (String topic: topics) {
topicMap.put(topic, numThreads);
}
JavaPairReceiverInputDStream<String, String> messages =
KafkaUtils.createStream(jssc, args[0], args[1], topicMap);
final JavaDStream<String> lines = messages.map(new Function<Tuple2<String,String>, String>() {
@Override
public String call(Tuple2<String, String> v1) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
objectMapper.configure(Feature.USE_ANNOTATIONS, false);
Map<String,String> mapValue = objectMapper.readValue(v1._2(), new TypeReference<Map<String,String>>() {
});
Collection<String> values = mapValue.values();
String finalString = "";
for (Iterator<String> iterator = values.iterator(); iterator.hasNext();) {
String value = iterator.next();
if(finalString.length()==0){
finalString = finalString +value;
}else {
finalString = finalString+","+ value;
}
}
return finalString;
}
});
lines.print();
new Thread(){
public void run() {
while(true){
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println("#############################################################################"+lines.count());
}
};
}.start();
jssc.start();
jssc.awaitTermination();
}
开发者ID:PacktPublishing,项目名称:Practical-Real-time-Processing-and-Analytics,代码行数:66,代码来源:JavaKafkaWordCount.java
示例4: main
import org.apache.spark.streaming.api.java.JavaPairReceiverInputDStream; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
String zkQuorum = "localhost:2181";
String groupName = "stream";
int numThreads = 3;
String topicsName = "test1";
SparkConf sparkConf = new SparkConf().setAppName("WordCountKafkaStream");
JavaStreamingContext javaStreamingContext = new JavaStreamingContext(sparkConf, new Duration(5000));
Map<String, Integer> topicToBeUsedBySpark = new HashMap<>();
String[] topics = topicsName.split(",");
for (String topic : topics) {
topicToBeUsedBySpark.put(topic, numThreads);
}
JavaPairReceiverInputDStream<String, String> streamMessages =
KafkaUtils.createStream(javaStreamingContext, zkQuorum, groupName, topicToBeUsedBySpark);
JavaDStream<String> lines = streamMessages.map(new Function<Tuple2<String, String>, String>() {
@Override
public String call(Tuple2<String, String> tuple2) {
return tuple2._2();
}
});
JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
@Override
public Iterator<String> call(String x) {
return Arrays.asList(WORD_DELIMETER.split(x)).iterator();
}
});
JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
new PairFunction<String, String, Integer>() {
@Override
public Tuple2<String, Integer> call(String s) {
return new Tuple2<>(s, 1);
}
}).reduceByKey(new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(Integer i1, Integer i2) {
return i1 + i2;
}
});
wordCounts.print();
javaStreamingContext.start();
javaStreamingContext.awaitTermination();
}
开发者ID:PacktPublishing,项目名称:Building-Data-Streaming-Applications-with-Apache-Kafka,代码行数:50,代码来源:KafkaReceiverWordCountJava.java