本文整理汇总了Java中org.apache.spark.streaming.kafka.KafkaUtils类的典型用法代码示例。如果您正苦于以下问题:Java KafkaUtils类的具体用法?Java KafkaUtils怎么用?Java KafkaUtils使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
KafkaUtils类属于org.apache.spark.streaming.kafka包,在下文中一共展示了KafkaUtils类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
public static void main(String[] args) {
SparkConf conf = new SparkConf()
.setAppName("kafka-sandbox")
.setMaster("local[*]");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(2000));
Set<String> topics = Collections.singleton("mytopic");
Map<String, String> kafkaParams = new HashMap<>();
kafkaParams.put("metadata.broker.list", "localhost:9092");
JavaPairInputDStream<String, String> directKafkaStream = KafkaUtils.createDirectStream(ssc,
String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topics);
directKafkaStream.foreachRDD(rdd -> {
System.out.println("--- New RDD with " + rdd.partitions().size()
+ " partitions and " + rdd.count() + " records");
rdd.foreach(record -> System.out.println(record._2));
});
ssc.start();
ssc.awaitTermination();
}
示例2: main
import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
public static void main(String[] args) throws InterruptedException {
SparkConf sc = new SparkConf().setAppName("POC-Kafka-New");
try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(2000))) {
JavaPairInputDStream<String, String> stream = KafkaUtils.createDirectStream(
jsc, String.class, String.class, StringDecoder.class, StringDecoder.class,
Collections.singletonMap("metadata.broker.list", KAFKA_HOST_PORT),
Collections.singleton(EXAMPLE_TOPIC));
JavaDStream<ExampleXML> records = stream.map(t -> t._2()).map(new ParseXML());
records.foreachRDD(rdd -> System.out.printf("Amount of XMLs: %d\n", rdd.count()));
jsc.start();
jsc.awaitTermination();
}
}
示例3: main
import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
public static void main(String[] args) throws InterruptedException, IOException {
SparkConf sc = new SparkConf().setAppName("POC-BigQuery");
try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(60000))) {
JavaPairInputDStream<String, String> stream = KafkaUtils.createDirectStream(
jsc, String.class, String.class, StringDecoder.class, StringDecoder.class,
Collections.singletonMap("metadata.broker.list", KAFKA_HOST_PORT), Collections.singleton(EXAMPLE_TOPIC));
Configuration conf = new Configuration();
BigQueryConfiguration.configureBigQueryOutput(conf, BQ_EXAMPLE_TABLE, BQ_EXAMPLE_SCHEMA);
conf.set("mapreduce.job.outputformat.class", BigQueryOutputFormat.class.getName());
JavaDStream<ExampleXML> records = stream.map(t -> t._2()).map(new ParseXML());
records.foreachRDD(rdd -> {
System.out.printf("Amount of XMLs: %d\n", rdd.count());
long time = System.currentTimeMillis();
rdd.mapToPair(new PrepToBQ()).saveAsNewAPIHadoopDataset(conf);
System.out.printf("Sent to BQ in %fs\n", (System.currentTimeMillis()-time)/1000f);
});
jsc.start();
jsc.awaitTermination();
}
}
示例4: main
import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
public static void main(String[] args) throws IOException {
Flags.setFromCommandLineArgs(THE_OPTIONS, args);
// 初始化Spark Conf.
SparkConf conf = new SparkConf().setAppName("A SECTONG Application: Apache Log Analysis with Spark");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaStreamingContext jssc = new JavaStreamingContext(sc, Flags.getInstance().getSlideInterval());
SQLContext sqlContext = new SQLContext(sc);
// 初始化参数
HashSet<String> topicsSet = new HashSet<String>(Arrays.asList(Flags.getInstance().getKafka_topic().split(",")));
HashMap<String, String> kafkaParams = new HashMap<String, String>();
kafkaParams.put("metadata.broker.list", Flags.getInstance().getKafka_broker());
// 从Kafka Stream获取数据
JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(jssc, String.class, String.class,
StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet);
JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
private static final long serialVersionUID = 5266880065425088203L;
public String call(Tuple2<String, String> tuple2) {
return tuple2._2();
}
});
JavaDStream<ApacheAccessLog> accessLogsDStream = lines.flatMap(line -> {
List<ApacheAccessLog> list = new ArrayList<>();
try {
// 映射每一行
list.add(ApacheAccessLog.parseFromLogLine(line));
return list;
} catch (RuntimeException e) {
return list;
}
}).cache();
accessLogsDStream.foreachRDD(rdd -> {
// rdd to DataFrame
DataFrame df = sqlContext.createDataFrame(rdd, ApacheAccessLog.class);
// 写入Parquet文件
df.write().partitionBy("ipAddress", "method", "responseCode").mode(SaveMode.Append).parquet(Flags.getInstance().getParquetFile());
return null;
});
// 启动Streaming服务器
jssc.start(); // 启动计算
jssc.awaitTermination(); // 等待终止
}
示例5: kafka
import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
private static <K, V> TransformEvaluator<KafkaIO.Read.Unbound<K, V>> kafka() {
return new TransformEvaluator<KafkaIO.Read.Unbound<K, V>>() {
@Override
public void evaluate(KafkaIO.Read.Unbound<K, V> transform, EvaluationContext context) {
StreamingEvaluationContext sec = (StreamingEvaluationContext) context;
JavaStreamingContext jssc = sec.getStreamingContext();
Class<K> keyClazz = transform.getKeyClass();
Class<V> valueClazz = transform.getValueClass();
Class<? extends Decoder<K>> keyDecoderClazz = transform.getKeyDecoderClass();
Class<? extends Decoder<V>> valueDecoderClazz = transform.getValueDecoderClass();
Map<String, String> kafkaParams = transform.getKafkaParams();
Set<String> topics = transform.getTopics();
JavaPairInputDStream<K, V> inputPairStream = KafkaUtils.createDirectStream(jssc, keyClazz,
valueClazz, keyDecoderClazz, valueDecoderClazz, kafkaParams, topics);
JavaDStream<WindowedValue<KV<K, V>>> inputStream =
inputPairStream.map(new Function<Tuple2<K, V>, KV<K, V>>() {
@Override
public KV<K, V> call(Tuple2<K, V> t2) throws Exception {
return KV.of(t2._1(), t2._2());
}
}).map(WindowingHelpers.<KV<K, V>>windowFunction());
sec.setStream(transform, inputStream);
}
};
}
示例6: main
import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
public static void main(String[] args) {
if (args.length < 4) {
System.err.println("Usage: JavaKafkaWordCount <zkQuorum> <group> <topics> <numThreads>");
System.exit(1);
}
SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaWordCount");
// Create the context with a 1 second batch size
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(2000));
int numThreads = Integer.parseInt(args[3]);
Map<String, Integer> topicMap = new HashMap<String, Integer>();
String[] topics = args[2].split(",");
for (String topic : topics) {
topicMap.put(topic, numThreads);
}
JavaPairReceiverInputDStream<String, String> messages = KafkaUtils.createStream(jssc, args[0], args[1],
topicMap);
JavaDStream<String> lines = messages.map(tuple2 -> tuple2._2());
JavaDStream<String> words = lines.flatMap(x -> Lists.newArrayList(SPACE.split(x)));
JavaPairDStream<String, Integer> wordCounts = words.mapToPair(s -> new Tuple2<String, Integer>(s, 1)).reduceByKey(
(i1, i2) -> i1 + i2);
wordCounts.print();
jssc.start();
jssc.awaitTermination();
}
示例7: startNewStream
import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
private JavaPairDStream<String, String> startNewStream(JavaStreamingContext jsc) {
JavaPairInputDStream<String, String> stream = KafkaUtils.createDirectStream(
jsc, String.class, String.class, StringDecoder.class, StringDecoder.class,
ImmutableMap.of("metadata.broker.list", kafka, "auto.offset.reset", "smallest"),
Collections.singleton(topic));
return stream.transformToPair(new ToPairWithOffsets<>(tuple -> tuple._2()));
}
示例8: startFromOffsets
import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
private static JavaPairDStream<String, String> startFromOffsets(JavaStreamingContext jsc, String offsetsInput) {
Map<TopicAndPartition, Long> map = new HashMap<>();
for (String partition : offsetsInput.split(",")) {
String[] offset = partition.split(":");
map.put(new TopicAndPartition(EXAMPLE_TOPIC, Integer.parseInt(offset[0])), Long.parseLong(offset[1]));
}
JavaDStream<String> stream = KafkaUtils.createDirectStream(jsc, String.class, String.class, StringDecoder.class,
StringDecoder.class, String.class, Collections.singletonMap("metadata.broker.list", KAFKA_HOST_PORT), map,
msg -> msg.message());
return stream.transformToPair(new ToPairWithOffset<>(str -> str));
}
示例9: startNewStream
import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
private static JavaPairDStream<String, String> startNewStream(JavaStreamingContext jsc) {
JavaPairInputDStream<String, String> stream = KafkaUtils.createDirectStream(jsc, String.class, String.class,
StringDecoder.class, StringDecoder.class, Collections.singletonMap("metadata.broker.list", KAFKA_HOST_PORT),
Collections.singleton(EXAMPLE_TOPIC));
return stream.transformToPair(new ToPairWithOffset<>(tuple -> tuple._2()));
}
示例10: main
import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
public static void main(String[] args) throws InterruptedException {
SparkConf sc = new SparkConf().setAppName("POC-Kafka");
try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(2000))) {
JavaPairReceiverInputDStream<String, String> stream = KafkaUtils.createStream(
jsc, ZK_HOST_PORT, "a_group_id", Collections.singletonMap(EXAMPLE_TOPIC, 1));
JavaDStream<ExampleXML> records = stream.map(t -> t._2()).map(new ParseXML());
records.foreachRDD(rdd -> System.out.printf("Amount of XMLs: %d\n", rdd.count()));
jsc.start();
jsc.awaitTermination();
}
}
示例11: create
import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
@Override public JavaDStream<byte[]> create(JavaStreamingContext jsc) {
return KafkaUtils.createDirectStream(
jsc,
byte[].class,
byte[].class,
DefaultDecoder.class,
DefaultDecoder.class,
kafkaParams(),
Collections.singleton(topic()))
.map(m -> m._2); // get value
}
示例12: processStream
import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
private static void processStream(JavaStreamingContext ssc, JavaSparkContext sc) {
System.out.println("--> Processing stream");
Map<String, String> props = new HashMap<>();
props.put("bootstrap.servers", "localhost:9092");
props.put("schema.registry.url", "http://localhost:8081");
props.put("group.id", "spark");
props.put("specific.avro.reader", "true");
props.put("value.deserializer", "io.confluent.kafka.serializers.KafkaAvroDeserializer");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
Set<String> topicsSet = new HashSet<>(Collections.singletonList("test"));
JavaPairInputDStream<String, Object> stream = KafkaUtils.createDirectStream(ssc, String.class, Object.class,
StringDecoder.class, KafkaAvroDecoder.class, props, topicsSet);
stream.foreachRDD(rdd -> {
rdd.foreachPartition(iterator -> {
while (iterator.hasNext()) {
Tuple2<String, Object> next = iterator.next();
Model model = (Model) next._2();
System.out.println(next._1() + " --> " + model);
}
}
);
});
}
示例13: main
import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
public static void main(String[] args) {
SparkConf conf = new SparkConf()
.setAppName("kafka-sandbox")
.setMaster("local[*]");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(2000));
Set<String> topics = Collections.singleton("mytopic");
Map<String, String> kafkaParams = new HashMap<>();
kafkaParams.put("metadata.broker.list", "localhost:9092");
JavaPairInputDStream<String, byte[]> directKafkaStream = KafkaUtils.createDirectStream(ssc,
String.class, byte[].class, StringDecoder.class, DefaultDecoder.class, kafkaParams, topics);
directKafkaStream
.map(message -> recordInjection.invert(message._2).get())
.foreachRDD(rdd -> {
rdd.foreach(record -> {
System.out.println("str1= " + record.get("str1")
+ ", str2= " + record.get("str2")
+ ", int1=" + record.get("int1"));
});
});
ssc.start();
ssc.awaitTermination();
}
示例14: startReadingDataStream
import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
public void startReadingDataStream(IDataProcessor processor) {
// first parameter is Kafka topic, and second is content (in this case,
// a line)
JavaPairInputDStream<String, String> messages = KafkaUtils
.createDirectStream(jssc, String.class, String.class,
StringDecoder.class, StringDecoder.class, kafkaParams,
topicSet);
processor.process(messages);
// start the computation
jssc.start();
jssc.awaitTermination();
}
示例15: stringStreamFromKafkaWithTime
import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
@Override
public SparkWorkloadOperator<WithTime<String>> stringStreamFromKafkaWithTime(String zkConStr,
String kafkaServers,
String group,
String topics,
String offset,
String componentId,
int parallelism) {
HashSet<String> topicsSet = new HashSet<>(Arrays.asList(topics.split(",")));
HashMap<String, String> kafkaParams = new HashMap<>();
kafkaParams.put("metadata.broker.list", kafkaServers);
kafkaParams.put("auto.offset.reset", offset);
kafkaParams.put("zookeeper.connect", zkConStr);
kafkaParams.put("group.id", group);
// Create direct kafka stream with brokers and topics
JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(
jssc,
String.class,
String.class,
StringDecoder.class,
StringDecoder.class,
kafkaParams,
topicsSet
);
JavaDStream<WithTime<String>> lines = messages.map(mapFunctionWithTime);
return new SparkWorkloadOperator<>(lines, parallelism);
}