当前位置: 首页>>代码示例>>Java>>正文


Java KafkaUtils类代码示例

本文整理汇总了Java中org.apache.spark.streaming.kafka.KafkaUtils的典型用法代码示例。如果您正苦于以下问题:Java KafkaUtils类的具体用法?Java KafkaUtils怎么用?Java KafkaUtils使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


KafkaUtils类属于org.apache.spark.streaming.kafka包,在下文中一共展示了KafkaUtils类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
public static void main(String[] args) {

        SparkConf conf = new SparkConf()
                .setAppName("kafka-sandbox")
                .setMaster("local[*]");
        JavaSparkContext sc = new JavaSparkContext(conf);
        JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(2000));

        Set<String> topics = Collections.singleton("mytopic");
        Map<String, String> kafkaParams = new HashMap<>();
        kafkaParams.put("metadata.broker.list", "localhost:9092");

        JavaPairInputDStream<String, String> directKafkaStream = KafkaUtils.createDirectStream(ssc,
                String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topics);

        directKafkaStream.foreachRDD(rdd -> {
            System.out.println("--- New RDD with " + rdd.partitions().size()
                    + " partitions and " + rdd.count() + " records");
            rdd.foreach(record -> System.out.println(record._2));
        });

        ssc.start();
        ssc.awaitTermination();
    }
 
开发者ID:aseigneurin,项目名称:kafka-sandbox,代码行数:25,代码来源:SparkStringConsumer.java

示例2: main

import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
public static void main(String[] args) throws InterruptedException {
  SparkConf sc = new SparkConf().setAppName("POC-Kafka-New");
  
  try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(2000))) {
    
    JavaPairInputDStream<String, String> stream = KafkaUtils.createDirectStream(
        jsc, String.class, String.class, StringDecoder.class, StringDecoder.class,
        Collections.singletonMap("metadata.broker.list", KAFKA_HOST_PORT),
        Collections.singleton(EXAMPLE_TOPIC));

    JavaDStream<ExampleXML> records = stream.map(t -> t._2()).map(new ParseXML());
    records.foreachRDD(rdd -> System.out.printf("Amount of XMLs: %d\n", rdd.count()));

    jsc.start();
    jsc.awaitTermination();
  }
}
 
开发者ID:ciandt-dev,项目名称:gcp,代码行数:18,代码来源:Spark4KafkaNew.java

示例3: main

import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
public static void main(String[] args) throws InterruptedException, IOException {
  SparkConf sc = new SparkConf().setAppName("POC-BigQuery");
  
  try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(60000))) {
    JavaPairInputDStream<String, String> stream = KafkaUtils.createDirectStream(
        jsc, String.class, String.class, StringDecoder.class, StringDecoder.class,
        Collections.singletonMap("metadata.broker.list", KAFKA_HOST_PORT), Collections.singleton(EXAMPLE_TOPIC));

    Configuration conf = new Configuration();
    BigQueryConfiguration.configureBigQueryOutput(conf, BQ_EXAMPLE_TABLE, BQ_EXAMPLE_SCHEMA);
    conf.set("mapreduce.job.outputformat.class", BigQueryOutputFormat.class.getName());

    JavaDStream<ExampleXML> records = stream.map(t -> t._2()).map(new ParseXML());
    records.foreachRDD(rdd -> {
      System.out.printf("Amount of XMLs: %d\n", rdd.count());
      long time = System.currentTimeMillis();
      rdd.mapToPair(new PrepToBQ()).saveAsNewAPIHadoopDataset(conf);
      System.out.printf("Sent to BQ in %fs\n", (System.currentTimeMillis()-time)/1000f);
    });
    
    jsc.start();
    jsc.awaitTermination();
  }
}
 
开发者ID:ciandt-dev,项目名称:gcp,代码行数:25,代码来源:Spark6BigQuery.java

示例4: main

import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
public static void main(String[] args) throws IOException {
	Flags.setFromCommandLineArgs(THE_OPTIONS, args);

	// 初始化Spark Conf.
	SparkConf conf = new SparkConf().setAppName("A SECTONG Application: Apache Log Analysis with Spark");
	JavaSparkContext sc = new JavaSparkContext(conf);
	JavaStreamingContext jssc = new JavaStreamingContext(sc, Flags.getInstance().getSlideInterval());
	SQLContext sqlContext = new SQLContext(sc);

	// 初始化参数
	HashSet<String> topicsSet = new HashSet<String>(Arrays.asList(Flags.getInstance().getKafka_topic().split(",")));
	HashMap<String, String> kafkaParams = new HashMap<String, String>();
	kafkaParams.put("metadata.broker.list", Flags.getInstance().getKafka_broker());

	// 从Kafka Stream获取数据
	JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(jssc, String.class, String.class,
			StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet);

	JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
		private static final long serialVersionUID = 5266880065425088203L;

		public String call(Tuple2<String, String> tuple2) {
			return tuple2._2();
		}
	});

	JavaDStream<ApacheAccessLog> accessLogsDStream = lines.flatMap(line -> {
		List<ApacheAccessLog> list = new ArrayList<>();
		try {
			// 映射每一行
			list.add(ApacheAccessLog.parseFromLogLine(line));
			return list;
		} catch (RuntimeException e) {
			return list;
		}
	}).cache();

	accessLogsDStream.foreachRDD(rdd -> {

		// rdd to DataFrame
		DataFrame df = sqlContext.createDataFrame(rdd, ApacheAccessLog.class);
		// 写入Parquet文件
		df.write().partitionBy("ipAddress", "method", "responseCode").mode(SaveMode.Append).parquet(Flags.getInstance().getParquetFile());

		return null;
	});

	// 启动Streaming服务器
	jssc.start(); // 启动计算
	jssc.awaitTermination(); // 等待终止
}
 
开发者ID:sectong,项目名称:SparkToParquet,代码行数:52,代码来源:AppMain.java

示例5: kafka

import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
private static <K, V> TransformEvaluator<KafkaIO.Read.Unbound<K, V>> kafka() {
  return new TransformEvaluator<KafkaIO.Read.Unbound<K, V>>() {
    @Override
    public void evaluate(KafkaIO.Read.Unbound<K, V> transform, EvaluationContext context) {
      StreamingEvaluationContext sec = (StreamingEvaluationContext) context;
      JavaStreamingContext jssc = sec.getStreamingContext();
      Class<K> keyClazz = transform.getKeyClass();
      Class<V> valueClazz = transform.getValueClass();
      Class<? extends Decoder<K>> keyDecoderClazz = transform.getKeyDecoderClass();
      Class<? extends Decoder<V>> valueDecoderClazz = transform.getValueDecoderClass();
      Map<String, String> kafkaParams = transform.getKafkaParams();
      Set<String> topics = transform.getTopics();
      JavaPairInputDStream<K, V> inputPairStream = KafkaUtils.createDirectStream(jssc, keyClazz,
              valueClazz, keyDecoderClazz, valueDecoderClazz, kafkaParams, topics);
      JavaDStream<WindowedValue<KV<K, V>>> inputStream =
          inputPairStream.map(new Function<Tuple2<K, V>, KV<K, V>>() {
        @Override
        public KV<K, V> call(Tuple2<K, V> t2) throws Exception {
          return KV.of(t2._1(), t2._2());
        }
      }).map(WindowingHelpers.<KV<K, V>>windowFunction());
      sec.setStream(transform, inputStream);
    }
  };
}
 
开发者ID:shakamunyi,项目名称:spark-dataflow,代码行数:26,代码来源:StreamingTransformTranslator.java

示例6: main

import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
public static void main(String[] args) {
    if (args.length < 4) {
        System.err.println("Usage: JavaKafkaWordCount <zkQuorum> <group> <topics> <numThreads>");
        System.exit(1);
    }

    SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaWordCount");
    // Create the context with a 1 second batch size
    JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(2000));
    int numThreads = Integer.parseInt(args[3]);
    Map<String, Integer> topicMap = new HashMap<String, Integer>();
    String[] topics = args[2].split(",");
    for (String topic : topics) {
        topicMap.put(topic, numThreads);
    }
    JavaPairReceiverInputDStream<String, String> messages = KafkaUtils.createStream(jssc, args[0], args[1],
            topicMap);
    JavaDStream<String> lines = messages.map(tuple2 -> tuple2._2());
    JavaDStream<String> words = lines.flatMap(x -> Lists.newArrayList(SPACE.split(x)));
    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(s -> new Tuple2<String, Integer>(s, 1)).reduceByKey(
            (i1, i2) -> i1 + i2);
    wordCounts.print();
    jssc.start();
    jssc.awaitTermination();
}
 
开发者ID:ogidogi,项目名称:laughing-octo-sansa,代码行数:26,代码来源:TestSparkKafkaReceiverApproach.java

示例7: startNewStream

import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
private JavaPairDStream<String, String> startNewStream(JavaStreamingContext jsc) {
  JavaPairInputDStream<String, String> stream = KafkaUtils.createDirectStream(
      jsc, String.class, String.class, StringDecoder.class, StringDecoder.class,
      ImmutableMap.of("metadata.broker.list", kafka, "auto.offset.reset", "smallest"),
      Collections.singleton(topic));

  return stream.transformToPair(new ToPairWithOffsets<>(tuple -> tuple._2()));
}
 
开发者ID:ciandt-dev,项目名称:gcp,代码行数:9,代码来源:KafkaInputWithOffsets.java

示例8: startFromOffsets

import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
private static JavaPairDStream<String, String> startFromOffsets(JavaStreamingContext jsc, String offsetsInput) {
  Map<TopicAndPartition, Long> map = new HashMap<>();
  for (String partition : offsetsInput.split(",")) {
    String[] offset = partition.split(":");
    map.put(new TopicAndPartition(EXAMPLE_TOPIC, Integer.parseInt(offset[0])), Long.parseLong(offset[1]));
  }

  JavaDStream<String> stream = KafkaUtils.createDirectStream(jsc, String.class, String.class, StringDecoder.class,
      StringDecoder.class, String.class, Collections.singletonMap("metadata.broker.list", KAFKA_HOST_PORT), map,
      msg -> msg.message());
  
  return stream.transformToPair(new ToPairWithOffset<>(str -> str));
}
 
开发者ID:ciandt-dev,项目名称:gcp,代码行数:14,代码来源:Spark7OffsetsToZK.java

示例9: startNewStream

import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
private static JavaPairDStream<String, String> startNewStream(JavaStreamingContext jsc) {
  JavaPairInputDStream<String, String> stream = KafkaUtils.createDirectStream(jsc, String.class, String.class,
      StringDecoder.class, StringDecoder.class, Collections.singletonMap("metadata.broker.list", KAFKA_HOST_PORT),
      Collections.singleton(EXAMPLE_TOPIC));

  return stream.transformToPair(new ToPairWithOffset<>(tuple -> tuple._2()));
}
 
开发者ID:ciandt-dev,项目名称:gcp,代码行数:8,代码来源:Spark7OffsetsToZK.java

示例10: main

import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
public static void main(String[] args) throws InterruptedException {
  SparkConf sc = new SparkConf().setAppName("POC-Kafka");
  
  try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(2000))) {
    
    JavaPairReceiverInputDStream<String, String> stream = KafkaUtils.createStream(
        jsc, ZK_HOST_PORT, "a_group_id", Collections.singletonMap(EXAMPLE_TOPIC, 1));

    JavaDStream<ExampleXML> records = stream.map(t -> t._2()).map(new ParseXML());
    records.foreachRDD(rdd -> System.out.printf("Amount of XMLs: %d\n", rdd.count()));

    jsc.start();
    jsc.awaitTermination();
  }
}
 
开发者ID:ciandt-dev,项目名称:gcp,代码行数:16,代码来源:Spark3Kafka.java

示例11: create

import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
@Override public JavaDStream<byte[]> create(JavaStreamingContext jsc) {
  return KafkaUtils.createDirectStream(
      jsc,
      byte[].class,
      byte[].class,
      DefaultDecoder.class,
      DefaultDecoder.class,
      kafkaParams(),
      Collections.singleton(topic()))
      .map(m -> m._2); // get value
}
 
开发者ID:openzipkin,项目名称:zipkin-sparkstreaming,代码行数:12,代码来源:KafkaStreamFactory.java

示例12: processStream

import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
private static void processStream(JavaStreamingContext ssc, JavaSparkContext sc) {
  System.out.println("--> Processing stream");

  Map<String, String> props = new HashMap<>();
  props.put("bootstrap.servers", "localhost:9092");
  props.put("schema.registry.url", "http://localhost:8081");
  props.put("group.id", "spark");
  props.put("specific.avro.reader", "true");

  props.put("value.deserializer", "io.confluent.kafka.serializers.KafkaAvroDeserializer");
  props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");

  Set<String> topicsSet = new HashSet<>(Collections.singletonList("test"));

  JavaPairInputDStream<String, Object> stream = KafkaUtils.createDirectStream(ssc, String.class, Object.class,
    StringDecoder.class, KafkaAvroDecoder.class, props, topicsSet);

  stream.foreachRDD(rdd -> {
    rdd.foreachPartition(iterator -> {
        while (iterator.hasNext()) {
          Tuple2<String, Object> next = iterator.next();
          Model model = (Model) next._2();
          System.out.println(next._1() + " --> " + model);
        }
      }
    );
  });
}
 
开发者ID:opencore,项目名称:kafka-spark-avro-example,代码行数:29,代码来源:SparkStreaming.java

示例13: main

import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
public static void main(String[] args) {

        SparkConf conf = new SparkConf()
                .setAppName("kafka-sandbox")
                .setMaster("local[*]");
        JavaSparkContext sc = new JavaSparkContext(conf);
        JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(2000));

        Set<String> topics = Collections.singleton("mytopic");
        Map<String, String> kafkaParams = new HashMap<>();
        kafkaParams.put("metadata.broker.list", "localhost:9092");

        JavaPairInputDStream<String, byte[]> directKafkaStream = KafkaUtils.createDirectStream(ssc,
                String.class, byte[].class, StringDecoder.class, DefaultDecoder.class, kafkaParams, topics);

        directKafkaStream
                .map(message -> recordInjection.invert(message._2).get())
                .foreachRDD(rdd -> {
                    rdd.foreach(record -> {
                        System.out.println("str1= " + record.get("str1")
                                + ", str2= " + record.get("str2")
                                + ", int1=" + record.get("int1"));
                    });
                });

        ssc.start();
        ssc.awaitTermination();
    }
 
开发者ID:aseigneurin,项目名称:kafka-sandbox,代码行数:29,代码来源:SparkAvroConsumer.java

示例14: startReadingDataStream

import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
public void startReadingDataStream(IDataProcessor processor) {
	// first parameter is Kafka topic, and second is content (in this case,
	// a line)
	JavaPairInputDStream<String, String> messages = KafkaUtils
			.createDirectStream(jssc, String.class, String.class,
					StringDecoder.class, StringDecoder.class, kafkaParams,
					topicSet);
	
	processor.process(messages);
	
	// start the computation
	jssc.start();
	jssc.awaitTermination();
	
}
 
开发者ID:henglicad,项目名称:logCollector,代码行数:16,代码来源:DataStreamReader.java

示例15: stringStreamFromKafkaWithTime

import org.apache.spark.streaming.kafka.KafkaUtils; //导入依赖的package包/类
@Override
public SparkWorkloadOperator<WithTime<String>> stringStreamFromKafkaWithTime(String zkConStr,
                                                                             String kafkaServers,
                                                                             String group,
                                                                             String topics,
                                                                             String offset,
                                                                             String componentId,
                                                                             int parallelism) {
    HashSet<String> topicsSet = new HashSet<>(Arrays.asList(topics.split(",")));
    HashMap<String, String> kafkaParams = new HashMap<>();
    kafkaParams.put("metadata.broker.list", kafkaServers);
    kafkaParams.put("auto.offset.reset", offset);
    kafkaParams.put("zookeeper.connect", zkConStr);
    kafkaParams.put("group.id", group);

    // Create direct kafka stream with brokers and topics
    JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(
            jssc,
            String.class,
            String.class,
            StringDecoder.class,
            StringDecoder.class,
            kafkaParams,
            topicsSet
    );

    JavaDStream<WithTime<String>> lines = messages.map(mapFunctionWithTime);

    return new SparkWorkloadOperator<>(lines, parallelism);
}
 
开发者ID:wangyangjun,项目名称:StreamBench,代码行数:31,代码来源:SparkOperatorCreater.java


注:本文中的org.apache.spark.streaming.kafka.KafkaUtils类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。