当前位置: 首页>>代码示例>>Java>>正文


Java KafkaIO类代码示例

本文整理汇总了Java中org.apache.beam.sdk.io.kafka.KafkaIO的典型用法代码示例。如果您正苦于以下问题:Java KafkaIO类的具体用法?Java KafkaIO怎么用?Java KafkaIO使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


KafkaIO类属于org.apache.beam.sdk.io.kafka包,在下文中一共展示了KafkaIO类的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.apache.beam.sdk.io.kafka.KafkaIO; //导入依赖的package包/类
public static void main(String[] args) throws Exception {

    Options options = PipelineOptionsFactory.fromArgs(args).withValidation()
        .as(Options.class);
    options.setRunner(FlinkRunner.class);

    Pipeline p = Pipeline.create(options);

    KafkaIO.Read<byte[], String> kafkaIOReader = KafkaIO.read()
        .withBootstrapServers("192.168.99.100:32771")
        .withTopics(Arrays.asList("beam".split(",")))
        .updateConsumerProperties(ImmutableMap.of("auto.offset.reset", (Object)"earliest"))
        .withValueCoder(StringUtf8Coder.of());

    p.apply(kafkaIOReader.withoutMetadata())
        .apply(Values.<String>create())
        .apply(Window.<String>into(
          FixedWindows.of(Duration.standardMinutes(options.getWindowSize()))))
        .apply(new CountWords())
        .apply(MapElements.via(new FormatAsTextFn()))
        .apply("WriteCounts", TextIO.Write.to(options.getOutput()));

    p.run();
  }
 
开发者ID:0x0ece,项目名称:beam-starter,代码行数:25,代码来源:StreamWordCount.java

示例2: buildIOReader

import org.apache.beam.sdk.io.kafka.KafkaIO; //导入依赖的package包/类
@Override
  public PCollection<BeamRecord> buildIOReader(Pipeline pipeline) {
    KafkaIO.Read<byte[], byte[]> kafkaRead = null;
    if (topics != null) {
      kafkaRead = KafkaIO.<byte[], byte[]>read()
      .withBootstrapServers(bootstrapServers)
      .withTopics(topics)
      .updateConsumerProperties(configUpdates)
      .withKeyDeserializerAndCoder(ByteArrayDeserializer.class, ByteArrayCoder.of())
      .withValueDeserializerAndCoder(ByteArrayDeserializer.class, ByteArrayCoder.of());
    } else if (topicPartitions != null) {
      kafkaRead = KafkaIO.<byte[], byte[]>read()
          .withBootstrapServers(bootstrapServers)
          .withTopicPartitions(topicPartitions)
          .updateConsumerProperties(configUpdates)
          .withKeyDeserializerAndCoder(ByteArrayDeserializer.class, ByteArrayCoder.of())
          .withValueDeserializerAndCoder(ByteArrayDeserializer.class, ByteArrayCoder.of());
    } else {
      throw new IllegalArgumentException("One of topics and topicPartitions must be configurated.");
    }

    return PBegin.in(pipeline).apply("read", kafkaRead.withoutMetadata())
.apply("in_format", getPTransformForInput());
  }
 
开发者ID:apache,项目名称:beam,代码行数:25,代码来源:BeamKafkaTable.java

示例3: main

import org.apache.beam.sdk.io.kafka.KafkaIO; //导入依赖的package包/类
public static void main(String[] args) throws Exception {

    Options options =
        PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    Pipeline pipeline = Pipeline.create(options);

    pipeline
    .apply(KafkaIO.<String, String>read()
        .withBootstrapServers(options.getKafkaBootstrapServer())
        .withTopic(options.getTopic())
        .withKeyDeserializer(StringDeserializer.class)
        .withValueDeserializer(StringDeserializer.class)
        .withTimestampFn(new SetTimestampFn()))
    .apply("Values", ParDo.of(new ValuesFn()))

    .apply("FixedWindows", Window.<String>into(FixedWindows.of(FIVE_MINUTES))
        .triggering(AfterWatermark.pastEndOfWindow()
            .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane()
                .plusDelayOf(TWO_MINUTES))
            .withLateFirings(AfterPane.elementCountAtLeast(1)))
        .withAllowedLateness(TEN_MINUTES)
        .accumulatingFiredPanes())

    .apply("TeamScore", new CalculateTeamScores(options.getOutputPrefix()));

    pipeline.run();
  }
 
开发者ID:davorbonaci,项目名称:beam-portability-demo,代码行数:28,代码来源:LeaderBoard.java

示例4: buildIOWriter

import org.apache.beam.sdk.io.kafka.KafkaIO; //导入依赖的package包/类
@Override
public PTransform<? super PCollection<BeamRecord>, PDone> buildIOWriter() {
  checkArgument(topics != null && topics.size() == 1,
      "Only one topic can be acceptable as output.");

  return new PTransform<PCollection<BeamRecord>, PDone>() {
    @Override
    public PDone expand(PCollection<BeamRecord> input) {
      return input.apply("out_reformat", getPTransformForOutput()).apply("persistent",
          KafkaIO.<byte[], byte[]>write()
              .withBootstrapServers(bootstrapServers)
              .withTopic(topics.get(0))
              .withKeySerializer(ByteArraySerializer.class)
              .withValueSerializer(ByteArraySerializer.class));
    }
  };
}
 
开发者ID:apache,项目名称:beam,代码行数:18,代码来源:BeamKafkaTable.java

示例5: expand

import org.apache.beam.sdk.io.kafka.KafkaIO; //导入依赖的package包/类
@Override
public PCollection<IndexedRecord> expand(PBegin pBegin) {

    KafkaIO.Read<byte[], byte[]> kafkaRead = KafkaIO.readBytes()
            .withBootstrapServers(properties.getDatasetProperties().getDatastoreProperties().brokers.getValue())
            .withTopics(Arrays.asList(new String[] { properties.getDatasetProperties().topic.getValue() }))
            .updateConsumerProperties(KafkaConnection.createInputMaps(properties));

    if (properties.useMaxReadTime.getValue()) {
        kafkaRead = kafkaRead.withMaxReadTime(new Duration(properties.maxReadTime.getValue()));
    }
    if (properties.useMaxNumRecords.getValue()) {
        kafkaRead = kafkaRead.withMaxNumRecords(properties.maxNumRecords.getValue());
    }
    // only consider value of kafkaRecord no matter which format selected
    PCollection<byte[]> kafkaRecords = pBegin.apply(kafkaRead) //
            .apply(ParDo.of(new ExtractRecord())) //
            .apply(Values.<byte[]> create());
    switch (properties.getDatasetProperties().valueFormat.getValue()) {
    case AVRO: {
        Schema schema = null;
        if (properties.getDatasetProperties().isHierarchy.getValue()) {
            // use component's schema directly? should be done on design time, no?
            schema = new Schema.Parser().parse(properties.getDatasetProperties().avroSchema.getValue());
        } else {
            // use component's schema directly as we are avro natural
            schema = properties.getDatasetProperties().main.schema.getValue();
        }
        return kafkaRecords.apply(ParDo.of(new ConvertToAvro(schema.toString())));
    }
    case CSV: {
        // FIXME(bchen) KafkaAvroRegistry do not have way to record adaptation, it infer schema by the data rather
        // than use the defined schema
        return kafkaRecords
                .apply(ParDo.of(new ExtractCsvSplit(properties.getDatasetProperties().fieldDelimiter.getValue())))
                .apply(ConvertToIndexedRecord.<String[]>of());
    }
    default:
        throw new RuntimeException("To be implemented: " + properties.getDatasetProperties().valueFormat.getValue());
    }

}
 
开发者ID:Talend,项目名称:components,代码行数:43,代码来源:KafkaInputPTransformRuntime.java

示例6: getKafkaReader

import org.apache.beam.sdk.io.kafka.KafkaIO; //导入依赖的package包/类
private static KafkaIO.Read<String, String> getKafkaReader(final String bootstrapServers) {
    return KafkaIO.<String, String>read().withBootstrapServers(bootstrapServers)
        .withKeyDeserializer(StringDeserializer.class)
        .withValueDeserializer(StringDeserializer.class);
}
 
开发者ID:trellis-ldp-archive,项目名称:trellis-rosid-file-streaming,代码行数:6,代码来源:FileProcessingPipeline.java

示例7: getKafkaWriter

import org.apache.beam.sdk.io.kafka.KafkaIO; //导入依赖的package包/类
private static KafkaIO.Write<String, String> getKafkaWriter(final String bootstrapServers) {
    return KafkaIO.<String, String>write().withBootstrapServers(bootstrapServers)
        .withKeySerializer(StringSerializer.class)
        .withValueSerializer(StringSerializer.class);
}
 
开发者ID:trellis-ldp-archive,项目名称:trellis-rosid-file-streaming,代码行数:6,代码来源:FileProcessingPipeline.java

示例8: debugStreamingPipeline

import org.apache.beam.sdk.io.kafka.KafkaIO; //导入依赖的package包/类
@Test
public void debugStreamingPipeline() {
  TestSparkPipelineOptions options =
      PipelineOptionsFactory.create().as(TestSparkPipelineOptions.class);
  options.setForceStreaming(true);
  options.setRunner(SparkRunnerDebugger.class);

  Pipeline pipeline = Pipeline.create(options);

  KafkaIO.Read<String, String> read = KafkaIO.<String, String>read()
      .withBootstrapServers("mykafka:9092")
      .withTopics(Collections.singletonList("my_input_topic"))
      .withKeyDeserializer(StringDeserializer.class)
      .withValueDeserializer(StringDeserializer.class);

  KafkaIO.Write<String, String> write = KafkaIO.<String, String>write()
      .withBootstrapServers("myotherkafka:9092")
      .withTopic("my_output_topic")
      .withKeySerializer(StringSerializer.class)
      .withValueSerializer(StringSerializer.class);

  KvCoder<String, String> stringKvCoder = KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of());

  pipeline
      .apply(read.withoutMetadata()).setCoder(stringKvCoder)
      .apply(Window.<KV<String, String>>into(FixedWindows.of(Duration.standardSeconds(5))))
      .apply(ParDo.of(new SparkRunnerDebuggerTest.FormatKVFn()))
      .apply(Distinct.<String>create())
      .apply(WithKeys.of(new SparkRunnerDebuggerTest.ArbitraryKeyFunction()))
      .apply(write);

  final String expectedPipeline = "KafkaUtils.createDirectStream(...)\n"
      + "_.map(new org.apache.beam.sdk.transforms.windowing.FixedWindows())\n"
      + "_.mapPartitions(new org.apache.beam.runners.spark."
      + "SparkRunnerDebuggerTest$FormatKVFn())\n"
      + "_.mapPartitions(new org.apache.beam.sdk.transforms.Contextful())\n"
      + "_.groupByKey()\n"
      + "_.map(new org.apache.beam.sdk.transforms.Combine$IterableCombineFn())\n"
      + "_.mapPartitions(new org.apache.beam.sdk.transforms.Distinct$3())\n"
      + "_.mapPartitions(new org.apache.beam.sdk.transforms.Contextful())\n"
      + "_.<org.apache.beam.sdk.io.kafka.AutoValue_KafkaIO_Write>";

  SparkRunnerDebugger.DebugSparkPipelineResult result =
      (SparkRunnerDebugger.DebugSparkPipelineResult) pipeline.run();

  assertThat("Debug pipeline did not equal expected",
      result.getDebugString(),
      Matchers.equalTo(expectedPipeline));
}
 
开发者ID:apache,项目名称:beam,代码行数:50,代码来源:SparkRunnerDebuggerTest.java

示例9: expand

import org.apache.beam.sdk.io.kafka.KafkaIO; //导入依赖的package包/类
@Override
public PDone expand(PCollection<IndexedRecord> objectPCollection) {
    final boolean useAvro =
            properties.getDatasetProperties().valueFormat.getValue() == KafkaDatasetProperties.ValueFormat.AVRO;

    KafkaIO.Write<byte[], byte[]> kafkaWrite = KafkaIO
            .<byte[], byte[]> write()
            .withBootstrapServers(properties.getDatasetProperties().getDatastoreProperties().brokers.getValue())
            .withTopic(properties.getDatasetProperties().topic.getValue())
            .withKeySerializer(ByteArraySerializer.class)
            .withValueSerializer(ByteArraySerializer.class)
            .updateProducerProperties(KafkaConnection.createOutputMaps(properties));

    switch (properties.partitionType.getValue()) {
    case COLUMN: {
        PCollection pc1 = objectPCollection.apply(WithKeys.of(new ProduceKey(properties.keyColumn.getValue())));
        if (useAvro) {
            // TODO for now use incoming avro schema directly, do not check configured schema, improvement it.
            return ((PCollection<KV<byte[], byte[]>>) pc1.apply(MapElements.via(new AvroToByteArrayKV())))
                    .apply(kafkaWrite);
        } else { // csv
            return ((PCollection<KV<byte[], byte[]>>) pc1.apply(
                    MapElements.via(new FormatCsvKV(properties.getDatasetProperties().fieldDelimiter.getValue()))))
                            .apply(kafkaWrite);
        }
    }
    case ROUND_ROBIN: {
        if (useAvro) {
            // TODO for now use incoming avro schema directly, do not check configured schema, improvement it.
            return (PDone) objectPCollection.apply(MapElements.via(new AvroToByteArray())).apply(
                    kafkaWrite.values());
        } else { // csv
            return (PDone) objectPCollection
                    .apply(MapElements
                            .via(new FormatCsv(properties.getDatasetProperties().fieldDelimiter.getValue())))
                    .apply(kafkaWrite.values());
        }
    }
    default:
        throw new RuntimeException("To be implemented: " + properties.partitionType.getValue());
    }
}
 
开发者ID:Talend,项目名称:components,代码行数:43,代码来源:KafkaOutputPTransformRuntime.java


注:本文中的org.apache.beam.sdk.io.kafka.KafkaIO类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。