本文整理汇总了Java中org.apache.beam.sdk.io.kafka.KafkaIO类的典型用法代码示例。如果您正苦于以下问题:Java KafkaIO类的具体用法?Java KafkaIO怎么用?Java KafkaIO使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
KafkaIO类属于org.apache.beam.sdk.io.kafka包,在下文中一共展示了KafkaIO类的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.beam.sdk.io.kafka.KafkaIO; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
Options options = PipelineOptionsFactory.fromArgs(args).withValidation()
.as(Options.class);
options.setRunner(FlinkRunner.class);
Pipeline p = Pipeline.create(options);
KafkaIO.Read<byte[], String> kafkaIOReader = KafkaIO.read()
.withBootstrapServers("192.168.99.100:32771")
.withTopics(Arrays.asList("beam".split(",")))
.updateConsumerProperties(ImmutableMap.of("auto.offset.reset", (Object)"earliest"))
.withValueCoder(StringUtf8Coder.of());
p.apply(kafkaIOReader.withoutMetadata())
.apply(Values.<String>create())
.apply(Window.<String>into(
FixedWindows.of(Duration.standardMinutes(options.getWindowSize()))))
.apply(new CountWords())
.apply(MapElements.via(new FormatAsTextFn()))
.apply("WriteCounts", TextIO.Write.to(options.getOutput()));
p.run();
}
示例2: buildIOReader
import org.apache.beam.sdk.io.kafka.KafkaIO; //导入依赖的package包/类
@Override
public PCollection<BeamRecord> buildIOReader(Pipeline pipeline) {
KafkaIO.Read<byte[], byte[]> kafkaRead = null;
if (topics != null) {
kafkaRead = KafkaIO.<byte[], byte[]>read()
.withBootstrapServers(bootstrapServers)
.withTopics(topics)
.updateConsumerProperties(configUpdates)
.withKeyDeserializerAndCoder(ByteArrayDeserializer.class, ByteArrayCoder.of())
.withValueDeserializerAndCoder(ByteArrayDeserializer.class, ByteArrayCoder.of());
} else if (topicPartitions != null) {
kafkaRead = KafkaIO.<byte[], byte[]>read()
.withBootstrapServers(bootstrapServers)
.withTopicPartitions(topicPartitions)
.updateConsumerProperties(configUpdates)
.withKeyDeserializerAndCoder(ByteArrayDeserializer.class, ByteArrayCoder.of())
.withValueDeserializerAndCoder(ByteArrayDeserializer.class, ByteArrayCoder.of());
} else {
throw new IllegalArgumentException("One of topics and topicPartitions must be configurated.");
}
return PBegin.in(pipeline).apply("read", kafkaRead.withoutMetadata())
.apply("in_format", getPTransformForInput());
}
示例3: main
import org.apache.beam.sdk.io.kafka.KafkaIO; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
Options options =
PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
Pipeline pipeline = Pipeline.create(options);
pipeline
.apply(KafkaIO.<String, String>read()
.withBootstrapServers(options.getKafkaBootstrapServer())
.withTopic(options.getTopic())
.withKeyDeserializer(StringDeserializer.class)
.withValueDeserializer(StringDeserializer.class)
.withTimestampFn(new SetTimestampFn()))
.apply("Values", ParDo.of(new ValuesFn()))
.apply("FixedWindows", Window.<String>into(FixedWindows.of(FIVE_MINUTES))
.triggering(AfterWatermark.pastEndOfWindow()
.withEarlyFirings(AfterProcessingTime.pastFirstElementInPane()
.plusDelayOf(TWO_MINUTES))
.withLateFirings(AfterPane.elementCountAtLeast(1)))
.withAllowedLateness(TEN_MINUTES)
.accumulatingFiredPanes())
.apply("TeamScore", new CalculateTeamScores(options.getOutputPrefix()));
pipeline.run();
}
示例4: buildIOWriter
import org.apache.beam.sdk.io.kafka.KafkaIO; //导入依赖的package包/类
@Override
public PTransform<? super PCollection<BeamRecord>, PDone> buildIOWriter() {
checkArgument(topics != null && topics.size() == 1,
"Only one topic can be acceptable as output.");
return new PTransform<PCollection<BeamRecord>, PDone>() {
@Override
public PDone expand(PCollection<BeamRecord> input) {
return input.apply("out_reformat", getPTransformForOutput()).apply("persistent",
KafkaIO.<byte[], byte[]>write()
.withBootstrapServers(bootstrapServers)
.withTopic(topics.get(0))
.withKeySerializer(ByteArraySerializer.class)
.withValueSerializer(ByteArraySerializer.class));
}
};
}
示例5: expand
import org.apache.beam.sdk.io.kafka.KafkaIO; //导入依赖的package包/类
@Override
public PCollection<IndexedRecord> expand(PBegin pBegin) {
KafkaIO.Read<byte[], byte[]> kafkaRead = KafkaIO.readBytes()
.withBootstrapServers(properties.getDatasetProperties().getDatastoreProperties().brokers.getValue())
.withTopics(Arrays.asList(new String[] { properties.getDatasetProperties().topic.getValue() }))
.updateConsumerProperties(KafkaConnection.createInputMaps(properties));
if (properties.useMaxReadTime.getValue()) {
kafkaRead = kafkaRead.withMaxReadTime(new Duration(properties.maxReadTime.getValue()));
}
if (properties.useMaxNumRecords.getValue()) {
kafkaRead = kafkaRead.withMaxNumRecords(properties.maxNumRecords.getValue());
}
// only consider value of kafkaRecord no matter which format selected
PCollection<byte[]> kafkaRecords = pBegin.apply(kafkaRead) //
.apply(ParDo.of(new ExtractRecord())) //
.apply(Values.<byte[]> create());
switch (properties.getDatasetProperties().valueFormat.getValue()) {
case AVRO: {
Schema schema = null;
if (properties.getDatasetProperties().isHierarchy.getValue()) {
// use component's schema directly? should be done on design time, no?
schema = new Schema.Parser().parse(properties.getDatasetProperties().avroSchema.getValue());
} else {
// use component's schema directly as we are avro natural
schema = properties.getDatasetProperties().main.schema.getValue();
}
return kafkaRecords.apply(ParDo.of(new ConvertToAvro(schema.toString())));
}
case CSV: {
// FIXME(bchen) KafkaAvroRegistry do not have way to record adaptation, it infer schema by the data rather
// than use the defined schema
return kafkaRecords
.apply(ParDo.of(new ExtractCsvSplit(properties.getDatasetProperties().fieldDelimiter.getValue())))
.apply(ConvertToIndexedRecord.<String[]>of());
}
default:
throw new RuntimeException("To be implemented: " + properties.getDatasetProperties().valueFormat.getValue());
}
}
示例6: getKafkaReader
import org.apache.beam.sdk.io.kafka.KafkaIO; //导入依赖的package包/类
private static KafkaIO.Read<String, String> getKafkaReader(final String bootstrapServers) {
return KafkaIO.<String, String>read().withBootstrapServers(bootstrapServers)
.withKeyDeserializer(StringDeserializer.class)
.withValueDeserializer(StringDeserializer.class);
}
示例7: getKafkaWriter
import org.apache.beam.sdk.io.kafka.KafkaIO; //导入依赖的package包/类
private static KafkaIO.Write<String, String> getKafkaWriter(final String bootstrapServers) {
return KafkaIO.<String, String>write().withBootstrapServers(bootstrapServers)
.withKeySerializer(StringSerializer.class)
.withValueSerializer(StringSerializer.class);
}
示例8: debugStreamingPipeline
import org.apache.beam.sdk.io.kafka.KafkaIO; //导入依赖的package包/类
@Test
public void debugStreamingPipeline() {
TestSparkPipelineOptions options =
PipelineOptionsFactory.create().as(TestSparkPipelineOptions.class);
options.setForceStreaming(true);
options.setRunner(SparkRunnerDebugger.class);
Pipeline pipeline = Pipeline.create(options);
KafkaIO.Read<String, String> read = KafkaIO.<String, String>read()
.withBootstrapServers("mykafka:9092")
.withTopics(Collections.singletonList("my_input_topic"))
.withKeyDeserializer(StringDeserializer.class)
.withValueDeserializer(StringDeserializer.class);
KafkaIO.Write<String, String> write = KafkaIO.<String, String>write()
.withBootstrapServers("myotherkafka:9092")
.withTopic("my_output_topic")
.withKeySerializer(StringSerializer.class)
.withValueSerializer(StringSerializer.class);
KvCoder<String, String> stringKvCoder = KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of());
pipeline
.apply(read.withoutMetadata()).setCoder(stringKvCoder)
.apply(Window.<KV<String, String>>into(FixedWindows.of(Duration.standardSeconds(5))))
.apply(ParDo.of(new SparkRunnerDebuggerTest.FormatKVFn()))
.apply(Distinct.<String>create())
.apply(WithKeys.of(new SparkRunnerDebuggerTest.ArbitraryKeyFunction()))
.apply(write);
final String expectedPipeline = "KafkaUtils.createDirectStream(...)\n"
+ "_.map(new org.apache.beam.sdk.transforms.windowing.FixedWindows())\n"
+ "_.mapPartitions(new org.apache.beam.runners.spark."
+ "SparkRunnerDebuggerTest$FormatKVFn())\n"
+ "_.mapPartitions(new org.apache.beam.sdk.transforms.Contextful())\n"
+ "_.groupByKey()\n"
+ "_.map(new org.apache.beam.sdk.transforms.Combine$IterableCombineFn())\n"
+ "_.mapPartitions(new org.apache.beam.sdk.transforms.Distinct$3())\n"
+ "_.mapPartitions(new org.apache.beam.sdk.transforms.Contextful())\n"
+ "_.<org.apache.beam.sdk.io.kafka.AutoValue_KafkaIO_Write>";
SparkRunnerDebugger.DebugSparkPipelineResult result =
(SparkRunnerDebugger.DebugSparkPipelineResult) pipeline.run();
assertThat("Debug pipeline did not equal expected",
result.getDebugString(),
Matchers.equalTo(expectedPipeline));
}
示例9: expand
import org.apache.beam.sdk.io.kafka.KafkaIO; //导入依赖的package包/类
@Override
public PDone expand(PCollection<IndexedRecord> objectPCollection) {
final boolean useAvro =
properties.getDatasetProperties().valueFormat.getValue() == KafkaDatasetProperties.ValueFormat.AVRO;
KafkaIO.Write<byte[], byte[]> kafkaWrite = KafkaIO
.<byte[], byte[]> write()
.withBootstrapServers(properties.getDatasetProperties().getDatastoreProperties().brokers.getValue())
.withTopic(properties.getDatasetProperties().topic.getValue())
.withKeySerializer(ByteArraySerializer.class)
.withValueSerializer(ByteArraySerializer.class)
.updateProducerProperties(KafkaConnection.createOutputMaps(properties));
switch (properties.partitionType.getValue()) {
case COLUMN: {
PCollection pc1 = objectPCollection.apply(WithKeys.of(new ProduceKey(properties.keyColumn.getValue())));
if (useAvro) {
// TODO for now use incoming avro schema directly, do not check configured schema, improvement it.
return ((PCollection<KV<byte[], byte[]>>) pc1.apply(MapElements.via(new AvroToByteArrayKV())))
.apply(kafkaWrite);
} else { // csv
return ((PCollection<KV<byte[], byte[]>>) pc1.apply(
MapElements.via(new FormatCsvKV(properties.getDatasetProperties().fieldDelimiter.getValue()))))
.apply(kafkaWrite);
}
}
case ROUND_ROBIN: {
if (useAvro) {
// TODO for now use incoming avro schema directly, do not check configured schema, improvement it.
return (PDone) objectPCollection.apply(MapElements.via(new AvroToByteArray())).apply(
kafkaWrite.values());
} else { // csv
return (PDone) objectPCollection
.apply(MapElements
.via(new FormatCsv(properties.getDatasetProperties().fieldDelimiter.getValue())))
.apply(kafkaWrite.values());
}
}
default:
throw new RuntimeException("To be implemented: " + properties.partitionType.getValue());
}
}