本文整理汇总了Java中org.apache.beam.sdk.transforms.Combine类的典型用法代码示例。如果您正苦于以下问题:Java Combine类的具体用法?Java Combine怎么用?Java Combine使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Combine类属于org.apache.beam.sdk.transforms包,在下文中一共展示了Combine类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testMergingCustomWindows
import org.apache.beam.sdk.transforms.Combine; //导入依赖的package包/类
@Test
@Category({ValidatesRunner.class, UsesCustomWindowMerging.class})
public void testMergingCustomWindows() {
Instant startInstant = new Instant(0L);
List<TimestampedValue<String>> input = new ArrayList<>();
PCollection<String> inputCollection =
pipeline.apply(
Create.timestamped(
TimestampedValue.of("big", startInstant.plus(Duration.standardSeconds(10))),
TimestampedValue.of("small1", startInstant.plus(Duration.standardSeconds(20))),
// This one will be outside of bigWindow thus not merged
TimestampedValue.of("small2", startInstant.plus(Duration.standardSeconds(39)))));
PCollection<String> windowedCollection =
inputCollection.apply(Window.into(new CustomWindowFn<String>()));
PCollection<Long> count =
windowedCollection.apply(Combine.globally(Count.<String>combineFn()).withoutDefaults());
// "small1" and "big" elements merged into bigWindow "small2" not merged
// because timestamp is not in bigWindow
PAssert.that("Wrong number of elements in output collection", count).containsInAnyOrder(2L, 1L);
pipeline.run();
}
示例2: testMergingCustomWindowsKeyedCollection
import org.apache.beam.sdk.transforms.Combine; //导入依赖的package包/类
@Test
@Category({ValidatesRunner.class, UsesCustomWindowMerging.class})
public void testMergingCustomWindowsKeyedCollection() {
Instant startInstant = new Instant(0L);
PCollection<KV<Integer, String>> inputCollection =
pipeline.apply(
Create.timestamped(
TimestampedValue.of(
KV.of(0, "big"), startInstant.plus(Duration.standardSeconds(10))),
TimestampedValue.of(
KV.of(1, "small1"), startInstant.plus(Duration.standardSeconds(20))),
// This element is not contained within the bigWindow and not merged
TimestampedValue.of(
KV.of(2, "small2"), startInstant.plus(Duration.standardSeconds(39)))));
PCollection<KV<Integer, String>> windowedCollection =
inputCollection.apply(Window.into(new CustomWindowFn<KV<Integer, String>>()));
PCollection<Long> count =
windowedCollection.apply(
Combine.globally(Count.<KV<Integer, String>>combineFn()).withoutDefaults());
// "small1" and "big" elements merged into bigWindow "small2" not merged
// because it is not contained in bigWindow
PAssert.that("Wrong number of elements in output collection", count).containsInAnyOrder(2L, 1L);
pipeline.run();
}
示例3: testHifIOWithElastic
import org.apache.beam.sdk.transforms.Combine; //导入依赖的package包/类
/**
* Test to read data from embedded Elasticsearch instance and verify whether data is read
* successfully.
*/
@Test
public void testHifIOWithElastic() {
// Expected hashcode is evaluated during insertion time one time and hardcoded here.
String expectedHashCode = "a62a85f5f081e3840baf1028d4d6c6bc";
Configuration conf = getConfiguration();
PCollection<KV<Text, LinkedMapWritable>> esData =
pipeline.apply(HadoopInputFormatIO.<Text, LinkedMapWritable>read().withConfiguration(conf));
PCollection<Long> count = esData.apply(Count.<KV<Text, LinkedMapWritable>>globally());
// Verify that the count of objects fetched using HIFInputFormat IO is correct.
PAssert.thatSingleton(count).isEqualTo((long) TEST_DATA_ROW_COUNT);
PCollection<LinkedMapWritable> values = esData.apply(Values.<LinkedMapWritable>create());
PCollection<String> textValues = values.apply(transformFunc);
// Verify the output values using checksum comparison.
PCollection<String> consolidatedHashcode =
textValues.apply(Combine.globally(new HashingFn()).withoutDefaults());
PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode);
pipeline.run().waitUntilFinish();
}
示例4: testHifIOWithElastic
import org.apache.beam.sdk.transforms.Combine; //导入依赖的package包/类
/**
* This test reads data from the Elasticsearch instance and verifies whether data is read
* successfully.
*/
@Test
public void testHifIOWithElastic() throws SecurityException, IOException {
// Expected hashcode is evaluated during insertion time one time and hardcoded here.
final long expectedRowCount = 1000L;
String expectedHashCode = "42e254c8689050ed0a617ff5e80ea392";
Configuration conf = getConfiguration(options);
PCollection<KV<Text, LinkedMapWritable>> esData =
pipeline.apply(HadoopInputFormatIO.<Text, LinkedMapWritable>read().withConfiguration(conf));
// Verify that the count of objects fetched using HIFInputFormat IO is correct.
PCollection<Long> count = esData.apply(Count.<KV<Text, LinkedMapWritable>>globally());
PAssert.thatSingleton(count).isEqualTo(expectedRowCount);
PCollection<LinkedMapWritable> values = esData.apply(Values.<LinkedMapWritable>create());
PCollection<String> textValues = values.apply(transformFunc);
// Verify the output values using checksum comparison.
PCollection<String> consolidatedHashcode =
textValues.apply(Combine.globally(new HashingFn()).withoutDefaults());
PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode);
pipeline.run().waitUntilFinish();
}
示例5: testHIFReadForCassandra
import org.apache.beam.sdk.transforms.Combine; //导入依赖的package包/类
/**
* This test reads data from the Cassandra instance and verifies if data is read successfully.
*/
@Test
public void testHIFReadForCassandra() {
// Expected hashcode is evaluated during insertion time one time and hardcoded here.
String expectedHashCode = "1a30ad400afe4ebf5fde75f5d2d95408";
Long expectedRecordsCount = 1000L;
Configuration conf = getConfiguration(options);
PCollection<KV<Long, String>> cassandraData = pipeline.apply(HadoopInputFormatIO
.<Long, String>read().withConfiguration(conf).withValueTranslation(myValueTranslate));
PAssert.thatSingleton(cassandraData.apply("Count", Count.<KV<Long, String>>globally()))
.isEqualTo(expectedRecordsCount);
PCollection<String> textValues = cassandraData.apply(Values.<String>create());
// Verify the output values using checksum comparison.
PCollection<String> consolidatedHashcode =
textValues.apply(Combine.globally(new HashingFn()).withoutDefaults());
PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode);
pipeline.run().waitUntilFinish();
}
示例6: testHIFReadForCassandraQuery
import org.apache.beam.sdk.transforms.Combine; //导入依赖的package包/类
/**
* This test reads data from the Cassandra instance based on query and verifies if data is read
* successfully.
*/
@Test
public void testHIFReadForCassandraQuery() {
String expectedHashCode = "7bead6d6385c5f4dd0524720cd320b49";
Long expectedNumRows = 1L;
Configuration conf = getConfiguration(options);
conf.set("cassandra.input.cql", "select * from " + CASSANDRA_KEYSPACE + "." + CASSANDRA_TABLE
+ " where token(y_id) > ? and token(y_id) <= ? "
+ "and field0 = 'user48:field0:431531'");
PCollection<KV<Long, String>> cassandraData =
pipeline.apply(HadoopInputFormatIO.<Long, String>read().withConfiguration(conf)
.withValueTranslation(myValueTranslate));
PAssert.thatSingleton(cassandraData.apply("Count", Count.<KV<Long, String>>globally()))
.isEqualTo(expectedNumRows);
PCollection<String> textValues = cassandraData.apply(Values.<String>create());
// Verify the output values using checksum comparison.
PCollection<String> consolidatedHashcode =
textValues.apply(Combine.globally(new HashingFn()).withoutDefaults());
PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode);
pipeline.run().waitUntilFinish();
}
示例7: testHIFReadForCassandra
import org.apache.beam.sdk.transforms.Combine; //导入依赖的package包/类
/**
* Test to read data from embedded Cassandra instance and verify whether data is read
* successfully.
* @throws Exception
*/
@Test
public void testHIFReadForCassandra() throws Exception {
// Expected hashcode is evaluated during insertion time one time and hardcoded here.
String expectedHashCode = "1b9780833cce000138b9afa25ba63486";
Configuration conf = getConfiguration();
PCollection<KV<Long, String>> cassandraData =
p.apply(HadoopInputFormatIO.<Long, String>read().withConfiguration(conf)
.withValueTranslation(myValueTranslate));
// Verify the count of data retrieved from Cassandra matches expected count.
PAssert.thatSingleton(cassandraData.apply("Count", Count.<KV<Long, String>>globally()))
.isEqualTo(TEST_DATA_ROW_COUNT);
PCollection<String> textValues = cassandraData.apply(Values.<String>create());
// Verify the output values using checksum comparison.
PCollection<String> consolidatedHashcode =
textValues.apply(Combine.globally(new HashingFn()).withoutDefaults());
PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode);
p.run().waitUntilFinish();
}
示例8: testHIFReadForCassandraQuery
import org.apache.beam.sdk.transforms.Combine; //导入依赖的package包/类
/**
* Test to read data from embedded Cassandra instance based on query and verify whether data is
* read successfully.
*/
@Test
public void testHIFReadForCassandraQuery() throws Exception {
Long expectedCount = 1L;
String expectedChecksum = "f11caabc7a9fc170e22b41218749166c";
Configuration conf = getConfiguration();
conf.set("cassandra.input.cql", "select * from " + CASSANDRA_KEYSPACE + "." + CASSANDRA_TABLE
+ " where token(id) > ? and token(id) <= ? and scientist='Faraday1' allow filtering");
PCollection<KV<Long, String>> cassandraData =
p.apply(HadoopInputFormatIO.<Long, String>read().withConfiguration(conf)
.withValueTranslation(myValueTranslate));
// Verify the count of data retrieved from Cassandra matches expected count.
PAssert.thatSingleton(cassandraData.apply("Count", Count.<KV<Long, String>>globally()))
.isEqualTo(expectedCount);
PCollection<String> textValues = cassandraData.apply(Values.<String>create());
// Verify the output values using checksum comparison.
PCollection<String> consolidatedHashcode =
textValues.apply(Combine.globally(new HashingFn()).withoutDefaults());
PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedChecksum);
p.run().waitUntilFinish();
}
示例9: RawCombine
import org.apache.beam.sdk.transforms.Combine; //导入依赖的package包/类
private RawCombine(RunnerApi.PTransform protoTransform,
RehydratedComponents rehydratedComponents) throws IOException {
this.protoTransform = protoTransform;
this.rehydratedComponents = rehydratedComponents;
this.spec = protoTransform.getSpec();
this.payload = CombinePayload.parseFrom(spec.getPayload());
// Eagerly extract the coder to throw a good exception here
try {
this.accumulatorCoder =
(Coder<AccumT>) rehydratedComponents.getCoder(payload.getAccumulatorCoderId());
} catch (IOException exc) {
throw new IllegalArgumentException(
String.format(
"Failure extracting accumulator coder with id '%s' for %s",
payload.getAccumulatorCoderId(), Combine.class.getSimpleName()),
exc);
}
}
示例10: getAdditionalInputs
import org.apache.beam.sdk.transforms.Combine; //导入依赖的package包/类
@Override
public Map<TupleTag<?>, PValue> getAdditionalInputs() {
Map<TupleTag<?>, PValue> additionalInputs = new HashMap<>();
for (Map.Entry<String, SideInput> sideInputEntry : payload.getSideInputsMap().entrySet()) {
try {
additionalInputs.put(
new TupleTag<>(sideInputEntry.getKey()),
rehydratedComponents.getPCollection(
protoTransform.getInputsOrThrow(sideInputEntry.getKey())));
} catch (IOException exc) {
throw new IllegalStateException(
String.format(
"Could not find input with name %s for %s transform",
sideInputEntry.getKey(), Combine.class.getSimpleName()));
}
}
return additionalInputs;
}
示例11: toProto
import org.apache.beam.sdk.transforms.Combine; //导入依赖的package包/类
@VisibleForTesting
static CombinePayload toProto(
AppliedPTransform<?, ?, Combine.PerKey<?, ?, ?>> combine, SdkComponents sdkComponents)
throws IOException {
GlobalCombineFn<?, ?, ?> combineFn = combine.getTransform().getFn();
try {
Coder<?> accumulatorCoder = extractAccumulatorCoder(combineFn, (AppliedPTransform) combine);
Map<String, SideInput> sideInputs = new HashMap<>();
return RunnerApi.CombinePayload.newBuilder()
.setAccumulatorCoderId(sdkComponents.registerCoder(accumulatorCoder))
.putAllSideInputs(sideInputs)
.setCombineFn(toProto(combineFn))
.build();
} catch (CannotProvideCoderException e) {
throw new IllegalStateException(e);
}
}
示例12: extractAccumulatorCoder
import org.apache.beam.sdk.transforms.Combine; //导入依赖的package包/类
private static <K, InputT, AccumT> Coder<AccumT> extractAccumulatorCoder(
GlobalCombineFn<InputT, AccumT, ?> combineFn,
AppliedPTransform<PCollection<KV<K, InputT>>, ?, Combine.PerKey<K, InputT, ?>> transform)
throws CannotProvideCoderException {
@SuppressWarnings("unchecked")
PCollection<KV<K, InputT>> mainInput =
(PCollection<KV<K, InputT>>)
Iterables.getOnlyElement(TransformInputs.nonAdditionalInputs(transform));
KvCoder<K, InputT> inputCoder = (KvCoder<K, InputT>) mainInput.getCoder();
return AppliedCombineFn.withInputCoder(
combineFn,
transform.getPipeline().getCoderRegistry(),
inputCoder,
transform.getTransform().getSideInputs(),
((PCollection<?>) Iterables.getOnlyElement(transform.getOutputs().values()))
.getWindowingStrategy())
.getAccumulatorCoder();
}
示例13: canTranslate
import org.apache.beam.sdk.transforms.Combine; //导入依赖的package包/类
@Override
boolean canTranslate(
Combine.PerKey<K, InputT, OutputT> transform,
FlinkStreamingTranslationContext context) {
// if we have a merging window strategy and side inputs we cannot
// translate as a proper combine. We have to group and then run the combine
// over the final grouped values.
PCollection<KV<K, InputT>> input = context.getInput(transform);
@SuppressWarnings("unchecked")
WindowingStrategy<?, BoundedWindow> windowingStrategy =
(WindowingStrategy<?, BoundedWindow>) input.getWindowingStrategy();
return windowingStrategy.getWindowFn().isNonMerging() || transform.getSideInputs().isEmpty();
}
示例14: translateHelper
import org.apache.beam.sdk.transforms.Combine; //导入依赖的package包/类
private <K, InputT, OutputT> void translateHelper(
final CombineGroupedValues<K, InputT, OutputT> primitiveTransform,
TranslationContext context) {
Combine.GroupedValues<K, InputT, OutputT> originalTransform =
primitiveTransform.getOriginalCombine();
StepTranslationContext stepContext =
context.addStep(primitiveTransform, "CombineValues");
translateInputs(
stepContext,
context.getInput(primitiveTransform),
originalTransform.getSideInputs(),
context);
AppliedCombineFn<? super K, ? super InputT, ?, OutputT> fn =
originalTransform.getAppliedFn(
context.getInput(primitiveTransform).getPipeline().getCoderRegistry(),
context.getInput(primitiveTransform).getCoder(),
context.getInput(primitiveTransform).getWindowingStrategy());
stepContext.addEncodingInput(fn.getAccumulatorCoder());
stepContext.addInput(
PropertyNames.SERIALIZED_FN, byteArrayToJsonString(serializeToByteArray(fn)));
stepContext.addOutput(context.getOutput(primitiveTransform));
}
示例15: getReplacementTransform
import org.apache.beam.sdk.transforms.Combine; //导入依赖的package包/类
@Override
public PTransformReplacement<PCollection<InputT>, PValue> getReplacementTransform(
AppliedPTransform<
PCollection<InputT>,
PValue,
PTransform<PCollection<InputT>, PValue>> transform) {
Combine.GloballyAsSingletonView<?, ?> combineTransform =
(Combine.GloballyAsSingletonView) transform.getTransform();
return PTransformReplacement.of(
PTransformReplacements.getSingletonMainInput(transform),
new BatchViewOverrides.BatchViewAsSingleton(
runner,
findCreatePCollectionView(transform),
(CombineFn) combineTransform.getCombineFn(),
combineTransform.getFanout()));
}