本文整理汇总了Java中org.apache.beam.sdk.values.PCollection.setCoder方法的典型用法代码示例。如果您正苦于以下问题:Java PCollection.setCoder方法的具体用法?Java PCollection.setCoder怎么用?Java PCollection.setCoder使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.beam.sdk.values.PCollection
的用法示例。
在下文中一共展示了PCollection.setCoder方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: buildBeamPipeline
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public PCollection<BeamRecord> buildBeamPipeline(PCollectionTuple inputPCollections
, BeamSqlEnv sqlEnv) throws Exception {
RelNode input = getInput();
String stageName = BeamSqlRelUtils.getStageName(this);
PCollection<BeamRecord> upstream =
BeamSqlRelUtils.getBeamRelInput(input).buildBeamPipeline(inputPCollections, sqlEnv);
BeamSqlExpressionExecutor executor = new BeamSqlFnExecutor(this);
PCollection<BeamRecord> projectStream = upstream.apply(stageName, ParDo
.of(new BeamSqlProjectFn(getRelTypeName(), executor,
CalciteUtils.toBeamRowType(rowType))));
projectStream.setCoder(CalciteUtils.toBeamRowType(getRowType()).getRecordCoder());
return projectStream;
}
示例2: buildBeamPipeline
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public PCollection<BeamRecord> buildBeamPipeline(PCollectionTuple inputPCollections
, BeamSqlEnv sqlEnv) throws Exception {
RelNode input = getInput();
String stageName = BeamSqlRelUtils.getStageName(this);
PCollection<BeamRecord> upstream =
BeamSqlRelUtils.getBeamRelInput(input).buildBeamPipeline(inputPCollections, sqlEnv);
BeamSqlExpressionExecutor executor = new BeamSqlFnExecutor(this);
PCollection<BeamRecord> filterStream = upstream.apply(stageName,
ParDo.of(new BeamSqlFilterFn(getRelTypeName(), executor)));
filterStream.setCoder(CalciteUtils.toBeamRowType(getRowType()).getRecordCoder());
return filterStream;
}
示例3: expand
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public PCollection<KV<K, V>> expand(PCollection<V> in) {
PCollection<KV<K, V>> result =
in.apply("AddKeys", MapElements.via(new SimpleFunction<V, KV<K, V>>() {
@Override
public KV<K, V> apply(V element) {
return KV.of(fn.apply(element), element);
}
}));
try {
Coder<K> keyCoder;
CoderRegistry coderRegistry = in.getPipeline().getCoderRegistry();
if (keyClass == null) {
keyCoder = coderRegistry.getOutputCoder(fn, in.getCoder());
} else {
keyCoder = coderRegistry.getCoder(TypeDescriptor.of(keyClass));
}
// TODO: Remove when we can set the coder inference context.
result.setCoder(KvCoder.of(keyCoder, in.getCoder()));
} catch (CannotProvideCoderException exc) {
// let lazy coder inference have a try
}
return result;
}
示例4: expand
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public PCollection<OutputT> expand(PCollection<? extends InputT> input) {
CoderRegistry registry = input.getPipeline().getCoderRegistry();
finishSpecifyingStateSpecs(fn, registry, input.getCoder());
TupleTag<OutputT> mainOutput = new TupleTag<>(MAIN_OUTPUT_TAG);
PCollection<OutputT> res =
input.apply(withOutputTags(mainOutput, TupleTagList.empty())).get(mainOutput);
try {
res.setCoder(
registry.getCoder(
getFn().getOutputTypeDescriptor(),
getFn().getInputTypeDescriptor(),
((PCollection<InputT>) input).getCoder()));
} catch (CannotProvideCoderException e) {
// Ignore and leave coder unset.
}
return res;
}
示例5: expand
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public PCollection<KV<Integer, Iterable<KV<KV<K, W>, WindowedValue<V>>>>>
expand(PCollection<KV<K, V>> input) {
@SuppressWarnings("unchecked")
Coder<W> windowCoder = (Coder<W>)
input.getWindowingStrategy().getWindowFn().windowCoder();
@SuppressWarnings("unchecked")
KvCoder<K, V> inputCoder = (KvCoder<K, V>) input.getCoder();
PCollection<KV<Integer, KV<KV<K, W>, WindowedValue<V>>>> keyedByHash;
keyedByHash = input.apply(
ParDo.of(new GroupByKeyHashAndSortByKeyAndWindowDoFn<K, V, W>(coder)));
keyedByHash.setCoder(
KvCoder.of(
VarIntCoder.of(),
KvCoder.of(KvCoder.of(inputCoder.getKeyCoder(), windowCoder),
FullWindowedValueCoder.of(inputCoder.getValueCoder(), windowCoder))));
return keyedByHash.apply(
new GroupByKeyAndSortValuesOnly<Integer, KV<K, W>, WindowedValue<V>>());
}
示例6: applyForSingleton
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
static <T, FinalT, ViewT, W extends BoundedWindow> PCollection<?>
applyForSingleton(
DataflowRunner runner,
PCollection<T> input,
DoFn<KV<Integer, Iterable<KV<W, WindowedValue<T>>>>,
IsmRecord<WindowedValue<FinalT>>> doFn,
Coder<FinalT> defaultValueCoder,
PCollectionView<ViewT> view) {
@SuppressWarnings("unchecked")
Coder<W> windowCoder = (Coder<W>)
input.getWindowingStrategy().getWindowFn().windowCoder();
IsmRecordCoder<WindowedValue<FinalT>> ismCoder =
coderForSingleton(windowCoder, defaultValueCoder);
PCollection<IsmRecord<WindowedValue<FinalT>>> reifiedPerWindowAndSorted = input
.apply(new GroupByWindowHashAsKeyAndWindowAsSortKey<T, W>(ismCoder))
.apply(ParDo.of(doFn));
reifiedPerWindowAndSorted.setCoder(ismCoder);
runner.addPCollectionRequiringIndexedFormat(reifiedPerWindowAndSorted);
reifiedPerWindowAndSorted.apply(
CreateDataflowView.<IsmRecord<WindowedValue<FinalT>>, ViewT>forBatch(view));
return reifiedPerWindowAndSorted;
}
示例7: buildBeamPipeline
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override public PCollection<BeamRecord> buildBeamPipeline(PCollectionTuple inputPCollections
, BeamSqlEnv sqlEnv) throws Exception {
RelNode input = getInput();
PCollection<BeamRecord> upstream = BeamSqlRelUtils.getBeamRelInput(input)
.buildBeamPipeline(inputPCollections, sqlEnv);
Type windowType = upstream.getWindowingStrategy().getWindowFn()
.getWindowTypeDescriptor().getType();
if (!windowType.equals(GlobalWindow.class)) {
throw new UnsupportedOperationException(
"`ORDER BY` is only supported for GlobalWindow, actual window: " + windowType);
}
BeamSqlRowComparator comparator = new BeamSqlRowComparator(fieldIndices, orientation,
nullsFirst);
// first find the top (offset + count)
PCollection<List<BeamRecord>> rawStream =
upstream.apply("extractTopOffsetAndFetch",
Top.of(startIndex + count, comparator).withoutDefaults())
.setCoder(ListCoder.<BeamRecord>of(upstream.getCoder()));
// strip the `leading offset`
if (startIndex > 0) {
rawStream = rawStream.apply("stripLeadingOffset", ParDo.of(
new SubListFn<BeamRecord>(startIndex, startIndex + count)))
.setCoder(ListCoder.<BeamRecord>of(upstream.getCoder()));
}
PCollection<BeamRecord> orderedStream = rawStream.apply(
"flatten", Flatten.<BeamRecord>iterables());
orderedStream.setCoder(CalciteUtils.toBeamRowType(getRowType()).getRecordCoder());
return orderedStream;
}
示例8: expand
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public PCollection<T> expand(PBegin input) {
try {
Coder<T> coder;
if (elementCoder.isPresent()) {
coder = elementCoder.get();
} else if (typeDescriptor.isPresent()) {
coder = input.getPipeline().getCoderRegistry().getCoder(typeDescriptor.get());
} else {
Iterable<T> rawElements =
Iterables.transform(
timestampedElements,
new Function<TimestampedValue<T>, T>() {
@Override
public T apply(TimestampedValue<T> timestampedValue) {
return timestampedValue.getValue();
}
});
coder = getDefaultCreateCoder(input.getPipeline().getCoderRegistry(), rawElements);
}
PCollection<TimestampedValue<T>> intermediate = Pipeline.applyTransform(input,
Create.of(timestampedElements).withCoder(TimestampedValueCoder.of(coder)));
PCollection<T> output = intermediate.apply(ParDo.of(new ConvertTimestamps<T>()));
output.setCoder(coder);
return output;
} catch (CannotProvideCoderException e) {
throw new IllegalArgumentException("Unable to infer a coder and no Coder was specified. "
+ "Please set a coder by invoking CreateTimestamped.withCoder() explicitly.", e);
}
}
示例9: expand
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public OutputT expand(InputT input) {
OutputT res = delegate().expand(input);
if (res instanceof PCollection) {
PCollection pc = (PCollection) res;
try {
pc.setCoder(delegate().getDefaultOutputCoder(input, pc));
} catch (CannotProvideCoderException e) {
// Let coder inference happen later.
}
}
return res;
}
示例10: expand
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public final PCollection<T> expand(PBegin input) {
try {
PCollection<T> pc = Pipeline
.applyTransform(input, new Impulse())
.apply(ParDo.of(DecodeAndEmitDoFn
.fromIterable(transform.getElements(), originalOutput.getCoder())));
pc.setCoder(originalOutput.getCoder());
return pc;
} catch (IOException e) {
throw new IllegalStateException("Unable to encode elements.", e);
}
}
示例11: testFlattenInMemoryEvaluatorWithEmptyPCollectionList
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Test
public void testFlattenInMemoryEvaluatorWithEmptyPCollectionList() throws Exception {
PCollectionList<Integer> list = PCollectionList.empty(p);
PCollection<Integer> flattened = list.apply(Flatten.<Integer>pCollections());
flattened.setCoder(VarIntCoder.of());
EvaluationContext evaluationContext = mock(EvaluationContext.class);
when(evaluationContext.createBundle(flattened))
.thenReturn(bundleFactory.createBundle(flattened));
FlattenEvaluatorFactory factory = new FlattenEvaluatorFactory(evaluationContext);
AppliedPTransform<?, ?, ?> flattendProducer = DirectGraphs.getProducer(flattened);
TransformEvaluator<Integer> emptyEvaluator =
factory.forApplication(
flattendProducer,
bundleFactory.createRootBundle().commit(BoundedWindow.TIMESTAMP_MAX_VALUE));
TransformResult<Integer> leftSideResult = emptyEvaluator.finishBundle();
CommittedBundle<?> outputBundle =
Iterables.getOnlyElement(leftSideResult.getOutputBundles()).commit(Instant.now());
assertThat(outputBundle.getElements(), emptyIterable());
assertThat(
leftSideResult.getTransform(),
Matchers.<AppliedPTransform<?, ?, ?>>equalTo(flattendProducer));
}
示例12: buildBeamPipeline
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public PCollection<BeamRecord> buildBeamPipeline(PCollectionTuple inputPCollections
, BeamSqlEnv sqlEnv) throws Exception {
RelNode input = getInput();
String stageName = BeamSqlRelUtils.getStageName(this) + "_";
PCollection<BeamRecord> upstream =
BeamSqlRelUtils.getBeamRelInput(input).buildBeamPipeline(inputPCollections, sqlEnv);
if (windowFieldIdx != -1) {
upstream = upstream.apply(stageName + "assignEventTimestamp", WithTimestamps
.of(new BeamAggregationTransforms.WindowTimestampFn(windowFieldIdx))
.withAllowedTimestampSkew(new Duration(Long.MAX_VALUE)))
.setCoder(upstream.getCoder());
}
PCollection<BeamRecord> windowStream = upstream.apply(stageName + "window",
Window.into(windowFn)
.triggering(trigger)
.withAllowedLateness(allowedLatence)
.accumulatingFiredPanes());
BeamRecordCoder keyCoder = exKeyFieldsSchema(input.getRowType()).getRecordCoder();
PCollection<KV<BeamRecord, BeamRecord>> exCombineByStream = windowStream.apply(
stageName + "exCombineBy",
WithKeys
.of(new BeamAggregationTransforms.AggregationGroupByKeyFn(
windowFieldIdx, groupSet)))
.setCoder(KvCoder.of(keyCoder, upstream.getCoder()));
BeamRecordCoder aggCoder = exAggFieldsSchema().getRecordCoder();
PCollection<KV<BeamRecord, BeamRecord>> aggregatedStream = exCombineByStream.apply(
stageName + "combineBy",
Combine.<BeamRecord, BeamRecord, BeamRecord>perKey(
new BeamAggregationTransforms.AggregationAdaptor(getAggCallList(),
CalciteUtils.toBeamRowType(input.getRowType()))))
.setCoder(KvCoder.of(keyCoder, aggCoder));
PCollection<BeamRecord> mergedStream = aggregatedStream.apply(stageName + "mergeRecord",
ParDo.of(new BeamAggregationTransforms.MergeAggregationRecord(
CalciteUtils.toBeamRowType(getRowType()), getAggCallList(), windowFieldIdx)));
mergedStream.setCoder(CalciteUtils.toBeamRowType(getRowType()).getRecordCoder());
return mergedStream;
}
示例13: testCountPerElementBasic
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
/**
* This step equals to below query.
* <pre>
* SELECT `f_int`
* , COUNT(*) AS `size`
* , SUM(`f_long`) AS `sum1`, AVG(`f_long`) AS `avg1`
* , MAX(`f_long`) AS `max1`, MIN(`f_long`) AS `min1`
* , SUM(`f_short`) AS `sum2`, AVG(`f_short`) AS `avg2`
* , MAX(`f_short`) AS `max2`, MIN(`f_short`) AS `min2`
* , SUM(`f_byte`) AS `sum3`, AVG(`f_byte`) AS `avg3`
* , MAX(`f_byte`) AS `max3`, MIN(`f_byte`) AS `min3`
* , SUM(`f_float`) AS `sum4`, AVG(`f_float`) AS `avg4`
* , MAX(`f_float`) AS `max4`, MIN(`f_float`) AS `min4`
* , SUM(`f_double`) AS `sum5`, AVG(`f_double`) AS `avg5`
* , MAX(`f_double`) AS `max5`, MIN(`f_double`) AS `min5`
* , MAX(`f_timestamp`) AS `max7`, MIN(`f_timestamp`) AS `min7`
* ,SUM(`f_int2`) AS `sum8`, AVG(`f_int2`) AS `avg8`
* , MAX(`f_int2`) AS `max8`, MIN(`f_int2`) AS `min8`
* FROM TABLE_NAME
* GROUP BY `f_int`
* </pre>
* @throws ParseException
*/
@Test
public void testCountPerElementBasic() throws ParseException {
setupEnvironment();
PCollection<BeamRecord> input = p.apply(Create.of(inputRows));
//1. extract fields in group-by key part
PCollection<KV<BeamRecord, BeamRecord>> exGroupByStream = input.apply("exGroupBy",
WithKeys
.of(new BeamAggregationTransforms.AggregationGroupByKeyFn(-1, ImmutableBitSet.of(0))))
.setCoder(KvCoder.<BeamRecord, BeamRecord>of(keyCoder, inRecordCoder));
//2. apply a GroupByKey.
PCollection<KV<BeamRecord, Iterable<BeamRecord>>> groupedStream = exGroupByStream
.apply("groupBy", GroupByKey.<BeamRecord, BeamRecord>create())
.setCoder(KvCoder.<BeamRecord, Iterable<BeamRecord>>of(keyCoder,
IterableCoder.<BeamRecord>of(inRecordCoder)));
//3. run aggregation functions
PCollection<KV<BeamRecord, BeamRecord>> aggregatedStream = groupedStream.apply("aggregation",
Combine.<BeamRecord, BeamRecord, BeamRecord>groupedValues(
new BeamAggregationTransforms.AggregationAdaptor(aggCalls, inputRowType)))
.setCoder(KvCoder.<BeamRecord, BeamRecord>of(keyCoder, aggCoder));
//4. flat KV to a single record
PCollection<BeamRecord> mergedStream = aggregatedStream.apply("mergeRecord",
ParDo.of(new BeamAggregationTransforms.MergeAggregationRecord(outputType, aggCalls, -1)));
mergedStream.setCoder(outRecordCoder);
//assert function BeamAggregationTransform.AggregationGroupByKeyFn
PAssert.that(exGroupByStream).containsInAnyOrder(prepareResultOfAggregationGroupByKeyFn());
//assert BeamAggregationTransform.AggregationCombineFn
PAssert.that(aggregatedStream).containsInAnyOrder(prepareResultOfAggregationCombineFn());
//assert BeamAggregationTransform.MergeAggregationRecord
PAssert.that(mergedStream).containsInAnyOrder(prepareResultOfMergeAggregationRecord());
p.run();
}
示例14: expand
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public PCollection<KV<Void, T>> expand(PCollection<T> input) {
PCollection output = input.apply(ParDo.of(new VoidKeyToMultimapMaterializationDoFn<>()));
output.setCoder(KvCoder.of(VoidCoder.of(), input.getCoder()));
return output;
}
示例15: expand
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public WriteResult expand(PCollection<KV<TableDestination, TableRow>> input) {
// A naive implementation would be to simply stream data directly to BigQuery.
// However, this could occasionally lead to duplicated data, e.g., when
// a VM that runs this code is restarted and the code is re-run.
// The above risk is mitigated in this implementation by relying on
// BigQuery built-in best effort de-dup mechanism.
// To use this mechanism, each input TableRow is tagged with a generated
// unique id, which is then passed to BigQuery and used to ignore duplicates
// We create 50 keys per BigQuery table to generate output on. This is few enough that we
// get good batching into BigQuery's insert calls, and enough that we can max out the
// streaming insert quota.
PCollection<KV<ShardedKey<String>, TableRowInfo>> tagged =
input
.apply("ShardTableWrites", ParDo.of(new GenerateShardedTable(50)))
.setCoder(KvCoder.of(ShardedKeyCoder.of(StringUtf8Coder.of()), TableRowJsonCoder.of()))
.apply("TagWithUniqueIds", ParDo.of(new TagWithUniqueIds()))
.setCoder(KvCoder.of(ShardedKeyCoder.of(StringUtf8Coder.of()), TableRowInfoCoder.of()));
// To prevent having the same TableRow processed more than once with regenerated
// different unique ids, this implementation relies on "checkpointing", which is
// achieved as a side effect of having StreamingWriteFn immediately follow a GBK,
// performed by Reshuffle.
TupleTag<Void> mainOutputTag = new TupleTag<>("mainOutput");
TupleTag<TableRow> failedInsertsTag = new TupleTag<>("failedInserts");
PCollectionTuple tuple = tagged
.apply(Reshuffle.<ShardedKey<String>, TableRowInfo>of())
// Put in the global window to ensure that DynamicDestinations side inputs are accessed
// correctly.
.apply("GlobalWindow",
Window.<KV<ShardedKey<String>, TableRowInfo>>into(new GlobalWindows())
.triggering(DefaultTrigger.of()).discardingFiredPanes())
.apply("StreamingWrite",
ParDo.of(
new StreamingWriteFn(bigQueryServices, retryPolicy, failedInsertsTag))
.withOutputTags(mainOutputTag, TupleTagList.of(failedInsertsTag)));
PCollection<TableRow> failedInserts = tuple.get(failedInsertsTag);
failedInserts.setCoder(TableRowJsonCoder.of());
return WriteResult.in(input.getPipeline(), failedInsertsTag, failedInserts);
}