本文整理汇总了Java中org.apache.beam.sdk.values.PCollection.getWindowingStrategy方法的典型用法代码示例。如果您正苦于以下问题:Java PCollection.getWindowingStrategy方法的具体用法?Java PCollection.getWindowingStrategy怎么用?Java PCollection.getWindowingStrategy使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.beam.sdk.values.PCollection
的用法示例。
在下文中一共展示了PCollection.getWindowingStrategy方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: applicableTo
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
public static void applicableTo(PCollection<?> input) {
WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
// Verify that the input PCollection is bounded, or that there is windowing/triggering being
// used. Without this, the watermark (at end of global window) will never be reached.
if (windowingStrategy.getWindowFn() instanceof GlobalWindows
&& windowingStrategy.getTrigger() instanceof DefaultTrigger
&& input.isBounded() != IsBounded.BOUNDED) {
throw new IllegalStateException("GroupByKey cannot be applied to non-bounded PCollection in "
+ "the GlobalWindow without a trigger. Use a Window.into or Window.triggering transform "
+ "prior to GroupByKey.");
}
// Validate the window merge function.
if (windowingStrategy.getWindowFn() instanceof InvalidWindows) {
String cause = ((InvalidWindows<?>) windowingStrategy.getWindowFn()).getCause();
throw new IllegalStateException(
"GroupByKey must have a valid Window merge function. "
+ "Invalid because: " + cause);
}
}
示例2: expand
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public PCollection<KV<K, Iterable<V>>> expand(PCollection<KV<K, V>> input) {
WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
return input
// Group by just the key.
// Combiner lifting will not happen regardless of the disallowCombinerLifting value.
// There will be no combiners right after the GroupByKeyOnly because of the two ParDos
// introduced in here.
.apply(new GroupByKeyOnly<K, V>())
// Sort each key's values by timestamp. GroupAlsoByWindow requires
// its input to be sorted by timestamp.
.apply(new SortValuesByTimestamp<K, V>())
// Group each key's values by window, merging windows as needed.
.apply(new GroupAlsoByWindow<K, V>(windowingStrategy))
// And update the windowing strategy as appropriate.
.setWindowingStrategyInternal(
gbkTransform.updateWindowingStrategy(windowingStrategy));
}
示例3: canTranslate
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
boolean canTranslate(
Combine.PerKey<K, InputT, OutputT> transform,
FlinkStreamingTranslationContext context) {
// if we have a merging window strategy and side inputs we cannot
// translate as a proper combine. We have to group and then run the combine
// over the final grouped values.
PCollection<KV<K, InputT>> input = context.getInput(transform);
@SuppressWarnings("unchecked")
WindowingStrategy<?, BoundedWindow> windowingStrategy =
(WindowingStrategy<?, BoundedWindow>) input.getWindowingStrategy();
return windowingStrategy.getWindowFn().isNonMerging() || transform.getSideInputs().isEmpty();
}
示例4: groupByKeyHelper
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
private <K, V> void groupByKeyHelper(
GroupByKey<K, V> transform, TranslationContext context) {
StepTranslationContext stepContext = context.addStep(transform, "GroupByKey");
PCollection<KV<K, V>> input = context.getInput(transform);
stepContext.addInput(PropertyNames.PARALLEL_INPUT, input);
stepContext.addOutput(context.getOutput(transform));
WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
boolean isStreaming =
context.getPipelineOptions().as(StreamingOptions.class).isStreaming();
boolean disallowCombinerLifting =
!windowingStrategy.getWindowFn().isNonMerging()
|| !windowingStrategy.getWindowFn().assignsToOneWindow()
|| (isStreaming && !transform.fewKeys())
// TODO: Allow combiner lifting on the non-default trigger, as appropriate.
|| !(windowingStrategy.getTrigger() instanceof DefaultTrigger);
stepContext.addInput(PropertyNames.DISALLOW_COMBINER_LIFTING, disallowCombinerLifting);
stepContext.addInput(
PropertyNames.SERIALIZED_FN,
byteArrayToJsonString(serializeWindowingStrategy(windowingStrategy)));
stepContext.addInput(
PropertyNames.IS_MERGING_WINDOW_FN,
!windowingStrategy.getWindowFn().isNonMerging());
}
示例5: expand
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public PCollection<KV<K, Iterable<V>>> expand(PCollection<KV<K, V>> input) {
// This operation groups by the combination of key and window,
// merging windows as needed, using the windows assigned to the
// key/value input elements and the window merge operation of the
// window function associated with the input PCollection.
WindowingStrategy<?, ?> inputWindowingStrategy = input.getWindowingStrategy();
// By default, implement GroupByKey via a series of lower-level operations.
return input
.apply(new DirectGroupByKeyOnly<K, V>())
// Group each key's values by window, merging windows as needed.
.apply(
"GroupAlsoByWindow",
new DirectGroupAlsoByWindow<K, V>(inputWindowingStrategy, outputWindowingStrategy));
}
示例6: expand
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public PCollection<KV<K, V>> expand(PCollection<KV<K, V>> input) {
WindowingStrategy<?, ?> originalStrategy = input.getWindowingStrategy();
// If the input has already had its windows merged, then the GBK that performed the merge
// will have set originalStrategy.getWindowFn() to InvalidWindows, causing the GBK contained
// here to fail. Instead, we install a valid WindowFn that leaves all windows unchanged.
// The TimestampCombiner is set to ensure the GroupByKey does not shift elements forwards in
// time.
// Because this outputs as fast as possible, this should not hold the watermark.
Window<KV<K, V>> rewindow =
Window.<KV<K, V>>into(new IdentityWindowFn<>(originalStrategy.getWindowFn().windowCoder()))
.triggering(new ReshuffleTrigger<>())
.discardingFiredPanes()
.withTimestampCombiner(TimestampCombiner.EARLIEST)
.withAllowedLateness(Duration.millis(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()));
return input
.apply(rewindow)
.apply("ReifyOriginalTimestamps", Reify.<K, V>timestampsInValue())
.apply(GroupByKey.<K, TimestampedValue<V>>create())
// Set the windowing strategy directly, so that it doesn't get counted as the user having
// set allowed lateness.
.setWindowingStrategyInternal(originalStrategy)
.apply(
"ExpandIterable",
ParDo.of(
new DoFn<KV<K, Iterable<TimestampedValue<V>>>, KV<K, TimestampedValue<V>>>() {
@ProcessElement
public void processElement(ProcessContext c) {
K key = c.element().getKey();
for (TimestampedValue<V> value : c.element().getValue()) {
c.output(KV.of(key, value));
}
}
}))
.apply(
"RestoreOriginalTimestamps",
ReifyTimestamps.<K, V>extractFromValues());
}
示例7: expand
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public PCollection<T> expand(PCollectionList<T> inputs) {
WindowingStrategy<?, ?> windowingStrategy;
IsBounded isBounded = IsBounded.BOUNDED;
if (!inputs.getAll().isEmpty()) {
windowingStrategy = inputs.get(0).getWindowingStrategy();
for (PCollection<?> input : inputs.getAll()) {
WindowingStrategy<?, ?> other = input.getWindowingStrategy();
if (!windowingStrategy.getWindowFn().isCompatible(other.getWindowFn())) {
throw new IllegalStateException(
"Inputs to Flatten had incompatible window windowFns: "
+ windowingStrategy.getWindowFn() + ", " + other.getWindowFn());
}
if (!windowingStrategy.getTrigger().isCompatible(other.getTrigger())) {
throw new IllegalStateException(
"Inputs to Flatten had incompatible triggers: "
+ windowingStrategy.getTrigger() + ", " + other.getTrigger());
}
isBounded = isBounded.and(input.isBounded());
}
} else {
windowingStrategy = WindowingStrategy.globalDefault();
}
return PCollection.createPrimitiveOutputInternal(
inputs.getPipeline(),
windowingStrategy,
isBounded,
// Take coder from first collection. If there are none, will be left unspecified.
inputs.getAll().isEmpty() ? null : inputs.get(0).getCoder());
}
示例8: ApexGroupByKeyOperator
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@SuppressWarnings("unchecked")
public ApexGroupByKeyOperator(ApexPipelineOptions pipelineOptions, PCollection<KV<K, V>> input,
ApexStateBackend stateBackend) {
checkNotNull(pipelineOptions);
this.serializedOptions = new SerializablePipelineOptions(pipelineOptions);
this.windowingStrategy = (WindowingStrategy<V, BoundedWindow>) input.getWindowingStrategy();
this.keyCoder = ((KvCoder<K, V>) input.getCoder()).getKeyCoder();
this.valueCoder = ((KvCoder<K, V>) input.getCoder()).getValueCoder();
this.stateInternalsFactory = stateBackend.newStateInternalsFactory(keyCoder);
TimerInternals.TimerDataCoder timerCoder =
TimerInternals.TimerDataCoder.of(windowingStrategy.getWindowFn().windowCoder());
this.timerInternals = new ApexTimerInternals<>(timerCoder);
}
示例9: testPipelines
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Parameters(name = "{index}")
public static Iterable<Pipeline> testPipelines() {
Pipeline trivialPipeline = Pipeline.create();
trivialPipeline.apply(Create.of(1, 2, 3));
Pipeline sideInputPipeline = Pipeline.create();
final PCollectionView<String> singletonView =
sideInputPipeline.apply(Create.of("foo")).apply(View.<String>asSingleton());
sideInputPipeline
.apply(Create.of("main input"))
.apply(
ParDo.of(
new DoFn<String, String>() {
@ProcessElement
public void process(ProcessContext c) {
// actually never executed and no effect on translation
c.sideInput(singletonView);
}
})
.withSideInputs(singletonView));
Pipeline complexPipeline = Pipeline.create();
BigEndianLongCoder customCoder = BigEndianLongCoder.of();
PCollection<Long> elems = complexPipeline.apply(GenerateSequence.from(0L).to(207L));
PCollection<Long> counted = elems.apply(Count.<Long>globally()).setCoder(customCoder);
PCollection<Long> windowed =
counted.apply(
Window.<Long>into(FixedWindows.of(Duration.standardMinutes(7)))
.triggering(
AfterWatermark.pastEndOfWindow()
.withEarlyFirings(AfterPane.elementCountAtLeast(19)))
.accumulatingFiredPanes()
.withAllowedLateness(Duration.standardMinutes(3L)));
final WindowingStrategy<?, ?> windowedStrategy = windowed.getWindowingStrategy();
PCollection<KV<String, Long>> keyed = windowed.apply(WithKeys.<String, Long>of("foo"));
PCollection<KV<String, Iterable<Long>>> grouped =
keyed.apply(GroupByKey.<String, Long>create());
return ImmutableList.of(trivialPipeline, sideInputPipeline, complexPipeline);
}
示例10: expand
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public PCollection<KV<K, Iterable<KV<Instant, WindowedValue<KV<K, V>>>>>> expand(
PCollection<KV<K, V>> input) {
WindowingStrategy<?, ?> inputWindowingStrategy = input.getWindowingStrategy();
// A KvCoder is required since this goes through GBK. Further, WindowedValueCoder
// is not registered by default, so we explicitly set the relevant coders.
checkState(
input.getCoder() instanceof KvCoder,
"Input to a %s using state requires a %s, but the coder was %s",
ParDo.class.getSimpleName(),
KvCoder.class.getSimpleName(),
input.getCoder());
KvCoder<K, V> kvCoder = (KvCoder<K, V>) input.getCoder();
Coder<K> keyCoder = kvCoder.getKeyCoder();
Coder<? extends BoundedWindow> windowCoder =
inputWindowingStrategy.getWindowFn().windowCoder();
return input
// Stash the original timestamps, etc, for when it is fed to the user's DoFn
.apply("ReifyWindows", ParDo.of(new ReifyWindowedValueFn<K, V>()))
.setCoder(
KvCoder.of(
keyCoder,
KvCoder.of(InstantCoder.of(), WindowedValue.getFullCoder(kvCoder, windowCoder))))
// Group by key and sort by timestamp, dropping windows as they are reified
.apply(
"PartitionKeys",
new GroupByKeyAndSortValuesOnly<K, Instant, WindowedValue<KV<K, V>>>())
// The GBKO sets the windowing strategy to the global default
.setWindowingStrategyInternal(inputWindowingStrategy);
}
示例11: expand
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public PCollection<KV<K, V>> expand(PCollection<KV<K, V>> input) {
WindowingStrategy<?, ?> originalStrategy = input.getWindowingStrategy();
// If the input has already had its windows merged, then the GBK that performed the merge
// will have set originalStrategy.getWindowFn() to InvalidWindows, causing the GBK contained
// here to fail. Instead, we install a valid WindowFn that leaves all windows unchanged.
Window<KV<K, V>> rewindow =
Window.<KV<K, V>>into(
new IdentityWindowFn<>(
originalStrategy.getWindowFn().windowCoder()))
.triggering(new ReshuffleTrigger<>())
.discardingFiredPanes()
.withAllowedLateness(Duration.millis(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()));
return input.apply(rewindow)
.apply(GroupByKey.<K, V>create())
// Set the windowing strategy directly, so that it doesn't get counted as the user having
// set allowed lateness.
.setWindowingStrategyInternal(originalStrategy)
.apply("ExpandIterable", ParDo.of(
new DoFn<KV<K, Iterable<V>>, KV<K, V>>() {
@ProcessElement
public void processElement(ProcessContext c) {
K key = c.element().getKey();
for (V value : c.element().getValue()) {
c.output(KV.of(key, value));
}
}
}));
}
示例12: translate
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public void translate(Window.Assign<T> transform, TranslationContext context) {
PCollection<T> input = (PCollection<T>) context.getInput();
PCollection<T> output = (PCollection<T>) context.getOutput();
JavaStream<WindowedValue<T>> inputStream = context.getInputStream(input);
WindowingStrategy<?, ?> outputStrategy = output.getWindowingStrategy();
WindowFn<T, BoundedWindow> windowFn = (WindowFn<T, BoundedWindow>) outputStrategy.getWindowFn();
JavaStream<WindowedValue<T>> outputStream =
inputStream
.flatMap(new AssignWindows(windowFn), "assign_windows");
context.setOutputStream(output, outputStream);
}
示例13: translate
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
@Override
public void translate(ParDo.MultiOutput<InputT, OutputT> transform, TranslationContext context) {
DoFn<InputT, OutputT> doFn = transform.getFn();
DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
if (signature.processElement().isSplittable()) {
throw new UnsupportedOperationException(
String.format(
"%s does not support splittable DoFn: %s", ApexRunner.class.getSimpleName(), doFn));
}
if (signature.timerDeclarations().size() > 0) {
throw new UnsupportedOperationException(
String.format(
"Found %s annotations on %s, but %s cannot yet be used with timers in the %s.",
DoFn.TimerId.class.getSimpleName(),
doFn.getClass().getName(),
DoFn.class.getSimpleName(),
ApexRunner.class.getSimpleName()));
}
Map<TupleTag<?>, PValue> outputs = context.getOutputs();
PCollection<InputT> input = context.getInput();
List<PCollectionView<?>> sideInputs = transform.getSideInputs();
ApexParDoOperator<InputT, OutputT> operator = new ApexParDoOperator<>(
context.getPipelineOptions(),
doFn,
transform.getMainOutputTag(),
transform.getAdditionalOutputTags().getAll(),
input.getWindowingStrategy(),
sideInputs,
input.getCoder(),
context.getStateBackend());
Map<PCollection<?>, OutputPort<?>> ports = Maps.newHashMapWithExpectedSize(outputs.size());
for (Entry<TupleTag<?>, PValue> output : outputs.entrySet()) {
checkArgument(
output.getValue() instanceof PCollection,
"%s %s outputs non-PCollection %s of type %s",
ParDo.MultiOutput.class.getSimpleName(),
context.getFullName(),
output.getValue(),
output.getValue().getClass().getSimpleName());
PCollection<?> pc = (PCollection<?>) output.getValue();
if (output.getKey().equals(transform.getMainOutputTag())) {
ports.put(pc, operator.output);
} else {
int portIndex = 0;
for (TupleTag<?> tag : transform.getAdditionalOutputTags().getAll()) {
if (tag.equals(output.getKey())) {
ports.put(pc, operator.additionalOutputPorts[portIndex]);
break;
}
portIndex++;
}
}
}
context.addOperator(operator, ports);
context.addStream(context.getInput(), operator.input);
if (!sideInputs.isEmpty()) {
addSideInputs(operator.sideInput1, sideInputs, context);
}
}
示例14: combineGlobally
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
private static <InputT, AccumT, OutputT> TransformEvaluator<Combine.Globally<InputT, OutputT>>
combineGlobally() {
return new TransformEvaluator<Combine.Globally<InputT, OutputT>>() {
@Override
public void evaluate(
Combine.Globally<InputT, OutputT> transform,
EvaluationContext context) {
final PCollection<InputT> input = context.getInput(transform);
final Coder<InputT> iCoder = context.getInput(transform).getCoder();
final Coder<OutputT> oCoder = context.getOutput(transform).getCoder();
final WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
@SuppressWarnings("unchecked")
final CombineWithContext.CombineFnWithContext<InputT, AccumT, OutputT> combineFn =
(CombineWithContext.CombineFnWithContext<InputT, AccumT, OutputT>)
CombineFnUtil.toFnWithContext(transform.getFn());
final WindowedValue.FullWindowedValueCoder<OutputT> wvoCoder =
WindowedValue.FullWindowedValueCoder.of(oCoder,
windowingStrategy.getWindowFn().windowCoder());
final boolean hasDefault = transform.isInsertDefault();
final SparkGlobalCombineFn<InputT, AccumT, OutputT> sparkCombineFn =
new SparkGlobalCombineFn<>(
combineFn,
context.getSerializableOptions(),
TranslationUtils.getSideInputs(transform.getSideInputs(), context),
windowingStrategy);
final Coder<AccumT> aCoder;
try {
aCoder = combineFn.getAccumulatorCoder(
context.getPipeline().getCoderRegistry(), iCoder);
} catch (CannotProvideCoderException e) {
throw new IllegalStateException("Could not determine coder for accumulator", e);
}
@SuppressWarnings("unchecked")
JavaRDD<WindowedValue<InputT>> inRdd =
((BoundedDataset<InputT>) context.borrowDataset(transform)).getRDD();
JavaRDD<WindowedValue<OutputT>> outRdd;
Optional<Iterable<WindowedValue<AccumT>>> maybeAccumulated =
GroupCombineFunctions.combineGlobally(inRdd, sparkCombineFn, iCoder, aCoder,
windowingStrategy);
if (maybeAccumulated.isPresent()) {
Iterable<WindowedValue<OutputT>> output =
sparkCombineFn.extractOutput(maybeAccumulated.get());
outRdd = context.getSparkContext()
.parallelize(CoderHelpers.toByteArrays(output, wvoCoder))
.map(CoderHelpers.fromByteFunction(wvoCoder));
} else {
// handle empty input RDD, which will naturally skip the entire execution
// as Spark will not run on empty RDDs.
JavaSparkContext jsc = new JavaSparkContext(inRdd.context());
if (hasDefault) {
OutputT defaultValue = combineFn.defaultValue();
outRdd = jsc
.parallelize(Lists.newArrayList(CoderHelpers.toByteArray(defaultValue, oCoder)))
.map(CoderHelpers.fromByteFunction(oCoder))
.map(WindowingHelpers.<OutputT>windowFunction());
} else {
outRdd = jsc.emptyRDD();
}
}
context.putDataset(transform, new BoundedDataset<>(outRdd));
}
@Override
public String toNativeString () {
return "aggregate(..., new <fn>(), ...)";
}
};
}
示例15: combinePerKey
import org.apache.beam.sdk.values.PCollection; //导入方法依赖的package包/类
private static <K, InputT, AccumT, OutputT>
TransformEvaluator<Combine.PerKey<K, InputT, OutputT>> combinePerKey() {
return new TransformEvaluator<Combine.PerKey<K, InputT, OutputT>>() {
@Override
public void evaluate(
Combine.PerKey<K, InputT, OutputT> transform, EvaluationContext context) {
final PCollection<KV<K, InputT>> input = context.getInput(transform);
// serializable arguments to pass.
@SuppressWarnings("unchecked")
final KvCoder<K, InputT> inputCoder =
(KvCoder<K, InputT>) context.getInput(transform).getCoder();
@SuppressWarnings("unchecked")
final CombineWithContext.CombineFnWithContext<InputT, AccumT, OutputT> combineFn =
(CombineWithContext.CombineFnWithContext<InputT, AccumT, OutputT>)
CombineFnUtil.toFnWithContext(transform.getFn());
final WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
final Map<TupleTag<?>, KV<WindowingStrategy<?, ?>, SideInputBroadcast<?>>> sideInputs =
TranslationUtils.getSideInputs(transform.getSideInputs(), context);
final SparkKeyedCombineFn<K, InputT, AccumT, OutputT> sparkCombineFn =
new SparkKeyedCombineFn<>(
combineFn, context.getSerializableOptions(), sideInputs, windowingStrategy);
final Coder<AccumT> vaCoder;
try {
vaCoder =
combineFn.getAccumulatorCoder(
context.getPipeline().getCoderRegistry(), inputCoder.getValueCoder());
} catch (CannotProvideCoderException e) {
throw new IllegalStateException("Could not determine coder for accumulator", e);
}
@SuppressWarnings("unchecked")
JavaRDD<WindowedValue<KV<K, InputT>>> inRdd =
((BoundedDataset<KV<K, InputT>>) context.borrowDataset(transform)).getRDD();
JavaPairRDD<K, Iterable<WindowedValue<KV<K, AccumT>>>> accumulatePerKey =
GroupCombineFunctions.combinePerKey(
inRdd,
sparkCombineFn,
inputCoder.getKeyCoder(),
inputCoder.getValueCoder(),
vaCoder,
windowingStrategy);
JavaRDD<WindowedValue<KV<K, OutputT>>> outRdd =
accumulatePerKey
.flatMapValues(
new Function<
Iterable<WindowedValue<KV<K, AccumT>>>,
Iterable<WindowedValue<OutputT>>>() {
@Override
public Iterable<WindowedValue<OutputT>> call(
Iterable<WindowedValue<KV<K, AccumT>>> iter) throws Exception {
return sparkCombineFn.extractOutput(iter);
}
})
.map(TranslationUtils.<K, WindowedValue<OutputT>>fromPairFunction())
.map(TranslationUtils.<K, OutputT>toKVByWindowInValue());
context.putDataset(transform, new BoundedDataset<>(outRdd));
}
@Override
public String toNativeString() {
return "combineByKey(..., new <fn>(), ...)";
}
};
}