本文整理汇总了Java中org.apache.beam.sdk.transforms.ParDo.MultiOutput方法的典型用法代码示例。如果您正苦于以下问题:Java ParDo.MultiOutput方法的具体用法?Java ParDo.MultiOutput怎么用?Java ParDo.MultiOutput使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.beam.sdk.transforms.ParDo
的用法示例。
在下文中一共展示了ParDo.MultiOutput方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: splittableParDoMulti
import org.apache.beam.sdk.transforms.ParDo; //导入方法依赖的package包/类
/**
* A {@link PTransformMatcher} that matches a {@link ParDo.MultiOutput} containing a {@link DoFn}
* that is splittable, as signified by {@link ProcessElementMethod#isSplittable()}.
*/
public static PTransformMatcher splittableParDoMulti() {
return new PTransformMatcher() {
@Override
public boolean matches(AppliedPTransform<?, ?, ?> application) {
PTransform<?, ?> transform = application.getTransform();
if (transform instanceof ParDo.MultiOutput) {
DoFn<?, ?> fn = ((ParDo.MultiOutput<?, ?>) transform).getFn();
DoFnSignature signature = DoFnSignatures.signatureForDoFn(fn);
return signature.processElement().isSplittable();
}
return false;
}
@Override
public String toString() {
return MoreObjects.toStringHelper("SplittableParDoMultiMatcher").toString();
}
};
}
示例2: parDoWithFnType
import org.apache.beam.sdk.transforms.ParDo; //导入方法依赖的package包/类
/**
* A {@link PTransformMatcher} which matches a {@link ParDo.SingleOutput} or {@link
* ParDo.MultiOutput} where the {@link DoFn} is of the provided type.
*/
public static PTransformMatcher parDoWithFnType(final Class<? extends DoFn> fnType) {
return new PTransformMatcher() {
@Override
public boolean matches(AppliedPTransform<?, ?, ?> application) {
DoFn<?, ?> fn;
if (application.getTransform() instanceof ParDo.SingleOutput) {
fn = ((ParDo.SingleOutput) application.getTransform()).getFn();
} else if (application.getTransform() instanceof ParDo.MultiOutput) {
fn = ((ParDo.MultiOutput) application.getTransform()).getFn();
} else {
return false;
}
return fnType.equals(fn.getClass());
}
@Override
public String toString() {
return MoreObjects.toStringHelper("ParDoWithFnTypeMatcher")
.add("fnType", fnType)
.toString();
}
};
}
示例3: getAdditionalOutputTags
import org.apache.beam.sdk.transforms.ParDo; //导入方法依赖的package包/类
public static TupleTagList getAdditionalOutputTags(AppliedPTransform<?, ?, ?> application)
throws IOException {
PTransform<?, ?> transform = application.getTransform();
if (transform instanceof ParDo.MultiOutput) {
return ((ParDo.MultiOutput<?, ?>) transform).getAdditionalOutputTags();
}
RunnerApi.PTransform protoTransform =
PTransformTranslation.toProto(application, SdkComponents.create());
ParDoPayload payload = ParDoPayload.parseFrom(protoTransform.getSpec().getPayload());
TupleTag<?> mainOutputTag = getMainOutputTag(payload);
Set<String> outputTags =
Sets.difference(
protoTransform.getOutputsMap().keySet(), Collections.singleton(mainOutputTag.getId()));
ArrayList<TupleTag<?>> additionalOutputTags = new ArrayList<>();
for (String outputTag : outputTags) {
additionalOutputTags.add(new TupleTag<>(outputTag));
}
return TupleTagList.of(additionalOutputTags);
}
示例4: multiMultiParDo
import org.apache.beam.sdk.transforms.ParDo; //导入方法依赖的package包/类
private static AppliedPTransform<?, ?, ?> multiMultiParDo(Pipeline pipeline) {
PCollectionView<String> view =
pipeline.apply(Create.of("foo")).apply(View.<String>asSingleton());
PCollection<Long> input = pipeline.apply(GenerateSequence.from(0));
ParDo.MultiOutput<Long, KV<Long, String>> parDo =
ParDo.of(new TestDoFn())
.withSideInputs(view)
.withOutputTags(
new TupleTag<KV<Long, String>>() {},
TupleTagList.of(new TupleTag<KV<String, Long>>() {}));
PCollectionTuple output = input.apply(parDo);
Map<TupleTag<?>, PValue> inputs = new HashMap<>();
inputs.putAll(parDo.getAdditionalInputs());
inputs.putAll(input.expand());
return AppliedPTransform
.<PCollection<Long>, PCollectionTuple, ParDo.MultiOutput<Long, KV<Long, String>>>of(
"MultiParDoInAndOut", inputs, output.expand(), parDo, pipeline);
}
示例5: data
import org.apache.beam.sdk.transforms.ParDo; //导入方法依赖的package包/类
@Parameters(name = "{index}: {0}")
public static Iterable<ParDo.MultiOutput<?, ?>> data() {
return ImmutableList.<ParDo.MultiOutput<?, ?>>of(
ParDo.of(new DropElementsFn()).withOutputTags(new TupleTag<Void>(), TupleTagList.empty()),
ParDo.of(new DropElementsFn())
.withOutputTags(new TupleTag<Void>(), TupleTagList.empty())
.withSideInputs(singletonSideInput, multimapSideInput),
ParDo.of(new DropElementsFn())
.withOutputTags(
new TupleTag<Void>(),
TupleTagList.of(new TupleTag<byte[]>() {}).and(new TupleTag<Integer>() {}))
.withSideInputs(singletonSideInput, multimapSideInput),
ParDo.of(new DropElementsFn())
.withOutputTags(
new TupleTag<Void>(),
TupleTagList.of(new TupleTag<byte[]>() {}).and(new TupleTag<Integer>() {})),
ParDo.of(new SplittableDropElementsFn())
.withOutputTags(
new TupleTag<Void>(),
TupleTagList.empty()));
}
示例6: translateMultiHelper
import org.apache.beam.sdk.transforms.ParDo; //导入方法依赖的package包/类
private <InputT, OutputT> void translateMultiHelper(
ParDo.MultiOutput<InputT, OutputT> transform, TranslationContext context) {
StepTranslationContext stepContext = context.addStep(transform, "ParallelDo");
translateInputs(
stepContext, context.getInput(transform), transform.getSideInputs(), context);
BiMap<Long, TupleTag<?>> outputMap =
translateOutputs(context.getOutputs(transform), stepContext);
String ptransformId =
context.getSdkComponents().getPTransformIdOrThrow(context.getCurrentTransform());
translateFn(
stepContext,
ptransformId,
transform.getFn(),
context.getInput(transform).getWindowingStrategy(),
transform.getSideInputs(),
context.getInput(transform).getCoder(),
context,
outputMap.inverse().get(transform.getMainOutputTag()),
outputMap);
}
示例7: isKeyPreserving
import org.apache.beam.sdk.transforms.ParDo; //导入方法依赖的package包/类
private static boolean isKeyPreserving(PTransform<?, ?> transform) {
// This is a hacky check for what is considered key-preserving to the direct runner.
// The most obvious alternative would be a package-private marker interface, but
// better to make this obviously hacky so it is less likely to proliferate. Meanwhile
// we intend to allow explicit expression of key-preserving DoFn in the model.
if (transform instanceof ParDo.MultiOutput) {
ParDo.MultiOutput<?, ?> parDo = (ParDo.MultiOutput<?, ?>) transform;
return parDo.getFn() instanceof ParDoMultiOverrideFactory.ToKeyedWorkItem;
} else {
return false;
}
}
示例8: getDoFn
import org.apache.beam.sdk.transforms.ParDo; //导入方法依赖的package包/类
public static DoFn<?, ?> getDoFn(AppliedPTransform<?, ?, ?> application) throws IOException {
PTransform<?, ?> transform = application.getTransform();
if (transform instanceof ParDo.MultiOutput) {
return ((ParDo.MultiOutput<?, ?>) transform).getFn();
}
return getDoFn(getParDoPayload(application));
}
示例9: getMainOutputTag
import org.apache.beam.sdk.transforms.ParDo; //导入方法依赖的package包/类
public static TupleTag<?> getMainOutputTag(AppliedPTransform<?, ?, ?> application)
throws IOException {
PTransform<?, ?> transform = application.getTransform();
if (transform instanceof ParDo.MultiOutput) {
return ((ParDo.MultiOutput<?, ?>) transform).getMainOutputTag();
}
return getMainOutputTag(getParDoPayload(application));
}
示例10: getSideInputs
import org.apache.beam.sdk.transforms.ParDo; //导入方法依赖的package包/类
public static List<PCollectionView<?>> getSideInputs(AppliedPTransform<?, ?, ?> application)
throws IOException {
PTransform<?, ?> transform = application.getTransform();
if (transform instanceof ParDo.MultiOutput) {
return ((ParDo.MultiOutput<?, ?>) transform).getSideInputs();
}
SdkComponents sdkComponents = SdkComponents.create();
RunnerApi.PTransform parDoProto = PTransformTranslation.toProto(application, sdkComponents);
ParDoPayload payload = ParDoPayload.parseFrom(parDoProto.getSpec().getPayload());
List<PCollectionView<?>> views = new ArrayList<>();
RehydratedComponents components =
RehydratedComponents.forComponents(sdkComponents.toComponents());
for (Map.Entry<String, SideInput> sideInputEntry : payload.getSideInputsMap().entrySet()) {
String sideInputTag = sideInputEntry.getKey();
RunnerApi.SideInput sideInput = sideInputEntry.getValue();
PCollection<?> originalPCollection =
checkNotNull(
(PCollection<?>) application.getInputs().get(new TupleTag<>(sideInputTag)),
"no input with tag %s",
sideInputTag);
views.add(
PCollectionViewTranslation.viewFromProto(sideInput, sideInputTag, originalPCollection,
parDoProto, components));
}
return views;
}
示例11: applySplittableParDo
import org.apache.beam.sdk.transforms.ParDo; //导入方法依赖的package包/类
private PCollection<String> applySplittableParDo(
String name, PCollection<Integer> input, DoFn<Integer, String> fn) {
ParDo.MultiOutput<Integer, String> multiOutput =
ParDo.of(fn).withOutputTags(MAIN_OUTPUT_TAG, TupleTagList.empty());
PCollectionTuple output = multiOutput.expand(input);
output.get(MAIN_OUTPUT_TAG).setName("main");
AppliedPTransform<PCollection<Integer>, PCollectionTuple, ?> transform =
AppliedPTransform.of("ParDo", input.expand(), output.expand(), multiOutput, pipeline);
return input.apply(name, SplittableParDo.forAppliedParDo(transform)).get(MAIN_OUTPUT_TAG);
}
示例12: multiOutputOverrideFactory
import org.apache.beam.sdk.transforms.ParDo; //导入方法依赖的package包/类
/**
* Returns a {@link PTransformOverrideFactory} that replaces a multi-output
* {@link ParDo} with a composite transform specialized for the {@link DataflowRunner}.
*/
public static <K, InputT, OutputT>
PTransformOverrideFactory<
PCollection<KV<K, InputT>>, PCollectionTuple,
ParDo.MultiOutput<KV<K, InputT>, OutputT>>
multiOutputOverrideFactory(DataflowPipelineOptions options) {
return new MultiOutputOverrideFactory<>(isFnApi(options));
}
示例13: getReplacementTransform
import org.apache.beam.sdk.transforms.ParDo; //导入方法依赖的package包/类
@Override
public PTransformReplacement<PCollection<InputT>, PCollectionTuple> getReplacementTransform(
AppliedPTransform<PCollection<InputT>, PCollectionTuple, ParDo.MultiOutput<InputT, OutputT>>
appliedTransform) {
return PTransformReplacement.of(
PTransformReplacements.getSingletonMainInput(appliedTransform),
SplittableParDo.forAppliedParDo(appliedTransform));
}
示例14: translate
import org.apache.beam.sdk.transforms.ParDo; //导入方法依赖的package包/类
@Override
public void translate(ParDo.MultiOutput<InputT, OutputT> transform, TranslationContext context) {
PCollection<InputT> inputT = (PCollection<InputT>) context.getInput();
JavaStream<WindowedValue<InputT>> inputStream = context.getInputStream(inputT);
Collection<PCollectionView<?>> sideInputs = transform.getSideInputs();
Map<String, PCollectionView<?>> tagsToSideInputs =
TranslatorUtils.getTagsToSideInputs(sideInputs);
Map<TupleTag<?>, PValue> outputs = context.getOutputs();
final TupleTag<OutputT> mainOutput = transform.getMainOutputTag();
List<TupleTag<?>> sideOutputs = new ArrayList<>(outputs.size() - 1);
for (TupleTag<?> tag: outputs.keySet()) {
if (tag != null && !tag.getId().equals(mainOutput.getId())) {
sideOutputs.add(tag);
}
}
JavaStream<TranslatorUtils.RawUnionValue> unionStream = TranslatorUtils.withSideInputStream(
context, inputStream, tagsToSideInputs);
JavaStream<TranslatorUtils.RawUnionValue> outputStream =
TranslatorUtils.toList(unionStream).flatMap(
new DoFnFunction<>(
context.getPipelineOptions(),
transform.getFn(),
inputT.getWindowingStrategy(),
sideInputs,
tagsToSideInputs,
mainOutput,
sideOutputs), transform.getName());
for (Map.Entry<TupleTag<?>, PValue> output: outputs.entrySet()) {
JavaStream<WindowedValue<OutputT>> taggedStream = outputStream
.filter(new FilterByOutputTag(output.getKey().getId()),
"filter_by_output_tag")
.map(new TranslatorUtils.FromRawUnionValue<OutputT>(), "from_RawUnionValue");
context.setOutputStream(output.getValue(), taggedStream);
}
}
示例15: translate
import org.apache.beam.sdk.transforms.ParDo; //导入方法依赖的package包/类
@Override
public void translate(ParDo.MultiOutput<InputT, OutputT> transform, TranslationContext context) {
DoFn<InputT, OutputT> doFn = transform.getFn();
DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
if (signature.processElement().isSplittable()) {
throw new UnsupportedOperationException(
String.format(
"%s does not support splittable DoFn: %s", ApexRunner.class.getSimpleName(), doFn));
}
if (signature.timerDeclarations().size() > 0) {
throw new UnsupportedOperationException(
String.format(
"Found %s annotations on %s, but %s cannot yet be used with timers in the %s.",
DoFn.TimerId.class.getSimpleName(),
doFn.getClass().getName(),
DoFn.class.getSimpleName(),
ApexRunner.class.getSimpleName()));
}
Map<TupleTag<?>, PValue> outputs = context.getOutputs();
PCollection<InputT> input = context.getInput();
List<PCollectionView<?>> sideInputs = transform.getSideInputs();
ApexParDoOperator<InputT, OutputT> operator = new ApexParDoOperator<>(
context.getPipelineOptions(),
doFn,
transform.getMainOutputTag(),
transform.getAdditionalOutputTags().getAll(),
input.getWindowingStrategy(),
sideInputs,
input.getCoder(),
context.getStateBackend());
Map<PCollection<?>, OutputPort<?>> ports = Maps.newHashMapWithExpectedSize(outputs.size());
for (Entry<TupleTag<?>, PValue> output : outputs.entrySet()) {
checkArgument(
output.getValue() instanceof PCollection,
"%s %s outputs non-PCollection %s of type %s",
ParDo.MultiOutput.class.getSimpleName(),
context.getFullName(),
output.getValue(),
output.getValue().getClass().getSimpleName());
PCollection<?> pc = (PCollection<?>) output.getValue();
if (output.getKey().equals(transform.getMainOutputTag())) {
ports.put(pc, operator.output);
} else {
int portIndex = 0;
for (TupleTag<?> tag : transform.getAdditionalOutputTags().getAll()) {
if (tag.equals(output.getKey())) {
ports.put(pc, operator.additionalOutputPorts[portIndex]);
break;
}
portIndex++;
}
}
}
context.addOperator(operator, ports);
context.addStream(context.getInput(), operator.input);
if (!sideInputs.isEmpty()) {
addSideInputs(operator.sideInput1, sideInputs, context);
}
}