本文整理汇总了Java中org.apache.beam.sdk.values.PBegin类的典型用法代码示例。如果您正苦于以下问题:Java PBegin类的具体用法?Java PBegin怎么用?Java PBegin使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
PBegin类属于org.apache.beam.sdk.values包,在下文中一共展示了PBegin类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: buildIOReader
import org.apache.beam.sdk.values.PBegin; //导入依赖的package包/类
@Override
public PCollection<BeamRecord> buildIOReader(Pipeline pipeline) {
KafkaIO.Read<byte[], byte[]> kafkaRead = null;
if (topics != null) {
kafkaRead = KafkaIO.<byte[], byte[]>read()
.withBootstrapServers(bootstrapServers)
.withTopics(topics)
.updateConsumerProperties(configUpdates)
.withKeyDeserializerAndCoder(ByteArrayDeserializer.class, ByteArrayCoder.of())
.withValueDeserializerAndCoder(ByteArrayDeserializer.class, ByteArrayCoder.of());
} else if (topicPartitions != null) {
kafkaRead = KafkaIO.<byte[], byte[]>read()
.withBootstrapServers(bootstrapServers)
.withTopicPartitions(topicPartitions)
.updateConsumerProperties(configUpdates)
.withKeyDeserializerAndCoder(ByteArrayDeserializer.class, ByteArrayCoder.of())
.withValueDeserializerAndCoder(ByteArrayDeserializer.class, ByteArrayCoder.of());
} else {
throw new IllegalArgumentException("One of topics and topicPartitions must be configurated.");
}
return PBegin.in(pipeline).apply("read", kafkaRead.withoutMetadata())
.apply("in_format", getPTransformForInput());
}
示例2: getInitialInputs
import org.apache.beam.sdk.values.PBegin; //导入依赖的package包/类
@Override
public Collection<CommittedBundle<BoundedSourceShard<T>>> getInitialInputs(
AppliedPTransform<PBegin, PCollection<T>, PTransform<PBegin, PCollection<T>>> transform,
int targetParallelism)
throws Exception {
BoundedSource<T> source = ReadTranslation.boundedSourceFromTransform(transform);
PipelineOptions options = evaluationContext.getPipelineOptions();
long estimatedBytes = source.getEstimatedSizeBytes(options);
long bytesPerBundle = estimatedBytes / targetParallelism;
List<? extends BoundedSource<T>> bundles =
source.split(bytesPerBundle, options);
ImmutableList.Builder<CommittedBundle<BoundedSourceShard<T>>> shards =
ImmutableList.builder();
for (BoundedSource<T> bundle : bundles) {
CommittedBundle<BoundedSourceShard<T>> inputShard =
evaluationContext
.<BoundedSourceShard<T>>createRootBundle()
.add(WindowedValue.valueInGlobalWindow(BoundedSourceShard.of(bundle)))
.commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
shards.add(inputShard);
}
return shards.build();
}
示例3: expand
import org.apache.beam.sdk.values.PBegin; //导入依赖的package包/类
@Override
public PCollectionView<ActualT> expand(PBegin input) {
final Coder<T> coder = actual.getCoder();
return actual
.apply("FilterActuals", rewindowActuals.<T>prepareActuals())
.apply("GatherPanes", GatherAllPanes.<T>globally())
.apply("ExtractPane", MapElements.via(extractPane))
.setCoder(IterableCoder.of(actual.getCoder()))
.apply(Flatten.<T>iterables())
.apply("RewindowActuals", rewindowActuals.<T>windowActuals())
.apply(
ParDo.of(
new DoFn<T, T>() {
@ProcessElement
public void processElement(ProcessContext context) throws CoderException {
context.output(CoderUtils.clone(coder, context.element()));
}
}))
.apply(actualView);
}
示例4: expand
import org.apache.beam.sdk.values.PBegin; //导入依赖的package包/类
@Override
public PCollection<byte[]> expand(PBegin input) {
if (getFilepattern() == null) {
throw new IllegalStateException(
"Need to set the filepattern of a TFRecordIO.Read transform");
}
if (getValidate()) {
checkState(getFilepattern().isAccessible(), "Cannot validate with a RVP.");
try {
MatchResult matches = FileSystems.match(getFilepattern().get());
checkState(
!matches.metadata().isEmpty(),
"Unable to find any files matching %s",
getFilepattern().get());
} catch (IOException e) {
throw new IllegalStateException(
String.format("Failed to validate %s", getFilepattern().get()), e);
}
}
return input.apply("Read", org.apache.beam.sdk.io.Read.from(getSource()));
}
示例5: expand
import org.apache.beam.sdk.values.PBegin; //导入依赖的package包/类
@Override
public PCollection<T> expand(PBegin input) {
checkNotNull(getFilepattern(), "filepattern");
checkNotNull(getSchema(), "schema");
if (getMatchConfiguration().getWatchInterval() == null && !getHintMatchesManyFiles()) {
return input.apply(
"Read",
org.apache.beam.sdk.io.Read.from(
createSource(
getFilepattern(),
getMatchConfiguration().getEmptyMatchTreatment(),
getRecordClass(),
getSchema())));
}
// All other cases go through ReadAll.
ReadAll<T> readAll =
(getRecordClass() == GenericRecord.class)
? (ReadAll<T>) readAllGenericRecords(getSchema())
: readAll(getRecordClass());
readAll = readAll.withMatchConfiguration(getMatchConfiguration());
return input
.apply("Create filepattern", Create.ofProvider(getFilepattern(), StringUtf8Coder.of()))
.apply("Via ReadAll", readAll);
}
示例6: expand
import org.apache.beam.sdk.values.PBegin; //导入依赖的package包/类
@Override
public PCollection<String> expand(PBegin input) {
checkNotNull(getFilepattern(), "need to set the filepattern of a TextIO.Read transform");
if (getMatchConfiguration().getWatchInterval() == null && !getHintMatchesManyFiles()) {
return input.apply("Read", org.apache.beam.sdk.io.Read.from(getSource()));
}
// All other cases go through ReadAll.
return input
.apply("Create filepattern", Create.ofProvider(getFilepattern(), StringUtf8Coder.of()))
.apply(
"Via ReadAll",
readAll()
.withCompression(getCompression())
.withMatchConfiguration(getMatchConfiguration())
.withDelimiter(getDelimiter()));
}
示例7: testViewUnbounded
import org.apache.beam.sdk.values.PBegin; //导入依赖的package包/类
private void testViewUnbounded(
Pipeline pipeline,
PTransform<PCollection<KV<String, Integer>>, ? extends PCollectionView<?>> view) {
thrown.expect(IllegalStateException.class);
thrown.expectMessage("Unable to create a side-input view from input");
thrown.expectCause(
ThrowableMessageMatcher.hasMessage(Matchers.containsString("non-bounded PCollection")));
pipeline
.apply(
new PTransform<PBegin, PCollection<KV<String, Integer>>>() {
@Override
public PCollection<KV<String, Integer>> expand(PBegin input) {
return PCollection.createPrimitiveOutputInternal(
input.getPipeline(),
WindowingStrategy.globalDefault(),
PCollection.IsBounded.UNBOUNDED,
KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()));
}
})
.apply(view);
}
示例8: expand
import org.apache.beam.sdk.values.PBegin; //导入依赖的package包/类
@Override
public PCollectionList<String> expand(PBegin b) {
// Composite transform: apply delegates to other transformations,
// here a Create transform.
PCollection<String> result = b.apply(Create.of("hello", "world"));
// Issue below: PCollection.createPrimitiveOutput should not be used
// from within a composite transform.
return PCollectionList.of(
Arrays.asList(
result,
PCollection.createPrimitiveOutputInternal(
b.getPipeline(),
WindowingStrategy.globalDefault(),
result.isBounded(),
StringUtf8Coder.of())));
}
示例9: testSourcePrimitiveDisplayData
import org.apache.beam.sdk.values.PBegin; //导入依赖的package包/类
@Test
public void testSourcePrimitiveDisplayData() {
DisplayDataEvaluator evaluator = DisplayDataEvaluator.create();
int numSplits = 98;
PTransform<PBegin, PCollection<Entity>> read =
DatastoreIO.v1().read()
.withProjectId(PROJECT_ID)
.withQuery(Query.newBuilder().build())
.withNumQuerySplits(numSplits);
String assertMessage = "DatastoreIO read should include the '%s' in its primitive display data";
Set<DisplayData> displayData = evaluator.displayDataForPrimitiveSourceTransforms(read);
assertThat(String.format(assertMessage, "project id"),
displayData, hasItem(hasDisplayItem("projectId", PROJECT_ID)));
assertThat(String.format(assertMessage, "number of query splits"),
displayData, hasItem(hasDisplayItem("numQuerySplits", numSplits)));
}
示例10: expand
import org.apache.beam.sdk.values.PBegin; //导入依赖的package包/类
@Override
public PCollection<KinesisRecord> expand(PBegin input) {
checkArgument(
streamExists(getAWSClientsProvider().getKinesisClient(), getStreamName()),
"Stream %s does not exist",
getStreamName());
Unbounded<KinesisRecord> unbounded =
org.apache.beam.sdk.io.Read.from(
new KinesisSource(
getAWSClientsProvider(),
getStreamName(),
getInitialPosition(),
getUpToDateThreshold()));
PTransform<PBegin, PCollection<KinesisRecord>> transform = unbounded;
if (getMaxNumRecords() < Long.MAX_VALUE || getMaxReadTime() != null) {
transform =
unbounded.withMaxReadTime(getMaxReadTime()).withMaxNumRecords(getMaxNumRecords());
}
return input.apply(transform);
}
示例11: expand
import org.apache.beam.sdk.values.PBegin; //导入依赖的package包/类
@Override
public PCollection<JmsRecord> expand(PBegin input) {
checkArgument(getConnectionFactory() != null, "withConnectionFactory() is required");
checkArgument(
getQueue() != null || getTopic() != null,
"Either withQueue() or withTopic() is required");
checkArgument(
getQueue() == null || getTopic() == null,
"withQueue() and withTopic() are exclusive");
// handles unbounded source to bounded conversion if maxNumRecords is set.
Unbounded<JmsRecord> unbounded = org.apache.beam.sdk.io.Read.from(createSource());
PTransform<PBegin, PCollection<JmsRecord>> transform = unbounded;
if (getMaxNumRecords() < Long.MAX_VALUE || getMaxReadTime() != null) {
transform = unbounded.withMaxReadTime(getMaxReadTime())
.withMaxNumRecords(getMaxNumRecords());
}
return input.getPipeline().apply(transform);
}
示例12: getInitialInputs
import org.apache.beam.sdk.values.PBegin; //导入依赖的package包/类
@Override
public Collection<CommittedBundle<SourceShard<T>>> getInitialInputs(
AppliedPTransform<PBegin, PCollection<T>, PTransform<PBegin, PCollection<T>>>
appliedTransform,
int targetParallelism)
throws Exception {
switch (ReadTranslation.sourceIsBounded(appliedTransform)) {
case BOUNDED:
// This cast could be made unnecessary, but too much bounded polymorphism
return (Collection)
boundedInputProvider.getInitialInputs(appliedTransform, targetParallelism);
case UNBOUNDED:
// This cast could be made unnecessary, but too much bounded polymorphism
return (Collection)
unboundedInputProvider.getInitialInputs(appliedTransform, targetParallelism);
default:
throw new IllegalArgumentException("PCollection is neither bounded nor unbounded?!?");
}
}
示例13: getTestStream
import org.apache.beam.sdk.values.PBegin; //导入依赖的package包/类
/**
* Converts an {@link AppliedPTransform}, which may be a rehydrated transform or an original
* {@link TestStream}, to a {@link TestStream}.
*/
public static <T> TestStream<T> getTestStream(
AppliedPTransform<PBegin, PCollection<T>, PTransform<PBegin, PCollection<T>>> application)
throws IOException {
// For robustness, we don't take this shortcut:
// if (application.getTransform() instanceof TestStream) {
// return application.getTransform()
// }
SdkComponents sdkComponents = SdkComponents.create();
RunnerApi.PTransform transformProto = PTransformTranslation.toProto(application, sdkComponents);
checkArgument(
TEST_STREAM_TRANSFORM_URN.equals(transformProto.getSpec().getUrn()),
"Attempt to get %s from a transform with wrong URN %s",
TestStream.class.getSimpleName(),
transformProto.getSpec().getUrn());
RunnerApi.TestStreamPayload testStreamPayload =
RunnerApi.TestStreamPayload.parseFrom(transformProto.getSpec().getPayload());
return (TestStream<T>)
testStreamFromProtoPayload(
testStreamPayload, RehydratedComponents.forComponents(sdkComponents.toComponents()));
}
示例14: registerTransformWithUnregisteredChildren
import org.apache.beam.sdk.values.PBegin; //导入依赖的package包/类
/**
* Tests that trying to register a transform which has unregistered children throws.
*/
@Test
public void registerTransformWithUnregisteredChildren() throws IOException {
Create.Values<Long> create = Create.of(1L, 2L, 3L);
GenerateSequence createChild = GenerateSequence.from(0);
PCollection<Long> pt = pipeline.apply(create);
String userName = "my_transform";
String childUserName = "my_transform/my_nesting";
AppliedPTransform<?, ?, ?> transform =
AppliedPTransform.<PBegin, PCollection<Long>, Create.Values<Long>>of(
userName, pipeline.begin().expand(), pt.expand(), create, pipeline);
AppliedPTransform<?, ?, ?> childTransform =
AppliedPTransform.<PBegin, PCollection<Long>, GenerateSequence>of(
childUserName, pipeline.begin().expand(), pt.expand(), createChild, pipeline);
thrown.expect(IllegalArgumentException.class);
thrown.expectMessage(childTransform.toString());
components.registerPTransform(
transform, Collections.<AppliedPTransform<?, ?, ?>>singletonList(childTransform));
}
示例15: testRegistrarEncodedProto
import org.apache.beam.sdk.values.PBegin; //导入依赖的package包/类
@Test
public void testRegistrarEncodedProto() throws Exception {
PCollection<String> output = p.apply(testStream);
AppliedPTransform<PBegin, PCollection<String>, TestStream<String>> appliedTestStream =
AppliedPTransform.<PBegin, PCollection<String>, TestStream<String>>of(
"fakeName", PBegin.in(p).expand(), output.expand(), testStream, p);
SdkComponents components = SdkComponents.create();
RunnerApi.FunctionSpec spec =
PTransformTranslation.toProto(appliedTestStream, components).getSpec();
assertThat(spec.getUrn(), equalTo(TEST_STREAM_TRANSFORM_URN));
RunnerApi.TestStreamPayload payload = TestStreamPayload.parseFrom(spec.getPayload());
verifyTestStreamEncoding(
testStream, payload, RehydratedComponents.forComponents(components.toComponents()));
}