本文整理匯總了Java中org.apache.beam.sdk.values.PBegin.apply方法的典型用法代碼示例。如果您正苦於以下問題:Java PBegin.apply方法的具體用法?Java PBegin.apply怎麽用?Java PBegin.apply使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.beam.sdk.values.PBegin
的用法示例。
在下文中一共展示了PBegin.apply方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: expand
import org.apache.beam.sdk.values.PBegin; //導入方法依賴的package包/類
@Override
public PCollection<byte[]> expand(PBegin input) {
if (getFilepattern() == null) {
throw new IllegalStateException(
"Need to set the filepattern of a TFRecordIO.Read transform");
}
if (getValidate()) {
checkState(getFilepattern().isAccessible(), "Cannot validate with a RVP.");
try {
MatchResult matches = FileSystems.match(getFilepattern().get());
checkState(
!matches.metadata().isEmpty(),
"Unable to find any files matching %s",
getFilepattern().get());
} catch (IOException e) {
throw new IllegalStateException(
String.format("Failed to validate %s", getFilepattern().get()), e);
}
}
return input.apply("Read", org.apache.beam.sdk.io.Read.from(getSource()));
}
示例2: expand
import org.apache.beam.sdk.values.PBegin; //導入方法依賴的package包/類
@Override
public PCollection<T> expand(PBegin input) {
checkNotNull(getFilepattern(), "filepattern");
checkNotNull(getSchema(), "schema");
if (getMatchConfiguration().getWatchInterval() == null && !getHintMatchesManyFiles()) {
return input.apply(
"Read",
org.apache.beam.sdk.io.Read.from(
createSource(
getFilepattern(),
getMatchConfiguration().getEmptyMatchTreatment(),
getRecordClass(),
getSchema())));
}
// All other cases go through ReadAll.
ReadAll<T> readAll =
(getRecordClass() == GenericRecord.class)
? (ReadAll<T>) readAllGenericRecords(getSchema())
: readAll(getRecordClass());
readAll = readAll.withMatchConfiguration(getMatchConfiguration());
return input
.apply("Create filepattern", Create.ofProvider(getFilepattern(), StringUtf8Coder.of()))
.apply("Via ReadAll", readAll);
}
示例3: expand
import org.apache.beam.sdk.values.PBegin; //導入方法依賴的package包/類
@Override
public PCollection<String> expand(PBegin input) {
checkNotNull(getFilepattern(), "need to set the filepattern of a TextIO.Read transform");
if (getMatchConfiguration().getWatchInterval() == null && !getHintMatchesManyFiles()) {
return input.apply("Read", org.apache.beam.sdk.io.Read.from(getSource()));
}
// All other cases go through ReadAll.
return input
.apply("Create filepattern", Create.ofProvider(getFilepattern(), StringUtf8Coder.of()))
.apply(
"Via ReadAll",
readAll()
.withCompression(getCompression())
.withMatchConfiguration(getMatchConfiguration())
.withDelimiter(getDelimiter()));
}
示例4: expand
import org.apache.beam.sdk.values.PBegin; //導入方法依賴的package包/類
@Override
public PCollectionList<String> expand(PBegin b) {
// Composite transform: apply delegates to other transformations,
// here a Create transform.
PCollection<String> result = b.apply(Create.of("hello", "world"));
// Issue below: PCollection.createPrimitiveOutput should not be used
// from within a composite transform.
return PCollectionList.of(
Arrays.asList(
result,
PCollection.createPrimitiveOutputInternal(
b.getPipeline(),
WindowingStrategy.globalDefault(),
result.isBounded(),
StringUtf8Coder.of())));
}
示例5: expand
import org.apache.beam.sdk.values.PBegin; //導入方法依賴的package包/類
@Override
public PCollection<KinesisRecord> expand(PBegin input) {
checkArgument(
streamExists(getAWSClientsProvider().getKinesisClient(), getStreamName()),
"Stream %s does not exist",
getStreamName());
Unbounded<KinesisRecord> unbounded =
org.apache.beam.sdk.io.Read.from(
new KinesisSource(
getAWSClientsProvider(),
getStreamName(),
getInitialPosition(),
getUpToDateThreshold()));
PTransform<PBegin, PCollection<KinesisRecord>> transform = unbounded;
if (getMaxNumRecords() < Long.MAX_VALUE || getMaxReadTime() != null) {
transform =
unbounded.withMaxReadTime(getMaxReadTime()).withMaxNumRecords(getMaxNumRecords());
}
return input.apply(transform);
}
示例6: expand
import org.apache.beam.sdk.values.PBegin; //導入方法依賴的package包/類
@Override
public PDone expand(PBegin input) {
final PCollectionView<ActualT> actual = input.apply("CreateActual", createActual);
input
.apply(Create.of(0).withCoder(VarIntCoder.of()))
.apply("WindowToken", windowToken)
.apply(
"RunChecks",
ParDo.of(new SideInputCheckerDoFn<>(checkerFn, actual, site)).withSideInputs(actual))
.apply("VerifyAssertions", new DefaultConcludeTransform());
return PDone.in(input.getPipeline());
}
示例7: expand
import org.apache.beam.sdk.values.PBegin; //導入方法依賴的package包/類
@Override
public PCollection<Long> expand(PBegin input) {
boolean isRangeUnbounded = getTo() < 0;
boolean usesUnboundedFeatures =
getTimestampFn() != null || getElementsPerPeriod() > 0 || getMaxReadTime() != null;
if (!isRangeUnbounded && !usesUnboundedFeatures) {
// This is the only case when we can use the bounded CountingSource.
return input.apply(Read.from(CountingSource.createSourceForSubrange(getFrom(), getTo())));
}
CountingSource.UnboundedCountingSource source = CountingSource.createUnboundedFrom(getFrom());
if (getTimestampFn() != null) {
source = source.withTimestampFn(getTimestampFn());
}
if (getElementsPerPeriod() > 0) {
source = source.withRate(getElementsPerPeriod(), getPeriod());
}
Read.Unbounded<Long> readUnbounded = Read.from(source);
if (getMaxReadTime() == null) {
if (isRangeUnbounded) {
return input.apply(readUnbounded);
} else {
return input.apply(readUnbounded.withMaxNumRecords(getTo() - getFrom()));
}
} else {
BoundedReadFromUnboundedSource<Long> withMaxReadTime =
readUnbounded.withMaxReadTime(getMaxReadTime());
if (isRangeUnbounded) {
return input.apply(withMaxReadTime);
} else {
return input.apply(withMaxReadTime.withMaxNumRecords(getTo() - getFrom()));
}
}
}
示例8: expand
import org.apache.beam.sdk.values.PBegin; //導入方法依賴的package包/類
@Override
public PCollection<Struct> expand(PBegin input) {
getSpannerConfig().validate();
checkArgument(
getTimestampBound() != null,
"SpannerIO.read() runs in a read only transaction and requires timestamp to be set "
+ "with withTimestampBound or withTimestamp method");
if (getReadOperation().getQuery() != null) {
// TODO: validate query?
} else if (getReadOperation().getTable() != null) {
// Assume read
checkNotNull(
getReadOperation().getColumns(),
"For a read operation SpannerIO.read() requires a list of "
+ "columns to set with withColumns method");
checkArgument(
!getReadOperation().getColumns().isEmpty(),
"For a read operation SpannerIO.read() requires a"
+ " list of columns to set with withColumns method");
} else {
throw new IllegalArgumentException(
"SpannerIO.read() requires configuring query or read operation.");
}
PCollectionView<Transaction> transaction = getTransaction();
if (transaction == null && getTimestampBound() != null) {
transaction =
input.apply(
createTransaction()
.withTimestampBound(getTimestampBound())
.withSpannerConfig(getSpannerConfig()));
}
ReadAll readAll =
readAll().withSpannerConfig(getSpannerConfig()).withTransaction(transaction);
return input.apply(Create.of(getReadOperation())).apply("Execute query", readAll);
}
示例9: expand
import org.apache.beam.sdk.values.PBegin; //導入方法依賴的package包/類
@Override
public PCollection<HCatRecord> expand(PBegin input) {
checkArgument(getTable() != null, "withTable() is required");
checkArgument(getConfigProperties() != null, "withConfigProperties() is required");
return input.apply(org.apache.beam.sdk.io.Read.from(new BoundedHCatalogSource(this)));
}
示例10: expand
import org.apache.beam.sdk.values.PBegin; //導入方法依賴的package包/類
@Override
public PCollection<T> expand(PBegin input) {
checkArgument(
(hosts() != null && port() != null) || cassandraService() != null,
"Either withHosts() and withPort(), or withCassandraService() is required");
checkArgument(keyspace() != null, "withKeyspace() is required");
checkArgument(table() != null, "withTable() is required");
checkArgument(entity() != null, "withEntity() is required");
checkArgument(coder() != null, "withCoder() is required");
return input.apply(org.apache.beam.sdk.io.Read.from(
new CassandraSource<T>(this, null)));
}
示例11: expand
import org.apache.beam.sdk.values.PBegin; //導入方法依賴的package包/類
@Override
public PCollection<String> expand(PBegin input) {
ConnectionConfiguration connectionConfiguration = getConnectionConfiguration();
checkState(
connectionConfiguration != null,
"withConnectionConfiguration() is required");
return input.apply(org.apache.beam.sdk.io.Read
.from(new BoundedElasticsearchSource(this, null, null, null)));
}
示例12: expand
import org.apache.beam.sdk.values.PBegin; //導入方法依賴的package包/類
@Override
public PCollection<SolrDocument> expand(PBegin input) {
checkArgument(
getConnectionConfiguration() != null, "withConnectionConfiguration() is required");
checkArgument(getCollection() != null, "from() is required");
return input.apply(org.apache.beam.sdk.io.Read.from(new BoundedSolrSource(this, null)));
}
示例13: expand
import org.apache.beam.sdk.values.PBegin; //導入方法依賴的package包/類
@Override
public PCollection<Document> expand(PBegin input) {
checkArgument(uri() != null, "withUri() is required");
checkArgument(database() != null, "withDatabase() is required");
checkArgument(collection() != null, "withCollection() is required");
return input.apply(org.apache.beam.sdk.io.Read.from(new BoundedMongoDbSource(this)));
}
示例14: expand
import org.apache.beam.sdk.values.PBegin; //導入方法依賴的package包/類
@Override
public PCollection<TableRow> expand(PBegin input) {
return input.apply(inner);
}
示例15: expand
import org.apache.beam.sdk.values.PBegin; //導入方法依賴的package包/類
@Override
public PCollection<Entity> expand(PBegin input) {
checkArgument(getProjectId() != null, "projectId provider cannot be null");
if (getProjectId().isAccessible()) {
checkArgument(getProjectId().get() != null, "projectId cannot be null");
}
checkArgument(
getQuery() != null || getLiteralGqlQuery() != null,
"Either withQuery() or withLiteralGqlQuery() is required");
checkArgument(
getQuery() == null || getLiteralGqlQuery() == null,
"withQuery() and withLiteralGqlQuery() are exclusive");
V1Options v1Options = V1Options.from(getProjectId(), getNamespace(), getLocalhost());
/*
* This composite transform involves the following steps:
* 1. Create a singleton of the user provided {@code query} or if {@code gqlQuery} is
* provided apply a {@link ParDo} that translates the {@code gqlQuery} into a {@code query}.
*
* 2. A {@link ParDo} splits the resulting query into {@code numQuerySplits} and
* assign each split query a unique {@code Integer} as the key. The resulting output is
* of the type {@code PCollection<KV<Integer, Query>>}.
*
* If the value of {@code numQuerySplits} is less than or equal to 0, then the number of
* splits will be computed dynamically based on the size of the data for the {@code query}.
*
* 3. The resulting {@code PCollection} is sharded using a {@link GroupByKey} operation. The
* queries are extracted from they {@code KV<Integer, Iterable<Query>>} and flattened to
* output a {@code PCollection<Query>}.
*
* 4. In the third step, a {@code ParDo} reads entities for each query and outputs
* a {@code PCollection<Entity>}.
*/
PCollection<Query> inputQuery;
if (getQuery() != null) {
inputQuery = input.apply(Create.of(getQuery()));
} else {
inputQuery =
input
.apply(Create.ofProvider(getLiteralGqlQuery(), StringUtf8Coder.of()))
.apply(ParDo.of(new GqlQueryTranslateFn(v1Options)));
}
return inputQuery
.apply("Split", ParDo.of(new SplitQueryFn(v1Options, getNumQuerySplits())))
.apply("Reshuffle", Reshuffle.<Query>viaRandomKey())
.apply("Read", ParDo.of(new ReadFn(v1Options)));
}