本文整理汇总了Java中org.apache.beam.sdk.PipelineResult.waitUntilFinish方法的典型用法代码示例。如果您正苦于以下问题:Java PipelineResult.waitUntilFinish方法的具体用法?Java PipelineResult.waitUntilFinish怎么用?Java PipelineResult.waitUntilFinish使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.beam.sdk.PipelineResult
的用法示例。
在下文中一共展示了PipelineResult.waitUntilFinish方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.beam.sdk.PipelineResult; //导入方法依赖的package包/类
public static void main(String[] args) {
PipelineOptions options = PipelineOptionsFactory.create();
options.setRunner(DirectRunner.class); // forced for this demo
Pipeline p = Pipeline.create(options);
// register Avro coders for serializing our messages
Coders.registerAvroCoders(p, ExtendedRecord.class, UntypedOccurrence.class);
PCollection<UntypedOccurrence> verbatimRecords = p.apply(
"Read Avro", AvroIO.read(UntypedOccurrence.class).from("demo/output/data*"));
verbatimRecords.apply("Write file per Genus",
AvroIO.write(UntypedOccurrence.class)
.to("demo/output-split/data*") // prefix, is required but overwritten
.to(new GenusDynamicAvroDestinations(
FileSystems.matchNewResource("demo/output-split/data*", true))));
LOG.info("Starting the pipeline");
PipelineResult result = p.run();
result.waitUntilFinish();
LOG.info("Pipeline finished with state: {} ", result.getState());
}
示例2: main
import org.apache.beam.sdk.PipelineResult; //导入方法依赖的package包/类
public static void main(String[] args) {
Configuration conf = new Configuration(); // assume defaults on CP
conf.setClass("mapreduce.job.inputformat.class", DwCAInputFormat.class, InputFormat.class);
conf.setStrings("mapreduce.input.fileinputformat.inputdir", "hdfs://ha-nn/tmp/dwca-lep5.zip");
conf.setClass("key.class", Text.class, Object.class);
conf.setClass("value.class", ExtendedRecord.class, Object.class);
Pipeline p = newPipeline(args, conf);
Coders.registerAvroCoders(p, UntypedOccurrence.class, TypedOccurrence.class, ExtendedRecord.class);
PCollection<KV<Text, ExtendedRecord>> rawRecords =
p.apply("Read DwC-A", HadoopInputFormatIO.<Text, ExtendedRecord>read().withConfiguration(conf));
PCollection<UntypedOccurrence> verbatimRecords = rawRecords.apply(
"Convert to Avro", ParDo.of(fromExtendedRecordKVP()));
verbatimRecords.apply(
"Write Avro files", AvroIO.write(UntypedOccurrence.class).to("hdfs://ha-nn/tmp/dwca-lep5.avro"));
LOG.info("Starting the pipeline");
PipelineResult result = p.run();
result.waitUntilFinish();
LOG.info("Pipeline finished with state: {} ", result.getState());
}
示例3: matchesSafely
import org.apache.beam.sdk.PipelineResult; //导入方法依赖的package包/类
@Override
protected boolean matchesSafely(PipelineResult pipelineResult) {
pipelineResult.waitUntilFinish();
Session session = cluster.connect();
ResultSet result = session.execute("select id,name from " + CassandraTestDataSet.KEYSPACE
+ "." + tableName);
List<Row> rows = result.all();
if (rows.size() != 1000) {
return false;
}
for (Row row : rows) {
if (!row.getString("name").matches("Name.*")) {
return false;
}
}
return true;
}
示例4: testWaitUntilFinishTimeout
import org.apache.beam.sdk.PipelineResult; //导入方法依赖的package包/类
@Test
public void testWaitUntilFinishTimeout() throws Exception {
DirectOptions options = PipelineOptionsFactory.as(DirectOptions.class);
options.setBlockOnRun(false);
options.setRunner(DirectRunner.class);
Pipeline p = Pipeline.create(options);
p
.apply(Create.of(1L))
.apply(ParDo.of(
new DoFn<Long, Long>() {
@ProcessElement
public void hang(ProcessContext context) throws InterruptedException {
// Hangs "forever"
Thread.sleep(Long.MAX_VALUE);
}
}));
PipelineResult result = p.run();
// The pipeline should never complete;
assertThat(result.getState(), is(State.RUNNING));
// Must time out, otherwise this test will never complete
result.waitUntilFinish(Duration.millis(1L));
assertThat(result.getState(), is(State.RUNNING));
}
示例5: getSample
import org.apache.beam.sdk.PipelineResult; //导入方法依赖的package包/类
@Override
public void getSample(int limit, Consumer<IndexedRecord> consumer) {
// Create a pipeline using the input component to get records.
DirectOptions options = BeamLocalRunnerOption.getOptions();
final Pipeline p = Pipeline.create(options);
// Create an input runtime based on the properties.
BigQueryInputRuntime inputRuntime = new BigQueryInputRuntime();
BigQueryInputProperties inputProperties = new BigQueryInputProperties(null);
inputProperties.init();
inputProperties.setDatasetProperties(properties);
inputRuntime.initialize(new BeamJobRuntimeContainer(options), inputProperties);
try (DirectConsumerCollector<IndexedRecord> collector = DirectConsumerCollector.of(consumer)) {
// Collect a sample of the input records.
p.apply(inputRuntime) //
.apply(Sample.<IndexedRecord> any(limit)).apply(collector);
PipelineResult pr = p.run();
pr.waitUntilFinish();
}
}
示例6: main
import org.apache.beam.sdk.PipelineResult; //导入方法依赖的package包/类
public static void main(String[] args) {
PipelineOptions options = PipelineOptionsFactory.create();
options.setRunner(DirectRunner.class); // forced for this demo
Pipeline p = Pipeline.create(options);
// register Avro coders for serializing our messages
Coders.registerAvroCoders(p, ExtendedRecord.class, UntypedOccurrence.class);
// Read the DwC-A using our custom reader
PCollection<ExtendedRecord> rawRecords = p.apply(
"Read from Darwin Core Archive", DwCAIO.Read.withPaths("demo/dwca.zip", "demo/target/tmp"));
// Convert the ExtendedRecord into an UntypedOccurrence record
DoFn<ExtendedRecord,UntypedOccurrence> fn = BeamFunctions.beamify(FunctionFactory.untypedOccurrenceBuilder());
// TODO: Explore the generics as to why the coder registry does not find it and we need to set the coder explicitly
PCollection<UntypedOccurrence> verbatimRecords = rawRecords.apply(
"Convert the objects into untyped DwC style records",ParDo.of(fn))
.setCoder(AvroCoder.of(UntypedOccurrence.class));
// Write the result as an Avro file
verbatimRecords.apply(
"Save the records as Avro", AvroIO.write(UntypedOccurrence.class).to("demo/output/data"));
LOG.info("Starting the pipeline");
PipelineResult result = p.run();
result.waitUntilFinish();
LOG.info("Pipeline finished with state: {} ", result.getState());
}
示例7: main
import org.apache.beam.sdk.PipelineResult; //导入方法依赖的package包/类
public static void main(String[] args) {
Configuration conf = new Configuration(); // assume defaults on CP
Pipeline p = newPipeline(args, conf);
Coders.registerAvroCoders(p, UntypedOccurrenceLowerCase.class, TypedOccurrence.class, ExtendedRecord.class);
// Read Avro files
PCollection<UntypedOccurrenceLowerCase> verbatimRecords = p.apply(
"Read Avro files", AvroIO.read(UntypedOccurrenceLowerCase.class).from(SOURCE_PATH));
// Convert the objects (interpretation)
PCollection<TypedOccurrence> interpreted = verbatimRecords.apply(
"Interpret occurrence records", ParDo.of(BeamFunctions.beamify(FunctionFactory.interpretOccurrenceLowerCase())))
.setCoder(AvroCoder.of(TypedOccurrence.class));
// Do the nub lookup
PCollection<TypedOccurrence> matched = interpreted.apply(
"Align to backbone using species/match", ParDo.of(
BeamFunctions.beamify(FunctionFactory.gbifSpeciesMatch())))
.setCoder(AvroCoder.of(TypedOccurrence.class));
// Write the file to SOLR
final SolrIO.ConnectionConfiguration conn = SolrIO.ConnectionConfiguration
.create(SOLR_HOST);
PCollection<SolrInputDocument> inputDocs = matched.apply(
"Convert to SOLR", ParDo.of(new SolrDocBuilder()));
inputDocs.apply(SolrIO.write().to("beam-demo1").withConnectionConfiguration(conn));
// instruct the writer to use a provided document ID
LOG.info("Starting the pipeline");
PipelineResult result = p.run();
result.waitUntilFinish();
LOG.info("Pipeline finished with state: {} ", result.getState());
}
示例8: main
import org.apache.beam.sdk.PipelineResult; //导入方法依赖的package包/类
public static void main(String[] args) {
Configuration conf = new Configuration(); // assume defaults on CP
Pipeline p = newPipeline(args, conf);
Coders.registerAvroCoders(p, UntypedOccurrenceLowerCase.class, TypedOccurrence.class, ExtendedRecord.class);
// Read Avro files
PCollection<UntypedOccurrenceLowerCase> verbatimRecords = p.apply(
"Read Avro files", AvroIO.read(UntypedOccurrenceLowerCase.class).from(SOURCE_PATH));
// Convert the objects (interpretation)
PCollection<TypedOccurrence> interpreted = verbatimRecords.apply(
"Interpret occurrence records", ParDo.of(BeamFunctions.beamify(FunctionFactory.interpretOccurrenceLowerCase())))
.setCoder(AvroCoder.of(TypedOccurrence.class));
// Do the nub lookup
PCollection<TypedOccurrence> matched = interpreted.apply(
"Align to backbone using species/match", ParDo.of(
BeamFunctions.beamify(FunctionFactory.gbifSpeciesMatch())))
.setCoder(AvroCoder.of(TypedOccurrence.class));
// Convert to JSON
PCollection<String> json = matched.apply(
"Convert to JSON", ParDo.of(BeamFunctions.asJson(TypedOccurrence.class)));
// Write the file to ES
ElasticsearchIO.ConnectionConfiguration conn = ElasticsearchIO.ConnectionConfiguration
.create(ES_HOSTS,ES_INDEX, ES_TYPE);
// Index in ES
json.apply(ElasticsearchIO.write().withConnectionConfiguration(conn).withMaxBatchSize(BATCH_SIZE));
// instruct the writer to use a provided document ID
LOG.info("Starting the pipeline");
PipelineResult result = p.run();
result.waitUntilFinish();
LOG.info("Pipeline finished with state: {} ", result.getState());
}
示例9: main
import org.apache.beam.sdk.PipelineResult; //导入方法依赖的package包/类
public static void main(String[] args) {
PipelineOptions options = PipelineOptionsFactory.create();
options.setRunner(DirectRunner.class); // forced for this demo
Pipeline p = Pipeline.create(options);
// register Avro coders for serializing our messages
Coders.registerAvroCoders(p, ExtendedRecord.class, UntypedOccurrence.class);
// Read the DwC-A using our custom reader
PCollection<ExtendedRecord> rawRecords = p.apply(
"Read from Darwin Core Archive", DwCAIO.Read.withPaths("/tmp/dwca-s-bryophytes-v4.1.zip", "demo/target/tmp"));
// Convert the ExtendedRecord into an UntypedOccurrence record
PCollection<UntypedOccurrence> verbatimRecords = rawRecords.apply(
"Convert the objects into untyped DwC style records",
ParDo.of(BeamFunctions.beamify(FunctionFactory.untypedOccurrenceBuilder())))
.setCoder(AvroCoder.of(UntypedOccurrence.class));
// Write the file to SOLR
final SolrIO.ConnectionConfiguration conn = SolrIO.ConnectionConfiguration
.create(SOLR_HOSTS);
PCollection<SolrInputDocument> inputDocs = verbatimRecords.apply(
"Convert to SOLR", ParDo.of(new SolrDocBuilder()));
inputDocs.apply(SolrIO.write().to("beam-demo1").withConnectionConfiguration(conn));
LOG.info("Starting the pipeline");
PipelineResult result = p.run();
result.waitUntilFinish();
LOG.info("Pipeline finished with state: {} ", result.getState());
}
示例10: queryMatchesModel
import org.apache.beam.sdk.PipelineResult; //导入方法依赖的package包/类
/** Test {@code query} matches {@code model}. */
private void queryMatchesModel(
String name, NexmarkQuery query, NexmarkQueryModel model, boolean streamingMode) {
NexmarkUtils.setupPipeline(NexmarkUtils.CoderStrategy.HAND, p);
PCollection<TimestampedValue<KnownSize>> results;
if (streamingMode) {
results =
p.apply(name + ".ReadUnBounded", NexmarkUtils.streamEventsSource(CONFIG)).apply(query);
} else {
results = p.apply(name + ".ReadBounded", NexmarkUtils.batchEventsSource(CONFIG)).apply(query);
}
PAssert.that(results).satisfies(model.assertionFor());
PipelineResult result = p.run();
result.waitUntilFinish();
}
示例11: main
import org.apache.beam.sdk.PipelineResult; //导入方法依赖的package包/类
public static void main(String[] args) throws IOException {
Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
final String output = options.getOutput();
final Instant minTimestamp = new Instant(options.getMinTimestampMillis());
final Instant maxTimestamp = new Instant(options.getMaxTimestampMillis());
Pipeline pipeline = Pipeline.create(options);
/**
* Concept #1: the Beam SDK lets us run the same pipeline with either a bounded or
* unbounded input source.
*/
PCollection<String> input = pipeline
/** Read from the GCS file. */
.apply(TextIO.read().from(options.getInputFile()))
// Concept #2: Add an element timestamp, using an artificial time just to show windowing.
// See AddTimestampFn for more detail on this.
.apply(ParDo.of(new AddTimestampFn(minTimestamp, maxTimestamp)));
/**
* Concept #3: Window into fixed windows. The fixed window size for this example defaults to 1
* minute (you can change this with a command-line option). See the documentation for more
* information on how fixed windows work, and for information on the other types of windowing
* available (e.g., sliding windows).
*/
PCollection<String> windowedWords =
input.apply(
Window.<String>into(
FixedWindows.of(Duration.standardMinutes(options.getWindowSize()))));
/**
* Concept #4: Re-use our existing CountWords transform that does not have knowledge of
* windows over a PCollection containing windowed values.
*/
PCollection<KV<String, Long>> wordCounts = windowedWords.apply(new WordCount.CountWords());
/**
* Concept #5: Format the results and write to a sharded file partitioned by window, using a
* simple ParDo operation. Because there may be failures followed by retries, the
* writes must be idempotent, but the details of writing to files is elided here.
*/
wordCounts
.apply(MapElements.via(new WordCount.FormatAsTextFn()))
.apply(new WriteOneFilePerWindow(output, options.getNumShards()));
PipelineResult result = pipeline.run();
try {
result.waitUntilFinish();
} catch (Exception exc) {
result.cancel();
}
}
示例12: main
import org.apache.beam.sdk.PipelineResult; //导入方法依赖的package包/类
public static void main(String[] args) throws GeneralSecurityException, IOException, URISyntaxException {
// Register the options so that they show up via --help
PipelineOptionsFactory.register(Options.class);
pipelineOptions = PipelineOptionsFactory.fromArgs(args)
.withValidation().as(Options.class);
// Option validation is not yet automatic, we make an explicit call here.
Options.Methods.validateOptions(pipelineOptions);
auth = GenomicsOptions.Methods.getGenomicsAuth(pipelineOptions);
p = Pipeline.create(pipelineOptions);
// ensure data is accessible
String BAMFilePath = pipelineOptions.getBAMFilePath();
if (!Strings.isNullOrEmpty(BAMFilePath)) {
if (GCSURLExists(BAMFilePath)) {
System.out.println(BAMFilePath + " is present, good.");
} else {
System.out.println("Error: " + BAMFilePath + " not found.");
return;
}
if (pipelineOptions.isShardBAMReading()) {
// the BAM code expects an index at BAMFilePath+".bai"
// and sharded reading will fail if the index isn't there.
String BAMIndexPath = BAMFilePath + ".bai";
if (GCSURLExists(BAMIndexPath)) {
System.out.println(BAMIndexPath + " is present, good.");
} else {
System.out.println("Error: " + BAMIndexPath + " not found.");
return;
}
}
}
System.out.println("Output will be written to "+pipelineOptions.getOutput());
PCollection<Read> reads = getReads();
PCollection<Long> readCount = reads.apply(Count.<Read>globally());
PCollection<String> readCountText = readCount.apply("toString", ParDo.of(new DoFn<Long, String>() {
@ProcessElement
public void processElement(DoFn<Long, String>.ProcessContext c) throws Exception {
c.output(String.valueOf(c.element()));
}
}));
readCountText.apply("WriteOutput", TextIO.write().to(pipelineOptions.getOutput()).withoutSharding());
PipelineResult result = p.run();
if(pipelineOptions.getWait()) {
result.waitUntilFinish();
}
}
示例13: main
import org.apache.beam.sdk.PipelineResult; //导入方法依赖的package包/类
public static void main(String[] args) throws IOException, GeneralSecurityException {
// Register the options so that they show up via --help
PipelineOptionsFactory.register(Options.class);
Options options = PipelineOptionsFactory.fromArgs(args)
.withValidation().as(Options.class);
// Option validation is not yet automatic, we make an explicit call here.
Options.Methods.validateOptions(options);
// Set up the prototype request and auth.
StreamVariantsRequest prototype = CallSetNamesOptions.Methods.getRequestPrototype(options);
OfflineAuth auth = GenomicsOptions.Methods.getGenomicsAuth(options);
// Make a bimap of the callsets so that the indices the pipeline is passing around are small.
List<String> callSetNames = (0 < prototype.getCallSetIdsCount())
? Lists.newArrayList(CallSetNamesOptions.Methods.getCallSetNames(options))
: GenomicsUtils.getCallSetsNames(options.getVariantSetId(), auth);
Collections.sort(callSetNames); // Ensure a stable sort order for reproducible results.
BiMap<String, Integer> dataIndices = HashBiMap.create();
for(String callSetName : callSetNames) {
dataIndices.put(callSetName, dataIndices.size());
}
Pipeline p = Pipeline.create(options);
p.begin();
PCollection<StreamVariantsRequest> requests;
if(null != options.getSitesFilepath()) {
// Compute PCA on a list of sites.
requests = p.apply("ReadSites", TextIO.read().from(options.getSitesFilepath()))
.apply(new SitesToShards.SitesToStreamVariantsShardsTransform(prototype));
} else {
// Compute PCA over genomic regions.
List<StreamVariantsRequest> shardRequests = options.isAllReferences() ?
ShardUtils.getVariantRequests(prototype, ShardUtils.SexChromosomeFilter.EXCLUDE_XY,
options.getBasesPerShard(), auth) :
ShardUtils.getVariantRequests(prototype, options.getBasesPerShard(), options.getReferences());
requests = p.apply(Create.of(shardRequests));
}
requests.apply(new VariantStreamer(auth, ShardBoundary.Requirement.STRICT, VARIANT_FIELDS))
.apply(ParDo.of(new ExtractSimilarCallsets()))
.apply(new OutputPCoAFile(dataIndices, options.getOutput()));
PipelineResult result = p.run();
if(options.getWait()) {
result.waitUntilFinish();
}
}
示例14: main
import org.apache.beam.sdk.PipelineResult; //导入方法依赖的package包/类
public static void main(String[] args) throws IOException, GeneralSecurityException {
// Register the options so that they show up via --help.
PipelineOptionsFactory.register(TransformNonVariantSegmentData.Options.class);
TransformNonVariantSegmentData.Options options =
PipelineOptionsFactory.fromArgs(args).withValidation()
.as(TransformNonVariantSegmentData.Options.class);
Preconditions.checkState(options.getHasNonVariantSegments(),
"This job is only valid for data containing non-variant segments. "
+ "Set the --hasNonVariantSegments command line option accordingly.");
Map<String, Set<String>> cohortMap = new HashMap<String, Set<String>>();
// Always include the default cohort.
cohortMap.put(ALL_SAMPLES_COHORT, ImmutableSet.<String>builder().build());
if (!Strings.isNullOrEmpty(options.getCohorts())) {
List<String> cohortFilenames = Splitter.on(",").splitToList(options.getCohorts());
for (String cohort : cohortFilenames) {
cohortMap.put(cohort, ImmutableSet
.<String>builder()
.addAll(
Splitter.on(CharMatcher.breakingWhitespace()).omitEmptyStrings().trimResults()
.split(Files.toString(new File(cohort), Charset.defaultCharset()))).build());
}
}
// Set up the prototype request and auth.
StreamVariantsRequest prototype = CallSetNamesOptions.Methods.getRequestPrototype(options);
final OfflineAuth auth = GenomicsOptions.Methods.getGenomicsAuth(options);
List<StreamVariantsRequest> requests = options.isAllReferences() ?
ShardUtils.getVariantRequests(prototype, ShardUtils.SexChromosomeFilter.INCLUDE_XY,
options.getBasesPerShard(), auth) :
ShardUtils.getVariantRequests(prototype, options.getBasesPerShard(), options.getReferences());
Pipeline p = Pipeline.create(options);
// Create a collection of data with non-variant segments omitted but calls from overlapping
// non-variant segments added to SNPs and write them to BigQuery.
p.begin()
.apply(Create.of(requests))
.apply(new VariantStreamer(auth, ShardBoundary.Requirement.STRICT, VARIANT_API_FIELDS))
.apply(ParDo.of(new FilterCallsFn(options.getOmitLowQualityCalls())))
.apply(new JoinNonVariantSegmentsWithVariants.BinShuffleAndCombineTransform())
.apply(ParDo.of(new FlagVariantsWithAmbiguousCallsFn()))
.apply(ParDo.of(new FormatVariantsFn(options.getSummarizeRefMatchCallSets(),
!options.getVariantMergeStrategy().equals(MergeNonVariantSegmentsWithSnps.class),
cohortMap)))
.apply(
BigQueryIO.writeTableRows().to(options.getOutputTable())
.withSchema(getTableSchema(options.getSummarizeRefMatchCallSets(), cohortMap.keySet()))
.withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
.withWriteDisposition(options.getAppendToTable()
? BigQueryIO.Write.WriteDisposition.WRITE_APPEND : BigQueryIO.Write.WriteDisposition.WRITE_EMPTY));
PipelineResult result = p.run();
if (options.getWait()) {
result.waitUntilFinish();
}
}