本文整理汇总了Java中org.apache.beam.sdk.Pipeline.run方法的典型用法代码示例。如果您正苦于以下问题:Java Pipeline.run方法的具体用法?Java Pipeline.run怎么用?Java Pipeline.run使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.beam.sdk.Pipeline
的用法示例。
在下文中一共展示了Pipeline.run方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
/** Run a batch pipeline to calculate hourly team scores. */
public static void main(String[] args) throws Exception {
Options options =
PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
Pipeline pipeline = Pipeline.create(options);
pipeline
.apply("ReadLogs", TextIO.read().from(options.getInput()))
.apply("SetTimestamps", WithTimestamps.of(new SetTimestampFn()))
.apply("FixedWindows", Window.<String>into(FixedWindows.of(ONE_HOUR)))
.apply("TeamScores", new CalculateTeamScores(options.getOutputPrefix()));
pipeline.run();
}
示例2: testGcsUploadBufferSizeIsSetForStreamingWhenDefault
import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
@Test
public void testGcsUploadBufferSizeIsSetForStreamingWhenDefault() throws IOException {
DataflowPipelineOptions streamingOptions = buildPipelineOptions();
streamingOptions.setStreaming(true);
streamingOptions.setRunner(DataflowRunner.class);
Pipeline p = Pipeline.create(streamingOptions);
// Instantiation of a runner prior to run() currently has a side effect of mutating the options.
// This could be tested by DataflowRunner.fromOptions(streamingOptions) but would not ensure
// that the pipeline itself had the expected options set.
p.run();
assertEquals(
DataflowRunner.GCS_UPLOAD_BUFFER_SIZE_BYTES_DEFAULT,
streamingOptions.getGcsUploadBufferSizeBytes().intValue());
}
示例3: main
import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
/**
* Runs the DatastoreToGcs dataflow pipeline
*/
public static void main(String[] args) throws IOException, ScriptException {
Options options = PipelineOptionsFactory.fromArgs(args)
.withValidation()
.as(Options.class);
options.setRunner(DataflowRunner.class);
Pipeline pipeline = Pipeline.create(options);
pipeline
.apply("IngestEntities",
DatastoreIO.v1().read()
.withProjectId(options.getDatastoreProjectId())
.withLiteralGqlQuery(options.getGqlQuery())
.withNamespace(options.getNamespace()))
.apply("EntityToJson", ParDo.of(EntityToJson.newBuilder()
.setJsTransformPath(options.getJsTransformPath())
.setJsTransformFunctionName(options.getJsTransformFunctionName())
.build()))
.apply("JsonToGcs", TextIO.write().to(options.getSavePath())
.withSuffix(".json"));
pipeline.run();
}
示例4: main
import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
/**
* Runs the GcsToDatastore dataflow pipeline
*/
public static void main(String[] args) throws IOException, ScriptException {
Options options = PipelineOptionsFactory.fromArgs(args)
.withValidation()
.as(Options.class);
options.setRunner(DataflowRunner.class);
Pipeline pipeline = Pipeline.create(options);
pipeline
.apply("IngestJson", TextIO.read()
.from(options.getJsonPathPrefix()))
.apply("GcsToEntity", ParDo.of(JsonToEntity.newBuilder()
.setJsTransformPath(options.getJsTransformPath())
.setJsTransformFunctionName(options.getJsTransformFunctionName())
.build()))
.apply(DatastoreIO.v1().write()
.withProjectId(options.getDatastoreProjectId()));
pipeline.run();
}
示例5: main
import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
Options options =
PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
Pipeline pipeline = Pipeline.create(options);
pipeline
.apply(KafkaIO.<String, String>read()
.withBootstrapServers(options.getKafkaBootstrapServer())
.withTopic(options.getTopic())
.withKeyDeserializer(StringDeserializer.class)
.withValueDeserializer(StringDeserializer.class)
.withTimestampFn(new SetTimestampFn()))
.apply("Values", ParDo.of(new ValuesFn()))
.apply("FixedWindows", Window.<String>into(FixedWindows.of(FIVE_MINUTES))
.triggering(AfterWatermark.pastEndOfWindow()
.withEarlyFirings(AfterProcessingTime.pastFirstElementInPane()
.plusDelayOf(TWO_MINUTES))
.withLateFirings(AfterPane.elementCountAtLeast(1)))
.withAllowedLateness(TEN_MINUTES)
.accumulatingFiredPanes())
.apply("TeamScore", new CalculateTeamScores(options.getOutputPrefix()));
pipeline.run();
}
示例6: main
import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
PipelineOptionsFactory.register(TemplateOptions.class);
TemplateOptions options = PipelineOptionsFactory
.fromArgs(args)
.withValidation()
.as(TemplateOptions.class);
options.setAutoscalingAlgorithm(THROUGHPUT_BASED);
Pipeline pipeline = Pipeline.create(options);
pipeline.apply(BigQueryIO.read().from(options.getBigQueryTableName()))
.apply(ParDo.of(new DoFn<TableRow, String>() {
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
String commaSep = c.element().values()
.stream()
.map(cell -> cell.toString().trim())
.collect(Collectors.joining("\",\""));
c.output(commaSep);
}
}))
.apply(TextIO.write().to(options.getOutputFile())
.withoutSharding()
.withWritableByteChannelFactory(GZIP)
);
pipeline.run();
}
示例7: main
import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
public static void main(String[] args) {
PipelineOptions options = PipelineOptionsFactory.create();
options.setRunner(DirectRunner.class); // forced for this demo
Pipeline p = Pipeline.create(options);
// register Avro coders for serializing our messages
Coders.registerAvroCoders(p, ExtendedRecord.class, UntypedOccurrence.class);
PCollection<UntypedOccurrence> verbatimRecords = p.apply(
"Read Avro", AvroIO.read(UntypedOccurrence.class).from("demo/output/data*"));
verbatimRecords.apply("Write file per Genus",
AvroIO.write(UntypedOccurrence.class)
.to("demo/output-split/data*") // prefix, is required but overwritten
.to(new GenusDynamicAvroDestinations(
FileSystems.matchNewResource("demo/output-split/data*", true))));
LOG.info("Starting the pipeline");
PipelineResult result = p.run();
result.waitUntilFinish();
LOG.info("Pipeline finished with state: {} ", result.getState());
}
示例8: testMutatingOutputThenOutputDoFnError
import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
/**
* Tests that a {@link DoFn} that mutates an output with a good equals() fails in the
* {@link DirectRunner}.
*/
@Test
public void testMutatingOutputThenOutputDoFnError() throws Exception {
Pipeline pipeline = getPipeline();
pipeline
.apply(Create.of(42))
.apply(ParDo.of(new DoFn<Integer, List<Integer>>() {
@ProcessElement
public void processElement(ProcessContext c) {
List<Integer> outputList = Arrays.asList(1, 2, 3, 4);
c.output(outputList);
outputList.set(0, 37);
c.output(outputList);
}
}));
thrown.expect(IllegalMutationException.class);
thrown.expectMessage("output");
thrown.expectMessage("must not be mutated");
pipeline.run();
}
示例9: testMutatingOutputWithEnforcementDisabledSucceeds
import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
/**
* Tests that a {@link DoFn} that mutates an output with a good equals() fails in the
* {@link DirectRunner}.
*/
@Test
public void testMutatingOutputWithEnforcementDisabledSucceeds() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
options.setRunner(DirectRunner.class);
options.as(DirectOptions.class).setEnforceImmutability(false);
Pipeline pipeline = Pipeline.create(options);
pipeline
.apply(Create.of(42))
.apply(ParDo.of(new DoFn<Integer, List<Integer>>() {
@ProcessElement
public void processElement(ProcessContext c) {
List<Integer> outputList = Arrays.asList(1, 2, 3, 4);
c.output(outputList);
outputList.set(0, 37);
c.output(outputList);
}
}));
pipeline.run();
}
示例10: runProgram
import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
private static void runProgram(String resultPath) throws Exception {
Pipeline p = FlinkTestPipeline.createForBatch();
PCollection<String> result = p
.apply(GenerateSequence.from(0).to(10))
.apply(ParDo.of(new DoFn<Long, String>() {
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
c.output(c.element().toString());
}
}));
result.apply(TextIO.write().to(new URI(resultPath).getPath() + "/part"));
p.run();
}
示例11: testE2EV1ReadWithGQLQuery
import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
/**
* An end-to-end test for {@link DatastoreV1.Read#withLiteralGqlQuery(String)}.
*
* <p>Write some test entities to datastore and then run a pipeline that
* reads and counts the total number of entities. Verify that the count matches
* the number of entities written.
*/
private void testE2EV1ReadWithGQLQuery(long limit) throws Exception {
String gqlQuery = String.format(
"SELECT * from %s WHERE __key__ HAS ANCESTOR KEY(%s, '%s')",
options.getKind(), options.getKind(), ancestor);
long expectedNumEntities = numEntities;
if (limit > 0) {
gqlQuery = String.format("%s LIMIT %d", gqlQuery, limit);
expectedNumEntities = limit;
}
DatastoreV1.Read read = DatastoreIO.v1().read()
.withProjectId(project)
.withLiteralGqlQuery(gqlQuery)
.withNamespace(options.getNamespace());
// Count the total number of entities
Pipeline p = Pipeline.create(options);
PCollection<Long> count = p
.apply(read)
.apply(Count.<Entity>globally());
PAssert.thatSingleton(count).isEqualTo(expectedNumEntities);
p.run();
}
示例12: deploy
import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
/** Deploys the invoicing pipeline as a template on GCS, for a given projectID and GCS bucket. */
public void deploy() {
// We can't store options as a member variable due to serialization concerns.
InvoicingPipelineOptions options = PipelineOptionsFactory.as(InvoicingPipelineOptions.class);
options.setProject(projectId);
options.setRunner(DataflowRunner.class);
options.setStagingLocation(beamBucket + "/staging");
options.setTemplateLocation(beamBucket + "/templates/invoicing");
Pipeline p = Pipeline.create(options);
PCollection<BillingEvent> billingEvents =
p.apply(
"Read BillingEvents from Bigquery",
BigQueryIO.read(BillingEvent::parseFromRecord)
.fromQuery(InvoicingUtils.makeQueryProvider(options.getYearMonth(), projectId))
.withCoder(SerializableCoder.of(BillingEvent.class))
.usingStandardSql()
.withoutValidation()
.withTemplateCompatibility());
applyTerminalTransforms(billingEvents, options.getYearMonth());
p.run();
}
示例13: main
import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
public static void main(String[] args) {
PipelineOptions options = PipelineOptionsFactory.create();
options.setRunner(DirectRunner.class); // forced for this demo
Pipeline p = Pipeline.create(options);
// register Avro coders for serializing our messages
Coders.registerAvroCoders(p, ExtendedRecord.class, UntypedOccurrence.class);
// Read the DwC-A using our custom reader
PCollection<ExtendedRecord> rawRecords = p.apply(
"Read from Darwin Core Archive", DwCAIO.Read.withPaths("/tmp/dwca-s-bryophytes-v4.1.zip", "demo/target/tmp"));
// Convert the ExtendedRecord into an UntypedOccurrence record
PCollection<UntypedOccurrence> verbatimRecords = rawRecords.apply(
"Convert the objects into untyped DwC style records",
ParDo.of(BeamFunctions.beamify(FunctionFactory.untypedOccurrenceBuilder())))
.setCoder(AvroCoder.of(UntypedOccurrence.class));
// Write the file to SOLR
final SolrIO.ConnectionConfiguration conn = SolrIO.ConnectionConfiguration
.create(SOLR_HOSTS);
PCollection<SolrInputDocument> inputDocs = verbatimRecords.apply(
"Convert to SOLR", ParDo.of(new SolrDocBuilder()));
inputDocs.apply(SolrIO.write().to("beam-demo1").withConnectionConfiguration(conn));
LOG.info("Starting the pipeline");
PipelineResult result = p.run();
result.waitUntilFinish();
LOG.info("Pipeline finished with state: {} ", result.getState());
}
示例14: main
import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
/**
* Runs the DatastoreToBigQuery dataflow pipeline
*/
public static void main(String[] args) throws IOException, ScriptException {
Options options = PipelineOptionsFactory.fromArgs(args)
.withValidation()
.as(Options.class);
NestedValueProvider<String, String> bqJsonSchema = NestedValueProvider
.of(options.getBqJsonSchema(), new ValueProviderHelpers.GcsLoad());
options.setRunner(DataflowRunner.class);
Pipeline pipeline = Pipeline.create(options);
pipeline
.apply("IngestEntities",
DatastoreIO.v1().read()
.withProjectId(options.getDatastoreProjectId())
.withLiteralGqlQuery(options.getGqlQuery())
.withNamespace(options.getNamespace()))
.apply("EntityToTableRow", ParDo.of(EntityToTableRow.newBuilder()
.setJsTransformPath(options.getJsTransformPath())
.setJsTransformFunctionName(options.getJsTransformFunctionName())
.setStrictCast(options.getStrictCast())
.setTableSchemaJson(bqJsonSchema)
.build()))
.apply("TableRowToBigQuery", BigQueryIO.writeTableRows()
.to(options.getBqTableSpec())
.withJsonSchema(bqJsonSchema)
.withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
.withWriteDisposition(WriteDisposition.WRITE_APPEND));
pipeline.run();
}
示例15: main
import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
public static void main(String[] args) {
PipelineOptionsFactory.register(TemplateOptions.class);
TemplateOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(TemplateOptions.class);
Pipeline pipeline = Pipeline.create(options);
pipeline.apply("READ", TextIO.read().from(options.getInputFile()).withCompressionType(GZIP))
.apply("TRANSFORM", ParDo.of(new WikiParDo()))
.apply("WRITE", BigQueryIO.writeTableRows()
.to(String.format("%s:devfest_melbourne_2017.wiki_demo", options.getProject()))
.withCreateDisposition(CREATE_IF_NEEDED)
.withWriteDisposition(WRITE_APPEND)
.withSchema(getTableSchema()));
pipeline.run();
}