当前位置: 首页>>代码示例>>Java>>正文


Java Pipeline.run方法代码示例

本文整理汇总了Java中org.apache.beam.sdk.Pipeline.run方法的典型用法代码示例。如果您正苦于以下问题:Java Pipeline.run方法的具体用法?Java Pipeline.run怎么用?Java Pipeline.run使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.beam.sdk.Pipeline的用法示例。


在下文中一共展示了Pipeline.run方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
/** Run a batch pipeline to calculate hourly team scores. */
public static void main(String[] args) throws Exception {

  Options options =
      PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
  Pipeline pipeline = Pipeline.create(options);

  pipeline
  .apply("ReadLogs", TextIO.read().from(options.getInput()))
  .apply("SetTimestamps", WithTimestamps.of(new SetTimestampFn()))

  .apply("FixedWindows", Window.<String>into(FixedWindows.of(ONE_HOUR)))

  .apply("TeamScores", new CalculateTeamScores(options.getOutputPrefix()));

  pipeline.run();
}
 
开发者ID:davorbonaci,项目名称:beam-portability-demo,代码行数:18,代码来源:HourlyTeamScore.java

示例2: testGcsUploadBufferSizeIsSetForStreamingWhenDefault

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
@Test
public void testGcsUploadBufferSizeIsSetForStreamingWhenDefault() throws IOException {
  DataflowPipelineOptions streamingOptions = buildPipelineOptions();
  streamingOptions.setStreaming(true);
  streamingOptions.setRunner(DataflowRunner.class);
  Pipeline p = Pipeline.create(streamingOptions);

  // Instantiation of a runner prior to run() currently has a side effect of mutating the options.
  // This could be tested by DataflowRunner.fromOptions(streamingOptions) but would not ensure
  // that the pipeline itself had the expected options set.
  p.run();

  assertEquals(
      DataflowRunner.GCS_UPLOAD_BUFFER_SIZE_BYTES_DEFAULT,
      streamingOptions.getGcsUploadBufferSizeBytes().intValue());
}
 
开发者ID:apache,项目名称:beam,代码行数:17,代码来源:DataflowRunnerTest.java

示例3: main

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
/**
 * Runs the DatastoreToGcs dataflow pipeline
 */
public static void main(String[] args) throws IOException, ScriptException {
  Options options = PipelineOptionsFactory.fromArgs(args)
      .withValidation()
      .as(Options.class);

  options.setRunner(DataflowRunner.class);

  Pipeline pipeline = Pipeline.create(options);

  pipeline
      .apply("IngestEntities",
          DatastoreIO.v1().read()
              .withProjectId(options.getDatastoreProjectId())
              .withLiteralGqlQuery(options.getGqlQuery())
              .withNamespace(options.getNamespace()))
      .apply("EntityToJson", ParDo.of(EntityToJson.newBuilder()
          .setJsTransformPath(options.getJsTransformPath())
          .setJsTransformFunctionName(options.getJsTransformFunctionName())
          .build()))
      .apply("JsonToGcs", TextIO.write().to(options.getSavePath())
          .withSuffix(".json"));

  pipeline.run();
}
 
开发者ID:cobookman,项目名称:teleport,代码行数:28,代码来源:DatastoreToGcs.java

示例4: main

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
/**
 * Runs the GcsToDatastore dataflow pipeline
 */
public static void main(String[] args) throws IOException, ScriptException {
  Options options = PipelineOptionsFactory.fromArgs(args)
      .withValidation()
      .as(Options.class);

  options.setRunner(DataflowRunner.class);

  Pipeline pipeline = Pipeline.create(options);

  pipeline
      .apply("IngestJson", TextIO.read()
          .from(options.getJsonPathPrefix()))
      .apply("GcsToEntity", ParDo.of(JsonToEntity.newBuilder()
          .setJsTransformPath(options.getJsTransformPath())
          .setJsTransformFunctionName(options.getJsTransformFunctionName())
          .build()))
      .apply(DatastoreIO.v1().write()
          .withProjectId(options.getDatastoreProjectId()));

  pipeline.run();
}
 
开发者ID:cobookman,项目名称:teleport,代码行数:25,代码来源:GcsToDatastore.java

示例5: main

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {

    Options options =
        PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    Pipeline pipeline = Pipeline.create(options);

    pipeline
    .apply(KafkaIO.<String, String>read()
        .withBootstrapServers(options.getKafkaBootstrapServer())
        .withTopic(options.getTopic())
        .withKeyDeserializer(StringDeserializer.class)
        .withValueDeserializer(StringDeserializer.class)
        .withTimestampFn(new SetTimestampFn()))
    .apply("Values", ParDo.of(new ValuesFn()))

    .apply("FixedWindows", Window.<String>into(FixedWindows.of(FIVE_MINUTES))
        .triggering(AfterWatermark.pastEndOfWindow()
            .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane()
                .plusDelayOf(TWO_MINUTES))
            .withLateFirings(AfterPane.elementCountAtLeast(1)))
        .withAllowedLateness(TEN_MINUTES)
        .accumulatingFiredPanes())

    .apply("TeamScore", new CalculateTeamScores(options.getOutputPrefix()));

    pipeline.run();
  }
 
开发者ID:davorbonaci,项目名称:beam-portability-demo,代码行数:28,代码来源:LeaderBoard.java

示例6: main

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
    PipelineOptionsFactory.register(TemplateOptions.class);
    TemplateOptions options = PipelineOptionsFactory
            .fromArgs(args)
            .withValidation()
            .as(TemplateOptions.class);
    options.setAutoscalingAlgorithm(THROUGHPUT_BASED);
    Pipeline pipeline = Pipeline.create(options);
    pipeline.apply(BigQueryIO.read().from(options.getBigQueryTableName()))
            .apply(ParDo.of(new DoFn<TableRow, String>() {
                @ProcessElement
                public void processElement(ProcessContext c) throws Exception {
                    String commaSep = c.element().values()
                            .stream()
                            .map(cell -> cell.toString().trim())
                            .collect(Collectors.joining("\",\""));
                    c.output(commaSep);
                }
            }))
            .apply(TextIO.write().to(options.getOutputFile())
                    .withoutSharding()
                    .withWritableByteChannelFactory(GZIP)
            );
    pipeline.run();
}
 
开发者ID:shinesolutions,项目名称:bigquery-table-to-one-file,代码行数:26,代码来源:BigQueryTableToOneFile.java

示例7: main

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
public static void main(String[] args) {
  PipelineOptions options = PipelineOptionsFactory.create();
  options.setRunner(DirectRunner.class); // forced for this demo
  Pipeline p = Pipeline.create(options);

  // register Avro coders for serializing our messages
  Coders.registerAvroCoders(p, ExtendedRecord.class, UntypedOccurrence.class);

  PCollection<UntypedOccurrence> verbatimRecords = p.apply(
    "Read Avro", AvroIO.read(UntypedOccurrence.class).from("demo/output/data*"));

  verbatimRecords.apply("Write file per Genus",
                        AvroIO.write(UntypedOccurrence.class)
                              .to("demo/output-split/data*") // prefix, is required but overwritten
                              .to(new GenusDynamicAvroDestinations(
                                FileSystems.matchNewResource("demo/output-split/data*", true))));


  LOG.info("Starting the pipeline");
  PipelineResult result = p.run();
  result.waitUntilFinish();
  LOG.info("Pipeline finished with state: {} ", result.getState());
}
 
开发者ID:gbif,项目名称:pipelines,代码行数:24,代码来源:MultiAvroOutDemo.java

示例8: testMutatingOutputThenOutputDoFnError

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
/**
 * Tests that a {@link DoFn} that mutates an output with a good equals() fails in the
 * {@link DirectRunner}.
 */
@Test
public void testMutatingOutputThenOutputDoFnError() throws Exception {
  Pipeline pipeline = getPipeline();

  pipeline
      .apply(Create.of(42))
      .apply(ParDo.of(new DoFn<Integer, List<Integer>>() {
        @ProcessElement
        public void processElement(ProcessContext c) {
          List<Integer> outputList = Arrays.asList(1, 2, 3, 4);
          c.output(outputList);
          outputList.set(0, 37);
          c.output(outputList);
        }
      }));

  thrown.expect(IllegalMutationException.class);
  thrown.expectMessage("output");
  thrown.expectMessage("must not be mutated");
  pipeline.run();
}
 
开发者ID:apache,项目名称:beam,代码行数:26,代码来源:DirectRunnerTest.java

示例9: testMutatingOutputWithEnforcementDisabledSucceeds

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
/**
 * Tests that a {@link DoFn} that mutates an output with a good equals() fails in the
 * {@link DirectRunner}.
 */
@Test
public void testMutatingOutputWithEnforcementDisabledSucceeds() throws Exception {
  PipelineOptions options = PipelineOptionsFactory.create();
  options.setRunner(DirectRunner.class);
  options.as(DirectOptions.class).setEnforceImmutability(false);
  Pipeline pipeline = Pipeline.create(options);

  pipeline
      .apply(Create.of(42))
      .apply(ParDo.of(new DoFn<Integer, List<Integer>>() {
        @ProcessElement
        public void processElement(ProcessContext c) {
          List<Integer> outputList = Arrays.asList(1, 2, 3, 4);
          c.output(outputList);
          outputList.set(0, 37);
          c.output(outputList);
        }
      }));

  pipeline.run();
}
 
开发者ID:apache,项目名称:beam,代码行数:26,代码来源:DirectRunnerTest.java

示例10: runProgram

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
private static void runProgram(String resultPath) throws Exception {

    Pipeline p = FlinkTestPipeline.createForBatch();

    PCollection<String> result = p
        .apply(GenerateSequence.from(0).to(10))
        .apply(ParDo.of(new DoFn<Long, String>() {
          @ProcessElement
          public void processElement(ProcessContext c) throws Exception {
            c.output(c.element().toString());
          }
        }));

    result.apply(TextIO.write().to(new URI(resultPath).getPath() + "/part"));

    p.run();
  }
 
开发者ID:apache,项目名称:beam,代码行数:18,代码来源:ReadSourceITCase.java

示例11: testE2EV1ReadWithGQLQuery

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
/**
 * An end-to-end test for {@link DatastoreV1.Read#withLiteralGqlQuery(String)}.
 *
 * <p>Write some test entities to datastore and then run a pipeline that
 * reads and counts the total number of entities. Verify that the count matches
 * the number of entities written.
 */
private void testE2EV1ReadWithGQLQuery(long limit) throws Exception {
  String gqlQuery = String.format(
      "SELECT * from %s WHERE __key__ HAS ANCESTOR KEY(%s, '%s')",
      options.getKind(), options.getKind(), ancestor);

  long expectedNumEntities = numEntities;
  if (limit > 0) {
    gqlQuery = String.format("%s LIMIT %d", gqlQuery, limit);
    expectedNumEntities = limit;
  }

  DatastoreV1.Read read = DatastoreIO.v1().read()
      .withProjectId(project)
      .withLiteralGqlQuery(gqlQuery)
      .withNamespace(options.getNamespace());

  // Count the total number of entities
  Pipeline p = Pipeline.create(options);
  PCollection<Long> count = p
      .apply(read)
      .apply(Count.<Entity>globally());

  PAssert.thatSingleton(count).isEqualTo(expectedNumEntities);
  p.run();
}
 
开发者ID:apache,项目名称:beam,代码行数:33,代码来源:V1ReadIT.java

示例12: deploy

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
/** Deploys the invoicing pipeline as a template on GCS, for a given projectID and GCS bucket. */
public void deploy() {
  // We can't store options as a member variable due to serialization concerns.
  InvoicingPipelineOptions options = PipelineOptionsFactory.as(InvoicingPipelineOptions.class);
  options.setProject(projectId);
  options.setRunner(DataflowRunner.class);
  options.setStagingLocation(beamBucket + "/staging");
  options.setTemplateLocation(beamBucket + "/templates/invoicing");
  Pipeline p = Pipeline.create(options);

  PCollection<BillingEvent> billingEvents =
      p.apply(
          "Read BillingEvents from Bigquery",
          BigQueryIO.read(BillingEvent::parseFromRecord)
              .fromQuery(InvoicingUtils.makeQueryProvider(options.getYearMonth(), projectId))
              .withCoder(SerializableCoder.of(BillingEvent.class))
              .usingStandardSql()
              .withoutValidation()
              .withTemplateCompatibility());
  applyTerminalTransforms(billingEvents, options.getYearMonth());
  p.run();
}
 
开发者ID:google,项目名称:nomulus,代码行数:23,代码来源:InvoicingPipeline.java

示例13: main

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
public static void main(String[] args) {
  PipelineOptions options = PipelineOptionsFactory.create();
  options.setRunner(DirectRunner.class); // forced for this demo
  Pipeline p = Pipeline.create(options);

  // register Avro coders for serializing our messages
  Coders.registerAvroCoders(p, ExtendedRecord.class, UntypedOccurrence.class);

  // Read the DwC-A using our custom reader
  PCollection<ExtendedRecord> rawRecords = p.apply(
    "Read from Darwin Core Archive", DwCAIO.Read.withPaths("/tmp/dwca-s-bryophytes-v4.1.zip", "demo/target/tmp"));

  // Convert the ExtendedRecord into an UntypedOccurrence record
  PCollection<UntypedOccurrence> verbatimRecords = rawRecords.apply(
    "Convert the objects into untyped DwC style records",
    ParDo.of(BeamFunctions.beamify(FunctionFactory.untypedOccurrenceBuilder())))
                                                             .setCoder(AvroCoder.of(UntypedOccurrence.class));

  // Write the file to SOLR
  final SolrIO.ConnectionConfiguration conn = SolrIO.ConnectionConfiguration
    .create(SOLR_HOSTS);

  PCollection<SolrInputDocument> inputDocs = verbatimRecords.apply(
    "Convert to SOLR", ParDo.of(new SolrDocBuilder()));

  inputDocs.apply(SolrIO.write().to("beam-demo1").withConnectionConfiguration(conn));

  LOG.info("Starting the pipeline");
  PipelineResult result = p.run();
  result.waitUntilFinish();
  LOG.info("Pipeline finished with state: {} ", result.getState());
}
 
开发者ID:gbif,项目名称:pipelines,代码行数:33,代码来源:DwCA2SolrPipeline.java

示例14: main

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
/**
 * Runs the DatastoreToBigQuery dataflow pipeline
 */
public static void main(String[] args) throws IOException, ScriptException {
  Options options = PipelineOptionsFactory.fromArgs(args)
      .withValidation()
      .as(Options.class);

  NestedValueProvider<String, String> bqJsonSchema = NestedValueProvider
      .of(options.getBqJsonSchema(), new ValueProviderHelpers.GcsLoad());

  options.setRunner(DataflowRunner.class);
  Pipeline pipeline = Pipeline.create(options);
  pipeline
      .apply("IngestEntities",
          DatastoreIO.v1().read()
              .withProjectId(options.getDatastoreProjectId())
              .withLiteralGqlQuery(options.getGqlQuery())
              .withNamespace(options.getNamespace()))
      .apply("EntityToTableRow", ParDo.of(EntityToTableRow.newBuilder()
          .setJsTransformPath(options.getJsTransformPath())
          .setJsTransformFunctionName(options.getJsTransformFunctionName())
          .setStrictCast(options.getStrictCast())
          .setTableSchemaJson(bqJsonSchema)
          .build()))
      .apply("TableRowToBigQuery", BigQueryIO.writeTableRows()
          .to(options.getBqTableSpec())
          .withJsonSchema(bqJsonSchema)
          .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
          .withWriteDisposition(WriteDisposition.WRITE_APPEND));
  pipeline.run();
}
 
开发者ID:cobookman,项目名称:teleport,代码行数:33,代码来源:DatastoreToBq.java

示例15: main

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
public static void main(String[] args) {
    PipelineOptionsFactory.register(TemplateOptions.class);
    TemplateOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(TemplateOptions.class);
    Pipeline pipeline = Pipeline.create(options);
    pipeline.apply("READ", TextIO.read().from(options.getInputFile()).withCompressionType(GZIP))
            .apply("TRANSFORM", ParDo.of(new WikiParDo()))
            .apply("WRITE", BigQueryIO.writeTableRows()
                    .to(String.format("%s:devfest_melbourne_2017.wiki_demo", options.getProject()))
                    .withCreateDisposition(CREATE_IF_NEEDED)
                    .withWriteDisposition(WRITE_APPEND)
                    .withSchema(getTableSchema()));
    pipeline.run();
}
 
开发者ID:shinesolutions,项目名称:devfest-melbourne-2017-demo,代码行数:14,代码来源:TemplatePipeline.java


注:本文中的org.apache.beam.sdk.Pipeline.run方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。