当前位置: 首页>>代码示例>>Java>>正文


Java Pipeline.create方法代码示例

本文整理汇总了Java中org.apache.beam.sdk.Pipeline.create方法的典型用法代码示例。如果您正苦于以下问题:Java Pipeline.create方法的具体用法?Java Pipeline.create怎么用?Java Pipeline.create使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.beam.sdk.Pipeline的用法示例。


在下文中一共展示了Pipeline.create方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
/** Run a batch pipeline to calculate hourly team scores. */
public static void main(String[] args) throws Exception {

  Options options =
      PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
  Pipeline pipeline = Pipeline.create(options);

  pipeline
  .apply("ReadLogs", TextIO.read().from(options.getInput()))
  .apply("SetTimestamps", WithTimestamps.of(new SetTimestampFn()))

  .apply("FixedWindows", Window.<String>into(FixedWindows.of(ONE_HOUR)))

  .apply("TeamScores", new CalculateTeamScores(options.getOutputPrefix()));

  pipeline.run();
}
 
开发者ID:davorbonaci,项目名称:beam-portability-demo,代码行数:18,代码来源:HourlyTeamScore.java

示例2: main

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
/**
 * Runs the DatastoreToGcs dataflow pipeline
 */
public static void main(String[] args) throws IOException, ScriptException {
  Options options = PipelineOptionsFactory.fromArgs(args)
      .withValidation()
      .as(Options.class);

  options.setRunner(DataflowRunner.class);

  Pipeline pipeline = Pipeline.create(options);

  pipeline
      .apply("IngestEntities",
          DatastoreIO.v1().read()
              .withProjectId(options.getDatastoreProjectId())
              .withLiteralGqlQuery(options.getGqlQuery())
              .withNamespace(options.getNamespace()))
      .apply("EntityToJson", ParDo.of(EntityToJson.newBuilder()
          .setJsTransformPath(options.getJsTransformPath())
          .setJsTransformFunctionName(options.getJsTransformFunctionName())
          .build()))
      .apply("JsonToGcs", TextIO.write().to(options.getSavePath())
          .withSuffix(".json"));

  pipeline.run();
}
 
开发者ID:cobookman,项目名称:teleport,代码行数:28,代码来源:DatastoreToGcs.java

示例3: main

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
/**
 * Runs the GcsToDatastore dataflow pipeline
 */
public static void main(String[] args) throws IOException, ScriptException {
  Options options = PipelineOptionsFactory.fromArgs(args)
      .withValidation()
      .as(Options.class);

  options.setRunner(DataflowRunner.class);

  Pipeline pipeline = Pipeline.create(options);

  pipeline
      .apply("IngestJson", TextIO.read()
          .from(options.getJsonPathPrefix()))
      .apply("GcsToEntity", ParDo.of(JsonToEntity.newBuilder()
          .setJsTransformPath(options.getJsTransformPath())
          .setJsTransformFunctionName(options.getJsTransformFunctionName())
          .build()))
      .apply(DatastoreIO.v1().write()
          .withProjectId(options.getDatastoreProjectId()));

  pipeline.run();
}
 
开发者ID:cobookman,项目名称:teleport,代码行数:25,代码来源:GcsToDatastore.java

示例4: main

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {

    Options options =
        PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    Pipeline pipeline = Pipeline.create(options);

    pipeline
    .apply(KafkaIO.<String, String>read()
        .withBootstrapServers(options.getKafkaBootstrapServer())
        .withTopic(options.getTopic())
        .withKeyDeserializer(StringDeserializer.class)
        .withValueDeserializer(StringDeserializer.class)
        .withTimestampFn(new SetTimestampFn()))
    .apply("Values", ParDo.of(new ValuesFn()))

    .apply("FixedWindows", Window.<String>into(FixedWindows.of(FIVE_MINUTES))
        .triggering(AfterWatermark.pastEndOfWindow()
            .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane()
                .plusDelayOf(TWO_MINUTES))
            .withLateFirings(AfterPane.elementCountAtLeast(1)))
        .withAllowedLateness(TEN_MINUTES)
        .accumulatingFiredPanes())

    .apply("TeamScore", new CalculateTeamScores(options.getOutputPrefix()));

    pipeline.run();
  }
 
开发者ID:davorbonaci,项目名称:beam-portability-demo,代码行数:28,代码来源:LeaderBoard.java

示例5: main

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
    PipelineOptionsFactory.register(TemplateOptions.class);
    TemplateOptions options = PipelineOptionsFactory
            .fromArgs(args)
            .withValidation()
            .as(TemplateOptions.class);
    options.setAutoscalingAlgorithm(THROUGHPUT_BASED);
    Pipeline pipeline = Pipeline.create(options);
    pipeline.apply(BigQueryIO.read().from(options.getBigQueryTableName()))
            .apply(ParDo.of(new DoFn<TableRow, String>() {
                @ProcessElement
                public void processElement(ProcessContext c) throws Exception {
                    String commaSep = c.element().values()
                            .stream()
                            .map(cell -> cell.toString().trim())
                            .collect(Collectors.joining("\",\""));
                    c.output(commaSep);
                }
            }))
            .apply(TextIO.write().to(options.getOutputFile())
                    .withoutSharding()
                    .withWritableByteChannelFactory(GZIP)
            );
    pipeline.run();
}
 
开发者ID:shinesolutions,项目名称:bigquery-table-to-one-file,代码行数:26,代码来源:BigQueryTableToOneFile.java

示例6: main

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
public static void main(String[] args) {
  PipelineOptions options = PipelineOptionsFactory.create();
  options.setRunner(DirectRunner.class); // forced for this demo
  Pipeline p = Pipeline.create(options);

  // register Avro coders for serializing our messages
  Coders.registerAvroCoders(p, ExtendedRecord.class, UntypedOccurrence.class);

  PCollection<UntypedOccurrence> verbatimRecords = p.apply(
    "Read Avro", AvroIO.read(UntypedOccurrence.class).from("demo/output/data*"));

  verbatimRecords.apply("Write file per Genus",
                        AvroIO.write(UntypedOccurrence.class)
                              .to("demo/output-split/data*") // prefix, is required but overwritten
                              .to(new GenusDynamicAvroDestinations(
                                FileSystems.matchNewResource("demo/output-split/data*", true))));


  LOG.info("Starting the pipeline");
  PipelineResult result = p.run();
  result.waitUntilFinish();
  LOG.info("Pipeline finished with state: {} ", result.getState());
}
 
开发者ID:gbif,项目名称:pipelines,代码行数:24,代码来源:MultiAvroOutDemo.java

示例7: testWithInvalidContext

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
private void testWithInvalidContext(JavaSparkContext jsc) {
    SparkContextOptions options = getSparkContextOptions(jsc);

    Pipeline p = Pipeline.create(options);
    PCollection<String> inputWords = p.apply(Create.of(WORDS).withCoder(StringUtf8Coder
            .of()));
    inputWords.apply(new WordCount.CountWords())
            .apply(MapElements.via(new WordCount.FormatAsTextFn()));

    try {
        p.run().waitUntilFinish();
        fail("Should throw an exception when The provided Spark context is null or stopped");
    } catch (RuntimeException e){
        assert(e.getMessage().contains(PROVIDED_CONTEXT_EXCEPTION));
    }
}
 
开发者ID:apache,项目名称:beam,代码行数:17,代码来源:ProvidedSparkContextTest.java

示例8: testWaitUntilFinishTimeout

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
@Test
public void testWaitUntilFinishTimeout() throws Exception {
  DirectOptions options = PipelineOptionsFactory.as(DirectOptions.class);
  options.setBlockOnRun(false);
  options.setRunner(DirectRunner.class);
  Pipeline p = Pipeline.create(options);
  p
    .apply(Create.of(1L))
    .apply(ParDo.of(
        new DoFn<Long, Long>() {
          @ProcessElement
          public void hang(ProcessContext context) throws InterruptedException {
            // Hangs "forever"
            Thread.sleep(Long.MAX_VALUE);
          }
        }));
  PipelineResult result = p.run();
  // The pipeline should never complete;
  assertThat(result.getState(), is(State.RUNNING));
  // Must time out, otherwise this test will never complete
  result.waitUntilFinish(Duration.millis(1L));
  assertThat(result.getState(), is(State.RUNNING));
}
 
开发者ID:apache,项目名称:beam,代码行数:24,代码来源:DirectRunnerTest.java

示例9: testE2EV1Write

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
/**
 * An end-to-end test for {@link DatastoreV1.Write}.
 *
 * <p>Write some test entities to Cloud Datastore.
 * Read and count all the entities. Verify that the count matches the
 * number of entities written.
 */
@Test
public void testE2EV1Write() throws Exception {
  Pipeline p = Pipeline.create(options);

  // Write to datastore
  p.apply(GenerateSequence.from(0).to(numEntities))
      .apply(ParDo.of(new CreateEntityFn(options.getKind(), options.getNamespace(), ancestor, 0)))
      .apply(DatastoreIO.v1().write().withProjectId(project));

  p.run();

  // Count number of entities written to datastore.
  long numEntitiesWritten = countEntities(options, project, ancestor);

  assertEquals(numEntitiesWritten, numEntities);
}
 
开发者ID:apache,项目名称:beam,代码行数:24,代码来源:V1WriteIT.java

示例10: execute

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
/**
 * Executes the given sql.
 */
public void execute(String sqlString) throws Exception {
  BeamSqlParser parser = new BeamSqlParser(sqlString);
  SqlNode sqlNode = parser.impl().parseSqlStmtEof();

  if (sqlNode instanceof SqlCreateTable) {
    handleCreateTable((SqlCreateTable) sqlNode, metaStore);
  } else {
    PipelineOptions options = PipelineOptionsFactory.fromArgs(new String[] {}).withValidation()
        .as(PipelineOptions.class);
    options.setJobName("BeamPlanCreator");
    Pipeline pipeline = Pipeline.create(options);
    compilePipeline(sqlString, pipeline, env);
    pipeline.run();
  }
}
 
开发者ID:apache,项目名称:beam,代码行数:19,代码来源:BeamSqlCli.java

示例11: testTemplateRunnerLoggedErrorForFile

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
/**
 * Tests that the {@link DataflowRunner} with {@code --templateLocation} throws the appropriate
 * exception when an output file is not writable.
 */
@Test
public void testTemplateRunnerLoggedErrorForFile() throws Exception {
  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  options.setJobName("TestJobName");
  options.setRunner(DataflowRunner.class);
  options.setTemplateLocation("//bad/path");
  options.setProject("test-project");
  options.setTempLocation(tmpFolder.getRoot().getPath());
  options.setGcpCredential(new TestCredential());
  options.setPathValidatorClass(NoopPathValidator.class);
  Pipeline p = Pipeline.create(options);

  thrown.expectMessage("Cannot create output file at");
  thrown.expect(RuntimeException.class);
  p.run();
}
 
开发者ID:apache,项目名称:beam,代码行数:21,代码来源:DataflowRunnerTest.java

示例12: createIndexerPipeline

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
/**
 * This function creates the DAG graph of transforms. It can be called from main()
 * as well as from the ControlPipeline.
 * @param options
 * @return
 * @throws Exception
 */
public static Pipeline createIndexerPipeline(FileIndexerPipelineOptions options) throws Exception {
	
    IndexerPipelineUtils.validateIndexerPipelineOptions(options);
	Pipeline pipeline = Pipeline.create(options);
	
	// PHASE: Read raw content from sources
	
	PCollection<InputContent> readContent = pipeline
			.apply("Read entire CSV file", org.apache.beam.sdk.io.Read.from(new RecordFileSource<String>(
				ValueProvider.StaticValueProvider.of(options.getInputFile()), 
				StringUtf8Coder.of(), RecordFileSource.DEFAULT_RECORD_SEPARATOR))) //
			.apply("Parse CSV file into InputContent objects", ParDo.of(new ParseCSVFile()));
	
	// Define the accumulators of all filters
	PCollection<InputContent> contentToIndex = readContent;
	
	// PHASE: Index documents (extract opinions and entities/tags). 
	// Return successfully indexed docs, and create a Bigtable write transform to store errors 
	// in Dead Letter table.
	PCollection<ContentIndexSummary> indexes = indexDocuments(options, contentToIndex);
	
	if (options.getRatioEnrichWithCNLP() > 0)
		indexes = enrichWithCNLP(indexes, options.getRatioEnrichWithCNLP());
	
	// PHASE: Write to BigQuery
	// For the Indexes that are unique ("filteredIndexes"), create records in webresource, document, and sentiment.
	// Then, merge resulting webresources with webresourceRowsUnindexed and webresourceDeduped
	indexes
		.apply(ParDo.of(new CreateCSVLineFromIndexSummaryFn()))
		.apply(TextIO.write()
			.to(options.getOutputFile()));
	
	
	return pipeline;
}
 
开发者ID:GoogleCloudPlatform,项目名称:dataflow-opinion-analysis,代码行数:43,代码来源:FileIndexerPipeline.java

示例13: main

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
/**
 * Runs the DatastoreToBigQuery dataflow pipeline
 */
public static void main(String[] args) throws IOException, ScriptException {
  Options options = PipelineOptionsFactory.fromArgs(args)
      .withValidation()
      .as(Options.class);

  NestedValueProvider<String, String> bqJsonSchema = NestedValueProvider
      .of(options.getBqJsonSchema(), new ValueProviderHelpers.GcsLoad());

  options.setRunner(DataflowRunner.class);
  Pipeline pipeline = Pipeline.create(options);
  pipeline
      .apply("IngestEntities",
          DatastoreIO.v1().read()
              .withProjectId(options.getDatastoreProjectId())
              .withLiteralGqlQuery(options.getGqlQuery())
              .withNamespace(options.getNamespace()))
      .apply("EntityToTableRow", ParDo.of(EntityToTableRow.newBuilder()
          .setJsTransformPath(options.getJsTransformPath())
          .setJsTransformFunctionName(options.getJsTransformFunctionName())
          .setStrictCast(options.getStrictCast())
          .setTableSchemaJson(bqJsonSchema)
          .build()))
      .apply("TableRowToBigQuery", BigQueryIO.writeTableRows()
          .to(options.getBqTableSpec())
          .withJsonSchema(bqJsonSchema)
          .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
          .withWriteDisposition(WriteDisposition.WRITE_APPEND));
  pipeline.run();
}
 
开发者ID:cobookman,项目名称:teleport,代码行数:33,代码来源:DatastoreToBq.java

示例14: main

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
public static void main(String[] args) {
    PipelineOptionsFactory.register(TemplateOptions.class);
    TemplateOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(TemplateOptions.class);
    Pipeline pipeline = Pipeline.create(options);
    pipeline.apply("READ", TextIO.read().from(options.getInputFile()).withCompressionType(GZIP))
            .apply("TRANSFORM", ParDo.of(new WikiParDo()))
            .apply("WRITE", BigQueryIO.writeTableRows()
                    .to(String.format("%s:devfest_melbourne_2017.wiki_demo", options.getProject()))
                    .withCreateDisposition(CREATE_IF_NEEDED)
                    .withWriteDisposition(WRITE_APPEND)
                    .withSchema(getTableSchema()));
    pipeline.run();
}
 
开发者ID:shinesolutions,项目名称:devfest-melbourne-2017-demo,代码行数:14,代码来源:TemplatePipeline.java

示例15: run

import org.apache.beam.sdk.Pipeline; //导入方法依赖的package包/类
protected static void run(Options options) {
  String projectId = options.getProject();
  String inputSubscription = options.getInputPubsubSubscription();
  String datasetId = options.getOutputBigQueryDataset();
  String tablePrefix = options.getOutputBigQueryTable();

  // Input
  String subscriptionName = "projects/" + projectId + "/subscriptions/" + inputSubscription;
  PubsubIO.Read<String> pubsubReader = PubsubIO.<String>read().subscription(subscriptionName)
      .withCoder(StringUtf8Coder.of());

  // Output
  TableSchema schema = PubsubMessage2TableRowFn.getOutputTableSchema();
  TableNameByWindowFn tableRefefenceFunction =
      new TableNameByWindowFn(projectId, datasetId, tablePrefix);
  BigQueryIO.Write.Bound bqWriter = BigQueryIO.Write
      .withSchema(schema)
      .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
      .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND)
      .to(tableRefefenceFunction);

  // Build and run pipeline
  Pipeline pipeline = Pipeline.create(options);
  pipeline
      .apply(options.getInputPubsubSubscription(), pubsubReader)
      .apply(new LogTransformer())
      .apply(options.getOutputBigQueryTable(), bqWriter);
  pipeline.run();
}
 
开发者ID:yu-iskw,项目名称:google-log-aggregation-example,代码行数:30,代码来源:LogAggregator.java


注:本文中的org.apache.beam.sdk.Pipeline.create方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。