当前位置: 首页>>代码示例>>Java>>正文


Java PipelineOptionsFactory类代码示例

本文整理汇总了Java中com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory的典型用法代码示例。如果您正苦于以下问题:Java PipelineOptionsFactory类的具体用法?Java PipelineOptionsFactory怎么用?Java PipelineOptionsFactory使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


PipelineOptionsFactory类属于com.google.cloud.dataflow.sdk.options包,在下文中一共展示了PipelineOptionsFactory类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
public static void main(String[] args) {
	BigQueryToDatastoreOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
			.as(BigQueryToDatastoreOptions.class);

	String inputTable = options.getInputTable().get();
	String projectID = options.getOutputProjectID().get();
	String kind = options.getOutputKind().get();

	LOG.info("Input_Table : " + inputTable);
	LOG.info("ProjectID : " + projectID);
	LOG.info("Kind : " + kind);

	Pipeline p = Pipeline.create(options);

	PCollection<KV<Integer, Iterable<TableRow>>> keywordGroups = p
			.apply(BigQueryIO.Read.named("ReadUtterance").from(inputTable)).apply(new GroupKeywords());
	
	CreateEntities createEntities = new CreateEntities();
	createEntities.setKind(kind);
	
	PCollection<Entity> entities = keywordGroups.apply(createEntities);
	entities.apply(DatastoreIO.v1().write().withProjectId(projectID));

	p.run();
}
 
开发者ID:sinmetal,项目名称:iron-hippo,代码行数:26,代码来源:BigQueryToDatastore.java

示例2: main

import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
/**
 * Run a batch pipeline.
 */
public static void main(String[] args) throws Exception {
  Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
  Pipeline pipeline = Pipeline.create(options);

  TableReference tableRef = new TableReference();
  tableRef.setDatasetId(options.as(Options.class).getOutputDataset());
  tableRef.setProjectId(options.as(GcpOptions.class).getProject());
  tableRef.setTableId(options.getOutputTableName());

  // Read events from a CSV file and parse them.
  pipeline
      .apply(TextIO.Read.from(options.getInput()))
      .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()))
      // Extract and sum username/score pairs from the event data.
      .apply("ExtractUserScore", new ExtractAndSumScore("user"))
      // Write the results to BigQuery.
      .apply(ParDo.named("FormatUserScoreSums").of(new FormatUserScoreSumsFn()))
      .apply(
          BigQueryIO.Write.to(tableRef)
              .withSchema(FormatUserScoreSumsFn.getSchema())
              .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
              .withWriteDisposition(WriteDisposition.WRITE_APPEND));

  pipeline.run();
}
 
开发者ID:mdvorsky,项目名称:DataflowSME,代码行数:29,代码来源:Exercise1.java

示例3: main

import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
/** Run a batch or streaming pipeline. */
public static void main(String[] args) throws Exception {
  Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);

  Pipeline pipeline = Pipeline.create(options);

  TableReference tableRef = new TableReference();
  tableRef.setDatasetId(options.as(Options.class).getOutputDataset());
  tableRef.setProjectId(options.as(GcpOptions.class).getProject());
  tableRef.setTableId(options.getOutputTableName());

  // Read events from either a CSV file or PubSub stream.
  pipeline
      .apply(new ReadGameEvents(options))
      .apply("WindowedTeamScore", new Exercise2.WindowedTeamScore(Duration.standardMinutes(60)))
      // Write the results to BigQuery.
      .apply(ParDo.named("FormatTeamScoreSums").of(new Exercise2.FormatTeamScoreSumsFn()))
      .apply(
          BigQueryIO.Write.to(tableRef)
              .withSchema(Exercise2.FormatTeamScoreSumsFn.getSchema())
              .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
              .withWriteDisposition(WriteDisposition.WRITE_APPEND));

  pipeline.run();
}
 
开发者ID:mdvorsky,项目名称:DataflowSME,代码行数:26,代码来源:Exercise3.java

示例4: main

import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
/**
 * Run a batch pipeline.
 */
public static void main(String[] args) throws Exception {
  Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
  Pipeline pipeline = Pipeline.create(options);

  TableReference tableRef = new TableReference();
  tableRef.setDatasetId(options.as(Options.class).getOutputDataset());
  tableRef.setProjectId(options.as(GcpOptions.class).getProject());
  tableRef.setTableId(options.getOutputTableName());

  // Read events from a CSV file, parse them and write (import) them to BigQuery.
  pipeline
      .apply(TextIO.Read.from(options.getInput()))
      .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()))
      .apply(ParDo.named("FormatGameEvent").of(new FormatGameEventFn()))
      .apply(
          BigQueryIO.Write.to(tableRef)
              .withSchema(FormatGameEventFn.getSchema())
              .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
              .withWriteDisposition(WriteDisposition.WRITE_APPEND));

  pipeline.run();
}
 
开发者ID:mdvorsky,项目名称:DataflowSME,代码行数:26,代码来源:Exercise0.java

示例5: main

import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
public static void main(String[] args) {
  CustomPipelineOptions options =
      PipelineOptionsFactory.fromArgs(args).withValidation().as(CustomPipelineOptions.class);
  Pipeline p = Pipeline.create(options);

  p.apply(PubsubIO.Read.named("read from PubSub")
      .topic(String.format("projects/%s/topics/%s", options.getSourceProject(), options.getSourceTopic()))
      .timestampLabel("ts")
      .withCoder(TableRowJsonCoder.of()))

   .apply("window 1s", Window.into(FixedWindows.of(Duration.standardSeconds(1))))
   .apply("mark rides", MapElements.via(new MarkRides()))
   .apply("count similar", Count.perKey())
   .apply("format rides", MapElements.via(new TransformRides()))

   .apply(PubsubIO.Write.named("WriteToPubsub")
      .topic(String.format("projects/%s/topics/%s", options.getSinkProject(), options.getSinkTopic()))
      .withCoder(TableRowJsonCoder.of()));

  p.run();
}
 
开发者ID:googlecodelabs,项目名称:cloud-dataflow-nyc-taxi-tycoon,代码行数:22,代码来源:CountRides.java

示例6: main

import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
public static void main(String[] args) {
  CustomPipelineOptions options =
      PipelineOptionsFactory.fromArgs(args).withValidation().as(CustomPipelineOptions.class);
  Pipeline p = Pipeline.create(options);

  p.apply(PubsubIO.Read.named("read from PubSub")
      .topic(String.format("projects/%s/topics/%s", options.getSourceProject(), options.getSourceTopic()))
      .timestampLabel("ts")
      .withCoder(TableRowJsonCoder.of()))

   .apply("filter lower Manhattan", ParDo.of(new FilterLowerManhattan()))

   .apply(PubsubIO.Write.named("WriteToPubsub")
      .topic(String.format("projects/%s/topics/%s", options.getSinkProject(), options.getSinkTopic()))
      .withCoder(TableRowJsonCoder.of()));
  p.run();
}
 
开发者ID:googlecodelabs,项目名称:cloud-dataflow-nyc-taxi-tycoon,代码行数:18,代码来源:FilterRides.java

示例7: testBundleSplitIsJustSource

import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
@Test
public void testBundleSplitIsJustSource() throws Exception {
  PipelineOptions options = PipelineOptionsFactory.create();
  List<GCSFilesSource> bundles = source.splitIntoBundles(0, null);
  assertEquals(bundles.size(), 1);
  assertEquals(bundles.get(0), source);

  bundles = source.splitIntoBundles(0, options);
  assertEquals(bundles.size(), 1);
  assertEquals(bundles.get(0), source);

  bundles = source.splitIntoBundles(1, options);
  assertEquals(bundles.size(), 1);
  assertEquals(bundles.get(0), source);

  bundles = source.splitIntoBundles(100000, options);
  assertEquals(bundles.size(), 1);
  assertEquals(bundles.get(0), source);

  bundles = source.splitIntoBundles(10, null);
  assertEquals(bundles.size(), 1);
  assertEquals(bundles.get(0), source);
}
 
开发者ID:GoogleCloudPlatform,项目名称:policyscanner,代码行数:24,代码来源:GCSFilesSourceTest.java

示例8: testReaderGetCurrent

import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
@Test
public void testReaderGetCurrent() {
  String projectName = "sampleProject";
  String objectName = REPOSITORY + this.source.getDirDelimiter() + projectName;
  String fileContent = "sample file content";
  ByteArrayOutputStream[] out = new ByteArrayOutputStream[1];
  PipelineOptions options = PipelineOptionsFactory.create();

  setUpGetFilesPage(objectName);
  setUpGetFileContent(fileContent, out);

  try {
    BoundedReader<KV<List<String>, String>> reader = this.source.createReader(options);
    reader.start();
    KV<List<String>, String> value = reader.getCurrent();
    assertEquals(value.getKey().size(), 2);
    assertEquals(value.getKey().get(0), REPOSITORY);
    assertEquals(value.getKey().get(1), projectName);
    assertEquals(value.getValue(), fileContent);
  } catch (IOException e) {
    fail();
  }
}
 
开发者ID:GoogleCloudPlatform,项目名称:policyscanner,代码行数:24,代码来源:GCSFilesSourceTest.java

示例9: testReaderAdvance

import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
@Test
public void testReaderAdvance() {
  String objectName = REPOSITORY + this.source.getDirDelimiter() + "sampleProject";
  PipelineOptions options = PipelineOptionsFactory.create();
  BoundedReader<KV<List<String>, String>> reader;

  try {
    setUpGetFilesPage(objectName, 0);
    reader = this.source.createReader(options);
    assertFalse(reader.start());

    setUpGetFilesPage(objectName, 1);
    reader = this.source.createReader(options);
    assertTrue(reader.start());
    assertFalse(reader.advance());

    setUpGetFilesPage(objectName, 2);
    reader = this.source.createReader(options);
    assertTrue(reader.start());
    assertTrue(reader.advance());
    assertFalse(reader.advance());
  } catch (IOException e) {
    fail();
  }
}
 
开发者ID:GoogleCloudPlatform,项目名称:policyscanner,代码行数:26,代码来源:GCSFilesSourceTest.java

示例10: testBundleSplitIsJustSource

import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
@Test
public void testBundleSplitIsJustSource() throws Exception {
  PipelineOptions options = PipelineOptionsFactory.create();
  List<LiveProjectSource> bundles = source.splitIntoBundles(0, null);
  assertEquals(bundles.size(), 1);
  assertEquals(bundles.get(0), source);

  bundles = source.splitIntoBundles(0, options);
  assertEquals(bundles.size(), 1);
  assertEquals(bundles.get(0), source);

  bundles = source.splitIntoBundles(1, options);
  assertEquals(bundles.size(), 1);
  assertEquals(bundles.get(0), source);

  bundles = source.splitIntoBundles(100000, options);
  assertEquals(bundles.size(), 1);
  assertEquals(bundles.get(0), source);

  bundles = source.splitIntoBundles(10, null);
  assertEquals(bundles.size(), 1);
  assertEquals(bundles.get(0), source);
}
 
开发者ID:GoogleCloudPlatform,项目名称:policyscanner,代码行数:24,代码来源:LiveProjectSourceTest.java

示例11: testAdvanceWithoutStart

import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
@Test
public void testAdvanceWithoutStart() {
  PipelineOptions options = PipelineOptionsFactory.create();
  LiveProjectReader reader;

  this.listProjectsResponse.setProjects(new ArrayList<Project>(0));
  this.listProjectsResponse.setNextPageToken(null);
  try {
    reader = (LiveProjectReader) this.source.createReader(options);
    assertFalse(reader.advance());
    assertNull(reader.getNextPageToken());
    assertTrue(reader.getProjects().isEmpty());
    reader.getCurrent();
  } catch (IOException e) {
    fail("IOException in reader.start");
  } catch (NoSuchElementException ignored) {
    // test passed.
  }
}
 
开发者ID:GoogleCloudPlatform,项目名称:policyscanner,代码行数:20,代码来源:LiveProjectSourceTest.java

示例12: pipelineOptions

import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
/**
 * Create Dataflow Pipeline options from the standard command-line options, "--project=",
 * "--runner=" and "--stagingLocation="
 *
 * @param args
 * @return
 * @throws IOException
 */
public static DataflowPipelineOptions pipelineOptions(String[] args) throws IOException {
  LOG.info("Set up Dataflow options");
  DataflowPipelineOptions o = PipelineOptionsFactory.as(DataflowPipelineOptions.class);

  Map<String, String> m = StringUtils.parseArgs(args);
  o.setProject(m.get(PROJECT));
  if (m.containsKey(STAGING)) {
    o.setStagingLocation(m.get(STAGING));
  } else if (m.containsKey(STAGING_LOCATION)) {
    o.setStagingLocation(m.get(STAGING_LOCATION));
  } else if (m.containsKey(WORKSPACE)) {
    o.setStagingLocation(m.get(WORKSPACE) + "/staging");
  }
  o.setRunner(runner(m.get(RUNNER)));
  o.setMaxNumWorkers(m.get(MAX_WORKERS) == null ? 1 : Integer.parseInt(m.get(MAX_WORKERS)));
  if (m.containsKey(MACHINE_TYPE)) {
    o.setWorkerMachineType(m.get(MACHINE_TYPE));
  } else {
    o.setWorkerMachineType(DEFAULT_MACHINE_TYPE);
  }
  return o;
}
 
开发者ID:googlegenomics,项目名称:dockerflow,代码行数:31,代码来源:DataflowFactory.java

示例13: setupPipeline

import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
private Pipeline setupPipeline(final String inputPath, final String outputPath, boolean enableGcs, boolean enableCloudExec) {
    final GATKGCSOptions options = PipelineOptionsFactory.as(GATKGCSOptions.class);
    if (enableCloudExec) {
        options.setStagingLocation(getGCPTestStaging());
        options.setProject(getGCPTestProject());
        options.setRunner(BlockingDataflowPipelineRunner.class);
    } else if (BucketUtils.isHadoopUrl(inputPath) || BucketUtils.isHadoopUrl(outputPath)) {
        options.setRunner(SparkPipelineRunner.class);
    } else {
        options.setRunner(DirectPipelineRunner.class);
    }
    if (enableGcs) {
        options.setApiKey(getGCPTestApiKey());
    }
    final Pipeline p = Pipeline.create(options);
    DataflowUtils.registerGATKCoders(p);
    return p;
}
 
开发者ID:broadinstitute,项目名称:gatk-dataflow,代码行数:19,代码来源:SmallBamWriterTest.java

示例14: main

import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
public static void main(String[] args) {
	
	String[] arguments = {
			String.format("--output=%s/output.txt", System.getProperty("java.io.tmpdir"))
	};
	
	Options options = PipelineOptionsFactory.fromArgs(arguments)
			.withValidation().as(Options.class);
	options.setRunner(FlinkLocalPipelineRunner.class);
	
	Pipeline p = Pipeline.create(options);

	p.apply(TextIO.Read.named("ReadLines").from(options.getInput()))
			.apply(new CountWords())
			.apply(TextIO.Write.named("WriteCounts")
					.to(options.getOutput())
					.withNumShards(options.getNumShards()));

	p.run();
}
 
开发者ID:StephanEwen,项目名称:flink-dataflow,代码行数:21,代码来源:GoogleWordCountExampleRunner.java

示例15: testGeneric

import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
@Test
public void testGeneric() throws Exception {
  Schema schema = new Schema.Parser().parse(Resources.getResource("person.avsc").openStream());
  GenericRecord savedRecord = new GenericData.Record(schema);
  savedRecord.put("name", "John Doe");
  savedRecord.put("age", 42);
  savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
  populateGenericFile(Lists.newArrayList(savedRecord), schema);

  Pipeline p = Pipeline.create(PipelineOptionsFactory.create());
  PCollection<GenericRecord> input = p.apply(
      AvroIO.Read.from(inputFile.getAbsolutePath()).withSchema(schema));
  input.apply(AvroIO.Write.to(outputDir.getAbsolutePath()).withSchema(schema));
  EvaluationResult res = SparkPipelineRunner.create().run(p);
  res.close();

  List<GenericRecord> records = readGenericFile();
  assertEquals(Lists.newArrayList(savedRecord), records);
}
 
开发者ID:shakamunyi,项目名称:spark-dataflow,代码行数:20,代码来源:AvroPipelineTest.java


注:本文中的com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。