当前位置: 首页>>代码示例>>Java>>正文


Java DataflowPipelineOptions类代码示例

本文整理汇总了Java中com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions的典型用法代码示例。如果您正苦于以下问题:Java DataflowPipelineOptions类的具体用法?Java DataflowPipelineOptions怎么用?Java DataflowPipelineOptions使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


DataflowPipelineOptions类属于com.google.cloud.dataflow.sdk.options包,在下文中一共展示了DataflowPipelineOptions类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: runInjectorPipeline

import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
/**
 * Runs the batch injector for the streaming pipeline.
 *
 * <p>The injector pipeline will read from the given text file, and inject data
 * into the Google Cloud Pub/Sub topic.
 */
public void runInjectorPipeline(String inputFile, String topic) {
  DataflowPipelineOptions copiedOptions = options.cloneAs(DataflowPipelineOptions.class);
  copiedOptions.setStreaming(false);
  copiedOptions.setWorkerHarnessContainerImage(
      DataflowPipelineRunner.BATCH_WORKER_HARNESS_CONTAINER_IMAGE);
  copiedOptions.setNumWorkers(
      options.as(ExamplePubsubTopicOptions.class).getInjectorNumWorkers());
  copiedOptions.setJobName(options.getJobName() + "-injector");
  Pipeline injectorPipeline = Pipeline.create(copiedOptions);
  injectorPipeline.apply(TextIO.Read.from(inputFile))
                  .apply(IntraBundleParallelization
                      .of(PubsubFileInjector.publish(topic))
                      .withMaxParallelism(20));
  DataflowPipelineJob injectorJob = (DataflowPipelineJob) injectorPipeline.run();
  jobsToCancel.add(injectorJob);
}
 
开发者ID:sinmetal,项目名称:iron-hippo,代码行数:23,代码来源:DataflowExampleUtils.java

示例2: pipelineOptions

import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
/**
 * Create Dataflow Pipeline options from the standard command-line options, "--project=",
 * "--runner=" and "--stagingLocation="
 *
 * @param args
 * @return
 * @throws IOException
 */
public static DataflowPipelineOptions pipelineOptions(String[] args) throws IOException {
  LOG.info("Set up Dataflow options");
  DataflowPipelineOptions o = PipelineOptionsFactory.as(DataflowPipelineOptions.class);

  Map<String, String> m = StringUtils.parseArgs(args);
  o.setProject(m.get(PROJECT));
  if (m.containsKey(STAGING)) {
    o.setStagingLocation(m.get(STAGING));
  } else if (m.containsKey(STAGING_LOCATION)) {
    o.setStagingLocation(m.get(STAGING_LOCATION));
  } else if (m.containsKey(WORKSPACE)) {
    o.setStagingLocation(m.get(WORKSPACE) + "/staging");
  }
  o.setRunner(runner(m.get(RUNNER)));
  o.setMaxNumWorkers(m.get(MAX_WORKERS) == null ? 1 : Integer.parseInt(m.get(MAX_WORKERS)));
  if (m.containsKey(MACHINE_TYPE)) {
    o.setWorkerMachineType(m.get(MACHINE_TYPE));
  } else {
    o.setWorkerMachineType(DEFAULT_MACHINE_TYPE);
  }
  return o;
}
 
开发者ID:googlegenomics,项目名称:dockerflow,代码行数:31,代码来源:DataflowFactory.java

示例3: run

import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
/** Run a Docker workflow on Dataflow. */
public static void run(Workflow w, Map<String, WorkflowArgs> a, DataflowPipelineOptions o)
    throws IOException {
  LOG.info("Running workflow graph");
  if (w.getArgs().getProjectId() == null) {
    throw new IllegalArgumentException("Project id is required");
  }

  Pipeline p = DataflowFactory.dataflow(w, a, o);

  LOG.info("Created Dataflow pipeline");
  LOG.debug(w.toString());

  PipelineResult r = p.run();

  LOG.info("Dataflow pipeline completed");
  LOG.info("Result state: " + r.getState());
}
 
开发者ID:googlegenomics,项目名称:dockerflow,代码行数:19,代码来源:TaskRunner.java

示例4: create

import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
@Override
public String create(PipelineOptions options) {
  DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
  if (dataflowOptions.getStagingLocation() != null) {
    return GcsPath.fromUri(dataflowOptions.getStagingLocation())
        .resolve("counts.txt").toString();
  } else {
    throw new IllegalArgumentException("Must specify --output or --stagingLocation");
  }
}
 
开发者ID:sinmetal,项目名称:iron-hippo,代码行数:11,代码来源:WordCount.java

示例5: create

import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
@Override
public String create(PipelineOptions options) {
  DataflowPipelineOptions dataflowPipelineOptions =
      options.as(DataflowPipelineOptions.class);
  return "projects/" + dataflowPipelineOptions.getProject()
      + "/topics/" + dataflowPipelineOptions.getJobName();
}
 
开发者ID:sinmetal,项目名称:iron-hippo,代码行数:8,代码来源:ExamplePubsubTopicOptions.java

示例6: getCloudExecutionOptions

import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
private PipelineOptions getCloudExecutionOptions(String stagingLocation) {
  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  options.setProject(Constants.PROJECT_ID);
  options.setStagingLocation(stagingLocation);
  options.setRunner(BlockingDataflowPipelineRunner.class);
  return options;
}
 
开发者ID:GoogleCloudPlatform,项目名称:policyscanner,代码行数:8,代码来源:LiveStateCheckerApp.java

示例7: getCloudExecutionOptions

import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
private PipelineOptions getCloudExecutionOptions(String stagingLocation) {
  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  options.setProject(SystemProperty.applicationId.get());
  options.setStagingLocation(stagingLocation);
  options.setRunner(BlockingDataflowPipelineRunner.class);
  return options;
}
 
开发者ID:GoogleCloudPlatform,项目名称:policyscanner,代码行数:8,代码来源:UserManagedKeysApp.java

示例8: getCloudExecutionOptions

import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
private static PipelineOptions getCloudExecutionOptions(String stagingLocation) {
  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  options.setProject(SystemProperty.applicationId.get());
  options.setStagingLocation(stagingLocation);
  options.setRunner(BlockingDataflowPipelineRunner.class);
  return options;
}
 
开发者ID:GoogleCloudPlatform,项目名称:policyscanner,代码行数:8,代码来源:LiveStateCheckerRunner.java

示例9: createDataflow

import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
/**
 * The workflow defn implementation is responsible for defining the workflow steps and default
 * args, and creating a Dataflow pipeline.
 *
 * @throws URISyntaxException
 */
default Pipeline createDataflow(
    Map<String, WorkflowArgs> argsTable, DataflowPipelineOptions pipelineOptions, String[] args)
    throws IOException {
  return DataflowBuilder.of(createWorkflow(args))
      .createFrom(argsTable)
      .pipelineOptions(pipelineOptions)
      .build();
}
 
开发者ID:googlegenomics,项目名称:dockerflow,代码行数:15,代码来源:WorkflowDefn.java

示例10: manualDataflow

import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
/**
 * For simple linear graphs, it's not too hard to generate the Dataflow pipeline yourself. Here's
 * the equivalent Dataflow code for this simple example.
 */
public static void manualDataflow(String[] args) throws IOException {
  LOG.info("Parsing Dataflow options");
  DataflowPipelineOptions o = DataflowFactory.pipelineOptions(args);
  o.setAppName(MultiLinearGraph.class.getSimpleName());
  Pipeline p = Pipeline.create(o);

  p.apply(Create.of(ArgsTableBuilder.fromArgs(args).build()))
      .apply(DockerDo.of(taskOne()))
      .apply(DockerDo.of(taskTwo()));
  p.run();
}
 
开发者ID:googlegenomics,项目名称:dockerflow,代码行数:16,代码来源:MultiLinearGraph.java

示例11: run

import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
public static void run() {
  DataflowPipelineOptions options = PipelineOptionsFactory.create()
      .as(DataflowPipelineOptions.class);
  options.setRunner(BlockingDataflowPipelineRunner.class);
  options.setProject("chrome-oven-144308");
  options.setFilesToStage(
      detectClassPathResourcesToStage(
          DataflowPipelineRunner.class.getClassLoader()
      )
  );
  options.setStagingLocation("gs://dataflow-chrome-oven-144308/stagingForScheduledPipeline");

  Pipeline p = Pipeline.create(options);

  System.out.println("get here 0");
  p.apply(TextIO.Read.from("gs://dataflow-samples/shakespeare/*"))
      .apply(ParDo.named("ExtractWords").of(new DoFn<String, String>() {
        @Override
        public void processElement(ProcessContext c) {
          System.out.println("get here 1");
          for (String word : c.element().split("[^a-zA-Z']+")) {
            if (!word.isEmpty()) {
              c.output(word);
            }
          }
        }
      }))
      .apply(Count.<String>perElement())
      .apply("FormatResults", MapElements.via(new SimpleFunction<KV<String, Long>, String>() {
        @Override
        public String apply(KV<String, Long> input) {
          System.out.println("get here 3");
          return input.getKey() + ": " + input.getValue();
        }
      }))

      .apply(TextIO.Write.to("gs://dataflow-chrome-oven-144308/scheduled"));

  p.run();
}
 
开发者ID:viktort,项目名称:appengine-cron-example,代码行数:41,代码来源:ScheduledMinimalWordCount.java

示例12: main

import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
public static void main(String[] args) throws GeneralSecurityException, IOException, ParseException, ParserConfigurationException, SAXException {
	String params = null;
	for (int i = 0; i < args.length; i++) {
		if (args[i].startsWith("--params="))
			params = args[i].replaceFirst("--params=", "");
	}

	System.out.println(params);
	init(params);

	GoogleCredential credential = new GoogleCredential.Builder()
			.setTransport(new NetHttpTransport())
			.setJsonFactory(new JacksonFactory())
			.setServiceAccountId(accountEmail)
			.setServiceAccountScopes(Arrays.asList(new String[] {"https://www.googleapis.com/auth/cloud-platform"}))
			.setServiceAccountPrivateKeyFromP12File(new File(keyFile))
			.build();

	DataflowPipelineOptions options = PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
	options.setRunner(DataflowPipelineRunner.class);
	// Your project ID is required in order to run your pipeline on the Google Cloud.
	options.setProject(projectId);
	// Your Google Cloud Storage path is required for staging local files.
	options.setStagingLocation(workingBucket);
	options.setGcpCredential(credential);
	options.setServiceAccountName(accountEmail);
	options.setServiceAccountKeyfile(keyFile);
	options.setMaxNumWorkers(maxNumWorkers);
	options.setDiskSizeGb(diskSizeGb);
	options.setWorkerMachineType(machineType);
	options.setAutoscalingAlgorithm(AutoscalingAlgorithmType.THROUGHPUT_BASED);
	options.setZone(zone);
	options.setStreaming(isStreaming);
	options.setJobName(pipelineName);


	Gson gson = new Gson();
	TableSchema schema = gson.fromJson(schemaStr, TableSchema.class);
	Pipeline pipeline = Pipeline.create(options);
	PCollection<String> streamData =
			pipeline.apply(PubsubIO.Read.named("ReadFromPubsub")
					.topic(String.format("projects/%1$s/topics/%2$s",projectId,pubSubTopic)));
	PCollection<TableRow> tableRow = streamData.apply("ToTableRow", ParDo.of(new PrepData.ToTableRow()));


	tableRow.apply(BigQueryIO.Write
			.named("WriteBQTable")
			.to(String.format("%1$s:%2$s.%3$s",projectId, bqDataSet, bqTable))
			.withSchema(schema)
			.withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND));

	System.out.println("Starting pipeline " + pipelineName);
	pipeline.run();
}
 
开发者ID:bomboradata,项目名称:pubsub-to-bigquery,代码行数:55,代码来源:PubSubToBQPipeline.java

示例13: DataflowExampleUtils

import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
public DataflowExampleUtils(DataflowPipelineOptions options) {
  this.options = options;
}
 
开发者ID:sinmetal,项目名称:iron-hippo,代码行数:4,代码来源:DataflowExampleUtils.java

示例14: create

import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
@Override
public String create(PipelineOptions options) {
  return options.as(DataflowPipelineOptions.class).getJobName()
      .replace('-', '_');
}
 
开发者ID:sinmetal,项目名称:iron-hippo,代码行数:6,代码来源:ExampleBigQueryTableOptions.java

示例15: startBundle

import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
@Override
public void startBundle(Context context) {
  this.pubsub =
      Transport.newPubsubClient(context.getPipelineOptions().as(DataflowPipelineOptions.class))
          .build();
}
 
开发者ID:sinmetal,项目名称:iron-hippo,代码行数:7,代码来源:PubsubFileInjector.java


注:本文中的com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。