当前位置: 首页>>代码示例>>Java>>正文


Java DataflowPipelineOptions.setWorkerMachineType方法代码示例

本文整理汇总了Java中com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions.setWorkerMachineType方法的典型用法代码示例。如果您正苦于以下问题:Java DataflowPipelineOptions.setWorkerMachineType方法的具体用法?Java DataflowPipelineOptions.setWorkerMachineType怎么用?Java DataflowPipelineOptions.setWorkerMachineType使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions的用法示例。


在下文中一共展示了DataflowPipelineOptions.setWorkerMachineType方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: pipelineOptions

import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入方法依赖的package包/类
/**
 * Create Dataflow Pipeline options from the standard command-line options, "--project=",
 * "--runner=" and "--stagingLocation="
 *
 * @param args
 * @return
 * @throws IOException
 */
public static DataflowPipelineOptions pipelineOptions(String[] args) throws IOException {
  LOG.info("Set up Dataflow options");
  DataflowPipelineOptions o = PipelineOptionsFactory.as(DataflowPipelineOptions.class);

  Map<String, String> m = StringUtils.parseArgs(args);
  o.setProject(m.get(PROJECT));
  if (m.containsKey(STAGING)) {
    o.setStagingLocation(m.get(STAGING));
  } else if (m.containsKey(STAGING_LOCATION)) {
    o.setStagingLocation(m.get(STAGING_LOCATION));
  } else if (m.containsKey(WORKSPACE)) {
    o.setStagingLocation(m.get(WORKSPACE) + "/staging");
  }
  o.setRunner(runner(m.get(RUNNER)));
  o.setMaxNumWorkers(m.get(MAX_WORKERS) == null ? 1 : Integer.parseInt(m.get(MAX_WORKERS)));
  if (m.containsKey(MACHINE_TYPE)) {
    o.setWorkerMachineType(m.get(MACHINE_TYPE));
  } else {
    o.setWorkerMachineType(DEFAULT_MACHINE_TYPE);
  }
  return o;
}
 
开发者ID:googlegenomics,项目名称:dockerflow,代码行数:31,代码来源:DataflowFactory.java

示例2: main

import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入方法依赖的package包/类
public static void main(String[] args) throws GeneralSecurityException, IOException, ParseException, ParserConfigurationException, SAXException {
	String params = null;
	for (int i = 0; i < args.length; i++) {
		if (args[i].startsWith("--params="))
			params = args[i].replaceFirst("--params=", "");
	}

	System.out.println(params);
	init(params);

	GoogleCredential credential = new GoogleCredential.Builder()
			.setTransport(new NetHttpTransport())
			.setJsonFactory(new JacksonFactory())
			.setServiceAccountId(accountEmail)
			.setServiceAccountScopes(Arrays.asList(new String[] {"https://www.googleapis.com/auth/cloud-platform"}))
			.setServiceAccountPrivateKeyFromP12File(new File(keyFile))
			.build();

	DataflowPipelineOptions options = PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
	options.setRunner(DataflowPipelineRunner.class);
	// Your project ID is required in order to run your pipeline on the Google Cloud.
	options.setProject(projectId);
	// Your Google Cloud Storage path is required for staging local files.
	options.setStagingLocation(workingBucket);
	options.setGcpCredential(credential);
	options.setServiceAccountName(accountEmail);
	options.setServiceAccountKeyfile(keyFile);
	options.setMaxNumWorkers(maxNumWorkers);
	options.setDiskSizeGb(diskSizeGb);
	options.setWorkerMachineType(machineType);
	options.setAutoscalingAlgorithm(AutoscalingAlgorithmType.THROUGHPUT_BASED);
	options.setZone(zone);
	options.setStreaming(isStreaming);
	options.setJobName(pipelineName);


	Gson gson = new Gson();
	TableSchema schema = gson.fromJson(schemaStr, TableSchema.class);
	Pipeline pipeline = Pipeline.create(options);
	PCollection<String> streamData =
			pipeline.apply(PubsubIO.Read.named("ReadFromPubsub")
					.topic(String.format("projects/%1$s/topics/%2$s",projectId,pubSubTopic)));
	PCollection<TableRow> tableRow = streamData.apply("ToTableRow", ParDo.of(new PrepData.ToTableRow()));


	tableRow.apply(BigQueryIO.Write
			.named("WriteBQTable")
			.to(String.format("%1$s:%2$s.%3$s",projectId, bqDataSet, bqTable))
			.withSchema(schema)
			.withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND));

	System.out.println("Starting pipeline " + pipelineName);
	pipeline.run();
}
 
开发者ID:bomboradata,项目名称:pubsub-to-bigquery,代码行数:55,代码来源:PubSubToBQPipeline.java

示例3: dataflow

import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入方法依赖的package包/类
/**
 * Dynamically construct a Dataflow from the workflow definition. The root PCollection has one
 * element, the root task's name.
 *
 * @param workflow
 * @param dataflowArgs
 * @return
 * @throws IOException
 */
public static Pipeline dataflow(
    Workflow workflow, Map<String, WorkflowArgs> workflowArgs, DataflowPipelineOptions o)
    throws IOException {

  assert (workflow != null);
  assert (o != null);
  assert (workflow.getDefn() != null);

  // Set defaults
  if (o.getAppName() == null) {
    o.setAppName(workflow.getDefn().getName());
  }
  if (o.getProject() == null && workflow.getArgs() != null) {
    o.setProject(workflow.getArgs().getProjectId());
  }
  if (o.getMaxNumWorkers() == 0) {
    o.setMaxNumWorkers(1);
  }
  if (o.getWorkerMachineType() == null) {
    o.setWorkerMachineType(DEFAULT_MACHINE_TYPE);
  }

  LOG.info("Initializing dataflow pipeline");
  Pipeline p = Pipeline.create(o);

  LOG.info("Creating input collection of workflow args");
  if (workflowArgs == null) {
    workflowArgs = new HashMap<String, WorkflowArgs>();
  }
  if (workflowArgs.isEmpty()) {
    LOG.info("No workflow args were provided. Using default values.");
    workflowArgs.put(workflow.getDefn().getName(), new WorkflowArgs());
  } else if (workflow.getArgs() != null) {
    LOG.info("Merging default workflow args with instance-specific args");

    for (String key : workflowArgs.keySet()) {
      WorkflowArgs instanceArgs = workflowArgs.get(key);
      instanceArgs.mergeDefaultArgs(workflow.getArgs());
      LOG.debug("Merged args: " + StringUtils.toJson(instanceArgs));
    }
  }

  LOG.info("Creating dataflow pipeline for workflow " + workflow.getDefn().getName());
  PCollection<KV<String, WorkflowArgs>> input = p.apply(Create.of(workflowArgs));
  input = dataflow(Workflow.Steps.graph(workflow), input);

  if (workflowArgs.values().iterator().next().getDeleteFiles()) {
    LOG.info("Intermediate files will be deleted");
    input =
        input.apply(
            ParDo.named("DeleteIntermediateFiles").of(new DeleteIntermediateFiles(workflow)));
  }

  return p;
}
 
开发者ID:googlegenomics,项目名称:dockerflow,代码行数:65,代码来源:DataflowFactory.java


注:本文中的com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions.setWorkerMachineType方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。