本文整理汇总了Java中com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions类的典型用法代码示例。如果您正苦于以下问题:Java DataflowPipelineOptions类的具体用法?Java DataflowPipelineOptions怎么用?Java DataflowPipelineOptions使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
DataflowPipelineOptions类属于com.google.cloud.dataflow.sdk.options包,在下文中一共展示了DataflowPipelineOptions类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: runInjectorPipeline
import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
/**
* Runs the batch injector for the streaming pipeline.
*
* <p>The injector pipeline will read from the given text file, and inject data
* into the Google Cloud Pub/Sub topic.
*/
public void runInjectorPipeline(String inputFile, String topic) {
DataflowPipelineOptions copiedOptions = options.cloneAs(DataflowPipelineOptions.class);
copiedOptions.setStreaming(false);
copiedOptions.setWorkerHarnessContainerImage(
DataflowPipelineRunner.BATCH_WORKER_HARNESS_CONTAINER_IMAGE);
copiedOptions.setNumWorkers(
options.as(ExamplePubsubTopicOptions.class).getInjectorNumWorkers());
copiedOptions.setJobName(options.getJobName() + "-injector");
Pipeline injectorPipeline = Pipeline.create(copiedOptions);
injectorPipeline.apply(TextIO.Read.from(inputFile))
.apply(IntraBundleParallelization
.of(PubsubFileInjector.publish(topic))
.withMaxParallelism(20));
DataflowPipelineJob injectorJob = (DataflowPipelineJob) injectorPipeline.run();
jobsToCancel.add(injectorJob);
}
示例2: pipelineOptions
import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
/**
* Create Dataflow Pipeline options from the standard command-line options, "--project=",
* "--runner=" and "--stagingLocation="
*
* @param args
* @return
* @throws IOException
*/
public static DataflowPipelineOptions pipelineOptions(String[] args) throws IOException {
LOG.info("Set up Dataflow options");
DataflowPipelineOptions o = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
Map<String, String> m = StringUtils.parseArgs(args);
o.setProject(m.get(PROJECT));
if (m.containsKey(STAGING)) {
o.setStagingLocation(m.get(STAGING));
} else if (m.containsKey(STAGING_LOCATION)) {
o.setStagingLocation(m.get(STAGING_LOCATION));
} else if (m.containsKey(WORKSPACE)) {
o.setStagingLocation(m.get(WORKSPACE) + "/staging");
}
o.setRunner(runner(m.get(RUNNER)));
o.setMaxNumWorkers(m.get(MAX_WORKERS) == null ? 1 : Integer.parseInt(m.get(MAX_WORKERS)));
if (m.containsKey(MACHINE_TYPE)) {
o.setWorkerMachineType(m.get(MACHINE_TYPE));
} else {
o.setWorkerMachineType(DEFAULT_MACHINE_TYPE);
}
return o;
}
示例3: run
import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
/** Run a Docker workflow on Dataflow. */
public static void run(Workflow w, Map<String, WorkflowArgs> a, DataflowPipelineOptions o)
throws IOException {
LOG.info("Running workflow graph");
if (w.getArgs().getProjectId() == null) {
throw new IllegalArgumentException("Project id is required");
}
Pipeline p = DataflowFactory.dataflow(w, a, o);
LOG.info("Created Dataflow pipeline");
LOG.debug(w.toString());
PipelineResult r = p.run();
LOG.info("Dataflow pipeline completed");
LOG.info("Result state: " + r.getState());
}
示例4: create
import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
@Override
public String create(PipelineOptions options) {
DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
if (dataflowOptions.getStagingLocation() != null) {
return GcsPath.fromUri(dataflowOptions.getStagingLocation())
.resolve("counts.txt").toString();
} else {
throw new IllegalArgumentException("Must specify --output or --stagingLocation");
}
}
示例5: create
import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
@Override
public String create(PipelineOptions options) {
DataflowPipelineOptions dataflowPipelineOptions =
options.as(DataflowPipelineOptions.class);
return "projects/" + dataflowPipelineOptions.getProject()
+ "/topics/" + dataflowPipelineOptions.getJobName();
}
示例6: getCloudExecutionOptions
import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
private PipelineOptions getCloudExecutionOptions(String stagingLocation) {
DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
options.setProject(Constants.PROJECT_ID);
options.setStagingLocation(stagingLocation);
options.setRunner(BlockingDataflowPipelineRunner.class);
return options;
}
示例7: getCloudExecutionOptions
import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
private PipelineOptions getCloudExecutionOptions(String stagingLocation) {
DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
options.setProject(SystemProperty.applicationId.get());
options.setStagingLocation(stagingLocation);
options.setRunner(BlockingDataflowPipelineRunner.class);
return options;
}
示例8: getCloudExecutionOptions
import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
private static PipelineOptions getCloudExecutionOptions(String stagingLocation) {
DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
options.setProject(SystemProperty.applicationId.get());
options.setStagingLocation(stagingLocation);
options.setRunner(BlockingDataflowPipelineRunner.class);
return options;
}
示例9: createDataflow
import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
/**
* The workflow defn implementation is responsible for defining the workflow steps and default
* args, and creating a Dataflow pipeline.
*
* @throws URISyntaxException
*/
default Pipeline createDataflow(
Map<String, WorkflowArgs> argsTable, DataflowPipelineOptions pipelineOptions, String[] args)
throws IOException {
return DataflowBuilder.of(createWorkflow(args))
.createFrom(argsTable)
.pipelineOptions(pipelineOptions)
.build();
}
示例10: manualDataflow
import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
/**
* For simple linear graphs, it's not too hard to generate the Dataflow pipeline yourself. Here's
* the equivalent Dataflow code for this simple example.
*/
public static void manualDataflow(String[] args) throws IOException {
LOG.info("Parsing Dataflow options");
DataflowPipelineOptions o = DataflowFactory.pipelineOptions(args);
o.setAppName(MultiLinearGraph.class.getSimpleName());
Pipeline p = Pipeline.create(o);
p.apply(Create.of(ArgsTableBuilder.fromArgs(args).build()))
.apply(DockerDo.of(taskOne()))
.apply(DockerDo.of(taskTwo()));
p.run();
}
示例11: run
import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
public static void run() {
DataflowPipelineOptions options = PipelineOptionsFactory.create()
.as(DataflowPipelineOptions.class);
options.setRunner(BlockingDataflowPipelineRunner.class);
options.setProject("chrome-oven-144308");
options.setFilesToStage(
detectClassPathResourcesToStage(
DataflowPipelineRunner.class.getClassLoader()
)
);
options.setStagingLocation("gs://dataflow-chrome-oven-144308/stagingForScheduledPipeline");
Pipeline p = Pipeline.create(options);
System.out.println("get here 0");
p.apply(TextIO.Read.from("gs://dataflow-samples/shakespeare/*"))
.apply(ParDo.named("ExtractWords").of(new DoFn<String, String>() {
@Override
public void processElement(ProcessContext c) {
System.out.println("get here 1");
for (String word : c.element().split("[^a-zA-Z']+")) {
if (!word.isEmpty()) {
c.output(word);
}
}
}
}))
.apply(Count.<String>perElement())
.apply("FormatResults", MapElements.via(new SimpleFunction<KV<String, Long>, String>() {
@Override
public String apply(KV<String, Long> input) {
System.out.println("get here 3");
return input.getKey() + ": " + input.getValue();
}
}))
.apply(TextIO.Write.to("gs://dataflow-chrome-oven-144308/scheduled"));
p.run();
}
示例12: main
import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
public static void main(String[] args) throws GeneralSecurityException, IOException, ParseException, ParserConfigurationException, SAXException {
String params = null;
for (int i = 0; i < args.length; i++) {
if (args[i].startsWith("--params="))
params = args[i].replaceFirst("--params=", "");
}
System.out.println(params);
init(params);
GoogleCredential credential = new GoogleCredential.Builder()
.setTransport(new NetHttpTransport())
.setJsonFactory(new JacksonFactory())
.setServiceAccountId(accountEmail)
.setServiceAccountScopes(Arrays.asList(new String[] {"https://www.googleapis.com/auth/cloud-platform"}))
.setServiceAccountPrivateKeyFromP12File(new File(keyFile))
.build();
DataflowPipelineOptions options = PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
options.setRunner(DataflowPipelineRunner.class);
// Your project ID is required in order to run your pipeline on the Google Cloud.
options.setProject(projectId);
// Your Google Cloud Storage path is required for staging local files.
options.setStagingLocation(workingBucket);
options.setGcpCredential(credential);
options.setServiceAccountName(accountEmail);
options.setServiceAccountKeyfile(keyFile);
options.setMaxNumWorkers(maxNumWorkers);
options.setDiskSizeGb(diskSizeGb);
options.setWorkerMachineType(machineType);
options.setAutoscalingAlgorithm(AutoscalingAlgorithmType.THROUGHPUT_BASED);
options.setZone(zone);
options.setStreaming(isStreaming);
options.setJobName(pipelineName);
Gson gson = new Gson();
TableSchema schema = gson.fromJson(schemaStr, TableSchema.class);
Pipeline pipeline = Pipeline.create(options);
PCollection<String> streamData =
pipeline.apply(PubsubIO.Read.named("ReadFromPubsub")
.topic(String.format("projects/%1$s/topics/%2$s",projectId,pubSubTopic)));
PCollection<TableRow> tableRow = streamData.apply("ToTableRow", ParDo.of(new PrepData.ToTableRow()));
tableRow.apply(BigQueryIO.Write
.named("WriteBQTable")
.to(String.format("%1$s:%2$s.%3$s",projectId, bqDataSet, bqTable))
.withSchema(schema)
.withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND));
System.out.println("Starting pipeline " + pipelineName);
pipeline.run();
}
示例13: DataflowExampleUtils
import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
public DataflowExampleUtils(DataflowPipelineOptions options) {
this.options = options;
}
示例14: create
import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
@Override
public String create(PipelineOptions options) {
return options.as(DataflowPipelineOptions.class).getJobName()
.replace('-', '_');
}
示例15: startBundle
import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions; //导入依赖的package包/类
@Override
public void startBundle(Context context) {
this.pubsub =
Transport.newPubsubClient(context.getPipelineOptions().as(DataflowPipelineOptions.class))
.build();
}