本文整理汇总了Java中com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory类的典型用法代码示例。如果您正苦于以下问题:Java PipelineOptionsFactory类的具体用法?Java PipelineOptionsFactory怎么用?Java PipelineOptionsFactory使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
PipelineOptionsFactory类属于com.google.cloud.dataflow.sdk.options包,在下文中一共展示了PipelineOptionsFactory类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
public static void main(String[] args) {
BigQueryToDatastoreOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
.as(BigQueryToDatastoreOptions.class);
String inputTable = options.getInputTable().get();
String projectID = options.getOutputProjectID().get();
String kind = options.getOutputKind().get();
LOG.info("Input_Table : " + inputTable);
LOG.info("ProjectID : " + projectID);
LOG.info("Kind : " + kind);
Pipeline p = Pipeline.create(options);
PCollection<KV<Integer, Iterable<TableRow>>> keywordGroups = p
.apply(BigQueryIO.Read.named("ReadUtterance").from(inputTable)).apply(new GroupKeywords());
CreateEntities createEntities = new CreateEntities();
createEntities.setKind(kind);
PCollection<Entity> entities = keywordGroups.apply(createEntities);
entities.apply(DatastoreIO.v1().write().withProjectId(projectID));
p.run();
}
示例2: main
import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
/**
* Run a batch pipeline.
*/
public static void main(String[] args) throws Exception {
Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
Pipeline pipeline = Pipeline.create(options);
TableReference tableRef = new TableReference();
tableRef.setDatasetId(options.as(Options.class).getOutputDataset());
tableRef.setProjectId(options.as(GcpOptions.class).getProject());
tableRef.setTableId(options.getOutputTableName());
// Read events from a CSV file and parse them.
pipeline
.apply(TextIO.Read.from(options.getInput()))
.apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()))
// Extract and sum username/score pairs from the event data.
.apply("ExtractUserScore", new ExtractAndSumScore("user"))
// Write the results to BigQuery.
.apply(ParDo.named("FormatUserScoreSums").of(new FormatUserScoreSumsFn()))
.apply(
BigQueryIO.Write.to(tableRef)
.withSchema(FormatUserScoreSumsFn.getSchema())
.withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
.withWriteDisposition(WriteDisposition.WRITE_APPEND));
pipeline.run();
}
示例3: main
import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
/** Run a batch or streaming pipeline. */
public static void main(String[] args) throws Exception {
Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
Pipeline pipeline = Pipeline.create(options);
TableReference tableRef = new TableReference();
tableRef.setDatasetId(options.as(Options.class).getOutputDataset());
tableRef.setProjectId(options.as(GcpOptions.class).getProject());
tableRef.setTableId(options.getOutputTableName());
// Read events from either a CSV file or PubSub stream.
pipeline
.apply(new ReadGameEvents(options))
.apply("WindowedTeamScore", new Exercise2.WindowedTeamScore(Duration.standardMinutes(60)))
// Write the results to BigQuery.
.apply(ParDo.named("FormatTeamScoreSums").of(new Exercise2.FormatTeamScoreSumsFn()))
.apply(
BigQueryIO.Write.to(tableRef)
.withSchema(Exercise2.FormatTeamScoreSumsFn.getSchema())
.withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
.withWriteDisposition(WriteDisposition.WRITE_APPEND));
pipeline.run();
}
示例4: main
import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
/**
* Run a batch pipeline.
*/
public static void main(String[] args) throws Exception {
Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
Pipeline pipeline = Pipeline.create(options);
TableReference tableRef = new TableReference();
tableRef.setDatasetId(options.as(Options.class).getOutputDataset());
tableRef.setProjectId(options.as(GcpOptions.class).getProject());
tableRef.setTableId(options.getOutputTableName());
// Read events from a CSV file, parse them and write (import) them to BigQuery.
pipeline
.apply(TextIO.Read.from(options.getInput()))
.apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()))
.apply(ParDo.named("FormatGameEvent").of(new FormatGameEventFn()))
.apply(
BigQueryIO.Write.to(tableRef)
.withSchema(FormatGameEventFn.getSchema())
.withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
.withWriteDisposition(WriteDisposition.WRITE_APPEND));
pipeline.run();
}
示例5: main
import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
public static void main(String[] args) {
CustomPipelineOptions options =
PipelineOptionsFactory.fromArgs(args).withValidation().as(CustomPipelineOptions.class);
Pipeline p = Pipeline.create(options);
p.apply(PubsubIO.Read.named("read from PubSub")
.topic(String.format("projects/%s/topics/%s", options.getSourceProject(), options.getSourceTopic()))
.timestampLabel("ts")
.withCoder(TableRowJsonCoder.of()))
.apply("window 1s", Window.into(FixedWindows.of(Duration.standardSeconds(1))))
.apply("mark rides", MapElements.via(new MarkRides()))
.apply("count similar", Count.perKey())
.apply("format rides", MapElements.via(new TransformRides()))
.apply(PubsubIO.Write.named("WriteToPubsub")
.topic(String.format("projects/%s/topics/%s", options.getSinkProject(), options.getSinkTopic()))
.withCoder(TableRowJsonCoder.of()));
p.run();
}
示例6: main
import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
public static void main(String[] args) {
CustomPipelineOptions options =
PipelineOptionsFactory.fromArgs(args).withValidation().as(CustomPipelineOptions.class);
Pipeline p = Pipeline.create(options);
p.apply(PubsubIO.Read.named("read from PubSub")
.topic(String.format("projects/%s/topics/%s", options.getSourceProject(), options.getSourceTopic()))
.timestampLabel("ts")
.withCoder(TableRowJsonCoder.of()))
.apply("filter lower Manhattan", ParDo.of(new FilterLowerManhattan()))
.apply(PubsubIO.Write.named("WriteToPubsub")
.topic(String.format("projects/%s/topics/%s", options.getSinkProject(), options.getSinkTopic()))
.withCoder(TableRowJsonCoder.of()));
p.run();
}
示例7: testBundleSplitIsJustSource
import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
@Test
public void testBundleSplitIsJustSource() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
List<GCSFilesSource> bundles = source.splitIntoBundles(0, null);
assertEquals(bundles.size(), 1);
assertEquals(bundles.get(0), source);
bundles = source.splitIntoBundles(0, options);
assertEquals(bundles.size(), 1);
assertEquals(bundles.get(0), source);
bundles = source.splitIntoBundles(1, options);
assertEquals(bundles.size(), 1);
assertEquals(bundles.get(0), source);
bundles = source.splitIntoBundles(100000, options);
assertEquals(bundles.size(), 1);
assertEquals(bundles.get(0), source);
bundles = source.splitIntoBundles(10, null);
assertEquals(bundles.size(), 1);
assertEquals(bundles.get(0), source);
}
示例8: testReaderGetCurrent
import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
@Test
public void testReaderGetCurrent() {
String projectName = "sampleProject";
String objectName = REPOSITORY + this.source.getDirDelimiter() + projectName;
String fileContent = "sample file content";
ByteArrayOutputStream[] out = new ByteArrayOutputStream[1];
PipelineOptions options = PipelineOptionsFactory.create();
setUpGetFilesPage(objectName);
setUpGetFileContent(fileContent, out);
try {
BoundedReader<KV<List<String>, String>> reader = this.source.createReader(options);
reader.start();
KV<List<String>, String> value = reader.getCurrent();
assertEquals(value.getKey().size(), 2);
assertEquals(value.getKey().get(0), REPOSITORY);
assertEquals(value.getKey().get(1), projectName);
assertEquals(value.getValue(), fileContent);
} catch (IOException e) {
fail();
}
}
示例9: testReaderAdvance
import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
@Test
public void testReaderAdvance() {
String objectName = REPOSITORY + this.source.getDirDelimiter() + "sampleProject";
PipelineOptions options = PipelineOptionsFactory.create();
BoundedReader<KV<List<String>, String>> reader;
try {
setUpGetFilesPage(objectName, 0);
reader = this.source.createReader(options);
assertFalse(reader.start());
setUpGetFilesPage(objectName, 1);
reader = this.source.createReader(options);
assertTrue(reader.start());
assertFalse(reader.advance());
setUpGetFilesPage(objectName, 2);
reader = this.source.createReader(options);
assertTrue(reader.start());
assertTrue(reader.advance());
assertFalse(reader.advance());
} catch (IOException e) {
fail();
}
}
示例10: testBundleSplitIsJustSource
import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
@Test
public void testBundleSplitIsJustSource() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
List<LiveProjectSource> bundles = source.splitIntoBundles(0, null);
assertEquals(bundles.size(), 1);
assertEquals(bundles.get(0), source);
bundles = source.splitIntoBundles(0, options);
assertEquals(bundles.size(), 1);
assertEquals(bundles.get(0), source);
bundles = source.splitIntoBundles(1, options);
assertEquals(bundles.size(), 1);
assertEquals(bundles.get(0), source);
bundles = source.splitIntoBundles(100000, options);
assertEquals(bundles.size(), 1);
assertEquals(bundles.get(0), source);
bundles = source.splitIntoBundles(10, null);
assertEquals(bundles.size(), 1);
assertEquals(bundles.get(0), source);
}
示例11: testAdvanceWithoutStart
import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
@Test
public void testAdvanceWithoutStart() {
PipelineOptions options = PipelineOptionsFactory.create();
LiveProjectReader reader;
this.listProjectsResponse.setProjects(new ArrayList<Project>(0));
this.listProjectsResponse.setNextPageToken(null);
try {
reader = (LiveProjectReader) this.source.createReader(options);
assertFalse(reader.advance());
assertNull(reader.getNextPageToken());
assertTrue(reader.getProjects().isEmpty());
reader.getCurrent();
} catch (IOException e) {
fail("IOException in reader.start");
} catch (NoSuchElementException ignored) {
// test passed.
}
}
示例12: pipelineOptions
import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
/**
* Create Dataflow Pipeline options from the standard command-line options, "--project=",
* "--runner=" and "--stagingLocation="
*
* @param args
* @return
* @throws IOException
*/
public static DataflowPipelineOptions pipelineOptions(String[] args) throws IOException {
LOG.info("Set up Dataflow options");
DataflowPipelineOptions o = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
Map<String, String> m = StringUtils.parseArgs(args);
o.setProject(m.get(PROJECT));
if (m.containsKey(STAGING)) {
o.setStagingLocation(m.get(STAGING));
} else if (m.containsKey(STAGING_LOCATION)) {
o.setStagingLocation(m.get(STAGING_LOCATION));
} else if (m.containsKey(WORKSPACE)) {
o.setStagingLocation(m.get(WORKSPACE) + "/staging");
}
o.setRunner(runner(m.get(RUNNER)));
o.setMaxNumWorkers(m.get(MAX_WORKERS) == null ? 1 : Integer.parseInt(m.get(MAX_WORKERS)));
if (m.containsKey(MACHINE_TYPE)) {
o.setWorkerMachineType(m.get(MACHINE_TYPE));
} else {
o.setWorkerMachineType(DEFAULT_MACHINE_TYPE);
}
return o;
}
示例13: setupPipeline
import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
private Pipeline setupPipeline(final String inputPath, final String outputPath, boolean enableGcs, boolean enableCloudExec) {
final GATKGCSOptions options = PipelineOptionsFactory.as(GATKGCSOptions.class);
if (enableCloudExec) {
options.setStagingLocation(getGCPTestStaging());
options.setProject(getGCPTestProject());
options.setRunner(BlockingDataflowPipelineRunner.class);
} else if (BucketUtils.isHadoopUrl(inputPath) || BucketUtils.isHadoopUrl(outputPath)) {
options.setRunner(SparkPipelineRunner.class);
} else {
options.setRunner(DirectPipelineRunner.class);
}
if (enableGcs) {
options.setApiKey(getGCPTestApiKey());
}
final Pipeline p = Pipeline.create(options);
DataflowUtils.registerGATKCoders(p);
return p;
}
示例14: main
import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
public static void main(String[] args) {
String[] arguments = {
String.format("--output=%s/output.txt", System.getProperty("java.io.tmpdir"))
};
Options options = PipelineOptionsFactory.fromArgs(arguments)
.withValidation().as(Options.class);
options.setRunner(FlinkLocalPipelineRunner.class);
Pipeline p = Pipeline.create(options);
p.apply(TextIO.Read.named("ReadLines").from(options.getInput()))
.apply(new CountWords())
.apply(TextIO.Write.named("WriteCounts")
.to(options.getOutput())
.withNumShards(options.getNumShards()));
p.run();
}
示例15: testGeneric
import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; //导入依赖的package包/类
@Test
public void testGeneric() throws Exception {
Schema schema = new Schema.Parser().parse(Resources.getResource("person.avsc").openStream());
GenericRecord savedRecord = new GenericData.Record(schema);
savedRecord.put("name", "John Doe");
savedRecord.put("age", 42);
savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
populateGenericFile(Lists.newArrayList(savedRecord), schema);
Pipeline p = Pipeline.create(PipelineOptionsFactory.create());
PCollection<GenericRecord> input = p.apply(
AvroIO.Read.from(inputFile.getAbsolutePath()).withSchema(schema));
input.apply(AvroIO.Write.to(outputDir.getAbsolutePath()).withSchema(schema));
EvaluationResult res = SparkPipelineRunner.create().run(p);
res.close();
List<GenericRecord> records = readGenericFile();
assertEquals(Lists.newArrayList(savedRecord), records);
}