本文整理汇总了Java中org.apache.beam.runners.dataflow.options.DataflowPipelineOptions类的典型用法代码示例。如果您正苦于以下问题:Java DataflowPipelineOptions类的具体用法?Java DataflowPipelineOptions怎么用?Java DataflowPipelineOptions使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
DataflowPipelineOptions类属于org.apache.beam.runners.dataflow.options包,在下文中一共展示了DataflowPipelineOptions类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testGcsUploadBufferSizeIsSetForStreamingWhenDefault
import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions; //导入依赖的package包/类
@Test
public void testGcsUploadBufferSizeIsSetForStreamingWhenDefault() throws IOException {
DataflowPipelineOptions streamingOptions = buildPipelineOptions();
streamingOptions.setStreaming(true);
streamingOptions.setRunner(DataflowRunner.class);
Pipeline p = Pipeline.create(streamingOptions);
// Instantiation of a runner prior to run() currently has a side effect of mutating the options.
// This could be tested by DataflowRunner.fromOptions(streamingOptions) but would not ensure
// that the pipeline itself had the expected options set.
p.run();
assertEquals(
DataflowRunner.GCS_UPLOAD_BUFFER_SIZE_BYTES_DEFAULT,
streamingOptions.getGcsUploadBufferSizeBytes().intValue());
}
示例2: testTransformTranslatorMissing
import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions; //导入依赖的package包/类
@Test
public void testTransformTranslatorMissing() throws IOException {
DataflowPipelineOptions options = buildPipelineOptions();
Pipeline p = Pipeline.create(options);
p.apply(Create.of(Arrays.asList(1, 2, 3)))
.apply(new TestTransform());
thrown.expect(IllegalStateException.class);
thrown.expectMessage(Matchers.containsString("no translator registered"));
DataflowPipelineTranslator.fromOptions(options)
.translate(
p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList());
ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
Mockito.verify(mockJobs).create(eq(PROJECT_ID), eq(REGION_ID), jobCaptor.capture());
assertValidJob(jobCaptor.getValue());
}
示例3: getEnvironmentVersion
import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions; //导入依赖的package包/类
/** Helper to configure the Dataflow Job Environment based on the user's job options. */
private static Map<String, Object> getEnvironmentVersion(DataflowPipelineOptions options) {
DataflowRunnerInfo runnerInfo = DataflowRunnerInfo.getDataflowRunnerInfo();
String majorVersion;
String jobType;
if (hasExperiment(options, "beam_fn_api")) {
majorVersion = runnerInfo.getFnApiEnvironmentMajorVersion();
jobType = options.isStreaming() ? "FNAPI_STREAMING" : "FNAPI_BATCH";
} else {
majorVersion = runnerInfo.getLegacyEnvironmentMajorVersion();
jobType = options.isStreaming() ? "STREAMING" : "JAVA_BATCH_AUTOSCALING";
}
return ImmutableMap.<String, Object>of(
PropertyNames.ENVIRONMENT_VERSION_MAJOR_KEY, majorVersion,
PropertyNames.ENVIRONMENT_VERSION_JOB_TYPE_KEY, jobType);
}
示例4: testWorkerHarnessContainerImage
import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions; //导入依赖的package包/类
@Test
public void testWorkerHarnessContainerImage() {
DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
// default image set
options.setWorkerHarnessContainerImage("some-container");
assertThat(getContainerImageForJob(options), equalTo("some-container"));
// batch, legacy
options.setWorkerHarnessContainerImage("gcr.io/IMAGE/foo");
options.setExperiments(null);
options.setStreaming(false);
assertThat(
getContainerImageForJob(options), equalTo("gcr.io/beam-java-batch/foo"));
// streaming, legacy
options.setStreaming(true);
assertThat(
getContainerImageForJob(options), equalTo("gcr.io/beam-java-streaming/foo"));
// streaming, fnapi
options.setExperiments(ImmutableList.of("experiment1", "beam_fn_api"));
assertThat(
getContainerImageForJob(options), equalTo("gcr.io/java/foo"));
}
示例5: testTemplateRunnerLoggedErrorForFile
import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions; //导入依赖的package包/类
/**
* Tests that the {@link DataflowRunner} with {@code --templateLocation} throws the appropriate
* exception when an output file is not writable.
*/
@Test
public void testTemplateRunnerLoggedErrorForFile() throws Exception {
DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
options.setJobName("TestJobName");
options.setRunner(DataflowRunner.class);
options.setTemplateLocation("//bad/path");
options.setProject("test-project");
options.setTempLocation(tmpFolder.getRoot().getPath());
options.setGcpCredential(new TestCredential());
options.setPathValidatorClass(NoopPathValidator.class);
Pipeline p = Pipeline.create(options);
thrown.expectMessage("Cannot create output file at");
thrown.expect(RuntimeException.class);
p.run();
}
示例6: newDataflowClient
import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions; //导入依赖的package包/类
/**
* Returns a Google Cloud Dataflow client builder.
*/
public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options) {
String servicePath = options.getDataflowEndpoint();
ApiComponents components;
if (servicePath.contains("://")) {
components = apiComponentsFromUrl(servicePath);
} else {
components = new ApiComponents(options.getApiRootUrl(), servicePath);
}
return new Dataflow.Builder(getTransport(),
getJsonFactory(),
chainHttpRequestInitializer(
options.getGcpCredential(),
// Do not log 404. It clutters the output and is possibly even required by the caller.
new RetryHttpRequestInitializer(ImmutableList.of(404))))
.setApplicationName(options.getAppName())
.setRootUrl(components.rootUrl)
.setServicePath(components.servicePath)
.setGoogleClientRequestInitializer(options.getGoogleApiTrace());
}
示例7: testEmptyMetricUpdates
import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions; //导入依赖的package包/类
@Test
public void testEmptyMetricUpdates() throws IOException {
Job modelJob = new Job();
modelJob.setCurrentState(State.RUNNING.toString());
DataflowPipelineJob job = mock(DataflowPipelineJob.class);
DataflowPipelineOptions options = mock(DataflowPipelineOptions.class);
when(options.isStreaming()).thenReturn(false);
when(job.getDataflowOptions()).thenReturn(options);
when(job.getState()).thenReturn(State.RUNNING);
job.jobId = JOB_ID;
JobMetrics jobMetrics = new JobMetrics();
jobMetrics.setMetrics(null /* this is how the APIs represent empty metrics */);
DataflowClient dataflowClient = mock(DataflowClient.class);
when(dataflowClient.getJobMetrics(JOB_ID)).thenReturn(jobMetrics);
DataflowMetrics dataflowMetrics = new DataflowMetrics(job, dataflowClient);
MetricQueryResults result = dataflowMetrics.queryMetrics();
assertThat(ImmutableList.copyOf(result.counters()), is(empty()));
assertThat(ImmutableList.copyOf(result.distributions()), is(empty()));
}
示例8: testCachingMetricUpdates
import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions; //导入依赖的package包/类
@Test
public void testCachingMetricUpdates() throws IOException {
Job modelJob = new Job();
modelJob.setCurrentState(State.RUNNING.toString());
DataflowPipelineJob job = mock(DataflowPipelineJob.class);
DataflowPipelineOptions options = mock(DataflowPipelineOptions.class);
when(options.isStreaming()).thenReturn(false);
when(job.getDataflowOptions()).thenReturn(options);
when(job.getState()).thenReturn(State.DONE);
job.jobId = JOB_ID;
JobMetrics jobMetrics = new JobMetrics();
jobMetrics.setMetrics(ImmutableList.<MetricUpdate>of());
DataflowClient dataflowClient = mock(DataflowClient.class);
when(dataflowClient.getJobMetrics(JOB_ID)).thenReturn(jobMetrics);
DataflowMetrics dataflowMetrics = new DataflowMetrics(job, dataflowClient);
verify(dataflowClient, times(0)).getJobMetrics(JOB_ID);
dataflowMetrics.queryMetrics(null);
verify(dataflowClient, times(1)).getJobMetrics(JOB_ID);
dataflowMetrics.queryMetrics(null);
verify(dataflowClient, times(1)).getJobMetrics(JOB_ID);
}
示例9: buildPipelineOptions
import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions; //导入依赖的package包/类
private static DataflowPipelineOptions buildPipelineOptions() throws IOException {
GcsUtil mockGcsUtil = mock(GcsUtil.class);
when(mockGcsUtil.expand(any(GcsPath.class))).then(new Answer<List<GcsPath>>() {
@Override
public List<GcsPath> answer(InvocationOnMock invocation) throws Throwable {
return ImmutableList.of((GcsPath) invocation.getArguments()[0]);
}
});
when(mockGcsUtil.bucketAccessible(any(GcsPath.class))).thenReturn(true);
DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
options.setRunner(DataflowRunner.class);
options.setGcpCredential(new TestCredential());
options.setJobName("some-job-name");
options.setProject("some-project");
options.setRegion("some-region");
options.setTempLocation(GcsPath.fromComponents("somebucket", "some/path").toString());
options.setFilesToStage(new LinkedList<String>());
options.setDataflowClient(buildMockDataflow(new IsValidCreateRequest()));
options.setGcsUtil(mockGcsUtil);
return options;
}
示例10: testNetworkConfig
import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions; //导入依赖的package包/类
@Test
public void testNetworkConfig() throws IOException {
final String testNetwork = "test-network";
DataflowPipelineOptions options = buildPipelineOptions();
options.setNetwork(testNetwork);
Pipeline p = buildPipeline(options);
p.traverseTopologically(new RecordingPipelineVisitor());
Job job =
DataflowPipelineTranslator.fromOptions(options)
.translate(
p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList())
.getJob();
assertEquals(1, job.getEnvironment().getWorkerPools().size());
assertEquals(testNetwork,
job.getEnvironment().getWorkerPools().get(0).getNetwork());
}
示例11: testSubnetworkConfig
import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions; //导入依赖的package包/类
@Test
public void testSubnetworkConfig() throws IOException {
final String testSubnetwork = "regions/REGION/subnetworks/SUBNETWORK";
DataflowPipelineOptions options = buildPipelineOptions();
options.setSubnetwork(testSubnetwork);
Pipeline p = buildPipeline(options);
p.traverseTopologically(new RecordingPipelineVisitor());
Job job =
DataflowPipelineTranslator.fromOptions(options)
.translate(
p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList())
.getJob();
assertEquals(1, job.getEnvironment().getWorkerPools().size());
assertEquals(testSubnetwork,
job.getEnvironment().getWorkerPools().get(0).getSubnetwork());
}
示例12: testZoneConfig
import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions; //导入依赖的package包/类
@Test
public void testZoneConfig() throws IOException {
final String testZone = "test-zone-1";
DataflowPipelineOptions options = buildPipelineOptions();
options.setZone(testZone);
Pipeline p = buildPipeline(options);
p.traverseTopologically(new RecordingPipelineVisitor());
Job job =
DataflowPipelineTranslator.fromOptions(options)
.translate(
p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList())
.getJob();
assertEquals(1, job.getEnvironment().getWorkerPools().size());
assertEquals(testZone,
job.getEnvironment().getWorkerPools().get(0).getZone());
}
示例13: testDiskSizeGbConfig
import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions; //导入依赖的package包/类
@Test
public void testDiskSizeGbConfig() throws IOException {
final Integer diskSizeGb = 1234;
DataflowPipelineOptions options = buildPipelineOptions();
options.setDiskSizeGb(diskSizeGb);
Pipeline p = buildPipeline(options);
p.traverseTopologically(new RecordingPipelineVisitor());
Job job =
DataflowPipelineTranslator.fromOptions(options)
.translate(
p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList())
.getJob();
assertEquals(1, job.getEnvironment().getWorkerPools().size());
assertEquals(diskSizeGb,
job.getEnvironment().getWorkerPools().get(0).getDiskSizeGb());
}
示例14: createPredefinedStep
import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions; //导入依赖的package包/类
/**
* Returns a Step for a {@link DoFn} by creating and translating a pipeline.
*/
private static Step createPredefinedStep() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline pipeline = Pipeline.create(options);
String stepName = "DoFn1";
pipeline.apply("ReadMyFile", TextIO.read().from("gs://bucket/in"))
.apply(stepName, ParDo.of(new NoOpFn()))
.apply("WriteMyFile", TextIO.write().to("gs://bucket/out"));
DataflowRunner runner = DataflowRunner.fromOptions(options);
runner.replaceTransforms(pipeline);
Job job =
translator
.translate(
pipeline,
runner,
Collections.<DataflowPackage>emptyList())
.getJob();
assertEquals(8, job.getSteps().size());
Step step = job.getSteps().get(1);
assertEquals(stepName, getString(step.getProperties(), PropertyNames.USER_NAME));
assertAllStepOutputsHaveUniqueIds(job);
return step;
}
示例15: testMultiGraphPipelineSerialization
import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions; //导入依赖的package包/类
@Test
public void testMultiGraphPipelineSerialization() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
Pipeline p = Pipeline.create(options);
PCollection<Integer> input = p.begin()
.apply(Create.of(1, 2, 3));
input.apply(new UnrelatedOutputCreator());
input.apply(new UnboundOutputCreator());
DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions(
PipelineOptionsFactory.as(DataflowPipelineOptions.class));
// Check that translation doesn't fail.
JobSpecification jobSpecification = t.translate(
p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList());
assertAllStepOutputsHaveUniqueIds(jobSpecification.getJob());
}