本文整理汇总了Java中org.apache.beam.sdk.options.PipelineOptionsFactory.create方法的典型用法代码示例。如果您正苦于以下问题:Java PipelineOptionsFactory.create方法的具体用法?Java PipelineOptionsFactory.create怎么用?Java PipelineOptionsFactory.create使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.beam.sdk.options.PipelineOptionsFactory
的用法示例。
在下文中一共展示了PipelineOptionsFactory.create方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
public static void main(String[] args) {
PipelineOptions options = PipelineOptionsFactory.create();
options.setRunner(DirectRunner.class); // forced for this demo
Pipeline p = Pipeline.create(options);
// register Avro coders for serializing our messages
Coders.registerAvroCoders(p, ExtendedRecord.class, UntypedOccurrence.class);
PCollection<UntypedOccurrence> verbatimRecords = p.apply(
"Read Avro", AvroIO.read(UntypedOccurrence.class).from("demo/output/data*"));
verbatimRecords.apply("Write file per Genus",
AvroIO.write(UntypedOccurrence.class)
.to("demo/output-split/data*") // prefix, is required but overwritten
.to(new GenusDynamicAvroDestinations(
FileSystems.matchNewResource("demo/output-split/data*", true))));
LOG.info("Starting the pipeline");
PipelineResult result = p.run();
result.waitUntilFinish();
LOG.info("Pipeline finished with state: {} ", result.getState());
}
示例2: testSizes
import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
@Test
public void testSizes() throws Exception {
SolrIOTestUtils.insertTestDocuments(SOLR_COLLECTION, NUM_DOCS, solrClient);
PipelineOptions options = PipelineOptionsFactory.create();
SolrIO.Read read =
SolrIO.read().withConnectionConfiguration(connectionConfiguration).from(SOLR_COLLECTION);
SolrIO.BoundedSolrSource initialSource = new SolrIO.BoundedSolrSource(read, null);
// can't use equal assert as Solr collections never have same size
// (due to internal Lucene implementation)
long estimatedSize = initialSource.getEstimatedSizeBytes(options);
LOG.info("Estimated size: {}", estimatedSize);
assertThat(
"Wrong estimated size bellow minimum",
estimatedSize,
greaterThan(SolrIOTestUtils.MIN_DOC_SIZE * NUM_DOCS));
assertThat(
"Wrong estimated size beyond maximum",
estimatedSize,
lessThan(SolrIOTestUtils.MAX_DOC_SIZE * NUM_DOCS));
}
示例3: testSplit
import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
@Test
public void testSplit() throws Exception {
SolrIOTestUtils.insertTestDocuments(SOLR_COLLECTION, NUM_DOCS, solrClient);
PipelineOptions options = PipelineOptionsFactory.create();
SolrIO.Read read =
SolrIO.read().withConnectionConfiguration(connectionConfiguration).from(SOLR_COLLECTION);
SolrIO.BoundedSolrSource initialSource = new SolrIO.BoundedSolrSource(read, null);
//desiredBundleSize is ignored for now
int desiredBundleSizeBytes = 0;
List<? extends BoundedSource<SolrDocument>> splits =
initialSource.split(desiredBundleSizeBytes, options);
SourceTestUtils.assertSourcesEqualReferenceSource(initialSource, splits, options);
int expectedNumSplits = NUM_SHARDS;
assertEquals(expectedNumSplits, splits.size());
int nonEmptySplits = 0;
for (BoundedSource<SolrDocument> subSource : splits) {
if (readFromSource(subSource, options).size() > 0) {
nonEmptySplits += 1;
}
}
// docs are hashed by id to shards, in this test, NUM_DOCS >> NUM_SHARDS
// therefore, can not exist an empty shard.
assertEquals("Wrong number of empty splits", expectedNumSplits, nonEmptySplits);
}
示例4: testStart
import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
@Test
public void testStart() throws Exception {
List<WindowedValue<Long>> outValues = new ArrayList<>();
Collection<FnDataReceiver<WindowedValue<Long>>> consumers =
ImmutableList.of(outValues::add);
ByteString encodedSource =
ByteString.copyFrom(SerializableUtils.serializeToByteArray(CountingSource.upTo(3)));
BoundedSourceRunner<BoundedSource<Long>, Long> runner = new BoundedSourceRunner<>(
PipelineOptionsFactory.create(),
RunnerApi.FunctionSpec.newBuilder()
.setUrn(ProcessBundleHandler.JAVA_SOURCE_URN).setPayload(encodedSource).build(),
consumers);
runner.start();
assertThat(outValues,
contains(valueInGlobalWindow(0L), valueInGlobalWindow(1L), valueInGlobalWindow(2L)));
}
示例5: testCreateTableSucceedsAlreadyExists
import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
/**
* Tests that table creation succeeds when the table already exists.
*/
@Test
public void testCreateTableSucceedsAlreadyExists() throws IOException {
TableReference ref =
new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
TableSchema schema = new TableSchema().setFields(ImmutableList.of(
new TableFieldSchema().setName("column1").setType("String"),
new TableFieldSchema().setName("column2").setType("Integer")));
Table testTable = new Table().setTableReference(ref).setSchema(schema);
when(response.getStatusCode()).thenReturn(409); // 409 means already exists
BigQueryServicesImpl.DatasetServiceImpl services =
new BigQueryServicesImpl.DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
Table ret =
services.tryCreateTable(
testTable,
new RetryBoundedBackOff(0, BackOff.ZERO_BACKOFF),
Sleeper.DEFAULT);
assertNull(ret);
verify(response, times(1)).getStatusCode();
verify(response, times(1)).getContent();
verify(response, times(1)).getContentType();
}
示例6: testReadRangeAtMiddle
import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
@Test
public void testReadRangeAtMiddle() throws IOException {
PipelineOptions options = PipelineOptionsFactory.create();
List<String> data = createStringDataset(3, 50);
String fileName = "file";
File file = createFileWithData(fileName, data);
Metadata metadata = FileSystems.matchSingleFileSpec(file.getPath());
TestFileBasedSource source1 = new TestFileBasedSource(metadata, 64, 0, 52, null);
TestFileBasedSource source2 = new TestFileBasedSource(metadata, 64, 52, 72, null);
TestFileBasedSource source3 =
new TestFileBasedSource(metadata, 64, 72, Long.MAX_VALUE, null);
List<String> results = new ArrayList<>();
results.addAll(readFromSource(source1, options));
results.addAll(readFromSource(source2, options));
results.addAll(readFromSource(source3, options));
assertThat(data, containsInAnyOrder(results.toArray()));
}
示例7: testGetTableThrows
import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
@Test
public void testGetTableThrows() throws Exception {
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
when(response.getStatusCode()).thenReturn(401);
TableReference tableRef = new TableReference()
.setProjectId("projectId")
.setDatasetId("datasetId")
.setTableId("tableId");
thrown.expect(IOException.class);
thrown.expectMessage(String.format("Unable to get table: %s", tableRef.getTableId()));
BigQueryServicesImpl.DatasetServiceImpl datasetService =
new BigQueryServicesImpl.DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
datasetService.getTable(tableRef, BackOff.STOP_BACKOFF, Sleeper.DEFAULT);
}
示例8: testGetTableNotFound
import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
@Test
public void testGetTableNotFound() throws IOException, InterruptedException {
when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
when(response.getStatusCode()).thenReturn(404);
BigQueryServicesImpl.DatasetServiceImpl datasetService =
new BigQueryServicesImpl.DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
TableReference tableRef = new TableReference()
.setProjectId("projectId")
.setDatasetId("datasetId")
.setTableId("tableId");
Table table = datasetService.getTable(tableRef, BackOff.ZERO_BACKOFF, Sleeper.DEFAULT);
assertNull(table);
verify(response, times(1)).getStatusCode();
verify(response, times(1)).getContent();
verify(response, times(1)).getContentType();
}
示例9: testRespectsCheckpointContract
import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
@Test
public void testRespectsCheckpointContract() throws IOException {
TestCountingSource source = new TestCountingSource(3);
PipelineOptions options = PipelineOptionsFactory.create();
TestCountingSource.CountingSourceReader reader =
source.createReader(options, null /* no checkpoint */);
assertTrue(reader.start());
assertEquals(0L, (long) reader.getCurrent().getValue());
assertTrue(reader.advance());
assertEquals(1L, (long) reader.getCurrent().getValue());
TestCountingSource.CounterMark checkpoint = reader.getCheckpointMark();
checkpoint.finalizeCheckpoint();
reader = source.createReader(options, checkpoint);
assertTrue(reader.start());
assertEquals(2L, (long) reader.getCurrent().getValue());
assertFalse(reader.advance());
}
示例10: main
import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
public static void main(String[] args) {
PipelineOptions options = PipelineOptionsFactory.create();
options.setRunner(DirectRunner.class); // forced for this demo
Pipeline p = Pipeline.create(options);
// register Avro coders for serializing our messages
Coders.registerAvroCoders(p, ExtendedRecord.class, UntypedOccurrence.class);
// Read the DwC-A using our custom reader
PCollection<ExtendedRecord> rawRecords = p.apply(
"Read from Darwin Core Archive", DwCAIO.Read.withPaths("demo/dwca.zip", "demo/target/tmp"));
// Convert the ExtendedRecord into an UntypedOccurrence record
DoFn<ExtendedRecord,UntypedOccurrence> fn = BeamFunctions.beamify(FunctionFactory.untypedOccurrenceBuilder());
// TODO: Explore the generics as to why the coder registry does not find it and we need to set the coder explicitly
PCollection<UntypedOccurrence> verbatimRecords = rawRecords.apply(
"Convert the objects into untyped DwC style records",ParDo.of(fn))
.setCoder(AvroCoder.of(UntypedOccurrence.class));
// Write the result as an Avro file
verbatimRecords.apply(
"Save the records as Avro", AvroIO.write(UntypedOccurrence.class).to("demo/output/data"));
LOG.info("Starting the pipeline");
PipelineResult result = p.run();
result.waitUntilFinish();
LOG.info("Pipeline finished with state: {} ", result.getState());
}
示例11: main
import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
public static void main(String[] args) {
PipelineOptions options = PipelineOptionsFactory.create();
options.setRunner(DirectRunner.class); // forced for this demo
Pipeline p = Pipeline.create(options);
// register Avro coders for serializing our messages
Coders.registerAvroCoders(p, ExtendedRecord.class, UntypedOccurrence.class);
// Read the DwC-A using our custom reader
PCollection<ExtendedRecord> rawRecords = p.apply(
"Read from Darwin Core Archive", DwCAIO.Read.withPaths("/tmp/dwca-s-bryophytes-v4.1.zip", "demo/target/tmp"));
// Convert the ExtendedRecord into an UntypedOccurrence record
PCollection<UntypedOccurrence> verbatimRecords = rawRecords.apply(
"Convert the objects into untyped DwC style records",
ParDo.of(BeamFunctions.beamify(FunctionFactory.untypedOccurrenceBuilder())))
.setCoder(AvroCoder.of(UntypedOccurrence.class));
// Write the file to SOLR
final SolrIO.ConnectionConfiguration conn = SolrIO.ConnectionConfiguration
.create(SOLR_HOSTS);
PCollection<SolrInputDocument> inputDocs = verbatimRecords.apply(
"Convert to SOLR", ParDo.of(new SolrDocBuilder()));
inputDocs.apply(SolrIO.write().to("beam-demo1").withConnectionConfiguration(conn));
LOG.info("Starting the pipeline");
PipelineResult result = p.run();
result.waitUntilFinish();
LOG.info("Pipeline finished with state: {} ", result.getState());
}
示例12: setUp
import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
@Before
public void setUp() throws Exception {
MockitoAnnotations.initMocks(this);
options = PipelineOptionsFactory.create();
options.as(GcsOptions.class).setGcsUtil(mockGcsUtil);
options.as(GcpOptions.class).setProject("foo");
options.as(GcpOptions.class).setZone("us-north1-a");
when(mockCrmClient.projects()).thenReturn(mockProjects);
when(mockProjects.get(any(String.class))).thenReturn(mockGet);
fakeProject = new Project().setProjectNumber(1L);
}
示例13: testEstimatedSizeBytes
import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
@Test
public void testEstimatedSizeBytes() throws Exception {
final FakeCassandraService service = new FakeCassandraService();
service.load();
PipelineOptions pipelineOptions = PipelineOptionsFactory.create();
CassandraIO.Read spec = CassandraIO.<Scientist>read().withCassandraService(service);
CassandraIO.CassandraSource source = new CassandraIO.CassandraSource(
spec,
null);
long estimatedSizeBytes = source.getEstimatedSizeBytes(pipelineOptions);
// the size is the sum of the bytes size of the String representation of a scientist in the map
assertEquals(113890, estimatedSizeBytes);
}
示例14: testWithDefaultBuffer
import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
@Test
public void testWithDefaultBuffer() throws Exception {
Collection<BeamFnApi.Elements> values = new ArrayList<>();
AtomicBoolean onCompletedWasCalled = new AtomicBoolean();
CloseableFnDataReceiver<WindowedValue<byte[]>> consumer =
new BeamFnDataBufferingOutboundObserver<>(
PipelineOptionsFactory.create(),
OUTPUT_LOCATION,
CODER,
TestStreams.withOnNext(values::add)
.withOnCompleted(() -> onCompletedWasCalled.set(true))
.build());
// Test that nothing is emitted till the default buffer size is surpassed.
consumer.accept(valueInGlobalWindow(new byte[DEFAULT_BUFFER_LIMIT - 50]));
assertThat(values, empty());
// Test that when we cross the buffer, we emit.
consumer.accept(valueInGlobalWindow(new byte[50]));
assertEquals(
messageWithData(new byte[DEFAULT_BUFFER_LIMIT - 50], new byte[50]),
Iterables.get(values, 0));
// Test that nothing is emitted till the default buffer size is surpassed after a reset
consumer.accept(valueInGlobalWindow(new byte[DEFAULT_BUFFER_LIMIT - 50]));
assertEquals(1, values.size());
// Test that when we cross the buffer, we emit.
consumer.accept(valueInGlobalWindow(new byte[50]));
assertEquals(
messageWithData(new byte[DEFAULT_BUFFER_LIMIT - 50], new byte[50]),
Iterables.get(values, 1));
// Test that when we close with an empty buffer we only have one end of stream
consumer.close();
assertEquals(messageWithData(),
Iterables.get(values, 2));
}
示例15: testReadRangeFromFileWithSplitsFromMiddleOfHeader
import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
@Test
public void testReadRangeFromFileWithSplitsFromMiddleOfHeader() throws IOException {
PipelineOptions options = PipelineOptionsFactory.create();
String header = "<h>";
List<String> data = new ArrayList<>();
for (int i = 0; i < 10; i++) {
data.add(header);
data.addAll(createStringDataset(3, 9));
}
String fileName = "file";
File file = createFileWithData(fileName, data);
List<String> expectedResults = new ArrayList<String>();
expectedResults.addAll(data.subList(10, data.size()));
// Remove all occurrences of header from expected results.
expectedResults.removeAll(Collections.singletonList(header));
Metadata metadata = FileSystems.matchSingleFileSpec(file.getPath());
// Split starts after "<" of the header
TestFileBasedSource source =
new TestFileBasedSource(metadata, 64, 1, Long.MAX_VALUE, header);
assertThat(expectedResults, containsInAnyOrder(readFromSource(source, options).toArray()));
// Split starts after "<h" of the header
source = new TestFileBasedSource(metadata, 64, 2, Long.MAX_VALUE, header);
assertThat(expectedResults, containsInAnyOrder(readFromSource(source, options).toArray()));
// Split starts after "<h>" of the header
source = new TestFileBasedSource(metadata, 64, 3, Long.MAX_VALUE, header);
assertThat(expectedResults, containsInAnyOrder(readFromSource(source, options).toArray()));
}