当前位置: 首页>>代码示例>>Java>>正文


Java PipelineOptionsFactory.create方法代码示例

本文整理汇总了Java中org.apache.beam.sdk.options.PipelineOptionsFactory.create方法的典型用法代码示例。如果您正苦于以下问题:Java PipelineOptionsFactory.create方法的具体用法?Java PipelineOptionsFactory.create怎么用?Java PipelineOptionsFactory.create使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.beam.sdk.options.PipelineOptionsFactory的用法示例。


在下文中一共展示了PipelineOptionsFactory.create方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
public static void main(String[] args) {
  PipelineOptions options = PipelineOptionsFactory.create();
  options.setRunner(DirectRunner.class); // forced for this demo
  Pipeline p = Pipeline.create(options);

  // register Avro coders for serializing our messages
  Coders.registerAvroCoders(p, ExtendedRecord.class, UntypedOccurrence.class);

  PCollection<UntypedOccurrence> verbatimRecords = p.apply(
    "Read Avro", AvroIO.read(UntypedOccurrence.class).from("demo/output/data*"));

  verbatimRecords.apply("Write file per Genus",
                        AvroIO.write(UntypedOccurrence.class)
                              .to("demo/output-split/data*") // prefix, is required but overwritten
                              .to(new GenusDynamicAvroDestinations(
                                FileSystems.matchNewResource("demo/output-split/data*", true))));


  LOG.info("Starting the pipeline");
  PipelineResult result = p.run();
  result.waitUntilFinish();
  LOG.info("Pipeline finished with state: {} ", result.getState());
}
 
开发者ID:gbif,项目名称:pipelines,代码行数:24,代码来源:MultiAvroOutDemo.java

示例2: testSizes

import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
@Test
public void testSizes() throws Exception {
  SolrIOTestUtils.insertTestDocuments(SOLR_COLLECTION, NUM_DOCS, solrClient);

  PipelineOptions options = PipelineOptionsFactory.create();
  SolrIO.Read read =
      SolrIO.read().withConnectionConfiguration(connectionConfiguration).from(SOLR_COLLECTION);
  SolrIO.BoundedSolrSource initialSource = new SolrIO.BoundedSolrSource(read, null);
  // can't use equal assert as Solr collections never have same size
  // (due to internal Lucene implementation)
  long estimatedSize = initialSource.getEstimatedSizeBytes(options);
  LOG.info("Estimated size: {}", estimatedSize);
  assertThat(
      "Wrong estimated size bellow minimum",
      estimatedSize,
      greaterThan(SolrIOTestUtils.MIN_DOC_SIZE * NUM_DOCS));
  assertThat(
      "Wrong estimated size beyond maximum",
      estimatedSize,
      lessThan(SolrIOTestUtils.MAX_DOC_SIZE * NUM_DOCS));
}
 
开发者ID:apache,项目名称:beam,代码行数:22,代码来源:SolrIOTest.java

示例3: testSplit

import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
@Test
public void testSplit() throws Exception {
  SolrIOTestUtils.insertTestDocuments(SOLR_COLLECTION, NUM_DOCS, solrClient);

  PipelineOptions options = PipelineOptionsFactory.create();
  SolrIO.Read read =
      SolrIO.read().withConnectionConfiguration(connectionConfiguration).from(SOLR_COLLECTION);
  SolrIO.BoundedSolrSource initialSource = new SolrIO.BoundedSolrSource(read, null);
  //desiredBundleSize is ignored for now
  int desiredBundleSizeBytes = 0;
  List<? extends BoundedSource<SolrDocument>> splits =
      initialSource.split(desiredBundleSizeBytes, options);
  SourceTestUtils.assertSourcesEqualReferenceSource(initialSource, splits, options);

  int expectedNumSplits = NUM_SHARDS;
  assertEquals(expectedNumSplits, splits.size());
  int nonEmptySplits = 0;
  for (BoundedSource<SolrDocument> subSource : splits) {
    if (readFromSource(subSource, options).size() > 0) {
      nonEmptySplits += 1;
    }
  }
  // docs are hashed by id to shards, in this test, NUM_DOCS >> NUM_SHARDS
  // therefore, can not exist an empty shard.
  assertEquals("Wrong number of empty splits", expectedNumSplits, nonEmptySplits);
}
 
开发者ID:apache,项目名称:beam,代码行数:27,代码来源:SolrIOTest.java

示例4: testStart

import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
@Test
public void testStart() throws Exception {
  List<WindowedValue<Long>> outValues = new ArrayList<>();
  Collection<FnDataReceiver<WindowedValue<Long>>> consumers =
      ImmutableList.of(outValues::add);

  ByteString encodedSource =
      ByteString.copyFrom(SerializableUtils.serializeToByteArray(CountingSource.upTo(3)));

  BoundedSourceRunner<BoundedSource<Long>, Long> runner = new BoundedSourceRunner<>(
      PipelineOptionsFactory.create(),
      RunnerApi.FunctionSpec.newBuilder()
          .setUrn(ProcessBundleHandler.JAVA_SOURCE_URN).setPayload(encodedSource).build(),
      consumers);

  runner.start();

  assertThat(outValues,
      contains(valueInGlobalWindow(0L), valueInGlobalWindow(1L), valueInGlobalWindow(2L)));
}
 
开发者ID:apache,项目名称:beam,代码行数:21,代码来源:BoundedSourceRunnerTest.java

示例5: testCreateTableSucceedsAlreadyExists

import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
/**
 * Tests that table creation succeeds when the table already exists.
 */
@Test
public void testCreateTableSucceedsAlreadyExists() throws IOException {
  TableReference ref =
      new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
  TableSchema schema = new TableSchema().setFields(ImmutableList.of(
      new TableFieldSchema().setName("column1").setType("String"),
      new TableFieldSchema().setName("column2").setType("Integer")));
  Table testTable = new Table().setTableReference(ref).setSchema(schema);

  when(response.getStatusCode()).thenReturn(409); // 409 means already exists

  BigQueryServicesImpl.DatasetServiceImpl services =
      new BigQueryServicesImpl.DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
  Table ret =
      services.tryCreateTable(
          testTable,
          new RetryBoundedBackOff(0, BackOff.ZERO_BACKOFF),
          Sleeper.DEFAULT);

  assertNull(ret);
  verify(response, times(1)).getStatusCode();
  verify(response, times(1)).getContent();
  verify(response, times(1)).getContentType();
}
 
开发者ID:apache,项目名称:beam,代码行数:28,代码来源:BigQueryServicesImplTest.java

示例6: testReadRangeAtMiddle

import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
@Test
public void testReadRangeAtMiddle() throws IOException {
  PipelineOptions options = PipelineOptionsFactory.create();
  List<String> data = createStringDataset(3, 50);
  String fileName = "file";
  File file = createFileWithData(fileName, data);

  Metadata metadata = FileSystems.matchSingleFileSpec(file.getPath());
  TestFileBasedSource source1 = new TestFileBasedSource(metadata, 64, 0, 52, null);
  TestFileBasedSource source2 = new TestFileBasedSource(metadata, 64, 52, 72, null);
  TestFileBasedSource source3 =
      new TestFileBasedSource(metadata, 64, 72, Long.MAX_VALUE, null);

  List<String> results = new ArrayList<>();
  results.addAll(readFromSource(source1, options));
  results.addAll(readFromSource(source2, options));
  results.addAll(readFromSource(source3, options));

  assertThat(data, containsInAnyOrder(results.toArray()));
}
 
开发者ID:apache,项目名称:beam,代码行数:21,代码来源:FileBasedSourceTest.java

示例7: testGetTableThrows

import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
@Test
public void testGetTableThrows() throws Exception {
  when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
  when(response.getStatusCode()).thenReturn(401);

  TableReference tableRef = new TableReference()
      .setProjectId("projectId")
      .setDatasetId("datasetId")
      .setTableId("tableId");

  thrown.expect(IOException.class);
  thrown.expectMessage(String.format("Unable to get table: %s", tableRef.getTableId()));

  BigQueryServicesImpl.DatasetServiceImpl datasetService =
      new BigQueryServicesImpl.DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
  datasetService.getTable(tableRef, BackOff.STOP_BACKOFF, Sleeper.DEFAULT);
}
 
开发者ID:apache,项目名称:beam,代码行数:18,代码来源:BigQueryServicesImplTest.java

示例8: testGetTableNotFound

import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
@Test
public void testGetTableNotFound() throws IOException, InterruptedException {
  when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
  when(response.getStatusCode()).thenReturn(404);

  BigQueryServicesImpl.DatasetServiceImpl datasetService =
      new BigQueryServicesImpl.DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());

  TableReference tableRef = new TableReference()
      .setProjectId("projectId")
      .setDatasetId("datasetId")
      .setTableId("tableId");
  Table table = datasetService.getTable(tableRef, BackOff.ZERO_BACKOFF, Sleeper.DEFAULT);

  assertNull(table);
  verify(response, times(1)).getStatusCode();
  verify(response, times(1)).getContent();
  verify(response, times(1)).getContentType();
}
 
开发者ID:apache,项目名称:beam,代码行数:20,代码来源:BigQueryServicesImplTest.java

示例9: testRespectsCheckpointContract

import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
@Test
public void testRespectsCheckpointContract() throws IOException {
  TestCountingSource source = new TestCountingSource(3);
  PipelineOptions options = PipelineOptionsFactory.create();
  TestCountingSource.CountingSourceReader reader =
      source.createReader(options, null /* no checkpoint */);
  assertTrue(reader.start());
  assertEquals(0L, (long) reader.getCurrent().getValue());
  assertTrue(reader.advance());
  assertEquals(1L, (long) reader.getCurrent().getValue());
  TestCountingSource.CounterMark checkpoint = reader.getCheckpointMark();
  checkpoint.finalizeCheckpoint();
  reader = source.createReader(options, checkpoint);
  assertTrue(reader.start());
  assertEquals(2L, (long) reader.getCurrent().getValue());
  assertFalse(reader.advance());
}
 
开发者ID:apache,项目名称:beam,代码行数:18,代码来源:TestCountingSourceTest.java

示例10: main

import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
public static void main(String[] args) {
  PipelineOptions options = PipelineOptionsFactory.create();
  options.setRunner(DirectRunner.class); // forced for this demo
  Pipeline p = Pipeline.create(options);

  // register Avro coders for serializing our messages
  Coders.registerAvroCoders(p, ExtendedRecord.class, UntypedOccurrence.class);

  // Read the DwC-A using our custom reader
  PCollection<ExtendedRecord> rawRecords = p.apply(
    "Read from Darwin Core Archive", DwCAIO.Read.withPaths("demo/dwca.zip", "demo/target/tmp"));

  // Convert the ExtendedRecord into an UntypedOccurrence record
  DoFn<ExtendedRecord,UntypedOccurrence> fn = BeamFunctions.beamify(FunctionFactory.untypedOccurrenceBuilder());

  // TODO: Explore the generics as to why the coder registry does not find it and we need to set the coder explicitly
  PCollection<UntypedOccurrence> verbatimRecords = rawRecords.apply(
    "Convert the objects into untyped DwC style records",ParDo.of(fn))
                                                             .setCoder(AvroCoder.of(UntypedOccurrence.class));

  // Write the result as an Avro file
  verbatimRecords.apply(
    "Save the records as Avro", AvroIO.write(UntypedOccurrence.class).to("demo/output/data"));

  LOG.info("Starting the pipeline");
  PipelineResult result = p.run();
  result.waitUntilFinish();
  LOG.info("Pipeline finished with state: {} ", result.getState());
}
 
开发者ID:gbif,项目名称:pipelines,代码行数:30,代码来源:DwCA2AvroPipeline.java

示例11: main

import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
public static void main(String[] args) {
  PipelineOptions options = PipelineOptionsFactory.create();
  options.setRunner(DirectRunner.class); // forced for this demo
  Pipeline p = Pipeline.create(options);

  // register Avro coders for serializing our messages
  Coders.registerAvroCoders(p, ExtendedRecord.class, UntypedOccurrence.class);

  // Read the DwC-A using our custom reader
  PCollection<ExtendedRecord> rawRecords = p.apply(
    "Read from Darwin Core Archive", DwCAIO.Read.withPaths("/tmp/dwca-s-bryophytes-v4.1.zip", "demo/target/tmp"));

  // Convert the ExtendedRecord into an UntypedOccurrence record
  PCollection<UntypedOccurrence> verbatimRecords = rawRecords.apply(
    "Convert the objects into untyped DwC style records",
    ParDo.of(BeamFunctions.beamify(FunctionFactory.untypedOccurrenceBuilder())))
                                                             .setCoder(AvroCoder.of(UntypedOccurrence.class));

  // Write the file to SOLR
  final SolrIO.ConnectionConfiguration conn = SolrIO.ConnectionConfiguration
    .create(SOLR_HOSTS);

  PCollection<SolrInputDocument> inputDocs = verbatimRecords.apply(
    "Convert to SOLR", ParDo.of(new SolrDocBuilder()));

  inputDocs.apply(SolrIO.write().to("beam-demo1").withConnectionConfiguration(conn));

  LOG.info("Starting the pipeline");
  PipelineResult result = p.run();
  result.waitUntilFinish();
  LOG.info("Pipeline finished with state: {} ", result.getState());
}
 
开发者ID:gbif,项目名称:pipelines,代码行数:33,代码来源:DwCA2SolrPipeline.java

示例12: setUp

import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
@Before
public void setUp() throws Exception {
  MockitoAnnotations.initMocks(this);
  options = PipelineOptionsFactory.create();
  options.as(GcsOptions.class).setGcsUtil(mockGcsUtil);
  options.as(GcpOptions.class).setProject("foo");
  options.as(GcpOptions.class).setZone("us-north1-a");
  when(mockCrmClient.projects()).thenReturn(mockProjects);
  when(mockProjects.get(any(String.class))).thenReturn(mockGet);
  fakeProject = new Project().setProjectNumber(1L);
}
 
开发者ID:apache,项目名称:beam,代码行数:12,代码来源:GcpOptionsTest.java

示例13: testEstimatedSizeBytes

import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
@Test
public void testEstimatedSizeBytes() throws Exception {
  final FakeCassandraService service = new FakeCassandraService();
  service.load();

  PipelineOptions pipelineOptions = PipelineOptionsFactory.create();
  CassandraIO.Read spec = CassandraIO.<Scientist>read().withCassandraService(service);
  CassandraIO.CassandraSource source = new CassandraIO.CassandraSource(
      spec,
      null);
  long estimatedSizeBytes = source.getEstimatedSizeBytes(pipelineOptions);
  // the size is the sum of the bytes size of the String representation of a scientist in the map
  assertEquals(113890, estimatedSizeBytes);
}
 
开发者ID:apache,项目名称:beam,代码行数:15,代码来源:CassandraIOTest.java

示例14: testWithDefaultBuffer

import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
@Test
public void testWithDefaultBuffer() throws Exception {
  Collection<BeamFnApi.Elements> values = new ArrayList<>();
  AtomicBoolean onCompletedWasCalled = new AtomicBoolean();
  CloseableFnDataReceiver<WindowedValue<byte[]>> consumer =
      new BeamFnDataBufferingOutboundObserver<>(
      PipelineOptionsFactory.create(),
      OUTPUT_LOCATION,
      CODER,
      TestStreams.withOnNext(values::add)
          .withOnCompleted(() -> onCompletedWasCalled.set(true))
          .build());

  // Test that nothing is emitted till the default buffer size is surpassed.
  consumer.accept(valueInGlobalWindow(new byte[DEFAULT_BUFFER_LIMIT - 50]));
  assertThat(values, empty());

  // Test that when we cross the buffer, we emit.
  consumer.accept(valueInGlobalWindow(new byte[50]));
  assertEquals(
      messageWithData(new byte[DEFAULT_BUFFER_LIMIT - 50], new byte[50]),
      Iterables.get(values, 0));

  // Test that nothing is emitted till the default buffer size is surpassed after a reset
  consumer.accept(valueInGlobalWindow(new byte[DEFAULT_BUFFER_LIMIT - 50]));
  assertEquals(1, values.size());

  // Test that when we cross the buffer, we emit.
  consumer.accept(valueInGlobalWindow(new byte[50]));
  assertEquals(
      messageWithData(new byte[DEFAULT_BUFFER_LIMIT - 50], new byte[50]),
      Iterables.get(values, 1));

  // Test that when we close with an empty buffer we only have one end of stream
  consumer.close();
  assertEquals(messageWithData(),
      Iterables.get(values, 2));
}
 
开发者ID:apache,项目名称:beam,代码行数:39,代码来源:BeamFnDataBufferingOutboundObserverTest.java

示例15: testReadRangeFromFileWithSplitsFromMiddleOfHeader

import org.apache.beam.sdk.options.PipelineOptionsFactory; //导入方法依赖的package包/类
@Test
public void testReadRangeFromFileWithSplitsFromMiddleOfHeader() throws IOException {
  PipelineOptions options = PipelineOptionsFactory.create();
  String header = "<h>";
  List<String> data = new ArrayList<>();
  for (int i = 0; i < 10; i++) {
    data.add(header);
    data.addAll(createStringDataset(3, 9));
  }
  String fileName = "file";
  File file = createFileWithData(fileName, data);

  List<String> expectedResults = new ArrayList<String>();
  expectedResults.addAll(data.subList(10, data.size()));
  // Remove all occurrences of header from expected results.
  expectedResults.removeAll(Collections.singletonList(header));

  Metadata metadata = FileSystems.matchSingleFileSpec(file.getPath());
  // Split starts after "<" of the header
  TestFileBasedSource source =
      new TestFileBasedSource(metadata, 64, 1, Long.MAX_VALUE, header);
  assertThat(expectedResults, containsInAnyOrder(readFromSource(source, options).toArray()));

  // Split starts after "<h" of the header
  source = new TestFileBasedSource(metadata, 64, 2, Long.MAX_VALUE, header);
  assertThat(expectedResults, containsInAnyOrder(readFromSource(source, options).toArray()));

  // Split starts after "<h>" of the header
  source = new TestFileBasedSource(metadata, 64, 3, Long.MAX_VALUE, header);
  assertThat(expectedResults, containsInAnyOrder(readFromSource(source, options).toArray()));
}
 
开发者ID:apache,项目名称:beam,代码行数:32,代码来源:FileBasedSourceTest.java


注:本文中的org.apache.beam.sdk.options.PipelineOptionsFactory.create方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。