当前位置: 首页>>代码示例>>Java>>正文


Java DirectPipelineRunner类代码示例

本文整理汇总了Java中com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner的典型用法代码示例。如果您正苦于以下问题:Java DirectPipelineRunner类的具体用法?Java DirectPipelineRunner怎么用?Java DirectPipelineRunner使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


DirectPipelineRunner类属于com.google.cloud.dataflow.sdk.runners包,在下文中一共展示了DirectPipelineRunner类的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: setupPipeline

import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner; //导入依赖的package包/类
private Pipeline setupPipeline(final String inputPath, final String outputPath, boolean enableGcs, boolean enableCloudExec) {
    final GATKGCSOptions options = PipelineOptionsFactory.as(GATKGCSOptions.class);
    if (enableCloudExec) {
        options.setStagingLocation(getGCPTestStaging());
        options.setProject(getGCPTestProject());
        options.setRunner(BlockingDataflowPipelineRunner.class);
    } else if (BucketUtils.isHadoopUrl(inputPath) || BucketUtils.isHadoopUrl(outputPath)) {
        options.setRunner(SparkPipelineRunner.class);
    } else {
        options.setRunner(DirectPipelineRunner.class);
    }
    if (enableGcs) {
        options.setApiKey(getGCPTestApiKey());
    }
    final Pipeline p = Pipeline.create(options);
    DataflowUtils.registerGATKCoders(p);
    return p;
}
 
开发者ID:broadinstitute,项目名称:gatk-dataflow,代码行数:19,代码来源:SmallBamWriterTest.java

示例2: setupRunner

import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner; //导入依赖的package包/类
/**
 * Do some runner setup: check that the DirectPipelineRunner is not used in conjunction with
 * streaming, and if streaming is specified, use the DataflowPipelineRunner. Return the streaming
 * flag value.
 */
public void setupRunner() {
  if (options.isStreaming()) {
    if (options.getRunner() == DirectPipelineRunner.class) {
      throw new IllegalArgumentException(
        "Processing of unbounded input sources is not supported with the DirectPipelineRunner.");
    }
    // In order to cancel the pipelines automatically,
    // {@literal DataflowPipelineRunner} is forced to be used.
    options.setRunner(DataflowPipelineRunner.class);
  }
}
 
开发者ID:sinmetal,项目名称:iron-hippo,代码行数:17,代码来源:DataflowExampleUtils.java

示例3: joinArtistCreditsWithRecordings

import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner; //导入依赖的package包/类
@org.junit.Test
public void joinArtistCreditsWithRecordings() {

  DirectPipeline p = DirectPipeline.createForTest();

  PCollection<String> artistCreditText = p.apply("artistCredits", Create.of(artistCreditLinesOfJson)).setCoder(StringUtf8Coder.of());
  PCollection<KV<Long, MusicBrainzDataObject>> artistCredits = MusicBrainzTransforms.loadTableFromText(artistCreditText, "artist_credit_name", "artist_credit");

  PCollection<String> recordingText = p.apply("recordings", Create.of(recordingLinesOfJson)).setCoder(StringUtf8Coder.of());
  PCollection<KV<Long, MusicBrainzDataObject>> recordings = MusicBrainzTransforms.loadTableFromText(recordingText, "recording", "artist_credit");

  PCollection<MusicBrainzDataObject> joinedRecordings = MusicBrainzTransforms.innerJoin("artist credits with recordings", artistCredits, recordings);

  PCollection<Long> recordingIds = joinedRecordings.apply(MapElements.via((MusicBrainzDataObject mbo) -> (Long) mbo.getColumnValue("recording_id")).
      withOutputType(new TypeDescriptor<Long>() {
      }));

  Long bieberRecording = 17069165L;
  Long bieberRecording2 = 15508507L;


  DataflowAssert.that(recordingIds).satisfies((longs) -> {
    List<Long> theList = new ArrayList<Long>();
    longs.forEach(theList::add);
    assert (theList.contains(bieberRecording));
    assert (theList.contains(bieberRecording2));
    return null;
  });

  PCollection<Long> numberJoined = joinedRecordings.apply("count joined recrodings", Count.globally());
  PCollection<Long> numberOfArtistCredits = artistCredits.apply("count artist credits", Count.globally());

  DirectPipelineRunner.EvaluationResults results = p.run();

  long joinedRecordingsCount = results.getPCollection(numberJoined).get(0);
  assert (448 == joinedRecordingsCount);
}
 
开发者ID:GoogleCloudPlatform,项目名称:bigquery-etl-dataflow-sample,代码行数:38,代码来源:MusicBrainzTransformsTest.java

示例4: testNest

import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner; //导入依赖的package包/类
@org.junit.Test
public void testNest() {
  DirectPipeline p = DirectPipeline.createForTest();
  PCollection<String> artistText = p.apply("artist", Create.of(artistLinesOfJson)).setCoder(StringUtf8Coder.of());
  PCollection<String> artistCreditNameText = p.apply("artist_credit_name", Create.of(artistCreditLinesOfJson));
  PCollection<String> recordingText = p.apply("recording", Create.of(recordingLinesOfJson)).setCoder(StringUtf8Coder.of());

  PCollection<KV<Long, MusicBrainzDataObject>> artistsById = MusicBrainzTransforms.loadTableFromText(artistText, "artist", "id");

  PCollection<KV<Long, MusicBrainzDataObject>> recordingsByArtistCredit =
      MusicBrainzTransforms.loadTableFromText(recordingText, "recording", "artist_credit");
  PCollection<KV<Long, MusicBrainzDataObject>> artistCreditByArtistCredit =
      MusicBrainzTransforms.loadTableFromText(artistCreditNameText, "artist_credit_name", "artist_credit");

  PCollection<MusicBrainzDataObject> recordingsWithCredits = MusicBrainzTransforms.innerJoin("credited recordings", artistCreditByArtistCredit, recordingsByArtistCredit);
  PCollection<KV<Long, MusicBrainzDataObject>> recordingsJoinedWithCredits =
      MusicBrainzTransforms.by("artist_credit_name_artist", recordingsWithCredits);
  PCollection<MusicBrainzDataObject> artistsWithNestedRecordings = MusicBrainzTransforms.nest(artistsById, recordingsJoinedWithCredits, "recordings");


  DirectPipelineRunner.EvaluationResults results = p.run();

  List<MusicBrainzDataObject> resultObjects = results.getPCollection(artistsWithNestedRecordings);
  assert (resultObjects.size() == 1);
  assert (((List<MusicBrainzDataObject>) resultObjects.get(0).getColumnValue("artist_recordings")).size() == 448);


}
 
开发者ID:GoogleCloudPlatform,项目名称:bigquery-etl-dataflow-sample,代码行数:29,代码来源:MusicBrainzTransformsTest.java

示例5: runner

import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner; //导入依赖的package包/类
private static Class<? extends PipelineRunner<?>> runner(String name) {
  Class<? extends PipelineRunner<?>> c = DirectPipelineRunner.class; // default

  if (DEFAULT_RUNNER.equals(name) || name == null) {
    c = DataflowPipelineRunner.class;
  } else if (BLOCKING_RUNNER.equals(name)) {
    c = BlockingDataflowPipelineRunner.class;
  } else if (DIRECT_RUNNER.equals(name)) {
    c = DirectPipelineRunner.class;
  }
  return c;
}
 
开发者ID:googlegenomics,项目名称:dockerflow,代码行数:13,代码来源:DataflowFactory.java

示例6: init

import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner; //导入依赖的package包/类
@Before public void init() throws IOException {
  sparkRunner = SparkPipelineRunner.create();
  directRunner = DirectPipelineRunner.createForTest();
  testDataDirName = Joiner.on(File.separator).join("target", "test-data", name.getMethodName())
      + File.separator;
  FileUtils.deleteDirectory(new File(testDataDirName));
  new File(testDataDirName).mkdirs();
}
 
开发者ID:shakamunyi,项目名称:spark-dataflow,代码行数:9,代码来源:TransformTranslatorTest.java


注:本文中的com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。