当前位置: 首页>>代码示例>>Java>>正文


Java GenomicsOptions类代码示例

本文整理汇总了Java中com.google.cloud.genomics.dataflow.utils.GenomicsOptions的典型用法代码示例。如果您正苦于以下问题:Java GenomicsOptions类的具体用法?Java GenomicsOptions怎么用?Java GenomicsOptions使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


GenomicsOptions类属于com.google.cloud.genomics.dataflow.utils包,在下文中一共展示了GenomicsOptions类的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: ReadsDataflowSource

import com.google.cloud.genomics.dataflow.utils.GenomicsOptions; //导入依赖的package包/类
/**
 * @param bam A file path or a google bucket identifier to a bam file to read
 * @param p the pipeline object for the job. This is needed to read a bam from a bucket.
 *          The options inside of the pipeline MUST BE GCSOptions (to get the secret file).
 */
public ReadsDataflowSource(String bam, Pipeline p){
    this.bam = Utils.nonNull(bam);
    this.pipeline = p;

    cloudStorageUrl = BucketUtils.isCloudStorageUrl(bam);
    hadoopUrl = BucketUtils.isHadoopUrl(bam);
    if(cloudStorageUrl) {
        // The options used to create the pipeline must be GCSOptions to get the secret file.
        try {
            options = p.getOptions().as(GCSOptions.class);
        } catch (ClassCastException e) {
            throw new GATKException("The pipeline options was not GCSOptions.", e);
        }
        GenomicsOptions.Methods.validateOptions(options);
        auth = getAuth(options);
    }
}
 
开发者ID:broadinstitute,项目名称:gatk-dataflow,代码行数:23,代码来源:ReadsDataflowSource.java

示例2: main

import com.google.cloud.genomics.dataflow.utils.GenomicsOptions; //导入依赖的package包/类
public static void main(String[] args) throws GeneralSecurityException, IOException, URISyntaxException {
  // Register the options so that they show up via --help
  PipelineOptionsFactory.register(Options.class);
  pipelineOptions = PipelineOptionsFactory.fromArgs(args)
      .withValidation().as(Options.class);
  // Option validation is not yet automatic, we make an explicit call here.
  Options.Methods.validateOptions(pipelineOptions);

  auth = GenomicsOptions.Methods.getGenomicsAuth(pipelineOptions);
  p = Pipeline.create(pipelineOptions);

  // ensure data is accessible
  String BAMFilePath = pipelineOptions.getBAMFilePath();
  if (!Strings.isNullOrEmpty(BAMFilePath)) {
    if (GCSURLExists(BAMFilePath)) {
      System.out.println(BAMFilePath + " is present, good.");
    } else {
      System.out.println("Error: " + BAMFilePath + " not found.");
      return;
    }
    if (pipelineOptions.isShardBAMReading()) {
      // the BAM code expects an index at BAMFilePath+".bai"
      // and sharded reading will fail if the index isn't there.
      String BAMIndexPath = BAMFilePath + ".bai";
      if (GCSURLExists(BAMIndexPath)) {
        System.out.println(BAMIndexPath + " is present, good.");
      } else {
        System.out.println("Error: " + BAMIndexPath + " not found.");
        return;
      }
    }
  }
  System.out.println("Output will be written to "+pipelineOptions.getOutput());

  PCollection<Read> reads = getReads();
  PCollection<Long> readCount = reads.apply(Count.<Read>globally());
  PCollection<String> readCountText = readCount.apply("toString", ParDo.of(new DoFn<Long, String>() {
    @ProcessElement
    public void processElement(DoFn<Long, String>.ProcessContext c) throws Exception {
      c.output(String.valueOf(c.element()));
    }
  }));
  readCountText.apply("WriteOutput", TextIO.write().to(pipelineOptions.getOutput()).withoutSharding());

  PipelineResult result = p.run();
  if(pipelineOptions.getWait()) {
    result.waitUntilFinish();
  }
}
 
开发者ID:googlegenomics,项目名称:dataflow-java,代码行数:50,代码来源:CountReads.java

示例3: main

import com.google.cloud.genomics.dataflow.utils.GenomicsOptions; //导入依赖的package包/类
public static void main(String[] args) throws IOException, GeneralSecurityException {
  // Register the options so that they show up via --help
  PipelineOptionsFactory.register(Options.class);
  Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
  // Option validation is not yet automatic, we make an explicit call here.
  Options.Methods.validateOptions(options);

  // Set up the prototype request and auth.
  StreamVariantsRequest prototype = StreamVariantsRequest.newBuilder(
      CallSetNamesOptions.Methods.getRequestPrototype(options))
      // In this case, we do not want responses containing a subset of calls, we want all of them.
      .clearCallSetIds()
      .build();
  OfflineAuth auth = GenomicsOptions.Methods.getGenomicsAuth(options);

  ImmutableSet<String> callSetIds = ImmutableSet.<String>builder()
      .addAll(CallSetNamesOptions.Methods.getCallSetIds(options))
      .build();
  LOG.info("The pipeline will identify and write to Cloud Storage variants "
      + "private to " + callSetIds.size() + " genomes with callSetIds: " + callSetIds);
  if (options.getIdentifyVariantsWithoutCalls()) {
    LOG.info("* The pipeline will also identify variants with no callsets. *");
  }

  List<StreamVariantsRequest> shardRequests =
      options.isAllReferences() ? ShardUtils.getVariantRequests(prototype,
          ShardUtils.SexChromosomeFilter.INCLUDE_XY, options.getBasesPerShard(), auth)
          : ShardUtils.getVariantRequests(prototype, options.getBasesPerShard(),
              options.getReferences());

  Pipeline p = Pipeline.create(options);
  PCollection<Variant> variants = p.begin()
      .apply(Create.of(shardRequests))
      .apply(new VariantStreamer(auth, ShardBoundary.Requirement.STRICT, VARIANT_FIELDS))
      .apply(ParDo.of(new PrivateVariantsFilterFn(callSetIds,
          options.getIdentifyVariantsWithoutCalls())));

  variants.apply("FormatResults", ParDo.of(new DoFn<Variant, String>() {
    @ProcessElement
    public void processElement(ProcessContext c) {
      Variant v = c.element();
      c.output(Joiner.on("\t").join(v.getId(),
          v.getReferenceName(),
          v.getStart(),
          v.getEnd(),
          v.getReferenceBases(),
          Joiner.on(",").join(v.getAlternateBasesList())
          ));
    }
  }))
  .apply(TextIO.write().to(options.getOutput()));

  p.run();
}
 
开发者ID:googlegenomics,项目名称:dataflow-java,代码行数:55,代码来源:IdentifyPrivateVariants.java

示例4: main

import com.google.cloud.genomics.dataflow.utils.GenomicsOptions; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
  // Register the options so that they show up via --help
  PipelineOptionsFactory.register(Options.class);
  Options options = PipelineOptionsFactory.fromArgs(args)
      .withValidation().as(Options.class);
  // Option validation is not yet automatic, we make an explicit call here.
  Options.Methods.validateOptions(options);

  // Set up the prototype request and auth.
  StreamVariantsRequest prototype = CallSetNamesOptions.Methods.getRequestPrototype(options);
  OfflineAuth auth = GenomicsOptions.Methods.getGenomicsAuth(options);
  Genomics genomics = GenomicsFactory.builder().build().fromOfflineAuth(auth);

  List<String> callSetIds = CallSetNamesOptions.Methods.getCallSetIds(options);
  List<String> transcriptSetIds =
      validateAnnotationSetsFlag(genomics, options.getTranscriptSetIds(), "TRANSCRIPT");
  List<String> variantAnnotationSetIds =
      validateAnnotationSetsFlag(genomics, options.getVariantAnnotationSetIds(), "VARIANT");
  validateRefsetForAnnotationSets(genomics, transcriptSetIds);

  List<StreamVariantsRequest> requests = options.isAllReferences() ?
      ShardUtils.getVariantRequests(prototype, ShardUtils.SexChromosomeFilter.EXCLUDE_XY,
          options.getBasesPerShard(), auth) :
            ShardUtils.getVariantRequests(prototype, options.getBasesPerShard(), options.getReferences());

  Pipeline p = Pipeline.create(options);
  p.getCoderRegistry().registerCoderForClass(Annotation.class,
    (Coder<Annotation>) GenericJsonCoder.of(Annotation.class));
  p.getCoderRegistry().registerCoderForClass(AnnotationSet.class,
    (Coder<AnnotationSet>) GenericJsonCoder.of(AnnotationSet.class));
  p.getCoderRegistry().registerCoderForClass(ListBasesResponse.class,
    (Coder<ListBasesResponse>) GenericJsonCoder.of(ListBasesResponse.class));
  p.getCoderRegistry().registerCoderForClass(SearchAnnotationsRequest.class,
    (Coder<SearchAnnotationsRequest>) GenericJsonCoder.of(SearchAnnotationsRequest.class));
  p.getCoderRegistry().registerCoderForClass(VariantAnnotation.class,
    (Coder<VariantAnnotation>) GenericJsonCoder.of(VariantAnnotation.class));
  p.begin()
    .apply(Create.of(requests))
    .apply(ParDo.of(new AnnotateVariants(auth, callSetIds, transcriptSetIds, variantAnnotationSetIds)))
    .apply(GroupByKey.<String, VariantAnnotation>create())
    .apply(ParDo.of(new DoFn<KV<String, Iterable<VariantAnnotation>>, String>() {
      @ProcessElement
      public void processElement(ProcessContext c) {
        c.output(c.element().getKey() + ": " + c.element().getValue());
      }
    }))
    .apply(TextIO.write().to(options.getOutput()));
  p.run();
}
 
开发者ID:googlegenomics,项目名称:dataflow-java,代码行数:50,代码来源:AnnotateVariants.java

示例5: main

import com.google.cloud.genomics.dataflow.utils.GenomicsOptions; //导入依赖的package包/类
public static void main(String[] args) throws IOException, GeneralSecurityException {
  // Register the options so that they show up via --help
  PipelineOptionsFactory.register(Options.class);
  Options options = PipelineOptionsFactory.fromArgs(args)
      .withValidation().as(Options.class);
  // Option validation is not yet automatic, we make an explicit call here.
  Options.Methods.validateOptions(options);

  // Set up the prototype request and auth.
  StreamVariantsRequest prototype = CallSetNamesOptions.Methods.getRequestPrototype(options);
  OfflineAuth auth = GenomicsOptions.Methods.getGenomicsAuth(options);

  // Make a bimap of the callsets so that the indices the pipeline is passing around are small.
  List<String> callSetNames = (0 < prototype.getCallSetIdsCount())
      ? Lists.newArrayList(CallSetNamesOptions.Methods.getCallSetNames(options))
          : GenomicsUtils.getCallSetsNames(options.getVariantSetId(), auth);
  Collections.sort(callSetNames); // Ensure a stable sort order for reproducible results.
  BiMap<String, Integer> dataIndices = HashBiMap.create();
  for(String callSetName : callSetNames) {
    dataIndices.put(callSetName, dataIndices.size());
  }

  Pipeline p = Pipeline.create(options);
  p.begin();

  PCollection<StreamVariantsRequest> requests;
  if(null != options.getSitesFilepath()) {
    // Compute PCA on a list of sites.
    requests = p.apply("ReadSites", TextIO.read().from(options.getSitesFilepath()))
        .apply(new SitesToShards.SitesToStreamVariantsShardsTransform(prototype));
  } else {
    // Compute PCA over genomic regions.
    List<StreamVariantsRequest> shardRequests = options.isAllReferences() ?
        ShardUtils.getVariantRequests(prototype, ShardUtils.SexChromosomeFilter.EXCLUDE_XY,
            options.getBasesPerShard(), auth) :
          ShardUtils.getVariantRequests(prototype, options.getBasesPerShard(), options.getReferences());

    requests = p.apply(Create.of(shardRequests));
  }

  requests.apply(new VariantStreamer(auth, ShardBoundary.Requirement.STRICT, VARIANT_FIELDS))
      .apply(ParDo.of(new ExtractSimilarCallsets()))
      .apply(new OutputPCoAFile(dataIndices, options.getOutput()));

  PipelineResult result = p.run();
  if(options.getWait()) {
    result.waitUntilFinish();
  }
}
 
开发者ID:googlegenomics,项目名称:dataflow-java,代码行数:50,代码来源:VariantSimilarity.java

示例6: main

import com.google.cloud.genomics.dataflow.utils.GenomicsOptions; //导入依赖的package包/类
public static void main(String[] args) throws IOException, GeneralSecurityException {
  // Register the options so that they show up via --help.
  PipelineOptionsFactory.register(TransformNonVariantSegmentData.Options.class);
  TransformNonVariantSegmentData.Options options =
      PipelineOptionsFactory.fromArgs(args).withValidation()
          .as(TransformNonVariantSegmentData.Options.class);

  Preconditions.checkState(options.getHasNonVariantSegments(),
      "This job is only valid for data containing non-variant segments. "
          + "Set the --hasNonVariantSegments command line option accordingly.");

  Map<String, Set<String>> cohortMap = new HashMap<String, Set<String>>();
  // Always include the default cohort.
  cohortMap.put(ALL_SAMPLES_COHORT, ImmutableSet.<String>builder().build());
  if (!Strings.isNullOrEmpty(options.getCohorts())) {
    List<String> cohortFilenames = Splitter.on(",").splitToList(options.getCohorts());
    for (String cohort : cohortFilenames) {
      cohortMap.put(cohort, ImmutableSet
          .<String>builder()
          .addAll(
              Splitter.on(CharMatcher.breakingWhitespace()).omitEmptyStrings().trimResults()
              .split(Files.toString(new File(cohort), Charset.defaultCharset()))).build());
    }
  }

  // Set up the prototype request and auth.
  StreamVariantsRequest prototype = CallSetNamesOptions.Methods.getRequestPrototype(options);
  final OfflineAuth auth = GenomicsOptions.Methods.getGenomicsAuth(options);

  List<StreamVariantsRequest> requests = options.isAllReferences() ?
      ShardUtils.getVariantRequests(prototype, ShardUtils.SexChromosomeFilter.INCLUDE_XY,
          options.getBasesPerShard(), auth) :
        ShardUtils.getVariantRequests(prototype, options.getBasesPerShard(), options.getReferences());

  Pipeline p = Pipeline.create(options);

  // Create a collection of data with non-variant segments omitted but calls from overlapping
  // non-variant segments added to SNPs and write them to BigQuery.
  p.begin()
      .apply(Create.of(requests))
      .apply(new VariantStreamer(auth, ShardBoundary.Requirement.STRICT, VARIANT_API_FIELDS))
      .apply(ParDo.of(new FilterCallsFn(options.getOmitLowQualityCalls())))
      .apply(new JoinNonVariantSegmentsWithVariants.BinShuffleAndCombineTransform())
      .apply(ParDo.of(new FlagVariantsWithAmbiguousCallsFn()))
      .apply(ParDo.of(new FormatVariantsFn(options.getSummarizeRefMatchCallSets(),
          !options.getVariantMergeStrategy().equals(MergeNonVariantSegmentsWithSnps.class),
          cohortMap)))
      .apply(
          BigQueryIO.writeTableRows().to(options.getOutputTable())
              .withSchema(getTableSchema(options.getSummarizeRefMatchCallSets(), cohortMap.keySet()))
              .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
              .withWriteDisposition(options.getAppendToTable()
                  ? BigQueryIO.Write.WriteDisposition.WRITE_APPEND : BigQueryIO.Write.WriteDisposition.WRITE_EMPTY));

  PipelineResult result = p.run();
  if (options.getWait()) {
    result.waitUntilFinish();
  }
}
 
开发者ID:googlegenomics,项目名称:codelabs,代码行数:60,代码来源:TransformNonVariantSegmentData.java


注:本文中的com.google.cloud.genomics.dataflow.utils.GenomicsOptions类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。