本文整理汇总了Java中com.google.cloud.genomics.dataflow.utils.GenomicsOptions类的典型用法代码示例。如果您正苦于以下问题:Java GenomicsOptions类的具体用法?Java GenomicsOptions怎么用?Java GenomicsOptions使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
GenomicsOptions类属于com.google.cloud.genomics.dataflow.utils包,在下文中一共展示了GenomicsOptions类的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: ReadsDataflowSource
import com.google.cloud.genomics.dataflow.utils.GenomicsOptions; //导入依赖的package包/类
/**
* @param bam A file path or a google bucket identifier to a bam file to read
* @param p the pipeline object for the job. This is needed to read a bam from a bucket.
* The options inside of the pipeline MUST BE GCSOptions (to get the secret file).
*/
public ReadsDataflowSource(String bam, Pipeline p){
this.bam = Utils.nonNull(bam);
this.pipeline = p;
cloudStorageUrl = BucketUtils.isCloudStorageUrl(bam);
hadoopUrl = BucketUtils.isHadoopUrl(bam);
if(cloudStorageUrl) {
// The options used to create the pipeline must be GCSOptions to get the secret file.
try {
options = p.getOptions().as(GCSOptions.class);
} catch (ClassCastException e) {
throw new GATKException("The pipeline options was not GCSOptions.", e);
}
GenomicsOptions.Methods.validateOptions(options);
auth = getAuth(options);
}
}
示例2: main
import com.google.cloud.genomics.dataflow.utils.GenomicsOptions; //导入依赖的package包/类
public static void main(String[] args) throws GeneralSecurityException, IOException, URISyntaxException {
// Register the options so that they show up via --help
PipelineOptionsFactory.register(Options.class);
pipelineOptions = PipelineOptionsFactory.fromArgs(args)
.withValidation().as(Options.class);
// Option validation is not yet automatic, we make an explicit call here.
Options.Methods.validateOptions(pipelineOptions);
auth = GenomicsOptions.Methods.getGenomicsAuth(pipelineOptions);
p = Pipeline.create(pipelineOptions);
// ensure data is accessible
String BAMFilePath = pipelineOptions.getBAMFilePath();
if (!Strings.isNullOrEmpty(BAMFilePath)) {
if (GCSURLExists(BAMFilePath)) {
System.out.println(BAMFilePath + " is present, good.");
} else {
System.out.println("Error: " + BAMFilePath + " not found.");
return;
}
if (pipelineOptions.isShardBAMReading()) {
// the BAM code expects an index at BAMFilePath+".bai"
// and sharded reading will fail if the index isn't there.
String BAMIndexPath = BAMFilePath + ".bai";
if (GCSURLExists(BAMIndexPath)) {
System.out.println(BAMIndexPath + " is present, good.");
} else {
System.out.println("Error: " + BAMIndexPath + " not found.");
return;
}
}
}
System.out.println("Output will be written to "+pipelineOptions.getOutput());
PCollection<Read> reads = getReads();
PCollection<Long> readCount = reads.apply(Count.<Read>globally());
PCollection<String> readCountText = readCount.apply("toString", ParDo.of(new DoFn<Long, String>() {
@ProcessElement
public void processElement(DoFn<Long, String>.ProcessContext c) throws Exception {
c.output(String.valueOf(c.element()));
}
}));
readCountText.apply("WriteOutput", TextIO.write().to(pipelineOptions.getOutput()).withoutSharding());
PipelineResult result = p.run();
if(pipelineOptions.getWait()) {
result.waitUntilFinish();
}
}
示例3: main
import com.google.cloud.genomics.dataflow.utils.GenomicsOptions; //导入依赖的package包/类
public static void main(String[] args) throws IOException, GeneralSecurityException {
// Register the options so that they show up via --help
PipelineOptionsFactory.register(Options.class);
Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
// Option validation is not yet automatic, we make an explicit call here.
Options.Methods.validateOptions(options);
// Set up the prototype request and auth.
StreamVariantsRequest prototype = StreamVariantsRequest.newBuilder(
CallSetNamesOptions.Methods.getRequestPrototype(options))
// In this case, we do not want responses containing a subset of calls, we want all of them.
.clearCallSetIds()
.build();
OfflineAuth auth = GenomicsOptions.Methods.getGenomicsAuth(options);
ImmutableSet<String> callSetIds = ImmutableSet.<String>builder()
.addAll(CallSetNamesOptions.Methods.getCallSetIds(options))
.build();
LOG.info("The pipeline will identify and write to Cloud Storage variants "
+ "private to " + callSetIds.size() + " genomes with callSetIds: " + callSetIds);
if (options.getIdentifyVariantsWithoutCalls()) {
LOG.info("* The pipeline will also identify variants with no callsets. *");
}
List<StreamVariantsRequest> shardRequests =
options.isAllReferences() ? ShardUtils.getVariantRequests(prototype,
ShardUtils.SexChromosomeFilter.INCLUDE_XY, options.getBasesPerShard(), auth)
: ShardUtils.getVariantRequests(prototype, options.getBasesPerShard(),
options.getReferences());
Pipeline p = Pipeline.create(options);
PCollection<Variant> variants = p.begin()
.apply(Create.of(shardRequests))
.apply(new VariantStreamer(auth, ShardBoundary.Requirement.STRICT, VARIANT_FIELDS))
.apply(ParDo.of(new PrivateVariantsFilterFn(callSetIds,
options.getIdentifyVariantsWithoutCalls())));
variants.apply("FormatResults", ParDo.of(new DoFn<Variant, String>() {
@ProcessElement
public void processElement(ProcessContext c) {
Variant v = c.element();
c.output(Joiner.on("\t").join(v.getId(),
v.getReferenceName(),
v.getStart(),
v.getEnd(),
v.getReferenceBases(),
Joiner.on(",").join(v.getAlternateBasesList())
));
}
}))
.apply(TextIO.write().to(options.getOutput()));
p.run();
}
示例4: main
import com.google.cloud.genomics.dataflow.utils.GenomicsOptions; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
// Register the options so that they show up via --help
PipelineOptionsFactory.register(Options.class);
Options options = PipelineOptionsFactory.fromArgs(args)
.withValidation().as(Options.class);
// Option validation is not yet automatic, we make an explicit call here.
Options.Methods.validateOptions(options);
// Set up the prototype request and auth.
StreamVariantsRequest prototype = CallSetNamesOptions.Methods.getRequestPrototype(options);
OfflineAuth auth = GenomicsOptions.Methods.getGenomicsAuth(options);
Genomics genomics = GenomicsFactory.builder().build().fromOfflineAuth(auth);
List<String> callSetIds = CallSetNamesOptions.Methods.getCallSetIds(options);
List<String> transcriptSetIds =
validateAnnotationSetsFlag(genomics, options.getTranscriptSetIds(), "TRANSCRIPT");
List<String> variantAnnotationSetIds =
validateAnnotationSetsFlag(genomics, options.getVariantAnnotationSetIds(), "VARIANT");
validateRefsetForAnnotationSets(genomics, transcriptSetIds);
List<StreamVariantsRequest> requests = options.isAllReferences() ?
ShardUtils.getVariantRequests(prototype, ShardUtils.SexChromosomeFilter.EXCLUDE_XY,
options.getBasesPerShard(), auth) :
ShardUtils.getVariantRequests(prototype, options.getBasesPerShard(), options.getReferences());
Pipeline p = Pipeline.create(options);
p.getCoderRegistry().registerCoderForClass(Annotation.class,
(Coder<Annotation>) GenericJsonCoder.of(Annotation.class));
p.getCoderRegistry().registerCoderForClass(AnnotationSet.class,
(Coder<AnnotationSet>) GenericJsonCoder.of(AnnotationSet.class));
p.getCoderRegistry().registerCoderForClass(ListBasesResponse.class,
(Coder<ListBasesResponse>) GenericJsonCoder.of(ListBasesResponse.class));
p.getCoderRegistry().registerCoderForClass(SearchAnnotationsRequest.class,
(Coder<SearchAnnotationsRequest>) GenericJsonCoder.of(SearchAnnotationsRequest.class));
p.getCoderRegistry().registerCoderForClass(VariantAnnotation.class,
(Coder<VariantAnnotation>) GenericJsonCoder.of(VariantAnnotation.class));
p.begin()
.apply(Create.of(requests))
.apply(ParDo.of(new AnnotateVariants(auth, callSetIds, transcriptSetIds, variantAnnotationSetIds)))
.apply(GroupByKey.<String, VariantAnnotation>create())
.apply(ParDo.of(new DoFn<KV<String, Iterable<VariantAnnotation>>, String>() {
@ProcessElement
public void processElement(ProcessContext c) {
c.output(c.element().getKey() + ": " + c.element().getValue());
}
}))
.apply(TextIO.write().to(options.getOutput()));
p.run();
}
示例5: main
import com.google.cloud.genomics.dataflow.utils.GenomicsOptions; //导入依赖的package包/类
public static void main(String[] args) throws IOException, GeneralSecurityException {
// Register the options so that they show up via --help
PipelineOptionsFactory.register(Options.class);
Options options = PipelineOptionsFactory.fromArgs(args)
.withValidation().as(Options.class);
// Option validation is not yet automatic, we make an explicit call here.
Options.Methods.validateOptions(options);
// Set up the prototype request and auth.
StreamVariantsRequest prototype = CallSetNamesOptions.Methods.getRequestPrototype(options);
OfflineAuth auth = GenomicsOptions.Methods.getGenomicsAuth(options);
// Make a bimap of the callsets so that the indices the pipeline is passing around are small.
List<String> callSetNames = (0 < prototype.getCallSetIdsCount())
? Lists.newArrayList(CallSetNamesOptions.Methods.getCallSetNames(options))
: GenomicsUtils.getCallSetsNames(options.getVariantSetId(), auth);
Collections.sort(callSetNames); // Ensure a stable sort order for reproducible results.
BiMap<String, Integer> dataIndices = HashBiMap.create();
for(String callSetName : callSetNames) {
dataIndices.put(callSetName, dataIndices.size());
}
Pipeline p = Pipeline.create(options);
p.begin();
PCollection<StreamVariantsRequest> requests;
if(null != options.getSitesFilepath()) {
// Compute PCA on a list of sites.
requests = p.apply("ReadSites", TextIO.read().from(options.getSitesFilepath()))
.apply(new SitesToShards.SitesToStreamVariantsShardsTransform(prototype));
} else {
// Compute PCA over genomic regions.
List<StreamVariantsRequest> shardRequests = options.isAllReferences() ?
ShardUtils.getVariantRequests(prototype, ShardUtils.SexChromosomeFilter.EXCLUDE_XY,
options.getBasesPerShard(), auth) :
ShardUtils.getVariantRequests(prototype, options.getBasesPerShard(), options.getReferences());
requests = p.apply(Create.of(shardRequests));
}
requests.apply(new VariantStreamer(auth, ShardBoundary.Requirement.STRICT, VARIANT_FIELDS))
.apply(ParDo.of(new ExtractSimilarCallsets()))
.apply(new OutputPCoAFile(dataIndices, options.getOutput()));
PipelineResult result = p.run();
if(options.getWait()) {
result.waitUntilFinish();
}
}
示例6: main
import com.google.cloud.genomics.dataflow.utils.GenomicsOptions; //导入依赖的package包/类
public static void main(String[] args) throws IOException, GeneralSecurityException {
// Register the options so that they show up via --help.
PipelineOptionsFactory.register(TransformNonVariantSegmentData.Options.class);
TransformNonVariantSegmentData.Options options =
PipelineOptionsFactory.fromArgs(args).withValidation()
.as(TransformNonVariantSegmentData.Options.class);
Preconditions.checkState(options.getHasNonVariantSegments(),
"This job is only valid for data containing non-variant segments. "
+ "Set the --hasNonVariantSegments command line option accordingly.");
Map<String, Set<String>> cohortMap = new HashMap<String, Set<String>>();
// Always include the default cohort.
cohortMap.put(ALL_SAMPLES_COHORT, ImmutableSet.<String>builder().build());
if (!Strings.isNullOrEmpty(options.getCohorts())) {
List<String> cohortFilenames = Splitter.on(",").splitToList(options.getCohorts());
for (String cohort : cohortFilenames) {
cohortMap.put(cohort, ImmutableSet
.<String>builder()
.addAll(
Splitter.on(CharMatcher.breakingWhitespace()).omitEmptyStrings().trimResults()
.split(Files.toString(new File(cohort), Charset.defaultCharset()))).build());
}
}
// Set up the prototype request and auth.
StreamVariantsRequest prototype = CallSetNamesOptions.Methods.getRequestPrototype(options);
final OfflineAuth auth = GenomicsOptions.Methods.getGenomicsAuth(options);
List<StreamVariantsRequest> requests = options.isAllReferences() ?
ShardUtils.getVariantRequests(prototype, ShardUtils.SexChromosomeFilter.INCLUDE_XY,
options.getBasesPerShard(), auth) :
ShardUtils.getVariantRequests(prototype, options.getBasesPerShard(), options.getReferences());
Pipeline p = Pipeline.create(options);
// Create a collection of data with non-variant segments omitted but calls from overlapping
// non-variant segments added to SNPs and write them to BigQuery.
p.begin()
.apply(Create.of(requests))
.apply(new VariantStreamer(auth, ShardBoundary.Requirement.STRICT, VARIANT_API_FIELDS))
.apply(ParDo.of(new FilterCallsFn(options.getOmitLowQualityCalls())))
.apply(new JoinNonVariantSegmentsWithVariants.BinShuffleAndCombineTransform())
.apply(ParDo.of(new FlagVariantsWithAmbiguousCallsFn()))
.apply(ParDo.of(new FormatVariantsFn(options.getSummarizeRefMatchCallSets(),
!options.getVariantMergeStrategy().equals(MergeNonVariantSegmentsWithSnps.class),
cohortMap)))
.apply(
BigQueryIO.writeTableRows().to(options.getOutputTable())
.withSchema(getTableSchema(options.getSummarizeRefMatchCallSets(), cohortMap.keySet()))
.withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
.withWriteDisposition(options.getAppendToTable()
? BigQueryIO.Write.WriteDisposition.WRITE_APPEND : BigQueryIO.Write.WriteDisposition.WRITE_EMPTY));
PipelineResult result = p.run();
if (options.getWait()) {
result.waitUntilFinish();
}
}