本文整理汇总了Java中org.broadinstitute.hellbender.utils.gcs.BucketUtils.isCloudStorageUrl方法的典型用法代码示例。如果您正苦于以下问题:Java BucketUtils.isCloudStorageUrl方法的具体用法?Java BucketUtils.isCloudStorageUrl怎么用?Java BucketUtils.isCloudStorageUrl使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.broadinstitute.hellbender.utils.gcs.BucketUtils
的用法示例。
在下文中一共展示了BucketUtils.isCloudStorageUrl方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: ReadsDataflowSource
import org.broadinstitute.hellbender.utils.gcs.BucketUtils; //导入方法依赖的package包/类
/**
* @param bam A file path or a google bucket identifier to a bam file to read
* @param p the pipeline object for the job. This is needed to read a bam from a bucket.
* The options inside of the pipeline MUST BE GCSOptions (to get the secret file).
*/
public ReadsDataflowSource(String bam, Pipeline p){
this.bam = Utils.nonNull(bam);
this.pipeline = p;
cloudStorageUrl = BucketUtils.isCloudStorageUrl(bam);
hadoopUrl = BucketUtils.isHadoopUrl(bam);
if(cloudStorageUrl) {
// The options used to create the pipeline must be GCSOptions to get the secret file.
try {
options = p.getOptions().as(GCSOptions.class);
} catch (ClassCastException e) {
throw new GATKException("The pipeline options was not GCSOptions.", e);
}
GenomicsOptions.Methods.validateOptions(options);
auth = getAuth(options);
}
}
示例2: getTribbleFeatureReader
import org.broadinstitute.hellbender.utils.gcs.BucketUtils; //导入方法依赖的package包/类
private static <T extends Feature> AbstractFeatureReader<T, ?> getTribbleFeatureReader(final FeatureInput<T> featureInput, final FeatureCodec<T, ?> codec, Function<SeekableByteChannel, SeekableByteChannel> cloudWrapper, Function<SeekableByteChannel, SeekableByteChannel> cloudIndexWrapper) {
Utils.nonNull(codec);
try {
final String absolutePath = IOUtils.getPath(featureInput.getFeaturePath()).toAbsolutePath().toUri().toString();
// Instruct the reader factory to not require an index. We will require one ourselves as soon as
// a query by interval is attempted.
final boolean requireIndex = false;
// Only apply the wrappers if the feature input is on Google Cloud Storage
if ( BucketUtils.isCloudStorageUrl(absolutePath) ) {
return AbstractFeatureReader.getFeatureReader(absolutePath, null, codec, requireIndex, cloudWrapper, cloudIndexWrapper);
} else {
return AbstractFeatureReader.getFeatureReader(absolutePath, null, codec, requireIndex, Function.identity(), Function.identity());
}
}
catch ( final TribbleException e ) {
throw new GATKException("Error initializing feature reader for path " + featureInput.getFeaturePath(), e);
}
}
示例3: SubdivideAndFillReadsIterator
import org.broadinstitute.hellbender.utils.gcs.BucketUtils; //导入方法依赖的package包/类
public SubdivideAndFillReadsIterator(String bam, int outputShardSize, int margin, final ReadFilter optFilter, ContextShard shard) throws IOException, GeneralSecurityException, ClassNotFoundException {
this.bam = bam;
this.shard = shard;
this.optFilter = optFilter;
// it's OK if this goes beyond the contig boundaries.
lastValidPos = shard.interval.getEnd() + margin;
firstValidPos = Math.max(shard.interval.getStart() - margin, 1);
ArrayList<SimpleInterval> ints =new ArrayList<>();
ints.add(shard.interval);
subshards = IntervalUtils.cutToShards(ints, outputShardSize);
currentSubShardIndex = 0;
currentSubShard = subshards.get(currentSubShardIndex);
if (BucketUtils.isCloudStorageUrl(bam)) {
reader = SamReaderFactory.make()
.validationStringency(ValidationStringency.SILENT)
.open(IOUtils.getPath(bam));
} else if (BucketUtils.isHadoopUrl(bam)) {
throw new RuntimeException("Sorry, Hadoop paths aren't yet supported");
} else {
// read from local file (this only makes sense if every worker sees the same thing, e.g. if we're running locally)
reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(new File(bam));
}
query = reader.queryOverlapping(shard.interval.getContig(), shard.interval.getStart(), shard.interval.getEnd());
}
示例4: VariantsDataflowSource
import org.broadinstitute.hellbender.utils.gcs.BucketUtils; //导入方法依赖的package包/类
/**
* VariantsDataflowSource sets up source using local files (or eventually GCS buckets).
* @param variantFiles, list of files (or eventually buckets) to read from
* @param pipeline, options to get credentials to access GCS buckets.
*/
public VariantsDataflowSource(final List<String> variantFiles, final Pipeline pipeline) {
for (final String variantSource : variantFiles) {
if (BucketUtils.isCloudStorageUrl(variantSource)) {
// This problem is tracked with issue 632.
throw new UnsupportedOperationException("Cloud storage URIs not supported");
}
}
this.variantSources = variantFiles;
this.pipeline = pipeline;
}
示例5: serializeSingleObject
import org.broadinstitute.hellbender.utils.gcs.BucketUtils; //导入方法依赖的package包/类
/**
* Serializes the collection's single object to the specified file.
*
* Of course if you run on the cloud and specify a local path, the file will be saved
* on a cloud worker, which may not be very useful.
*
* @param collection A collection with a single serializable object to save.
* @param fname the name of the destination, starting with "gs://" to save to GCS, or "hdfs://" to save to HDFS.
* @returns SaveDestination.CLOUD if saved to GCS, SaveDestination.HDFS if saved to HDFS,
* SaveDestination.LOCAL_DISK otherwise.
*/
public static <T> SaveDestination serializeSingleObject(PCollection<T> collection, String fname) {
if ( BucketUtils.isCloudStorageUrl(fname)) {
saveSingleResultToRemoteStorage(collection, fname);
return SaveDestination.CLOUD;
} else if (BucketUtils.isHadoopUrl(fname)) {
saveSingleResultToRemoteStorage(collection, fname);
return SaveDestination.HDFS;
} else {
saveSingleResultToLocalDisk(collection, fname);
return SaveDestination.LOCAL_DISK;
}
}
示例6: ingestReadsAndGrabHeader
import org.broadinstitute.hellbender.utils.gcs.BucketUtils; //导入方法依赖的package包/类
/** reads local disks or GCS -> header, and PCollection */
private PCollection<GATKRead> ingestReadsAndGrabHeader(final Pipeline pipeline, String filename) throws IOException {
// input reads
if (BucketUtils.isCloudStorageUrl(filename)) {
// set up ingestion on the cloud
// but read the header locally
GcsPath path = GcsPath.fromUri(filename);
InputStream inputstream = Channels.newInputStream(new GcsUtil.GcsUtilFactory().create(pipeline.getOptions())
.open(path));
SamReader reader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(SamInputResource.of(inputstream));
header = reader.getFileHeader();
final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary();
final List<SimpleInterval> intervals = IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
return new ReadsDataflowSource(filename, pipeline).getReadPCollection(intervals, ValidationStringency.SILENT, false);
} else {
// ingestion from local file
try( ReadsDataSource readsSource = new ReadsDataSource(new File(filename)) ) {
header = readsSource.getHeader();
List<GATKRead> reads = new ArrayList<>();
for ( GATKRead read : readsSource ) {
reads.add(read);
}
return pipeline.apply("input ingest",Create.of(reads));
}
}
}
示例7: hackilyCopyFromGCSIfNecessary
import org.broadinstitute.hellbender.utils.gcs.BucketUtils; //导入方法依赖的package包/类
private ArrayList<String> hackilyCopyFromGCSIfNecessary(List<String> localVariants) {
int i=0;
Stopwatch hacking = Stopwatch.createStarted();
boolean copied = false;
ArrayList<String> ret = new ArrayList<>();
for (String v : localVariants) {
if (BucketUtils.isCloudStorageUrl(v)) {
if (!copied) {
logger.info("(HACK): copying the GCS variant file to local just so we can read it back.");
copied=true;
}
// this only works with the API_KEY, but then again it's a hack so there's no point in polishing it. Please don't make me.
String d = IOUtils.createTempFile("knownVariants-"+i,".vcf").getAbsolutePath();
try {
BucketUtils.copyFile(v, d);
} catch (IOException x) {
throw new UserException.CouldNotReadInputFile(v,x);
}
ret.add(d);
} else {
ret.add(v);
}
}
hacking.stop();
if (copied) {
logger.info("Copying the vcf took "+hacking.elapsed(TimeUnit.MILLISECONDS)+" ms.");
}
return ret;
}
示例8: setupPipeline
import org.broadinstitute.hellbender.utils.gcs.BucketUtils; //导入方法依赖的package包/类
@Override
protected void setupPipeline(Pipeline pipeline) {
try {
bunny.start("BaseRecalibratorDataflowOptimized");
if (knownVariants == null || knownVariants.isEmpty()) {
throw new UserException.CommandLineException(NO_DBSNP_EXCEPTION);
}
String referenceURL = referenceArguments.getReferenceFileName();
if (readArguments.getReadFilesNames().size()!=1) {
throw new UserException("Sorry, we only support a single reads input for now.");
}
String bam = readArguments.getReadFilesNames().get(0);
if (!BucketUtils.isCloudStorageUrl(bam) && isRemote()) {
throw new UserException("Sorry, for remote execution the BAM must be stored remotely.");
}
// Load the input bam
final ReadsDataflowSource readsDataflowSource = new ReadsDataflowSource(bam, pipeline);
readsHeader = readsDataflowSource.getHeader();
final SAMSequenceDictionary readsDictionary = readsHeader.getSequenceDictionary();
final List<SimpleInterval> intervals = intervalArgumentCollection.intervalsSpecified() ? intervalArgumentCollection.getIntervals(readsHeader.getSequenceDictionary())
: IntervalUtils.getAllIntervalsForReference(readsHeader.getSequenceDictionary());
final PCollectionView<SAMFileHeader> headerSingleton = ReadsDataflowSource.getHeaderView(pipeline, readsHeader);
final CountingReadFilter readFilterToApply = BaseRecalibrator.getStandardBQSRReadFilter(readsHeader);
bunny.stepEnd("set up bam input");
// Load the Variants and the Reference
final ReferenceMultiSource referenceDataflowSource = new ReferenceMultiSource(pipeline.getOptions(), referenceURL, BQSR_REFERENCE_WINDOW_FUNCTION);
bunny.stepEnd("create referenceDataflowSource");
final SAMSequenceDictionary refDictionary = referenceDataflowSource.getReferenceSequenceDictionary(readsDictionary);
bunny.stepEnd("load ref sequence dictionary");
checkSequenceDictionaries(refDictionary, readsDictionary);
bunny.stepEnd("checkSequenceDictionaries");
PCollectionView<SAMSequenceDictionary> refDictionaryView = pipeline.apply(Create.of(refDictionary)).setName("refDictionary").apply(View.asSingleton());
bunny.stepEnd("create ref dictionary view");
List<SimpleInterval> shardedIntervals = IntervalUtils.cutToShards(intervals, bigShardSize);
// since we currently can only read variants at the client, that's the right place to populate the shards.
List<Variant> variants = VariantsDataflowSource.getVariantsList(knownVariants);
bunny.stepEnd("load variants");
ArrayList<AddContextDataToReadOptimized.ContextShard> shards = AddContextDataToReadOptimized.fillVariants(shardedIntervals, variants, margin);
bunny.stepEnd("sharding variants");
logger.info("Shipping "+shards.size()+" big shards.");
PCollection<AddContextDataToReadOptimized.ContextShard> shardsPCol = pipeline.apply(Create.of(shards));
PCollection<AddContextDataToReadOptimized.ContextShard> shardsWithContext = shardsPCol
// big shards of variants -> smaller shards with variants, reads. We take the opportunity to filter the reads as close to the source as possible.
.apply(ParDo.named("subdivideAndFillReads").of(AddContextDataToReadOptimized.subdivideAndFillReads(bam, smallShardSize, margin, readFilterToApply)))
// add ref bases to the shards.
.apply(ParDo.named("fillContext").of(AddContextDataToReadOptimized.fillContext(referenceDataflowSource)));
final PCollection<RecalibrationTables> recalibrationTable = shardsWithContext.apply(new BaseRecalibratorOptimizedTransform(headerSingleton, refDictionaryView, recalArgs));
if (null == serializedOutputTablesPath) {
// we need those, so let's pick a temporary location for them.
serializedOutputTablesPath = pickTemporaryRecaltablesPath(isRemote(), stagingLocation);
}
DataflowUtils.SaveDestination dest = DataflowUtils.serializeSingleObject(recalibrationTable, serializedOutputTablesPath);
if (isRemote() && dest == DataflowUtils.SaveDestination.LOCAL_DISK) {
throw new UserException("If running on the cloud, either leave serializedOutputTablesPath unset or point it to a GCS location.");
}
bunny.stepEnd("setup");
} catch (UserException|GATKException rx) {
throw rx;
} catch (Exception x) {
throw new GATKException("Unexpected: " + x.getMessage(), x);
}
}
示例9: ReadsDataSource
import org.broadinstitute.hellbender.utils.gcs.BucketUtils; //导入方法依赖的package包/类
/**
* Initialize this data source with multiple SAM/BAM/CRAM files, explicit indices for those files,
* and a custom SamReaderFactory.
*
* @param samPaths paths to SAM/BAM/CRAM files, not null
* @param samIndices indices for all of the SAM/BAM/CRAM files, in the same order as samPaths. May be null,
* in which case index paths are inferred automatically.
* @param customSamReaderFactory SamReaderFactory to use, if null a default factory with no reference and validation
* stringency SILENT is used.
* @param cloudWrapper caching/prefetching wrapper for the data, if on Google Cloud.
* @param cloudIndexWrapper caching/prefetching wrapper for the index, if on Google Cloud.
*/
public ReadsDataSource( final List<Path> samPaths, final List<Path> samIndices,
SamReaderFactory customSamReaderFactory,
Function<SeekableByteChannel, SeekableByteChannel> cloudWrapper,
Function<SeekableByteChannel, SeekableByteChannel> cloudIndexWrapper) {
Utils.nonNull(samPaths);
Utils.nonEmpty(samPaths, "ReadsDataSource cannot be created from empty file list");
if ( samIndices != null && samPaths.size() != samIndices.size() ) {
throw new UserException(String.format("Must have the same number of BAM/CRAM/SAM paths and indices. Saw %d BAM/CRAM/SAMs but %d indices",
samPaths.size(), samIndices.size()));
}
readers = new LinkedHashMap<>(samPaths.size() * 2);
backingPaths = new LinkedHashMap<>(samPaths.size() * 2);
indicesAvailable = true;
final SamReaderFactory samReaderFactory =
customSamReaderFactory == null ?
SamReaderFactory.makeDefault().validationStringency(ReadConstants.DEFAULT_READ_VALIDATION_STRINGENCY) :
customSamReaderFactory;
int samCount = 0;
for ( final Path samPath : samPaths ) {
// Ensure each file can be read
try {
IOUtil.assertFileIsReadable(samPath);
}
catch ( SAMException|IllegalArgumentException e ) {
throw new UserException.CouldNotReadInputFile(samPath.toString(), e);
}
Function<SeekableByteChannel, SeekableByteChannel> wrapper =
(BucketUtils.isCloudStorageUrl(samPath)
? cloudWrapper
: Function.identity());
// if samIndices==null then we'll guess the index name from the file name.
// If the file's on the cloud, then the search will only consider locations that are also
// in the cloud.
Function<SeekableByteChannel, SeekableByteChannel> indexWrapper =
((samIndices != null && BucketUtils.isCloudStorageUrl(samIndices.get(samCount))
|| (samIndices == null && BucketUtils.isCloudStorageUrl(samPath)))
? cloudIndexWrapper
: Function.identity());
SamReader reader;
if ( samIndices == null ) {
reader = samReaderFactory.open(samPath, wrapper, indexWrapper);
}
else {
final SamInputResource samResource = SamInputResource.of(samPath, wrapper);
Path indexPath = samIndices.get(samCount);
samResource.index(indexPath, indexWrapper);
reader = samReaderFactory.open(samResource);
}
// Ensure that each file has an index
if ( ! reader.hasIndex() ) {
indicesAvailable = false;
}
readers.put(reader, null);
backingPaths.put(reader, samPath);
++samCount;
}
// Prepare a header merger only if we have multiple readers
headerMerger = samPaths.size() > 1 ? createHeaderMerger() : null;
}