本文整理汇总了Java中org.broadinstitute.hellbender.utils.gcs.BucketUtils类的典型用法代码示例。如果您正苦于以下问题:Java BucketUtils类的具体用法?Java BucketUtils怎么用?Java BucketUtils使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
BucketUtils类属于org.broadinstitute.hellbender.utils.gcs包,在下文中一共展示了BucketUtils类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: ReadsDataflowSource
import org.broadinstitute.hellbender.utils.gcs.BucketUtils; //导入依赖的package包/类
/**
* @param bam A file path or a google bucket identifier to a bam file to read
* @param p the pipeline object for the job. This is needed to read a bam from a bucket.
* The options inside of the pipeline MUST BE GCSOptions (to get the secret file).
*/
public ReadsDataflowSource(String bam, Pipeline p){
this.bam = Utils.nonNull(bam);
this.pipeline = p;
cloudStorageUrl = BucketUtils.isCloudStorageUrl(bam);
hadoopUrl = BucketUtils.isHadoopUrl(bam);
if(cloudStorageUrl) {
// The options used to create the pipeline must be GCSOptions to get the secret file.
try {
options = p.getOptions().as(GCSOptions.class);
} catch (ClassCastException e) {
throw new GATKException("The pipeline options was not GCSOptions.", e);
}
GenomicsOptions.Methods.validateOptions(options);
auth = getAuth(options);
}
}
示例2: writeToFile
import org.broadinstitute.hellbender.utils.gcs.BucketUtils; //导入依赖的package包/类
/**
* Takes a few Reads and will write them to a BAM file.
* The Reads don't have to be sorted initially, the BAM file will be.
* All the reads must fit into a single worker's memory, so this won't go well if you have too many.
*
* @param pipeline the pipeline to add this operation to.
* @param reads the reads to write (they don't need to be sorted).
* @param header the header that corresponds to the reads.
* @param destPath the GCS or local path to write to (must start with "gs://" if writing to GCS).
* @param parquet whether to write out BAM or Parquet data (BDG AlignmentRecords); only applies when writing to Hadoop
*/
public static void writeToFile(
Pipeline pipeline, PCollection<GATKRead> reads, final SAMFileHeader header, final String destPath,
final boolean parquet) {
if ( BucketUtils.isHadoopUrl(destPath) ||
pipeline.getRunner().getClass().equals(SparkPipelineRunner.class)) {
writeToHadoop(pipeline, reads, header, destPath, parquet);
} else {
PCollectionView<Iterable<GATKRead>> iterableView =
reads.apply(View.<GATKRead>asIterable());
PCollection<String> dummy = pipeline.apply("output file name", Create.<String>of(destPath));
dummy.apply(ParDo.named("save to BAM file")
.withSideInputs(iterableView)
.of(new SaveToBAMFile(header, iterableView))
);
}
}
示例3: of
import org.broadinstitute.hellbender.utils.gcs.BucketUtils; //导入依赖的package包/类
/**
* Recalibration report on GCS/HDFS -> PCollection of a single BaseRecalOutput.
* The loading is done at the worker.
*
* @param pipeline the pipeline, with authentication information.
* @param GCSFileName the path to the recalibration report. Must start with "gs://"
*/
static public PCollection<BaseRecalOutput> of(final Pipeline pipeline, String GCSFileName) {
return pipeline.apply("calibration report name", Create.of(GCSFileName))
.apply(ParDo.of(new DoFn<String, BaseRecalOutput>() {
private static final long serialVersionUID = 1L;
@Override
public void processElement(ProcessContext c) {
final String fname = c.element();
File dest = IOUtils.createTempFile("temp-BaseRecal-", ".tmp");
try {
BucketUtils.copyFile(fname, c.getPipelineOptions(), dest.getPath());
} catch (IOException x) {
throw new GATKException("Unable to download recalibration table from '" + fname + "'.", x);
}
c.output(new BaseRecalOutput(dest));
}
}).named("ingest calibration report"));
}
示例4: afterPipeline
import org.broadinstitute.hellbender.utils.gcs.BucketUtils; //导入依赖的package包/类
@Override
protected void afterPipeline(Pipeline p) {
bunny.stepEnd("dataflow");
logger.info("Saving recalibration report to " + outputTablesPath);
// Get the table back and output it in text form to outputTablesPath.
// TODO: if running on the cloud and the output destination is on the cloud, then it's faster to have a worker do it directly, without the file roundtrip.
try (ObjectInputStream oin = new ObjectInputStream(BucketUtils.openFile(serializedOutputTablesPath, p.getOptions()))) {
Object o = oin.readObject();
RecalibrationTables rt = (RecalibrationTables) o;
BaseRecalibratorFn.saveTextualReport(new File(outputTablesPath), readsHeader, rt, recalArgs);
bunny.stepEnd("repatriate_report");
} catch (Exception e) {
throw new GATKException("Unexpected: unable to read results file. (bug?)", e);
}
bunny.end();
}
示例5: setupPipeline
import org.broadinstitute.hellbender.utils.gcs.BucketUtils; //导入依赖的package包/类
@Override
protected void setupPipeline(Pipeline pipeline) {
if (readArguments.getReadFilesNames().size()>1) {
throw new UserException("Sorry, we only support a single input file for now.");
}
final String filename = readArguments.getReadFilesNames().get(0);
final ReadsDataflowSource readsSource = new ReadsDataflowSource(filename, pipeline);
final SAMFileHeader header = readsSource.getHeader();
final PCollectionView<SAMFileHeader> headerView = pipeline.apply(Create.of(header)).apply(View.asSingleton());
final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary();
final List<SimpleInterval> intervals = intervalArgumentCollection.intervalsSpecified() ? intervalArgumentCollection.getIntervals(sequenceDictionary)
: IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
final PCollectionView<BaseRecalOutput> recalInfoSingletonView = BaseRecalOutputSource.loadFileOrRemote(pipeline, BQSR_RECAL_FILE_NAME).apply(View.asSingleton());
final PCollection<GATKRead> output = readsSource.getReadPCollection(intervals, ValidationStringency.SILENT, false)
.apply(new ApplyBQSRTransform(headerView, recalInfoSingletonView, bqsrOpts));
intermediateRemoteBam = OUTPUT;
if (needsIntermediateCopy()) {
// The user specified remote execution and provided a local file name. So we're going to have to save to remote storage as a go-between.
// Note that this may require more permissions
intermediateRemoteBam = BucketUtils.randomRemotePath(stagingLocation, "temp-applyBqsr-output-", ".bam");
logger.info("Staging results at " + intermediateRemoteBam);
}
SmallBamWriter.writeToFile(pipeline, output, header, intermediateRemoteBam);
}
示例6: afterPipeline
import org.broadinstitute.hellbender.utils.gcs.BucketUtils; //导入依赖的package包/类
@Override
protected void afterPipeline(Pipeline p) {
bunny.stepEnd("dataflow");
logger.info("Saving recalibration report to " + outputTablesPath);
// Get the table back and output it in text form to outputTablesPath
// TODO: if running on the cloud and the output destination is on the cloud, then it's faster to have a worker do it directly, without the file roundtrip.
try (ObjectInputStream oin = new ObjectInputStream(BucketUtils.openFile(serializedOutputTablesPath, p.getOptions()))) {
Object o = oin.readObject();
RecalibrationTables rt = (RecalibrationTables) o;
BaseRecalibratorFn.saveTextualReport(new File(outputTablesPath), readsHeader, rt, recalArgs);
bunny.stepEnd("repatriate_report");
} catch (Exception e) {
throw new GATKException("Unexpected: unable to read results file. (bug?)", e);
}
bunny.end();
}
示例7: setupPipeline
import org.broadinstitute.hellbender.utils.gcs.BucketUtils; //导入依赖的package包/类
private Pipeline setupPipeline(final String inputPath, final String outputPath, boolean enableGcs, boolean enableCloudExec) {
final GATKGCSOptions options = PipelineOptionsFactory.as(GATKGCSOptions.class);
if (enableCloudExec) {
options.setStagingLocation(getGCPTestStaging());
options.setProject(getGCPTestProject());
options.setRunner(BlockingDataflowPipelineRunner.class);
} else if (BucketUtils.isHadoopUrl(inputPath) || BucketUtils.isHadoopUrl(outputPath)) {
options.setRunner(SparkPipelineRunner.class);
} else {
options.setRunner(DirectPipelineRunner.class);
}
if (enableGcs) {
options.setApiKey(getGCPTestApiKey());
}
final Pipeline p = Pipeline.create(options);
DataflowUtils.registerGATKCoders(p);
return p;
}
示例8: getTribbleFeatureReader
import org.broadinstitute.hellbender.utils.gcs.BucketUtils; //导入依赖的package包/类
private static <T extends Feature> AbstractFeatureReader<T, ?> getTribbleFeatureReader(final FeatureInput<T> featureInput, final FeatureCodec<T, ?> codec, Function<SeekableByteChannel, SeekableByteChannel> cloudWrapper, Function<SeekableByteChannel, SeekableByteChannel> cloudIndexWrapper) {
Utils.nonNull(codec);
try {
final String absolutePath = IOUtils.getPath(featureInput.getFeaturePath()).toAbsolutePath().toUri().toString();
// Instruct the reader factory to not require an index. We will require one ourselves as soon as
// a query by interval is attempted.
final boolean requireIndex = false;
// Only apply the wrappers if the feature input is on Google Cloud Storage
if ( BucketUtils.isCloudStorageUrl(absolutePath) ) {
return AbstractFeatureReader.getFeatureReader(absolutePath, null, codec, requireIndex, cloudWrapper, cloudIndexWrapper);
} else {
return AbstractFeatureReader.getFeatureReader(absolutePath, null, codec, requireIndex, Function.identity(), Function.identity());
}
}
catch ( final TribbleException e ) {
throw new GATKException("Error initializing feature reader for path " + featureInput.getFeaturePath(), e);
}
}
示例9: ReferenceMultiSource
import org.broadinstitute.hellbender.utils.gcs.BucketUtils; //导入依赖的package包/类
/**
* @param referenceURL the name of the reference (if using the Google Genomics API), or a path to the reference file
* @param referenceWindowFunction the custom reference window function used to map reads to desired reference bases
*/
public ReferenceMultiSource(final String referenceURL,
final SerializableFunction<GATKRead, SimpleInterval> referenceWindowFunction) {
Utils.nonNull(referenceWindowFunction);
if (ReferenceTwoBitSource.isTwoBit(referenceURL)) {
try {
referenceSource = new ReferenceTwoBitSource(referenceURL);
} catch (IOException e) {
throw new UserException("Failed to create a ReferenceTwoBitSource object" + e.getMessage());
}
} else if (isFasta(referenceURL)) {
if (BucketUtils.isHadoopUrl(referenceURL)) {
referenceSource = new ReferenceHadoopSource(referenceURL);
} else {
referenceSource = new ReferenceFileSource(referenceURL);
}
} else { // use the Google Genomics API
referenceSource = new ReferenceAPISource(referenceURL);
}
this.referenceWindowFunction = referenceWindowFunction;
}
示例10: SubdivideAndFillReadsIterator
import org.broadinstitute.hellbender.utils.gcs.BucketUtils; //导入依赖的package包/类
public SubdivideAndFillReadsIterator(String bam, int outputShardSize, int margin, final ReadFilter optFilter, ContextShard shard) throws IOException, GeneralSecurityException, ClassNotFoundException {
this.bam = bam;
this.shard = shard;
this.optFilter = optFilter;
// it's OK if this goes beyond the contig boundaries.
lastValidPos = shard.interval.getEnd() + margin;
firstValidPos = Math.max(shard.interval.getStart() - margin, 1);
ArrayList<SimpleInterval> ints =new ArrayList<>();
ints.add(shard.interval);
subshards = IntervalUtils.cutToShards(ints, outputShardSize);
currentSubShardIndex = 0;
currentSubShard = subshards.get(currentSubShardIndex);
if (BucketUtils.isCloudStorageUrl(bam)) {
reader = SamReaderFactory.make()
.validationStringency(ValidationStringency.SILENT)
.open(IOUtils.getPath(bam));
} else if (BucketUtils.isHadoopUrl(bam)) {
throw new RuntimeException("Sorry, Hadoop paths aren't yet supported");
} else {
// read from local file (this only makes sense if every worker sees the same thing, e.g. if we're running locally)
reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(new File(bam));
}
query = reader.queryOverlapping(shard.interval.getContig(), shard.interval.getStart(), shard.interval.getEnd());
}
示例11: getSAMFileWriter
import org.broadinstitute.hellbender.utils.gcs.BucketUtils; //导入依赖的package包/类
/**
* Supported format: BAM and SAM. CRAM is unsupported. Other requested type will default to SAM.
*/
private static SAMFileWriter getSAMFileWriter(final String outputName, final SAMFileHeader header, final boolean preOrdered) {
final int idx = outputName.lastIndexOf(".");
if ( idx < 0) {
throw new IllegalArgumentException("Provided path doesn't have a proper extension: " + outputName);
}
final SAMFileWriterFactory factory = new SAMFileWriterFactory()
.setCreateIndex(preOrdered && header.getSortOrder() == SAMFileHeader.SortOrder.coordinate);
final String fileExtension = outputName.substring(idx + 1, outputName.length());
if (fileExtension.endsWith(SamReader.Type.BAM_TYPE.fileExtension())) {
return factory.makeBAMWriter(header, preOrdered, BucketUtils.createFile(outputName));
} else if (fileExtension.endsWith(SamReader.Type.SAM_TYPE.fileExtension())) {
return factory.makeSAMWriter(header, preOrdered, BucketUtils.createFile(outputName));
} else if (fileExtension.endsWith(SamReader.Type.CRAM_TYPE.fileExtension())) {
throw new UnsupportedOperationException("We currently don't support CRAM output");
} else {
return factory.makeSAMWriter(header, preOrdered, BucketUtils.createFile(outputName));
}
}
示例12: readCrossContigsToIgnoreFile
import org.broadinstitute.hellbender.utils.gcs.BucketUtils; //导入依赖的package包/类
/** Read a file of contig names that will be ignored when checking for inter-contig pairs. */
private static Set<Integer> readCrossContigsToIgnoreFile( final String crossContigsToIgnoreFile,
final SAMSequenceDictionary dictionary ) {
final Set<Integer> ignoreSet = new HashSet<>();
try ( final BufferedReader rdr =
new BufferedReader(
new InputStreamReader(BucketUtils.openFile(crossContigsToIgnoreFile))) ) {
String line;
while ( (line = rdr.readLine()) != null ) {
final int tigId = dictionary.getSequenceIndex(line);
if ( tigId == -1 ) {
throw new UserException("crossContigToIgnoreFile contains an unrecognized contig name: "+line);
}
ignoreSet.add(tigId);
}
}
catch ( final IOException ioe ) {
throw new UserException("Can't read crossContigToIgnore file "+crossContigsToIgnoreFile, ioe);
}
return ignoreSet;
}
示例13: processFasta
import org.broadinstitute.hellbender.utils.gcs.BucketUtils; //导入依赖的package包/类
@VisibleForTesting static List<SVKmer> processFasta( final int kSize,
final int maxDUSTScore,
final String fastaFilename) {
try ( BufferedReader rdr = new BufferedReader(new InputStreamReader(BucketUtils.openFile(fastaFilename))) ) {
final List<SVKmer> kmers = new ArrayList<>((int) BucketUtils.fileSize(fastaFilename));
String line;
final StringBuilder sb = new StringBuilder();
final SVKmer kmerSeed = new SVKmerLong();
while ( (line = rdr.readLine()) != null ) {
if ( line.charAt(0) != '>' ) sb.append(line);
else if ( sb.length() > 0 ) {
SVDUSTFilteredKmerizer.canonicalStream(sb,kSize,maxDUSTScore,kmerSeed).forEach(kmers::add);
sb.setLength(0);
}
}
if ( sb.length() > 0 ) {
SVDUSTFilteredKmerizer.canonicalStream(sb,kSize,maxDUSTScore,kmerSeed).forEach(kmers::add);
}
return kmers;
}
catch ( IOException ioe ) {
throw new GATKException("Can't read high copy kmers fasta file "+fastaFilename, ioe);
}
}
示例14: loadBam
import org.broadinstitute.hellbender.utils.gcs.BucketUtils; //导入依赖的package包/类
/**
* Reads bam from path and returns tuple of the header and reads RDD
*/
private Tuple2<SAMFileHeader, JavaRDD<GATKRead>> loadBam(final String path,
final ReadsSparkSource readsSource) {
if (path == null) return null;
if (BucketUtils.fileExists(path)) {
final SAMFileHeader header = readsSource.getHeader(path, null);
if (header.getSequenceDictionary() != null && !header.getSequenceDictionary().isEmpty()) {
throw new UserException.BadInput("Input BAM should be unaligned, but found one or more sequences in the header.");
}
PSBwaUtils.addReferenceSequencesToHeader(header, bwaArgs.referencePath, getReferenceWindowFunction());
final JavaRDD<GATKRead> reads = readsSource.getParallelReads(path, null, null, bamPartitionSplitSize);
return new Tuple2<>(header, reads);
}
logger.warn("Could not find file " + path + ". Skipping...");
return null;
}
示例15: writeMissingReferenceAccessions
import org.broadinstitute.hellbender.utils.gcs.BucketUtils; //导入依赖的package包/类
/**
* Writes accessions contained in a SAM header that do not exist in the taxonomy database
*/
public static void writeMissingReferenceAccessions(final String path, final SAMFileHeader header, final PSTaxonomyDatabase taxDB,
final Logger logger) {
if (header != null && header.getSequenceDictionary() != null && header.getSequenceDictionary().getSequences() != null) {
final Set<String> unknownSequences = header.getSequenceDictionary().getSequences().stream()
.map(SAMSequenceRecord::getSequenceName)
.filter(name -> !taxDB.accessionToTaxId.containsKey(name))
.collect(Collectors.toSet());
try (final PrintStream file = new PrintStream(BucketUtils.createFile(path))) {
unknownSequences.stream().forEach(file::print);
if (file.checkError()) {
logger.warn("Error writing to header warnings file");
}
}
}
}