本文整理汇总了Java中org.broadinstitute.hellbender.utils.IntervalUtils类的典型用法代码示例。如果您正苦于以下问题:Java IntervalUtils类的具体用法?Java IntervalUtils怎么用?Java IntervalUtils使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
IntervalUtils类属于org.broadinstitute.hellbender.utils包,在下文中一共展示了IntervalUtils类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: add
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入依赖的package包/类
/**
* Takes the variants, groups them into shards of size "bigShardSize" at the client, then
* runs subdivideAndFillReads then fillContext. See their documentation for more information.
*/
public static PCollection<ContextShard> add(
Pipeline pipeline,
List<SimpleInterval> intervalsOfInterest, int bigShardSize, List<Variant> variants,
String bam, int outputShardSize, int margin, final ReadFilter optFilter,
final ReferenceMultiSource refSource
) throws IOException {
List<SimpleInterval> shardedIntervals = IntervalUtils.cutToShards(intervalsOfInterest, bigShardSize);
ArrayList<AddContextDataToReadOptimized.ContextShard> shards = AddContextDataToReadOptimized.fillVariants(shardedIntervals, variants, margin);
PCollection<AddContextDataToReadOptimized.ContextShard> shardsPCol = pipeline.apply(Create.of(shards));
return shardsPCol
// big shards of variants -> smaller shards with variants, reads. We take the opportunity to filter the reads as close to the source as possible.
.apply(ParDo.named("subdivideAndFillReads").of(AddContextDataToReadOptimized.subdivideAndFillReads(bam, outputShardSize, margin, optFilter)))
// add ref bases to the shards.
.apply(ParDo.named("fillContext").of(AddContextDataToReadOptimized.fillContext(refSource)));
}
示例2: setupPipeline
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入依赖的package包/类
@Override
protected void setupPipeline(final Pipeline pipeline) {
final ReadsDataflowSource readsSource = new ReadsDataflowSource(bam, pipeline);
final SAMFileHeader header = readsSource.getHeader();
final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary();
final List<SimpleInterval> intervals = intervalArgumentCollection.intervalsSpecified() ? intervalArgumentCollection.getIntervals(sequenceDictionary):
IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
final PCollectionView<SAMFileHeader> headerPcolView = pipeline.apply(Create.of(header)).apply(View.<SAMFileHeader>asSingleton());
final PCollection<GATKRead> preads = readsSource.getReadPCollection(intervals);
final OpticalDuplicateFinder finder = opticalDuplicatesArgumentCollection.READ_NAME_REGEX != null ?
new OpticalDuplicateFinder(opticalDuplicatesArgumentCollection.READ_NAME_REGEX, opticalDuplicatesArgumentCollection.OPTICAL_DUPLICATE_PIXEL_DISTANCE, null) : null;
final PCollectionView<OpticalDuplicateFinder> finderPcolView = pipeline.apply(Create.of(finder)).apply(View.<OpticalDuplicateFinder>asSingleton());
final PCollection<GATKRead> results = preads.apply(new MarkDuplicates(headerPcolView, finderPcolView));
// TODO: support writing large output files (need a sharded BAM writer)
SmallBamWriter.writeToFile(pipeline, results, header, outputFile);
if (metricsFile != null) {
final PCollection<KV<String,DuplicationMetrics>> metrics = results.apply(new MarkDuplicatesDataflowUtils.GenerateMetricsTransform(headerPcolView));
MarkDuplicatesDataflowUtils.writeMetricsToFile(pipeline, metrics, header, metricsFile);
}
}
示例3: setupPipeline
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入依赖的package包/类
@Override
protected void setupPipeline(Pipeline pipeline) {
if (readArguments.getReadFilesNames().size()>1) {
throw new UserException("Sorry, we only support a single input file for now.");
}
final String filename = readArguments.getReadFilesNames().get(0);
final ReadsDataflowSource readsSource = new ReadsDataflowSource(filename, pipeline);
final SAMFileHeader header = readsSource.getHeader();
final PCollectionView<SAMFileHeader> headerView = pipeline.apply(Create.of(header)).apply(View.asSingleton());
final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary();
final List<SimpleInterval> intervals = intervalArgumentCollection.intervalsSpecified() ? intervalArgumentCollection.getIntervals(sequenceDictionary)
: IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
final PCollectionView<BaseRecalOutput> recalInfoSingletonView = BaseRecalOutputSource.loadFileOrRemote(pipeline, BQSR_RECAL_FILE_NAME).apply(View.asSingleton());
final PCollection<GATKRead> output = readsSource.getReadPCollection(intervals, ValidationStringency.SILENT, false)
.apply(new ApplyBQSRTransform(headerView, recalInfoSingletonView, bqsrOpts));
intermediateRemoteBam = OUTPUT;
if (needsIntermediateCopy()) {
// The user specified remote execution and provided a local file name. So we're going to have to save to remote storage as a go-between.
// Note that this may require more permissions
intermediateRemoteBam = BucketUtils.randomRemotePath(stagingLocation, "temp-applyBqsr-output-", ".bam");
logger.info("Staging results at " + intermediateRemoteBam);
}
SmallBamWriter.writeToFile(pipeline, output, header, intermediateRemoteBam);
}
示例4: onTraversalStart
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入依赖的package包/类
@Override
public void onTraversalStart() {
ParamUtils.isPositive(scatterCount, "scatter count must be > 0.");
if (!outputDir.exists() && !outputDir.mkdir()) {
throw new RuntimeIOException("Unable to create directory: " + outputDir.getAbsolutePath());
}
// in general dictionary will be from the reference, but using -I reads.bam or -F variants.vcf
// to use the sequence dict from a bam or vcf is also supported
final SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
final List<SimpleInterval> intervals = hasIntervals() ? intervalArgumentCollection.getIntervals(sequenceDictionary)
: IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
final IntervalList intervalList = new IntervalList(sequenceDictionary);
intervals.stream().map(si -> new Interval(si.getContig(), si.getStart(), si.getEnd())).forEach(intervalList::add);
final IntervalListScatterer scatterer = new IntervalListScatterer(subdivisionMode);
final List<IntervalList> scattered = scatterer.scatter(intervalList, scatterCount, false);
final DecimalFormat formatter = new DecimalFormat("0000");
IntStream.range(0, scattered.size()).forEach(n -> scattered.get(n).write(new File(outputDir, formatter.format(n) + "-scattered.intervals")));
}
示例5: editIntervals
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入依赖的package包/类
/**
* Determine the intervals to consider for coverage collection. Honors the keepAutosome parameter.
*
* <p>Developer's note: This CLI will always set the attribute, intervals, to a non-null value.</p>
*
* @param rawIntervals Specified by the user. If null, converts to SimpleIntervals specifying the entire
* reference genome. If keepNonAutosomes is NOT specified, it will prune these intervals (or the
* ones specified by the user), to remove the contigs that are listed in the docs for
* {@link SparkGenomeReadCounts#keepNonAutosomes}
* @return Never {@code null} Specified list of intervals. These will be treated as if user had specified on the
* CLI.
*/
@Override
protected List<SimpleInterval> editIntervals(final List<SimpleInterval> rawIntervals) {
List<SimpleInterval> modifiedIntervals = rawIntervals;
if (rawIntervals == null) {
modifiedIntervals = IntervalUtils.getAllIntervalsForReference(getReferenceSequenceDictionary());
}
if (keepNonAutosomes) {
return modifiedIntervals;
}
// Enforce the elimination of certain contigs when proper option is set.
logger.info("Dropping non-autosomes, as requested...");
return modifiedIntervals.stream()
.filter(s -> !NONAUTOSOMALCONTIGS.contains(s.getContig()))
.filter(s -> !(s.getContig().startsWith("GL")) && !(s.getContig().startsWith("NC_")))
.collect(Collectors.toList());
}
示例6: divideIntervalIntoShards
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入依赖的package包/类
/**
* Divide an interval into ShardBoundaries. Each shard will cover up to shardSize bases, include shardPadding
* bases of extra padding on either side, and begin shardStep bases after the previous shard.
*
* @param interval interval to shard; must be on the contig according to the provided dictionary
* @param shardSize desired shard size; intervals larger than this will be divided into shards of up to this size
* @param shardStep each shard will begin this many bases away from the previous shard
* @param shardPadding desired shard padding; each shard's interval will be padded on both sides by this number of bases (may be 0)
* @param dictionary sequence dictionary for reads
* @return List of {@link ShardBoundary} objects spanning the interval
*/
static List<ShardBoundary> divideIntervalIntoShards(final SimpleInterval interval, final int shardSize, final int shardStep, final int shardPadding, final SAMSequenceDictionary dictionary) {
Utils.nonNull(interval);
Utils.nonNull(dictionary);
Utils.validateArg(shardSize >= 1, "shardSize must be >= 1");
Utils.validateArg(shardStep >= 1, "shardStep must be >= 1");
Utils.validateArg(shardPadding >= 0, "shardPadding must be >= 0");
Utils.validateArg(IntervalUtils.intervalIsOnDictionaryContig(interval, dictionary), () ->
"Interval " + interval + " not within the bounds of a contig in the provided dictionary");
final List<ShardBoundary> shards = new ArrayList<>();
int start = interval.getStart();
while ( start <= interval.getEnd() ) {
final int end = Math.min(start + shardSize - 1, interval.getEnd());
final SimpleInterval nextShardInterval = new SimpleInterval(interval.getContig(), start, end);
final SimpleInterval nextShardIntervalPadded = nextShardInterval.expandWithinContig(shardPadding, dictionary);
shards.add(new ShardBoundary(nextShardInterval, nextShardIntervalPadded));
start += shardStep;
}
return shards;
}
示例7: MultiIntervalLocalReadShard
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入依赖的package包/类
/**
* Create a new MultiIntervalLocalReadShard spanning the given intervals, with each interval expanded
* on both sides by the specified number of padding bases.
*
* @param intervals The intervals that this shard spans
* @param intervalPadding Number of bases to pad each of the shard's intervals (on both sides)
* @param readsSource Source of reads
*/
public MultiIntervalLocalReadShard(final List<SimpleInterval> intervals, final int intervalPadding, final ReadsDataSource readsSource) {
Utils.nonNull(intervals);
Utils.nonNull(readsSource);
Utils.validateArg(intervalPadding >= 0, "intervalPadding must be >= 0");
// Feed intervals through IntervalUtils.getIntervalsWithFlanks() to ensure they get sorted using
// the same comparator as the paddedIntervals below.
this.intervals = Collections.unmodifiableList(IntervalUtils.getIntervalsWithFlanks(intervals, 0, readsSource.getSequenceDictionary()));
// This will both pad each interval and merge any intervals that are overlapping or adjacent after padding,
// in addition to sorting the intervals
this.paddedIntervals = Collections.unmodifiableList(IntervalUtils.getIntervalsWithFlanks(intervals, intervalPadding, readsSource.getSequenceDictionary()));
this.readsSource = readsSource;
}
示例8: onStartup
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入依赖的package包/类
/**
* Initialize data sources for traversal.
*
* Marked final so that tool authors don't override it. Tool authors should override onTraversalStart() instead.
*/
@Override
protected final void onStartup() {
super.onStartup();
if ( minAssemblyRegionSize <= 0 || maxAssemblyRegionSize <= 0 ) {
throw new CommandLineException.BadArgumentValue("min/max assembly region size must be > 0");
}
if ( minAssemblyRegionSize > maxAssemblyRegionSize ) {
throw new CommandLineException.BadArgumentValue("minAssemblyRegionSize must be <= maxAssemblyRegionSize");
}
if ( assemblyRegionPadding < 0 ) {
throw new CommandLineException.BadArgumentValue("assemblyRegionPadding must be >= 0");
}
if ( maxReadsPerAlignmentStart < 0 ) {
throw new CommandLineException.BadArgumentValue("maxReadsPerAlignmentStart must be >= 0");
}
final List<SimpleInterval> intervals = hasIntervals() ? intervalsForTraversal : IntervalUtils.getAllIntervalsForReference(getHeaderForReads().getSequenceDictionary());
readShards = makeReadShards(intervals);
initializeAssemblyRegionOutputStreams();
}
示例9: add
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入依赖的package包/类
/**
* Create shards with reads, variants, and reference bases, using default values for shard sizes and margin.
* See the other methods here for an explanation of the various arguments.
*/
public static JavaRDD<ContextShard> add(JavaSparkContext ctx,
final List<SimpleInterval> intervals,
String bam, final List<GATKVariant> variants,
final ReadFilter optFilter, final ReferenceMultiSource rds) {
// prepare shards for the intervals of interest
List<SimpleInterval> shardedIntervals = IntervalUtils.cutToShards(intervals, bigShardSize);
// add variants
ArrayList<ContextShard> localShards = AddContextDataToReadSparkOptimized.fillVariants(shardedIntervals, variants, margin);
// ship to cluster
JavaRDD<ContextShard> shards = ctx.parallelize(localShards);
// subdivide, and add reads
JavaRDD<ContextShard> reads;
try {
reads = shards.flatMap(AddContextDataToReadSparkOptimized.subdivideAndFillReads(bam,
outputShardSize, margin, optFilter));
} catch (IOException x) {
throw new UserException.CouldNotReadInputFile("Couldn't read "+bam+": "+x.getMessage(), x);
}
// add reference bases
reads = reads.map(s -> AddContextDataToReadSparkOptimized.fillContext(rds, s));
return reads;
}
示例10: SubdivideAndFillReadsIterator
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入依赖的package包/类
public SubdivideAndFillReadsIterator(String bam, int outputShardSize, int margin, final ReadFilter optFilter, ContextShard shard) throws IOException, GeneralSecurityException, ClassNotFoundException {
this.bam = bam;
this.shard = shard;
this.optFilter = optFilter;
// it's OK if this goes beyond the contig boundaries.
lastValidPos = shard.interval.getEnd() + margin;
firstValidPos = Math.max(shard.interval.getStart() - margin, 1);
ArrayList<SimpleInterval> ints =new ArrayList<>();
ints.add(shard.interval);
subshards = IntervalUtils.cutToShards(ints, outputShardSize);
currentSubShardIndex = 0;
currentSubShard = subshards.get(currentSubShardIndex);
if (BucketUtils.isCloudStorageUrl(bam)) {
reader = SamReaderFactory.make()
.validationStringency(ValidationStringency.SILENT)
.open(IOUtils.getPath(bam));
} else if (BucketUtils.isHadoopUrl(bam)) {
throw new RuntimeException("Sorry, Hadoop paths aren't yet supported");
} else {
// read from local file (this only makes sense if every worker sees the same thing, e.g. if we're running locally)
reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(new File(bam));
}
query = reader.queryOverlapping(shard.interval.getContig(), shard.interval.getStart(), shard.interval.getEnd());
}
示例11: getVariants
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入依赖的package包/类
/**
* Loads variants and the corresponding reads, reference and features into a {@link JavaRDD} for the intervals specified.
* FOr the current implementation the reads context will always be empty.
*
* If no intervals were specified, returns all the variants.
*
* @return all variants as a {@link JavaRDD}, bounded by intervals if specified.
*/
public JavaRDD<VariantWalkerContext> getVariants(JavaSparkContext ctx) {
SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
List<SimpleInterval> intervals = hasIntervals() ? getIntervals() : IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
// use unpadded shards (padding is only needed for reference bases)
final List<ShardBoundary> intervalShards = intervals.stream()
.flatMap(interval -> Shard.divideIntervalIntoShards(interval, variantShardSize, 0, sequenceDictionary).stream())
.collect(Collectors.toList());
JavaRDD<VariantContext> variants = variantsSource.getParallelVariantContexts(drivingVariantFile, getIntervals());
VariantFilter variantFilter = makeVariantFilter();
variants = variants.filter(variantFilter::test);
JavaRDD<Shard<VariantContext>> shardedVariants = SparkSharder.shard(ctx, variants, VariantContext.class, sequenceDictionary, intervalShards, variantShardSize, shuffle);
Broadcast<ReferenceMultiSource> bReferenceSource = hasReference() ? ctx.broadcast(getReference()) : null;
Broadcast<FeatureManager> bFeatureManager = features == null ? null : ctx.broadcast(features);
return shardedVariants.flatMap(getVariantsFunction(bReferenceSource, bFeatureManager, sequenceDictionary, variantShardPadding));
}
示例12: prepareQueryIntervals
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入依赖的package包/类
/**
* Converts a List of SimpleIntervals into the format required by the SamReader query API
* @param rawIntervals SimpleIntervals to be converted
* @return A sorted, merged list of QueryIntervals suitable for passing to the SamReader query API
*/
private QueryInterval[] prepareQueryIntervals( final List<SimpleInterval> rawIntervals ) {
if ( rawIntervals == null || rawIntervals.isEmpty() ) {
return null;
}
// This might take a while with large interval lists, so log a status message
logger.debug("Preparing intervals for traversal");
// Convert each SimpleInterval to a QueryInterval
final QueryInterval[] convertedIntervals =
rawIntervals.stream()
.map(rawInterval -> IntervalUtils.convertSimpleIntervalToQueryInterval(rawInterval, reader.getFileHeader().getSequenceDictionary()))
.toArray(QueryInterval[]::new);
// Intervals must be optimized (sorted and merged) in order to use the htsjdk query API
return QueryInterval.optimizeIntervals(convertedIntervals);
}
示例13: advance
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入依赖的package包/类
/**
* Advance to the next location, setting next to null if there are no more records
*/
private void advance() {
// get the next record to check
next = (iterator.hasNext()) ? iterator.next() : null;
// iterate till next or current interval is null
while(next != null && currentInterval != null) {
if(currentInterval.overlaps(next)) {
// keep the next because it overlaps
return;
} else {
int comparison = IntervalUtils.compareLocatables(currentInterval, next, dictionary);
// only advance if the current interval is before the next record
if(comparison < 0) {
// advance the interval and try with th next one
currentInterval = (intervals.hasNext()) ? intervals.next() : null;
} else if(comparison > 0) {
// advance the location
next = (iterator.hasNext()) ? iterator.next() : null;
}
}
}
// if the value of next overlaps some interval, the method should return before this point
next = null;
}
示例14: createTempGenomicsDB
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入依赖的package包/类
/**
* Create a temporary GenomicsDB containing a single interval of data from a set of gvcfs
* this database will be deleted on jvm shutdown automatically
* @param gvcfs, a List of a GVCFs to load from
* @param interval the interval to load
* @return the created workspace folder containing the new GenomicsDB
*/
public static File createTempGenomicsDB(final List<File> gvcfs, final Locatable interval) {
final File workspaceDir = BaseTest.createTempDir("genomicsDBWorkspace");
final CommandLineProgramTester importer = GenomicsDBImport.class::getSimpleName;
final ArgumentsBuilder args = new ArgumentsBuilder();
gvcfs.forEach(args::addVCF);
final String workspace = new File(workspaceDir, "workspace").getAbsolutePath();
args.addArgument(GenomicsDBImport.WORKSPACE_ARG_LONG_NAME, workspace);
args.addArgument("L", IntervalUtils.locatableToString(interval));
importer.runCommandLine(args);
return new File(workspace);
}
示例15: onTraversalStart
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入依赖的package包/类
@Override
public void onTraversalStart() {
ParamUtils.isPositive(scatterCount, "scatter-count must be > 0.");
if (!outputDir.exists() && !outputDir.mkdir()) {
throw new RuntimeIOException("Unable to create directory: " + outputDir.getAbsolutePath());
}
// in general dictionary will be from the reference, but using -I reads.bam or -F variants.vcf
// to use the sequence dict from a bam or vcf is also supported
final SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
final List<SimpleInterval> intervals = hasIntervals() ? intervalArgumentCollection.getIntervals(sequenceDictionary)
: IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
final IntervalList intervalList = new IntervalList(sequenceDictionary);
intervals.stream().map(si -> new Interval(si.getContig(), si.getStart(), si.getEnd())).forEach(intervalList::add);
final IntervalListScatterer scatterer = new IntervalListScatterer(subdivisionMode);
final List<IntervalList> scattered = scatterer.scatter(intervalList, scatterCount, false);
final DecimalFormat formatter = new DecimalFormat("0000");
IntStream.range(0, scattered.size()).forEach(n -> scattered.get(n).write(new File(outputDir, formatter.format(n) + "-scattered.intervals")));
}