当前位置: 首页>>代码示例>>Java>>正文


Java IntervalUtils.getAllIntervalsForReference方法代码示例

本文整理汇总了Java中org.broadinstitute.hellbender.utils.IntervalUtils.getAllIntervalsForReference方法的典型用法代码示例。如果您正苦于以下问题:Java IntervalUtils.getAllIntervalsForReference方法的具体用法?Java IntervalUtils.getAllIntervalsForReference怎么用?Java IntervalUtils.getAllIntervalsForReference使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.broadinstitute.hellbender.utils.IntervalUtils的用法示例。


在下文中一共展示了IntervalUtils.getAllIntervalsForReference方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: setupPipeline

import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
@Override
protected void setupPipeline(final Pipeline pipeline) {
    final ReadsDataflowSource readsSource = new ReadsDataflowSource(bam, pipeline);
    final SAMFileHeader header = readsSource.getHeader();
    final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary();
    final List<SimpleInterval> intervals = intervalArgumentCollection.intervalsSpecified() ? intervalArgumentCollection.getIntervals(sequenceDictionary):
            IntervalUtils.getAllIntervalsForReference(sequenceDictionary);

    final PCollectionView<SAMFileHeader> headerPcolView = pipeline.apply(Create.of(header)).apply(View.<SAMFileHeader>asSingleton());

    final PCollection<GATKRead> preads = readsSource.getReadPCollection(intervals);

    final OpticalDuplicateFinder finder = opticalDuplicatesArgumentCollection.READ_NAME_REGEX != null ?
        new OpticalDuplicateFinder(opticalDuplicatesArgumentCollection.READ_NAME_REGEX, opticalDuplicatesArgumentCollection.OPTICAL_DUPLICATE_PIXEL_DISTANCE, null) : null;
    final PCollectionView<OpticalDuplicateFinder> finderPcolView = pipeline.apply(Create.of(finder)).apply(View.<OpticalDuplicateFinder>asSingleton());

    final PCollection<GATKRead> results = preads.apply(new MarkDuplicates(headerPcolView, finderPcolView));

    // TODO: support writing large output files (need a sharded BAM writer)
    SmallBamWriter.writeToFile(pipeline, results, header, outputFile);

    if (metricsFile != null) {
        final PCollection<KV<String,DuplicationMetrics>> metrics = results.apply(new MarkDuplicatesDataflowUtils.GenerateMetricsTransform(headerPcolView));
        MarkDuplicatesDataflowUtils.writeMetricsToFile(pipeline, metrics, header, metricsFile);
    }
}
 
开发者ID:broadinstitute,项目名称:gatk-dataflow,代码行数:27,代码来源:MarkDuplicatesDataflow.java

示例2: setupPipeline

import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
@Override
protected void setupPipeline(Pipeline pipeline) {
    if (readArguments.getReadFilesNames().size()>1) {
        throw new UserException("Sorry, we only support a single input file for now.");
    }
    final String filename = readArguments.getReadFilesNames().get(0);
    final ReadsDataflowSource readsSource = new ReadsDataflowSource(filename, pipeline);
    final SAMFileHeader header = readsSource.getHeader();
    final PCollectionView<SAMFileHeader> headerView = pipeline.apply(Create.of(header)).apply(View.asSingleton());
    final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary();
    final List<SimpleInterval> intervals = intervalArgumentCollection.intervalsSpecified() ? intervalArgumentCollection.getIntervals(sequenceDictionary)
            : IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
    final PCollectionView<BaseRecalOutput> recalInfoSingletonView = BaseRecalOutputSource.loadFileOrRemote(pipeline, BQSR_RECAL_FILE_NAME).apply(View.asSingleton());
    final PCollection<GATKRead> output = readsSource.getReadPCollection(intervals, ValidationStringency.SILENT, false)
            .apply(new ApplyBQSRTransform(headerView, recalInfoSingletonView, bqsrOpts));
    intermediateRemoteBam = OUTPUT;
    if (needsIntermediateCopy()) {
        // The user specified remote execution and provided a local file name. So we're going to have to save to remote storage as a go-between.
        // Note that this may require more permissions
        intermediateRemoteBam = BucketUtils.randomRemotePath(stagingLocation, "temp-applyBqsr-output-", ".bam");
        logger.info("Staging results at " + intermediateRemoteBam);
    }
    SmallBamWriter.writeToFile(pipeline, output, header, intermediateRemoteBam);
}
 
开发者ID:broadinstitute,项目名称:gatk-dataflow,代码行数:25,代码来源:ApplyBQSRDataflow.java

示例3: onTraversalStart

import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
@Override
public void onTraversalStart() {
    ParamUtils.isPositive(scatterCount, "scatter count must be > 0.");

    if (!outputDir.exists() && !outputDir.mkdir()) {
        throw new RuntimeIOException("Unable to create directory: " + outputDir.getAbsolutePath());
    }

    // in general dictionary will be from the reference, but using -I reads.bam or -F variants.vcf
    // to use the sequence dict from a bam or vcf is also supported
    final SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();

    final List<SimpleInterval> intervals = hasIntervals() ? intervalArgumentCollection.getIntervals(sequenceDictionary)
            : IntervalUtils.getAllIntervalsForReference(sequenceDictionary);

    final IntervalList intervalList = new IntervalList(sequenceDictionary);
    intervals.stream().map(si -> new Interval(si.getContig(), si.getStart(), si.getEnd())).forEach(intervalList::add);
    final IntervalListScatterer scatterer = new IntervalListScatterer(subdivisionMode);
    final List<IntervalList> scattered = scatterer.scatter(intervalList, scatterCount, false);

    final DecimalFormat formatter = new DecimalFormat("0000");
    IntStream.range(0, scattered.size()).forEach(n -> scattered.get(n).write(new File(outputDir, formatter.format(n) + "-scattered.intervals")));
}
 
开发者ID:broadinstitute,项目名称:gatk-protected,代码行数:24,代码来源:SplitIntervals.java

示例4: editIntervals

import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
/**
 * Determine the intervals to consider for coverage collection.  Honors the keepAutosome parameter.
 *
 * <p>Developer's note:  This CLI will always set the attribute, intervals, to a non-null value.</p>
 *
 * @param rawIntervals Specified by the user.  If null, converts to SimpleIntervals specifying the entire
 *                     reference genome.  If keepNonAutosomes is NOT specified, it will prune these intervals (or the
 *                     ones specified by the user), to remove the contigs that are listed in the docs for
 *                     {@link SparkGenomeReadCounts#keepNonAutosomes}
 * @return Never {@code null}  Specified list of intervals.  These will be treated as if user had specified on the
 *  CLI.
 */
@Override
protected List<SimpleInterval> editIntervals(final List<SimpleInterval> rawIntervals) {
    List<SimpleInterval> modifiedIntervals = rawIntervals;
    if (rawIntervals == null) {
        modifiedIntervals = IntervalUtils.getAllIntervalsForReference(getReferenceSequenceDictionary());
    }

    if (keepNonAutosomes) {
        return modifiedIntervals;
    }

    // Enforce the elimination of certain contigs when proper option is set.
    logger.info("Dropping non-autosomes, as requested...");
    return modifiedIntervals.stream()
            .filter(s -> !NONAUTOSOMALCONTIGS.contains(s.getContig()))
            .filter(s -> !(s.getContig().startsWith("GL")) && !(s.getContig().startsWith("NC_")))
            .collect(Collectors.toList());
}
 
开发者ID:broadinstitute,项目名称:gatk-protected,代码行数:31,代码来源:SparkGenomeReadCounts.java

示例5: onStartup

import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
/**
 * Initialize data sources for traversal.
 *
 * Marked final so that tool authors don't override it. Tool authors should override onTraversalStart() instead.
 */
@Override
protected final void onStartup() {
    super.onStartup();

    if ( minAssemblyRegionSize <= 0 || maxAssemblyRegionSize <= 0 ) {
        throw new CommandLineException.BadArgumentValue("min/max assembly region size must be > 0");
    }

    if ( minAssemblyRegionSize > maxAssemblyRegionSize ) {
        throw new CommandLineException.BadArgumentValue("minAssemblyRegionSize must be <= maxAssemblyRegionSize");
    }

    if ( assemblyRegionPadding < 0 ) {
        throw new CommandLineException.BadArgumentValue("assemblyRegionPadding must be >= 0");
    }

    if ( maxReadsPerAlignmentStart < 0 ) {
        throw new CommandLineException.BadArgumentValue("maxReadsPerAlignmentStart must be >= 0");
    }

    final List<SimpleInterval> intervals = hasIntervals() ? intervalsForTraversal : IntervalUtils.getAllIntervalsForReference(getHeaderForReads().getSequenceDictionary());
    readShards = makeReadShards(intervals);

    initializeAssemblyRegionOutputStreams();
}
 
开发者ID:broadinstitute,项目名称:gatk,代码行数:31,代码来源:AssemblyRegionWalker.java

示例6: getVariants

import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
/**
 * Loads variants and the corresponding reads, reference and features into a {@link JavaRDD} for the intervals specified.
 * FOr the current implementation the reads context will always be empty.
 *
 * If no intervals were specified, returns all the variants.
 *
 * @return all variants as a {@link JavaRDD}, bounded by intervals if specified.
 */
public JavaRDD<VariantWalkerContext> getVariants(JavaSparkContext ctx) {
    SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
    List<SimpleInterval> intervals = hasIntervals() ? getIntervals() : IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
    // use unpadded shards (padding is only needed for reference bases)
    final List<ShardBoundary> intervalShards = intervals.stream()
            .flatMap(interval -> Shard.divideIntervalIntoShards(interval, variantShardSize, 0, sequenceDictionary).stream())
            .collect(Collectors.toList());
    JavaRDD<VariantContext> variants = variantsSource.getParallelVariantContexts(drivingVariantFile, getIntervals());
    VariantFilter variantFilter = makeVariantFilter();
    variants = variants.filter(variantFilter::test);
    JavaRDD<Shard<VariantContext>> shardedVariants = SparkSharder.shard(ctx, variants, VariantContext.class, sequenceDictionary, intervalShards, variantShardSize, shuffle);
    Broadcast<ReferenceMultiSource> bReferenceSource = hasReference() ? ctx.broadcast(getReference()) : null;
    Broadcast<FeatureManager> bFeatureManager = features == null ? null : ctx.broadcast(features);
    return shardedVariants.flatMap(getVariantsFunction(bReferenceSource, bFeatureManager, sequenceDictionary, variantShardPadding));
}
 
开发者ID:broadinstitute,项目名称:gatk,代码行数:24,代码来源:VariantWalkerSpark.java

示例7: onTraversalStart

import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
@Override
public void onTraversalStart() {
    ParamUtils.isPositive(scatterCount, "scatter-count must be > 0.");

    if (!outputDir.exists() && !outputDir.mkdir()) {
        throw new RuntimeIOException("Unable to create directory: " + outputDir.getAbsolutePath());
    }

    // in general dictionary will be from the reference, but using -I reads.bam or -F variants.vcf
    // to use the sequence dict from a bam or vcf is also supported
    final SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();

    final List<SimpleInterval> intervals = hasIntervals() ? intervalArgumentCollection.getIntervals(sequenceDictionary)
            : IntervalUtils.getAllIntervalsForReference(sequenceDictionary);

    final IntervalList intervalList = new IntervalList(sequenceDictionary);
    intervals.stream().map(si -> new Interval(si.getContig(), si.getStart(), si.getEnd())).forEach(intervalList::add);
    final IntervalListScatterer scatterer = new IntervalListScatterer(subdivisionMode);
    final List<IntervalList> scattered = scatterer.scatter(intervalList, scatterCount, false);

    final DecimalFormat formatter = new DecimalFormat("0000");
    IntStream.range(0, scattered.size()).forEach(n -> scattered.get(n).write(new File(outputDir, formatter.format(n) + "-scattered.intervals")));
}
 
开发者ID:broadinstitute,项目名称:gatk,代码行数:24,代码来源:SplitIntervals.java

示例8: runTool

import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
@Override
protected void runTool(final JavaSparkContext ctx) {
    //TODO remove me when https://github.com/broadinstitute/gatk/issues/4274 and https://github.com/broadinstitute/gatk/issues/4303 are fixed
    if(hcArgs.emitReferenceConfidence == ReferenceConfidenceMode.GVCF
            && (AbstractFeatureReader.hasBlockCompressedExtension(output) || output.endsWith(IOUtil.BCF_FILE_EXTENSION))) {
        throw new UserException.UnimplementedFeature("It is currently not possible to write a compressed g.vcf or g.bcf from HaplotypeCallerSpark.  " +
                                        "See https://github.com/broadinstitute/gatk/issues/4274 and https://github.com/broadinstitute/gatk/issues/4303 for more details.");
    }

    logger.info("********************************************************************************");
    logger.info("The output of this tool DOES NOT match the output of HaplotypeCaller. ");
    logger.info("It is under development and should not be used for production work. ");
    logger.info("For evaluation only.");
    logger.info("Use the non-spark HaplotypeCaller if you care about the results. ");
    logger.info("********************************************************************************");
    final List<SimpleInterval> intervals = hasIntervals() ? getIntervals() : IntervalUtils.getAllIntervalsForReference(getHeaderForReads().getSequenceDictionary());
    callVariantsWithHaplotypeCallerAndWriteOutput(ctx, getReads(), getHeaderForReads(), getReference(), intervals, hcArgs, shardingArgs, numReducers, output);
}
 
开发者ID:broadinstitute,项目名称:gatk,代码行数:19,代码来源:HaplotypeCallerSpark.java

示例9: testLocalFile

import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
@Test(enabled = false)
public void testLocalFile() {
    final String bam2 = "src/test/resources/org/broadinstitute/hellbender/tools/BQSR/HiSeq.1mb.1RG.2k_lines.alternate.bam";
    Pipeline pipeline = GATKTestPipeline.create();
    DataflowUtils.registerGATKCoders(pipeline);
    ReadsDataflowSource readsSource = new ReadsDataflowSource(bam2, pipeline);
    SAMFileHeader header = readsSource.getHeader();
    final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary();
    final List<SimpleInterval> intervals = IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
    PCollection<GATKRead> reads = readsSource.getReadPCollection(intervals, ValidationStringency.SILENT, true);
    PCollection<Long> count = reads.apply(Count.globally());
    // for now we only get 1649, because it removes unmapped reads.
    DataflowAssert.thatSingleton(count).isEqualTo(1674L);
    pipeline.run();
}
 
开发者ID:broadinstitute,项目名称:gatk-dataflow,代码行数:16,代码来源:ReadsDataflowSourceTest.java

示例10: ingestReadsAndGrabHeader

import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
/** reads local disks or GCS -> header, and PCollection */
private PCollection<GATKRead> ingestReadsAndGrabHeader(final Pipeline pipeline, String filename) throws IOException {

    // input reads
    if (BucketUtils.isCloudStorageUrl(filename)) {
        // set up ingestion on the cloud
        // but read the header locally
        GcsPath path = GcsPath.fromUri(filename);
        InputStream inputstream = Channels.newInputStream(new GcsUtil.GcsUtilFactory().create(pipeline.getOptions())
                .open(path));
        SamReader reader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(SamInputResource.of(inputstream));
        header = reader.getFileHeader();

        final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary();
        final List<SimpleInterval> intervals = IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
        return new ReadsDataflowSource(filename, pipeline).getReadPCollection(intervals, ValidationStringency.SILENT, false);
    } else {
        // ingestion from local file
        try( ReadsDataSource readsSource = new ReadsDataSource(new File(filename)) ) {
            header = readsSource.getHeader();
            List<GATKRead> reads = new ArrayList<>();
            for ( GATKRead read : readsSource ) {
                reads.add(read);
            }
            return pipeline.apply("input ingest",Create.of(reads));
        }
    }
}
 
开发者ID:broadinstitute,项目名称:gatk-dataflow,代码行数:29,代码来源:SmallBamWriterTest.java

示例11: getAlignments

import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
/**
 * Loads alignments and the corresponding reference and features into a {@link JavaRDD} for the intervals specified.
 *
 * If no intervals were specified, returns all the alignments.
 *
 * @return all alignments as a {@link JavaRDD}, bounded by intervals if specified.
 */
public JavaRDD<LocusWalkerContext> getAlignments(JavaSparkContext ctx) {
    SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
    List<SimpleInterval> intervals = hasIntervals() ? getIntervals() : IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
    final List<ShardBoundary> intervalShards = intervals.stream()
            .flatMap(interval -> Shard.divideIntervalIntoShards(interval, readShardSize, readShardPadding, sequenceDictionary).stream())
            .collect(Collectors.toList());
    int maxLocatableSize = Math.min(readShardSize, readShardPadding);
    JavaRDD<Shard<GATKRead>> shardedReads = SparkSharder.shard(ctx, getReads(), GATKRead.class, sequenceDictionary, intervalShards, maxLocatableSize, shuffle);
    Broadcast<ReferenceMultiSource> bReferenceSource = hasReference() ? ctx.broadcast(getReference()) : null;
    Broadcast<FeatureManager> bFeatureManager = features == null ? null : ctx.broadcast(features);
    return shardedReads.flatMap(getAlignmentsFunction(bReferenceSource, bFeatureManager, sequenceDictionary, getHeaderForReads(), getDownsamplingInfo(), emitEmptyLoci()));
}
 
开发者ID:broadinstitute,项目名称:gatk,代码行数:20,代码来源:LocusWalkerSpark.java

示例12: getReads

import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
/**
 * Loads reads and the corresponding reference and features into a {@link JavaRDD} for the intervals specified.
 *
 * If no intervals were specified, returns all the reads.
 *
 * @return all reads as a {@link JavaRDD}, bounded by intervals if specified.
 */
public JavaRDD<ReadWalkerContext> getReads(JavaSparkContext ctx) {
    SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
    List<SimpleInterval> intervals = hasIntervals() ? getIntervals() : IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
    // use unpadded shards (padding is only needed for reference bases)
    final List<ShardBoundary> intervalShards = intervals.stream()
            .flatMap(interval -> Shard.divideIntervalIntoShards(interval, readShardSize, 0, sequenceDictionary).stream())
            .collect(Collectors.toList());
    JavaRDD<Shard<GATKRead>> shardedReads = SparkSharder.shard(ctx, getReads(), GATKRead.class, sequenceDictionary, intervalShards, readShardSize, shuffle);
    Broadcast<ReferenceMultiSource> bReferenceSource = hasReference() ? ctx.broadcast(getReference()) : null;
    Broadcast<FeatureManager> bFeatureManager = features == null ? null : ctx.broadcast(features);
    return shardedReads.flatMap(getReadsFunction(bReferenceSource, bFeatureManager, sequenceDictionary, readShardPadding));
}
 
开发者ID:broadinstitute,项目名称:gatk,代码行数:20,代码来源:ReadWalkerSpark.java

示例13: editIntervals

import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
/**
 * Note that this sets {@code intervalShards} as a side effect, in order to add padding to the intervals.
 */
@Override
protected List<SimpleInterval> editIntervals(List<SimpleInterval> rawIntervals) {
    SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
    List<SimpleInterval> intervals = rawIntervals == null ? IntervalUtils.getAllIntervalsForReference(sequenceDictionary) : rawIntervals;
    intervalShards = intervals.stream()
            .flatMap(interval -> Shard.divideIntervalIntoShards(interval, readShardSize, readShardPadding, sequenceDictionary).stream())
            .collect(Collectors.toList());
    List<SimpleInterval> paddedIntervalsForReads =
            intervals.stream().map(interval -> interval.expandWithinContig(readShardPadding, sequenceDictionary)).collect(Collectors.toList());
    return paddedIntervalsForReads;
}
 
开发者ID:broadinstitute,项目名称:gatk,代码行数:15,代码来源:AssemblyRegionWalkerSpark.java

示例14: createAlignmentContextIterator

import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
/**
 *  Create the appropriate instance of an alignment context spliterator based on the input parameters.
 *
 *  Please note that this wrapper is still tied to {@link LocusIteratorByState} and some parameters are being passed directly to that class.
 *
 * @param intervalsForTraversal the intervals to generate alignment contexts over.
 * @param header SAM file header to use
 * @param readIterator iterator of sorted GATK reads
 * @param dictionary the SAMSequenceDictionary being used for this traversal.  This can be the same as the reference.  {@code null} is supported, but will often lead to invalid parameter combinations.
 * @param downsamplingInfo how to downsample (for {@link LocusIteratorByState})
 * @param isReference the dictionary specified above is a reference, {@code false} if no reference being used or it is not a reference.
 * @param emitEmptyLoci whether loci with no coverage should be emitted.  In this case, the AlignmentContext will be empty (not null).
 * @param isKeepUniqueReadListInLibs if true, we will keep the unique reads from the samIterator and make them
 *                                       available via the transferReadsFromAllPreviousPileups interface (this parameter is specific to {@link LocusIteratorByState})
 * @param isIncludeDeletions include reads with deletion on the loci in question
 * @param isIncludeNs include reads with N on the loci in question
 * @return iterator that produces AlignmentContexts ready for consumption (e.g. by a {@link org.broadinstitute.hellbender.engine.LocusWalker})
 */
private static Iterator<AlignmentContext> createAlignmentContextIterator(final List<SimpleInterval> intervalsForTraversal,
                                                                        final SAMFileHeader header,
                                                                           final Iterator<GATKRead> readIterator,
                                                                           final SAMSequenceDictionary dictionary,
                                                                           final LIBSDownsamplingInfo downsamplingInfo,
                                                                           final boolean isReference,
                                                                           boolean emitEmptyLoci,
                                                                           boolean isKeepUniqueReadListInLibs,
                                                                           boolean isIncludeDeletions,
                                                                           boolean isIncludeNs) {

    // get the samples from the read groups
    final Set<String> samples = header.getReadGroups().stream()
            .map(SAMReadGroupRecord::getSample)
            .collect(Collectors.toSet());

    // get the LIBS
    final LocusIteratorByState libs = new LocusIteratorByState(readIterator, downsamplingInfo, isKeepUniqueReadListInLibs, samples, header, isIncludeDeletions, isIncludeNs);

    List<SimpleInterval> finalIntervals = intervalsForTraversal;
    validateEmitEmptyLociParameters(emitEmptyLoci, dictionary, intervalsForTraversal, isReference);
    if (emitEmptyLoci) {

        // If no intervals were specified, then use the entire reference (or best available sequence dictionary).
        if (!areIntervalsSpecified(finalIntervals)) {
            finalIntervals = IntervalUtils.getAllIntervalsForReference(dictionary);
        }
        final IntervalLocusIterator intervalLocusIterator = new IntervalLocusIterator(finalIntervals.iterator());
        return new IntervalAlignmentContextIterator(libs, intervalLocusIterator, header.getSequenceDictionary());
    } else if (areIntervalsSpecified(finalIntervals)) {
        return new IntervalOverlappingIterator<>(libs, finalIntervals, header.getSequenceDictionary());
    } else {
        // prepare the iterator
        return libs;
    }
}
 
开发者ID:broadinstitute,项目名称:gatk,代码行数:55,代码来源:AlignmentContextIteratorBuilder.java

示例15: setupPipeline

import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
@Override
protected void setupPipeline( Pipeline pipeline ) {
    // Load the reads.
    final ReadsDataflowSource readsDataflowSource = new ReadsDataflowSource(bam, pipeline);
    final SAMFileHeader readsHeader = readsDataflowSource.getHeader();
    final List<SimpleInterval> intervals = intervalArgumentCollection.intervalsSpecified() ? intervalArgumentCollection.getIntervals(readsHeader.getSequenceDictionary())
            : IntervalUtils.getAllIntervalsForReference(readsHeader.getSequenceDictionary());

    final PCollectionView<SAMFileHeader> headerSingleton = ReadsDataflowSource.getHeaderView(pipeline, readsHeader);
    final PCollection<GATKRead> initialReads = readsDataflowSource.getReadPCollection(intervals);

    final OpticalDuplicateFinder finder = opticalDuplicatesArgumentCollection.READ_NAME_REGEX != null ?
            new OpticalDuplicateFinder(opticalDuplicatesArgumentCollection.READ_NAME_REGEX, opticalDuplicatesArgumentCollection.OPTICAL_DUPLICATE_PIXEL_DISTANCE, null) : null;
    final PCollectionView<OpticalDuplicateFinder> finderPcolView = pipeline.apply(Create.of(finder)).apply(View.<OpticalDuplicateFinder>asSingleton());

    // Apply MarkDuplicates to produce updated GATKReads.
    final PCollection<GATKRead> markedReads = initialReads.apply(new MarkDuplicates(headerSingleton, finderPcolView));

    // Load the Variants and the Reference and join them to reads.
    final VariantsDataflowSource variantsDataflowSource = new VariantsDataflowSource(baseRecalibrationKnownVariants, pipeline);

    // Use the BQSR_REFERENCE_WINDOW_FUNCTION so that the reference bases required by BQSR for each read are fetched
    final ReferenceMultiSource referenceDataflowSource = new ReferenceMultiSource(pipeline.getOptions(), referenceURL, BaseRecalibrationEngine.BQSR_REFERENCE_WINDOW_FUNCTION);

    final PCollection<KV<GATKRead, ReadContextData>> readsWithContext = AddContextDataToRead.add(markedReads, referenceDataflowSource, variantsDataflowSource);

    // BQSR.
    // default arguments are best practice.
    RecalibrationArgumentCollection recalArgs = new RecalibrationArgumentCollection();
    final SAMSequenceDictionary readsDictionary = readsHeader.getSequenceDictionary();
    final SAMSequenceDictionary refDictionary = referenceDataflowSource.getReferenceSequenceDictionary(readsDictionary);
    checkSequenceDictionaries(refDictionary, readsDictionary);
    PCollectionView<SAMSequenceDictionary> refDictionaryView = pipeline.apply(Create.of(refDictionary)).setName("refDictionary").apply(View.asSingleton());
    BaseRecalibratorTransform baseRecalibrator = new BaseRecalibratorTransform(headerSingleton, refDictionaryView, recalArgs);
    final PCollection<BaseRecalOutput> recalibrationReports = readsWithContext.apply(baseRecalibrator).apply(baseRecalibrator.toBaseRecalOutput());
    final PCollectionView<BaseRecalOutput> mergedRecalibrationReport = recalibrationReports.apply(View.<BaseRecalOutput>asSingleton());

    final ApplyBQSRArgumentCollection applyArgs = new ApplyBQSRArgumentCollection();
    final PCollection<GATKRead> finalReads = markedReads.apply(new ApplyBQSRTransform(headerSingleton, mergedRecalibrationReport, applyArgs));
    SmallBamWriter.writeToFile(pipeline, finalReads, readsHeader, output);
}
 
开发者ID:broadinstitute,项目名称:gatk-dataflow,代码行数:42,代码来源:ReadsPreprocessingPipeline.java


注:本文中的org.broadinstitute.hellbender.utils.IntervalUtils.getAllIntervalsForReference方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。