本文整理汇总了Java中org.broadinstitute.hellbender.utils.IntervalUtils.getAllIntervalsForReference方法的典型用法代码示例。如果您正苦于以下问题:Java IntervalUtils.getAllIntervalsForReference方法的具体用法?Java IntervalUtils.getAllIntervalsForReference怎么用?Java IntervalUtils.getAllIntervalsForReference使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.broadinstitute.hellbender.utils.IntervalUtils
的用法示例。
在下文中一共展示了IntervalUtils.getAllIntervalsForReference方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: setupPipeline
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
@Override
protected void setupPipeline(final Pipeline pipeline) {
final ReadsDataflowSource readsSource = new ReadsDataflowSource(bam, pipeline);
final SAMFileHeader header = readsSource.getHeader();
final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary();
final List<SimpleInterval> intervals = intervalArgumentCollection.intervalsSpecified() ? intervalArgumentCollection.getIntervals(sequenceDictionary):
IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
final PCollectionView<SAMFileHeader> headerPcolView = pipeline.apply(Create.of(header)).apply(View.<SAMFileHeader>asSingleton());
final PCollection<GATKRead> preads = readsSource.getReadPCollection(intervals);
final OpticalDuplicateFinder finder = opticalDuplicatesArgumentCollection.READ_NAME_REGEX != null ?
new OpticalDuplicateFinder(opticalDuplicatesArgumentCollection.READ_NAME_REGEX, opticalDuplicatesArgumentCollection.OPTICAL_DUPLICATE_PIXEL_DISTANCE, null) : null;
final PCollectionView<OpticalDuplicateFinder> finderPcolView = pipeline.apply(Create.of(finder)).apply(View.<OpticalDuplicateFinder>asSingleton());
final PCollection<GATKRead> results = preads.apply(new MarkDuplicates(headerPcolView, finderPcolView));
// TODO: support writing large output files (need a sharded BAM writer)
SmallBamWriter.writeToFile(pipeline, results, header, outputFile);
if (metricsFile != null) {
final PCollection<KV<String,DuplicationMetrics>> metrics = results.apply(new MarkDuplicatesDataflowUtils.GenerateMetricsTransform(headerPcolView));
MarkDuplicatesDataflowUtils.writeMetricsToFile(pipeline, metrics, header, metricsFile);
}
}
示例2: setupPipeline
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
@Override
protected void setupPipeline(Pipeline pipeline) {
if (readArguments.getReadFilesNames().size()>1) {
throw new UserException("Sorry, we only support a single input file for now.");
}
final String filename = readArguments.getReadFilesNames().get(0);
final ReadsDataflowSource readsSource = new ReadsDataflowSource(filename, pipeline);
final SAMFileHeader header = readsSource.getHeader();
final PCollectionView<SAMFileHeader> headerView = pipeline.apply(Create.of(header)).apply(View.asSingleton());
final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary();
final List<SimpleInterval> intervals = intervalArgumentCollection.intervalsSpecified() ? intervalArgumentCollection.getIntervals(sequenceDictionary)
: IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
final PCollectionView<BaseRecalOutput> recalInfoSingletonView = BaseRecalOutputSource.loadFileOrRemote(pipeline, BQSR_RECAL_FILE_NAME).apply(View.asSingleton());
final PCollection<GATKRead> output = readsSource.getReadPCollection(intervals, ValidationStringency.SILENT, false)
.apply(new ApplyBQSRTransform(headerView, recalInfoSingletonView, bqsrOpts));
intermediateRemoteBam = OUTPUT;
if (needsIntermediateCopy()) {
// The user specified remote execution and provided a local file name. So we're going to have to save to remote storage as a go-between.
// Note that this may require more permissions
intermediateRemoteBam = BucketUtils.randomRemotePath(stagingLocation, "temp-applyBqsr-output-", ".bam");
logger.info("Staging results at " + intermediateRemoteBam);
}
SmallBamWriter.writeToFile(pipeline, output, header, intermediateRemoteBam);
}
示例3: onTraversalStart
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
@Override
public void onTraversalStart() {
ParamUtils.isPositive(scatterCount, "scatter count must be > 0.");
if (!outputDir.exists() && !outputDir.mkdir()) {
throw new RuntimeIOException("Unable to create directory: " + outputDir.getAbsolutePath());
}
// in general dictionary will be from the reference, but using -I reads.bam or -F variants.vcf
// to use the sequence dict from a bam or vcf is also supported
final SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
final List<SimpleInterval> intervals = hasIntervals() ? intervalArgumentCollection.getIntervals(sequenceDictionary)
: IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
final IntervalList intervalList = new IntervalList(sequenceDictionary);
intervals.stream().map(si -> new Interval(si.getContig(), si.getStart(), si.getEnd())).forEach(intervalList::add);
final IntervalListScatterer scatterer = new IntervalListScatterer(subdivisionMode);
final List<IntervalList> scattered = scatterer.scatter(intervalList, scatterCount, false);
final DecimalFormat formatter = new DecimalFormat("0000");
IntStream.range(0, scattered.size()).forEach(n -> scattered.get(n).write(new File(outputDir, formatter.format(n) + "-scattered.intervals")));
}
示例4: editIntervals
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
/**
* Determine the intervals to consider for coverage collection. Honors the keepAutosome parameter.
*
* <p>Developer's note: This CLI will always set the attribute, intervals, to a non-null value.</p>
*
* @param rawIntervals Specified by the user. If null, converts to SimpleIntervals specifying the entire
* reference genome. If keepNonAutosomes is NOT specified, it will prune these intervals (or the
* ones specified by the user), to remove the contigs that are listed in the docs for
* {@link SparkGenomeReadCounts#keepNonAutosomes}
* @return Never {@code null} Specified list of intervals. These will be treated as if user had specified on the
* CLI.
*/
@Override
protected List<SimpleInterval> editIntervals(final List<SimpleInterval> rawIntervals) {
List<SimpleInterval> modifiedIntervals = rawIntervals;
if (rawIntervals == null) {
modifiedIntervals = IntervalUtils.getAllIntervalsForReference(getReferenceSequenceDictionary());
}
if (keepNonAutosomes) {
return modifiedIntervals;
}
// Enforce the elimination of certain contigs when proper option is set.
logger.info("Dropping non-autosomes, as requested...");
return modifiedIntervals.stream()
.filter(s -> !NONAUTOSOMALCONTIGS.contains(s.getContig()))
.filter(s -> !(s.getContig().startsWith("GL")) && !(s.getContig().startsWith("NC_")))
.collect(Collectors.toList());
}
示例5: onStartup
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
/**
* Initialize data sources for traversal.
*
* Marked final so that tool authors don't override it. Tool authors should override onTraversalStart() instead.
*/
@Override
protected final void onStartup() {
super.onStartup();
if ( minAssemblyRegionSize <= 0 || maxAssemblyRegionSize <= 0 ) {
throw new CommandLineException.BadArgumentValue("min/max assembly region size must be > 0");
}
if ( minAssemblyRegionSize > maxAssemblyRegionSize ) {
throw new CommandLineException.BadArgumentValue("minAssemblyRegionSize must be <= maxAssemblyRegionSize");
}
if ( assemblyRegionPadding < 0 ) {
throw new CommandLineException.BadArgumentValue("assemblyRegionPadding must be >= 0");
}
if ( maxReadsPerAlignmentStart < 0 ) {
throw new CommandLineException.BadArgumentValue("maxReadsPerAlignmentStart must be >= 0");
}
final List<SimpleInterval> intervals = hasIntervals() ? intervalsForTraversal : IntervalUtils.getAllIntervalsForReference(getHeaderForReads().getSequenceDictionary());
readShards = makeReadShards(intervals);
initializeAssemblyRegionOutputStreams();
}
示例6: getVariants
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
/**
* Loads variants and the corresponding reads, reference and features into a {@link JavaRDD} for the intervals specified.
* FOr the current implementation the reads context will always be empty.
*
* If no intervals were specified, returns all the variants.
*
* @return all variants as a {@link JavaRDD}, bounded by intervals if specified.
*/
public JavaRDD<VariantWalkerContext> getVariants(JavaSparkContext ctx) {
SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
List<SimpleInterval> intervals = hasIntervals() ? getIntervals() : IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
// use unpadded shards (padding is only needed for reference bases)
final List<ShardBoundary> intervalShards = intervals.stream()
.flatMap(interval -> Shard.divideIntervalIntoShards(interval, variantShardSize, 0, sequenceDictionary).stream())
.collect(Collectors.toList());
JavaRDD<VariantContext> variants = variantsSource.getParallelVariantContexts(drivingVariantFile, getIntervals());
VariantFilter variantFilter = makeVariantFilter();
variants = variants.filter(variantFilter::test);
JavaRDD<Shard<VariantContext>> shardedVariants = SparkSharder.shard(ctx, variants, VariantContext.class, sequenceDictionary, intervalShards, variantShardSize, shuffle);
Broadcast<ReferenceMultiSource> bReferenceSource = hasReference() ? ctx.broadcast(getReference()) : null;
Broadcast<FeatureManager> bFeatureManager = features == null ? null : ctx.broadcast(features);
return shardedVariants.flatMap(getVariantsFunction(bReferenceSource, bFeatureManager, sequenceDictionary, variantShardPadding));
}
示例7: onTraversalStart
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
@Override
public void onTraversalStart() {
ParamUtils.isPositive(scatterCount, "scatter-count must be > 0.");
if (!outputDir.exists() && !outputDir.mkdir()) {
throw new RuntimeIOException("Unable to create directory: " + outputDir.getAbsolutePath());
}
// in general dictionary will be from the reference, but using -I reads.bam or -F variants.vcf
// to use the sequence dict from a bam or vcf is also supported
final SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
final List<SimpleInterval> intervals = hasIntervals() ? intervalArgumentCollection.getIntervals(sequenceDictionary)
: IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
final IntervalList intervalList = new IntervalList(sequenceDictionary);
intervals.stream().map(si -> new Interval(si.getContig(), si.getStart(), si.getEnd())).forEach(intervalList::add);
final IntervalListScatterer scatterer = new IntervalListScatterer(subdivisionMode);
final List<IntervalList> scattered = scatterer.scatter(intervalList, scatterCount, false);
final DecimalFormat formatter = new DecimalFormat("0000");
IntStream.range(0, scattered.size()).forEach(n -> scattered.get(n).write(new File(outputDir, formatter.format(n) + "-scattered.intervals")));
}
示例8: runTool
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
@Override
protected void runTool(final JavaSparkContext ctx) {
//TODO remove me when https://github.com/broadinstitute/gatk/issues/4274 and https://github.com/broadinstitute/gatk/issues/4303 are fixed
if(hcArgs.emitReferenceConfidence == ReferenceConfidenceMode.GVCF
&& (AbstractFeatureReader.hasBlockCompressedExtension(output) || output.endsWith(IOUtil.BCF_FILE_EXTENSION))) {
throw new UserException.UnimplementedFeature("It is currently not possible to write a compressed g.vcf or g.bcf from HaplotypeCallerSpark. " +
"See https://github.com/broadinstitute/gatk/issues/4274 and https://github.com/broadinstitute/gatk/issues/4303 for more details.");
}
logger.info("********************************************************************************");
logger.info("The output of this tool DOES NOT match the output of HaplotypeCaller. ");
logger.info("It is under development and should not be used for production work. ");
logger.info("For evaluation only.");
logger.info("Use the non-spark HaplotypeCaller if you care about the results. ");
logger.info("********************************************************************************");
final List<SimpleInterval> intervals = hasIntervals() ? getIntervals() : IntervalUtils.getAllIntervalsForReference(getHeaderForReads().getSequenceDictionary());
callVariantsWithHaplotypeCallerAndWriteOutput(ctx, getReads(), getHeaderForReads(), getReference(), intervals, hcArgs, shardingArgs, numReducers, output);
}
示例9: testLocalFile
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
@Test(enabled = false)
public void testLocalFile() {
final String bam2 = "src/test/resources/org/broadinstitute/hellbender/tools/BQSR/HiSeq.1mb.1RG.2k_lines.alternate.bam";
Pipeline pipeline = GATKTestPipeline.create();
DataflowUtils.registerGATKCoders(pipeline);
ReadsDataflowSource readsSource = new ReadsDataflowSource(bam2, pipeline);
SAMFileHeader header = readsSource.getHeader();
final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary();
final List<SimpleInterval> intervals = IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
PCollection<GATKRead> reads = readsSource.getReadPCollection(intervals, ValidationStringency.SILENT, true);
PCollection<Long> count = reads.apply(Count.globally());
// for now we only get 1649, because it removes unmapped reads.
DataflowAssert.thatSingleton(count).isEqualTo(1674L);
pipeline.run();
}
示例10: ingestReadsAndGrabHeader
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
/** reads local disks or GCS -> header, and PCollection */
private PCollection<GATKRead> ingestReadsAndGrabHeader(final Pipeline pipeline, String filename) throws IOException {
// input reads
if (BucketUtils.isCloudStorageUrl(filename)) {
// set up ingestion on the cloud
// but read the header locally
GcsPath path = GcsPath.fromUri(filename);
InputStream inputstream = Channels.newInputStream(new GcsUtil.GcsUtilFactory().create(pipeline.getOptions())
.open(path));
SamReader reader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(SamInputResource.of(inputstream));
header = reader.getFileHeader();
final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary();
final List<SimpleInterval> intervals = IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
return new ReadsDataflowSource(filename, pipeline).getReadPCollection(intervals, ValidationStringency.SILENT, false);
} else {
// ingestion from local file
try( ReadsDataSource readsSource = new ReadsDataSource(new File(filename)) ) {
header = readsSource.getHeader();
List<GATKRead> reads = new ArrayList<>();
for ( GATKRead read : readsSource ) {
reads.add(read);
}
return pipeline.apply("input ingest",Create.of(reads));
}
}
}
示例11: getAlignments
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
/**
* Loads alignments and the corresponding reference and features into a {@link JavaRDD} for the intervals specified.
*
* If no intervals were specified, returns all the alignments.
*
* @return all alignments as a {@link JavaRDD}, bounded by intervals if specified.
*/
public JavaRDD<LocusWalkerContext> getAlignments(JavaSparkContext ctx) {
SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
List<SimpleInterval> intervals = hasIntervals() ? getIntervals() : IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
final List<ShardBoundary> intervalShards = intervals.stream()
.flatMap(interval -> Shard.divideIntervalIntoShards(interval, readShardSize, readShardPadding, sequenceDictionary).stream())
.collect(Collectors.toList());
int maxLocatableSize = Math.min(readShardSize, readShardPadding);
JavaRDD<Shard<GATKRead>> shardedReads = SparkSharder.shard(ctx, getReads(), GATKRead.class, sequenceDictionary, intervalShards, maxLocatableSize, shuffle);
Broadcast<ReferenceMultiSource> bReferenceSource = hasReference() ? ctx.broadcast(getReference()) : null;
Broadcast<FeatureManager> bFeatureManager = features == null ? null : ctx.broadcast(features);
return shardedReads.flatMap(getAlignmentsFunction(bReferenceSource, bFeatureManager, sequenceDictionary, getHeaderForReads(), getDownsamplingInfo(), emitEmptyLoci()));
}
示例12: getReads
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
/**
* Loads reads and the corresponding reference and features into a {@link JavaRDD} for the intervals specified.
*
* If no intervals were specified, returns all the reads.
*
* @return all reads as a {@link JavaRDD}, bounded by intervals if specified.
*/
public JavaRDD<ReadWalkerContext> getReads(JavaSparkContext ctx) {
SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
List<SimpleInterval> intervals = hasIntervals() ? getIntervals() : IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
// use unpadded shards (padding is only needed for reference bases)
final List<ShardBoundary> intervalShards = intervals.stream()
.flatMap(interval -> Shard.divideIntervalIntoShards(interval, readShardSize, 0, sequenceDictionary).stream())
.collect(Collectors.toList());
JavaRDD<Shard<GATKRead>> shardedReads = SparkSharder.shard(ctx, getReads(), GATKRead.class, sequenceDictionary, intervalShards, readShardSize, shuffle);
Broadcast<ReferenceMultiSource> bReferenceSource = hasReference() ? ctx.broadcast(getReference()) : null;
Broadcast<FeatureManager> bFeatureManager = features == null ? null : ctx.broadcast(features);
return shardedReads.flatMap(getReadsFunction(bReferenceSource, bFeatureManager, sequenceDictionary, readShardPadding));
}
示例13: editIntervals
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
/**
* Note that this sets {@code intervalShards} as a side effect, in order to add padding to the intervals.
*/
@Override
protected List<SimpleInterval> editIntervals(List<SimpleInterval> rawIntervals) {
SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
List<SimpleInterval> intervals = rawIntervals == null ? IntervalUtils.getAllIntervalsForReference(sequenceDictionary) : rawIntervals;
intervalShards = intervals.stream()
.flatMap(interval -> Shard.divideIntervalIntoShards(interval, readShardSize, readShardPadding, sequenceDictionary).stream())
.collect(Collectors.toList());
List<SimpleInterval> paddedIntervalsForReads =
intervals.stream().map(interval -> interval.expandWithinContig(readShardPadding, sequenceDictionary)).collect(Collectors.toList());
return paddedIntervalsForReads;
}
示例14: createAlignmentContextIterator
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
/**
* Create the appropriate instance of an alignment context spliterator based on the input parameters.
*
* Please note that this wrapper is still tied to {@link LocusIteratorByState} and some parameters are being passed directly to that class.
*
* @param intervalsForTraversal the intervals to generate alignment contexts over.
* @param header SAM file header to use
* @param readIterator iterator of sorted GATK reads
* @param dictionary the SAMSequenceDictionary being used for this traversal. This can be the same as the reference. {@code null} is supported, but will often lead to invalid parameter combinations.
* @param downsamplingInfo how to downsample (for {@link LocusIteratorByState})
* @param isReference the dictionary specified above is a reference, {@code false} if no reference being used or it is not a reference.
* @param emitEmptyLoci whether loci with no coverage should be emitted. In this case, the AlignmentContext will be empty (not null).
* @param isKeepUniqueReadListInLibs if true, we will keep the unique reads from the samIterator and make them
* available via the transferReadsFromAllPreviousPileups interface (this parameter is specific to {@link LocusIteratorByState})
* @param isIncludeDeletions include reads with deletion on the loci in question
* @param isIncludeNs include reads with N on the loci in question
* @return iterator that produces AlignmentContexts ready for consumption (e.g. by a {@link org.broadinstitute.hellbender.engine.LocusWalker})
*/
private static Iterator<AlignmentContext> createAlignmentContextIterator(final List<SimpleInterval> intervalsForTraversal,
final SAMFileHeader header,
final Iterator<GATKRead> readIterator,
final SAMSequenceDictionary dictionary,
final LIBSDownsamplingInfo downsamplingInfo,
final boolean isReference,
boolean emitEmptyLoci,
boolean isKeepUniqueReadListInLibs,
boolean isIncludeDeletions,
boolean isIncludeNs) {
// get the samples from the read groups
final Set<String> samples = header.getReadGroups().stream()
.map(SAMReadGroupRecord::getSample)
.collect(Collectors.toSet());
// get the LIBS
final LocusIteratorByState libs = new LocusIteratorByState(readIterator, downsamplingInfo, isKeepUniqueReadListInLibs, samples, header, isIncludeDeletions, isIncludeNs);
List<SimpleInterval> finalIntervals = intervalsForTraversal;
validateEmitEmptyLociParameters(emitEmptyLoci, dictionary, intervalsForTraversal, isReference);
if (emitEmptyLoci) {
// If no intervals were specified, then use the entire reference (or best available sequence dictionary).
if (!areIntervalsSpecified(finalIntervals)) {
finalIntervals = IntervalUtils.getAllIntervalsForReference(dictionary);
}
final IntervalLocusIterator intervalLocusIterator = new IntervalLocusIterator(finalIntervals.iterator());
return new IntervalAlignmentContextIterator(libs, intervalLocusIterator, header.getSequenceDictionary());
} else if (areIntervalsSpecified(finalIntervals)) {
return new IntervalOverlappingIterator<>(libs, finalIntervals, header.getSequenceDictionary());
} else {
// prepare the iterator
return libs;
}
}
示例15: setupPipeline
import org.broadinstitute.hellbender.utils.IntervalUtils; //导入方法依赖的package包/类
@Override
protected void setupPipeline( Pipeline pipeline ) {
// Load the reads.
final ReadsDataflowSource readsDataflowSource = new ReadsDataflowSource(bam, pipeline);
final SAMFileHeader readsHeader = readsDataflowSource.getHeader();
final List<SimpleInterval> intervals = intervalArgumentCollection.intervalsSpecified() ? intervalArgumentCollection.getIntervals(readsHeader.getSequenceDictionary())
: IntervalUtils.getAllIntervalsForReference(readsHeader.getSequenceDictionary());
final PCollectionView<SAMFileHeader> headerSingleton = ReadsDataflowSource.getHeaderView(pipeline, readsHeader);
final PCollection<GATKRead> initialReads = readsDataflowSource.getReadPCollection(intervals);
final OpticalDuplicateFinder finder = opticalDuplicatesArgumentCollection.READ_NAME_REGEX != null ?
new OpticalDuplicateFinder(opticalDuplicatesArgumentCollection.READ_NAME_REGEX, opticalDuplicatesArgumentCollection.OPTICAL_DUPLICATE_PIXEL_DISTANCE, null) : null;
final PCollectionView<OpticalDuplicateFinder> finderPcolView = pipeline.apply(Create.of(finder)).apply(View.<OpticalDuplicateFinder>asSingleton());
// Apply MarkDuplicates to produce updated GATKReads.
final PCollection<GATKRead> markedReads = initialReads.apply(new MarkDuplicates(headerSingleton, finderPcolView));
// Load the Variants and the Reference and join them to reads.
final VariantsDataflowSource variantsDataflowSource = new VariantsDataflowSource(baseRecalibrationKnownVariants, pipeline);
// Use the BQSR_REFERENCE_WINDOW_FUNCTION so that the reference bases required by BQSR for each read are fetched
final ReferenceMultiSource referenceDataflowSource = new ReferenceMultiSource(pipeline.getOptions(), referenceURL, BaseRecalibrationEngine.BQSR_REFERENCE_WINDOW_FUNCTION);
final PCollection<KV<GATKRead, ReadContextData>> readsWithContext = AddContextDataToRead.add(markedReads, referenceDataflowSource, variantsDataflowSource);
// BQSR.
// default arguments are best practice.
RecalibrationArgumentCollection recalArgs = new RecalibrationArgumentCollection();
final SAMSequenceDictionary readsDictionary = readsHeader.getSequenceDictionary();
final SAMSequenceDictionary refDictionary = referenceDataflowSource.getReferenceSequenceDictionary(readsDictionary);
checkSequenceDictionaries(refDictionary, readsDictionary);
PCollectionView<SAMSequenceDictionary> refDictionaryView = pipeline.apply(Create.of(refDictionary)).setName("refDictionary").apply(View.asSingleton());
BaseRecalibratorTransform baseRecalibrator = new BaseRecalibratorTransform(headerSingleton, refDictionaryView, recalArgs);
final PCollection<BaseRecalOutput> recalibrationReports = readsWithContext.apply(baseRecalibrator).apply(baseRecalibrator.toBaseRecalOutput());
final PCollectionView<BaseRecalOutput> mergedRecalibrationReport = recalibrationReports.apply(View.<BaseRecalOutput>asSingleton());
final ApplyBQSRArgumentCollection applyArgs = new ApplyBQSRArgumentCollection();
final PCollection<GATKRead> finalReads = markedReads.apply(new ApplyBQSRTransform(headerSingleton, mergedRecalibrationReport, applyArgs));
SmallBamWriter.writeToFile(pipeline, finalReads, readsHeader, output);
}