本文整理汇总了Java中org.broadinstitute.hellbender.engine.ReferenceDataSource类的典型用法代码示例。如果您正苦于以下问题:Java ReferenceDataSource类的具体用法?Java ReferenceDataSource怎么用?Java ReferenceDataSource使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
ReferenceDataSource类属于org.broadinstitute.hellbender.engine包,在下文中一共展示了ReferenceDataSource类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: processElement
import org.broadinstitute.hellbender.engine.ReferenceDataSource; //导入依赖的package包/类
@Override
public void processElement(ProcessContext c) throws Exception {
// can't set things up in startBundle because it doesn't have our side input yet.
// So instead we're doing it here in processElement.
if (firstInBundle) {
init(c);
onTraversalStart();
firstInBundle = false;
}
AddContextDataToReadOptimized.ContextShard shard = c.element();
for (int i=0; i<shard.reads.size(); i++) {
GATKRead read = shard.reads.get(i);
// Reads are shipped without the header -- put it back in
ReadUtils.restoreHeaderIfNecessary(read, header);
ReadContextData rc = shard.readContext.get(i);
Iterable<Variant> variants = rc.getOverlappingVariants();
final ReferenceBases refBases = rc.getOverlappingReferenceBases();
ReferenceDataSource refDS = new ReferenceMemorySource(refBases, referenceSequenceDictionary);
recalibrationEngine.processRead(read, refDS, variants);
nReadsProcessed++;
}
}
示例2: processElement
import org.broadinstitute.hellbender.engine.ReferenceDataSource; //导入依赖的package包/类
@Override
public void processElement(ProcessContext c) throws Exception {
// can't set things up in startBundle because it doesn't have our side input yet.
// So instead we're doing it here in processElement.
if (firstInBundle) {
init(c);
onTraversalStart();
firstInBundle = false;
}
GATKRead read = c.element().getKey();
// Reads are shipped without the header -- put it back in
ReadUtils.restoreHeaderIfNecessary(read, header);
ReadContextData rc = c.element().getValue();
Iterable<Variant> variants = rc.getOverlappingVariants();
final ReferenceBases refBases = rc.getOverlappingReferenceBases();
ReferenceDataSource refDS = new ReferenceMemorySource(refBases, referenceSequenceDictionary);
recalibrationEngine.processRead(read, refDS, variants);
nReadsProcessed++;
}
示例3: onTraversalStart
import org.broadinstitute.hellbender.engine.ReferenceDataSource; //导入依赖的package包/类
@Override
public void onTraversalStart() {
final SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
final List<SimpleInterval> inputIntervals;
if (hasIntervals()) {
CopyNumberArgumentValidationUtils.validateIntervalArgumentCollection(intervalArgumentCollection);
inputIntervals = intervalArgumentCollection.getIntervals(sequenceDictionary);
} else {
// if the user didn't add any intervals, we assume that they wanted to do whole genome sequencing
inputIntervals = IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
}
logger.info("Padding intervals...");
final IntervalList paddedIntervalList = padIntervals(inputIntervals, padding, sequenceDictionary);
logger.info("Generating bins...");
final IntervalList unfilteredBins = generateBins(paddedIntervalList, binLength, sequenceDictionary);
logger.info("Filtering bins containing only Ns...");
final ReferenceDataSource reference = ReferenceDataSource.of(referenceArguments.getReferencePath());
final IntervalList bins = filterBinsContainingOnlyNs(unfilteredBins, reference);
logger.info(String.format("Writing bins to %s...", outputFile));
bins.write(outputFile);
}
示例4: GencodeFuncotationFactory
import org.broadinstitute.hellbender.engine.ReferenceDataSource; //导入依赖的package包/类
public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFile,
final String version,
final FuncotatorArgumentDefinitions.TranscriptSelectionMode transcriptSelectionMode,
final Set<String> userRequestedTranscripts,
final LinkedHashMap<String, String> annotationOverrides) {
this.gencodeTranscriptFastaFile = gencodeTranscriptFastaFile;
transcriptFastaReferenceDataSource = ReferenceDataSource.of(gencodeTranscriptFastaFile);
transcriptIdMap = createTranscriptIdMap(transcriptFastaReferenceDataSource);
this.transcriptSelectionMode = transcriptSelectionMode;
this.version = version;
// Go through each requested transcript and remove the version numbers from them if they exist:
this.userRequestedTranscripts = new HashSet<>();
for ( final String transcript : userRequestedTranscripts ) {
this.userRequestedTranscripts.add( getTranscriptIdWithoutVersionNumber(transcript) );
}
// Initialize overrides / defaults:
initializeAnnotationOverrides( annotationOverrides );
}
示例5: createTranscriptIdMap
import org.broadinstitute.hellbender.engine.ReferenceDataSource; //导入依赖的package包/类
/**
* Creates a map of Transcript IDs for use in looking up transcripts from the FASTA dictionary for the GENCODE Transcripts.
* We include the start and stop codons in the transcripts so we can handle start/stop codon variants.
* @param fastaReference The {@link ReferenceDataSource} corresponding to the Transcript FASTA file for this GENCODE dataset.
* @return A {@link Map} of {@link String} -> {@link MappedTranscriptIdInfo} which maps real transcript IDs to the information about that transcript in the transcript FASTA file.
*/
@VisibleForTesting
static Map<String, MappedTranscriptIdInfo> createTranscriptIdMap(final ReferenceDataSource fastaReference) {
final Map<String, MappedTranscriptIdInfo> idMap = new HashMap<>();
for ( final SAMSequenceRecord sequence : fastaReference.getSequenceDictionary().getSequences() ) {
final MappedTranscriptIdInfo transcriptInfo = createMappedTranscriptIdInfo( sequence );
// The names in the file are actually in a list with | between each sequence name.
// We need to split the names and add them to the dictionary so we can resolve them to the full
// sequence name as it appears in the file:
for ( final String transcriptId : Utils.split(sequence.getSequenceName(), "|") ) {
idMap.put(transcriptId, transcriptInfo);
}
}
return idMap;
}
示例6: getCodingSequenceFromTranscriptFasta
import org.broadinstitute.hellbender.engine.ReferenceDataSource; //导入依赖的package包/类
/**
* Get the coding sequence from the GENCODE Transcript FASTA file for a given {@code transcriptId}.
* This will get ONLY the coding sequence for the given {@code transcriptId} and will not include any UTRs.
* @param transcriptId The ID of the transcript to get from the FASTA file.
* @param transcriptIdMap A map from transcriptId to MappedTranscriptIdInfo, which tells us how to pull information for the given {@code transcriptId} out of the given {@code transcriptFastaReferenceDataSource}.
* @param transcriptFastaReferenceDataSource A {@link ReferenceDataSource} for the GENCODE transcript FASTA file.
* @return The coding sequence for the given {@code transcriptId} as represented in the GENCODE transcript FASTA file.
*/
private static String getCodingSequenceFromTranscriptFasta( final String transcriptId,
final Map<String, MappedTranscriptIdInfo> transcriptIdMap,
final ReferenceDataSource transcriptFastaReferenceDataSource) {
final MappedTranscriptIdInfo transcriptMapIdAndMetadata = transcriptIdMap.get(transcriptId);
if ( transcriptMapIdAndMetadata == null ) {
throw new UserException.BadInput( "Unable to find the given Transcript ID in our transcript list (not in given transcript FASTA file): " + transcriptId );
}
final SimpleInterval transcriptInterval = new SimpleInterval(
transcriptMapIdAndMetadata.mapKey,
transcriptMapIdAndMetadata.codingSequenceStart,
transcriptMapIdAndMetadata.codingSequenceEnd
);
return transcriptFastaReferenceDataSource.queryAndPrefetch( transcriptInterval ).getBaseString();
}
示例7: testBAQOverwritesExistingTagWithNull
import org.broadinstitute.hellbender.engine.ReferenceDataSource; //导入依赖的package包/类
@Test
public void testBAQOverwritesExistingTagWithNull() {
final Path reference = IOUtils.getPath(hg19_chr1_1M_Reference);
final ReferenceDataSource rds = new ReferenceFileSource(reference);
// create a read with a single base off the end of the contig, which cannot be BAQed
final GATKRead read = ArtificialReadUtils.createArtificialRead(createHeader(), "foo", 0, rds.getSequenceDictionary().getSequence("1").getSequenceLength() + 1, 1);
read.setBases(new byte[]{(byte) 'A'});
read.setBaseQualities(new byte[]{(byte) 20});
read.setCigar("1M");
read.setAttribute("BQ", "A");
// try to BAQ and tell it to RECALCULATE AND ADD_TAG
final BAQ baq = new BAQ(1.0e-3, 0.1, 7, (byte) 4);
baq.baqRead(read, rds, BAQ.CalculationMode.RECALCULATE, BAQ.QualityMode.ADD_TAG);
// did we remove the existing tag?
Assert.assertFalse(read.hasAttribute("BQ"));
}
示例8: testBAQ
import org.broadinstitute.hellbender.engine.ReferenceDataSource; //导入依赖的package包/类
private void testBAQ(BAQTest test, boolean lookupWithFasta, ReferenceDataSource rds) {
final int bandWidth = 7;
final BAQ baqHMM = new BAQ(1.0e-3, 0.1, bandWidth, (byte) 4); // matches current samtools parameters
final GATKRead read = test.createRead();
final BAQ.BAQCalculationResult result;
if (lookupWithFasta && test.cigar != null && rds != null) {
result = baqHMM.calcBAQFromHMM(read, rds);
} else {
result = baqHMM.calcBAQFromHMM(read, test.refBases.getBytes(), test.refOffset);
}
Assert.assertNotNull(result);
System.out.println(Strings.repeat("-", 40));
System.out.println("reads : " + new String(test.readBases));
printQuals(System.out, "in-quals:", test.quals, false);
printQuals(System.out, "bq-quals:", result.bq, false);
for (int i = 0; i < test.quals.length; i++) {
Assert.assertTrue(result.bq[i] >= baqHMM.getMinBaseQual() || test.expected[i] < baqHMM.getMinBaseQual(), "BQ < min base quality");
Assert.assertEquals(result.bq[i], test.expected[i], "Did not see the expected BAQ value at " + i);
}
}
示例9: addReferenceDataToUnitTestData
import org.broadinstitute.hellbender.engine.ReferenceDataSource; //导入依赖的package包/类
private static List<Object[]> addReferenceDataToUnitTestData(final List<Object[]> unitTestData,
final String referenceFileName,
final FeatureReader<GencodeGtfFeature> featureReader,
final ReferenceDataSource referenceDataSource,
final String transcriptFastaFile) {
final List<Object[]> outList = new ArrayList<>(unitTestData.size());
for ( final Object[] rawData : unitTestData ) {
final Object[] dataWithReference = new Object[rawData.length + 4];
for ( int i = 0; i < rawData.length; ++i ) {
dataWithReference[i] = rawData[i];
}
dataWithReference[dataWithReference.length-4] = referenceFileName;
dataWithReference[dataWithReference.length-3] = featureReader;
dataWithReference[dataWithReference.length-2] = referenceDataSource;
dataWithReference[dataWithReference.length-1] = transcriptFastaFile;
outList.add(dataWithReference);
}
return outList;
}
示例10: makeAnnotatedCall
import org.broadinstitute.hellbender.engine.ReferenceDataSource; //导入依赖的package包/类
protected VariantContext makeAnnotatedCall(byte[] ref, SimpleInterval refLoc, FeatureContext tracker, SAMFileHeader header, VariantContext mergedVC, ReadLikelihoods<Allele> readAlleleLikelihoods, VariantContext call) {
final SimpleInterval locus = new SimpleInterval(mergedVC.getContig(), mergedVC.getStart(), mergedVC.getEnd());
final SimpleInterval refLocInterval= new SimpleInterval(refLoc);
final ReferenceDataSource refData = new ReferenceMemorySource(new ReferenceBases(ref, refLocInterval), header.getSequenceDictionary());
final ReferenceContext referenceContext = new ReferenceContext(refData, locus, refLocInterval);
final VariantContext untrimmedResult = annotationEngine.annotateContext(call, tracker, referenceContext, readAlleleLikelihoods, a -> true);
return call.getAlleles().size() == mergedVC.getAlleles().size() ? untrimmedResult
: GATKVariantContextUtils.reverseTrimAlleles(untrimmedResult);
}
示例11: calcBAQFromHMM
import org.broadinstitute.hellbender.engine.ReferenceDataSource; //导入依赖的package包/类
public BAQCalculationResult calcBAQFromHMM(GATKRead read, ReferenceDataSource refDS) {
final SimpleInterval referenceWindow = getReferenceWindowForRead(read, getBandWidth());
if ( referenceWindow.getEnd() > refDS.getSequenceDictionary().getSequence(read.getContig()).getSequenceLength() ) {
return null;
} else {
// now that we have the start and stop, get the reference sequence covering it
final ReferenceSequence refSeq = refDS.queryAndPrefetch(referenceWindow.getContig(), referenceWindow.getStart(), referenceWindow.getEnd());
return calcBAQFromHMM(read, refSeq.getBases(), (referenceWindow.getStart() - read.getStart()));
}
}
示例12: baqRead
import org.broadinstitute.hellbender.engine.ReferenceDataSource; //导入依赖的package包/类
/**
* Modifies read in place so that the base quality scores are capped by the BAQ calculation. Uses the BAQ
* tag if present already and alwaysRecalculate is false, otherwise fires up the HMM and does the BAQ on the fly
* using the refReader to obtain the reference bases as needed.
*
* @return BQ qualities for use, in case qmode is DONT_MODIFY
*/
public byte[] baqRead(GATKRead read, ReferenceDataSource refDS, CalculationMode calculationType, QualityMode qmode ) {
if ( DEBUG ) System.out.printf("BAQ %s read %s%n", calculationType, read.getName());
byte[] BAQQuals = read.getBaseQualities(); // in general we are overwriting quals, so just get a pointer to them
if ( calculationType == CalculationMode.OFF) { // we don't want to do anything
// just fall though
} else if ( excludeReadFromBAQ(read) ) {
// just fall through
} else {
final boolean readHasBAQTag = hasBAQTag(read);
if ( calculationType == CalculationMode.RECALCULATE || ! readHasBAQTag ) {
if ( DEBUG ) System.out.printf(" Calculating BAQ on the fly%n");
BAQCalculationResult hmmResult = calcBAQFromHMM(read, refDS);
if ( hmmResult != null ) {
switch ( qmode ) {
case ADD_TAG: addBAQTag(read, hmmResult.bq); break;
case OVERWRITE_QUALS: System.arraycopy(hmmResult.bq, 0, read.getBaseQualities(), 0, hmmResult.bq.length); break;
case DONT_MODIFY: BAQQuals = hmmResult.bq; break;
default: throw new GATKException("BUG: unexpected qmode " + qmode);
}
} else if ( readHasBAQTag ) {
// remove the BAQ tag if it's there because we cannot trust it
read.clearAttribute(BAQ_TAG);
}
} else if ( qmode == QualityMode.OVERWRITE_QUALS ) { // only makes sense if we are overwriting quals
if ( DEBUG ) System.out.printf(" Taking BAQ from tag%n");
// this overwrites the original qualities
calcBAQFromTag(read, true, false);
}
}
return BAQQuals;
}
示例13: processRead
import org.broadinstitute.hellbender.engine.ReferenceDataSource; //导入依赖的package包/类
/**
* For each read at this locus get the various covariate values and increment that location in the map based on
* whether or not the base matches the reference at this particular location
*/
public void processRead( final GATKRead originalRead, final ReferenceDataSource refDS, final Iterable<? extends Locatable> knownSites ) {
final ReadTransformer transform = makeReadTransform();
final GATKRead read = transform.apply(originalRead);
if( read.isEmpty() ) {
return; // the whole read was inside the adaptor so skip it
}
RecalUtils.parsePlatformForRead(read, readsHeader, recalArgs);
int[] isSNP = new int[read.getLength()];
int[] isInsertion = new int[isSNP.length];
int[] isDeletion = new int[isSNP.length];
//Note: this function modifies the isSNP, isInsertion and isDeletion arguments so it can't be skipped, BAQ or no BAQ
final int nErrors = calculateIsSNPOrIndel(read, refDS, isSNP, isInsertion, isDeletion);
// note for efficiency reasons we don't compute the BAQ array unless we actually have
// some error to marginalize over. For ILMN data ~85% of reads have no error
final byte[] baqArray = (nErrors == 0 || !recalArgs.enableBAQ) ? flatBAQArray(read) : calculateBAQArray(read, refDS);
if( baqArray != null ) { // some reads just can't be BAQ'ed
final ReadCovariates covariates = RecalUtils.computeCovariates(read, readsHeader, this.covariates, true, keyCache);
final boolean[] skip = calculateSkipArray(read, knownSites); // skip known sites of variation as well as low quality and non-regular bases
final double[] snpErrors = calculateFractionalErrorArray(isSNP, baqArray);
final double[] insertionErrors = calculateFractionalErrorArray(isInsertion, baqArray);
final double[] deletionErrors = calculateFractionalErrorArray(isDeletion, baqArray);
// aggregate all of the info into our info object, and update the data
final ReadRecalibrationInfo info = new ReadRecalibrationInfo(read, covariates, skip, snpErrors, insertionErrors, deletionErrors);
updateRecalTablesForRead(info);
}
numReadsProcessed++;
}
示例14: filterBinsContainingOnlyNs
import org.broadinstitute.hellbender.engine.ReferenceDataSource; //导入依赖的package包/类
private static IntervalList filterBinsContainingOnlyNs(final IntervalList unfilteredBins, final ReferenceDataSource reference) {
final IntervalList bins = new IntervalList(reference.getSequenceDictionary());
for (final Interval unfilteredBin : unfilteredBins) {
if (!Utils.stream(reference.query(new SimpleInterval(unfilteredBin))).allMatch(b -> b == Nucleotide.N.toBase())) {
bins.add(unfilteredBin);
}
}
return bins;
}
示例15: apply
import org.broadinstitute.hellbender.engine.ReferenceDataSource; //导入依赖的package包/类
public static RecalibrationReport apply( final JavaPairRDD<GATKRead, ReadContextData> readsWithContext, final SAMFileHeader header, final SAMSequenceDictionary referenceDictionary, final RecalibrationArgumentCollection recalArgs ) {
JavaRDD<RecalibrationTables> unmergedTables = readsWithContext.mapPartitions(readWithContextIterator -> {
final BaseRecalibrationEngine bqsr = new BaseRecalibrationEngine(recalArgs, header);
bqsr.logCovariatesUsed();
while ( readWithContextIterator.hasNext() ) {
final Tuple2<GATKRead, ReadContextData> readWithData = readWithContextIterator.next();
Iterable<GATKVariant> variants = readWithData._2().getOverlappingVariants();
final ReferenceBases refBases = readWithData._2().getOverlappingReferenceBases();
ReferenceDataSource refDS = new ReferenceMemorySource(refBases, referenceDictionary);
bqsr.processRead(readWithData._1(), refDS, variants);
}
return Arrays.asList(bqsr.getRecalibrationTables()).iterator();
});
final RecalibrationTables emptyRecalibrationTable = new RecalibrationTables(new StandardCovariateList(recalArgs, header));
final RecalibrationTables combinedTables = unmergedTables.treeAggregate(emptyRecalibrationTable,
RecalibrationTables::inPlaceCombine,
RecalibrationTables::inPlaceCombine,
Math.max(1, (int)(Math.log(unmergedTables.partitions().size()) / Math.log(2))));
BaseRecalibrationEngine.finalizeRecalibrationTables(combinedTables);
final QuantizationInfo quantizationInfo = new QuantizationInfo(combinedTables, recalArgs.QUANTIZING_LEVELS);
final StandardCovariateList covariates = new StandardCovariateList(recalArgs, header);
return RecalUtils.createRecalibrationReport(recalArgs.generateReportTable(covariates.covariateNames()), quantizationInfo.generateReportTable(), RecalUtils.generateReportTables(combinedTables, covariates));
}