本文整理汇总了C++中BamReader::GetHeaderText方法的典型用法代码示例。如果您正苦于以下问题:C++ BamReader::GetHeaderText方法的具体用法?C++ BamReader::GetHeaderText怎么用?C++ BamReader::GetHeaderText使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类BamReader
的用法示例。
在下文中一共展示了BamReader::GetHeaderText方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: IntersectBamPE
void BedIntersectPE::IntersectBamPE(string bamFile) {
// load the "B" bed file into a map so
// that we can easily compare "A" to it for overlaps
_bedB->loadBedFileIntoMap();
// open the BAM file
BamReader reader;
BamWriter writer;
reader.Open(bamFile);
// get header & reference information
string bamHeader = reader.GetHeaderText();
RefVector refs = reader.GetReferenceData();
// open a BAM output to stdout if we are writing BAM
if (_bamOutput == true) {
// set compression mode
BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed;
writer.SetCompressionMode(compressionMode);
// open our BAM writer
writer.Open("stdout", bamHeader, refs);
}
// track the previous and current sequence
// names so that we can identify blocks of
// alignments for a given read ID.
string prevName, currName;
prevName = currName = "";
vector<BamAlignment> alignments; // vector of BAM alignments for a given ID in a BAM file.
alignments.reserve(100);
_bedA->bedType = 10; // it's a full BEDPE given it's BAM
// rip through the BAM file and convert each mapped entry to BEDPE
BamAlignment bam1, bam2;
while (reader.GetNextAlignment(bam1)) {
// the alignment must be paired
if (bam1.IsPaired() == true) {
// grab the second alignment for the pair.
reader.GetNextAlignment(bam2);
// require that the alignments are from the same query
if (bam1.Name == bam2.Name) {
ProcessBamBlock(bam1, bam2, refs, writer);
}
else {
cerr << "*****ERROR: -bedpe requires BAM to be sorted or grouped by query name. " << endl;
exit(1);
}
}
}
// close up
reader.Close();
if (_bamOutput == true) {
writer.Close();
}
}
示例2: while
bool RevertTool::RevertToolPrivate::Run(void) {
// opens the BAM file without checking for indexes
BamReader reader;
if ( !reader.Open(m_settings->InputFilename) ) {
cerr << "Could not open input BAM file... quitting." << endl;
return false;
}
// get BAM file metadata
const string& headerText = reader.GetHeaderText();
const RefVector& references = reader.GetReferenceData();
// open writer
BamWriter writer;
bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression );
if ( !writer.Open(m_settings->OutputFilename, headerText, references, writeUncompressed) ) {
cerr << "Could not open " << m_settings->OutputFilename << " for writing." << endl;
return false;
}
// plow through file, reverting alignments
BamAlignment al;
while ( reader.GetNextAlignment(al) ) {
RevertAlignment(al);
writer.SaveAlignment(al);
}
// clean and exit
reader.Close();
writer.Close();
return true;
}
示例3: CollectCoverageBam
void BedCoverage::CollectCoverageBam(string bamFile) {
// load the "B" bed file into a map so
// that we can easily compare "A" to it for overlaps
_bedB->loadBedCovFileIntoMap();
// open the BAM file
BamReader reader;
reader.Open(bamFile);
// get header & reference information
string header = reader.GetHeaderText();
RefVector refs = reader.GetReferenceData();
// convert each aligned BAM entry to BED
// and compute coverage on B
BamAlignment bam;
while (reader.GetNextAlignment(bam)) {
if (bam.IsMapped()) {
// treat the BAM alignment as a single "block"
if (_obeySplits == false) {
// construct a new BED entry from the current BAM alignment.
BED a;
a.chrom = refs.at(bam.RefID).RefName;
a.start = bam.Position;
a.end = bam.GetEndPosition(false, false);
a.strand = "+";
if (bam.IsReverseStrand()) a.strand = "-";
_bedB->countHits(a, _sameStrand, _diffStrand, _countsOnly);
}
// split the BAM alignment into discrete blocks and
// look for overlaps only within each block.
else {
// vec to store the discrete BED "blocks" from a
bedVector bedBlocks;
// since we are counting coverage, we do want to split blocks when a
// deletion (D) CIGAR op is encountered (hence the true for the last parm)
GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, false, true);
// use countSplitHits to avoid over-counting each split chunk
// as distinct read coverage.
_bedB->countSplitHits(bedBlocks, _sameStrand, _diffStrand, _countsOnly);
}
}
}
// report the coverage (summary or histogram) for BED B.
if (_countsOnly == true)
ReportCounts();
else
ReportCoverage();
// close the BAM file
reader.Close();
}
示例4: GetHeaderText
// makes a virtual, unified header for all the bam files in the multireader
const string BamMultiReader::GetHeaderText(void) const {
string mergedHeader = "";
map<string, bool> readGroups;
// foreach extraction entry (each BAM file)
for (vector<pair<BamReader*, BamAlignment*> >::const_iterator rs = readers.begin(); rs != readers.end(); ++rs) {
BamReader* reader = rs->first;
string headerText = reader->GetHeaderText();
if ( headerText.empty() ) continue;
map<string, bool> currentFileReadGroups;
stringstream header(headerText);
vector<string> lines;
string item;
while (getline(header, item))
lines.push_back(item);
for (vector<string>::const_iterator it = lines.begin(); it != lines.end(); ++it) {
// get next line from header, skip if empty
string headerLine = *it;
if ( headerLine.empty() ) { continue; }
// if first file, save HD & SQ entries
if ( rs == readers.begin() ) {
if ( headerLine.find("@HD") == 0 || headerLine.find("@SQ") == 0) {
mergedHeader.append(headerLine.c_str());
mergedHeader.append(1, '\n');
}
}
// (for all files) append RG entries if they are unique
if ( headerLine.find("@RG") == 0 ) {
stringstream headerLineSs(headerLine);
string part, readGroupPart, readGroup;
while(std::getline(headerLineSs, part, '\t')) {
stringstream partSs(part);
string subtag;
std::getline(partSs, subtag, ':');
if (subtag == "ID") {
std::getline(partSs, readGroup, ':');
break;
}
}
if (readGroups.find(readGroup) == readGroups.end()) { // prevents duplicate @RG entries
mergedHeader.append(headerLine.c_str() );
mergedHeader.append(1, '\n');
readGroups[readGroup] = true;
currentFileReadGroups[readGroup] = true;
} else {
// warn iff we are reading one file and discover duplicated @RG tags in the header
// otherwise, we emit no warning, as we might be merging multiple BAM files with identical @RG tags
if (currentFileReadGroups.find(readGroup) != currentFileReadGroups.end()) {
cerr << "WARNING: duplicate @RG tag " << readGroup
<< " entry in header of " << reader->GetFilename() << endl;
}
}
}
}
}
// return merged header text
return mergedHeader;
}
示例5: main
//.........这里部分代码省略.........
}
else if (has_regionFile == true)
{
ifstream RG(RegionFile.c_str(), ios_base::in);
string line;
while(getline(RG,line))
{
BamRegion region;
ParseRegionString(line, reader, region);
regionlist.push_back(region);
}
RG.close();
}
else if ( (has_regionFile == false) && (has_region == false) )
{
for (int i= 0; i < (int)referencedata.size(); i++)
{
string regionstr = referencedata.at(i).RefName;
BamRegion region;
ParseRegionString(regionstr, reader, region);
if (!reader.SetRegion(region)) // Bam region will get [0,101) = 0 to 100 => [closed, half-opened)
{
cerr << "ERROR: set region " << regionstr << " failed. Check that REGION describes a valid range... Aborting" << endl;
reader.Close();
exit(1);
}
else
regionlist.push_back(region);
}
}
////
BamWriter writer;
if (!writer.Open("stdout", reader.GetHeaderText(), reader.GetReferenceData()))
{
cerr << "could not open stdout for writing" << endl;
exit(1);
}
//// Smallest start position and Largest end position for Req Seq
vector<RefData>::iterator refdataIter = referencedata.begin();
vector<BamRegion>::iterator regionListIter = regionlist.begin();
// CLASS
RealignFunctionsClass RealignFunction;
map<int, string> RefIDRedName;
vector<SalRealignInfo> AlGroups;
multimap<int, BamAlignment> SortRealignedAlignmentsMultimap;
int refid = 0;
BamAlignment alignment;
bool IsNextAlignment = reader.GetNextAlignment(alignment);
//cerr << " " << alignment.Name << " Chr " << alignment.RefID << " Startpos: " << alignment.Position << " Endpos: " << alignment.GetEndPosition() << " Length: " << alignment.Length << endl;
int windowrealigned = 0;
int TotalWindowDetected = 0;
int TotalReadsAligned = 0;
int TotalWindow = 0;
int TotalReads = 0;
while (refdataIter != referencedata.end() )
{
string refname = refdataIter->RefName;
RefIDRedName[refid] = refname;
示例6: IntersectBamPE
void BedIntersectPE::IntersectBamPE(string bamFile) {
// load the "B" bed file into a map so
// that we can easily compare "A" to it for overlaps
_bedB->loadBedFileIntoMap();
// open the BAM file
BamReader reader;
BamWriter writer;
reader.Open(bamFile);
// get header & reference information
string bamHeader = reader.GetHeaderText();
RefVector refs = reader.GetReferenceData();
// open a BAM output to stdout if we are writing BAM
if (_bamOutput == true) {
// set compression mode
BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed;
writer.SetCompressionMode(compressionMode);
// open our BAM writer
writer.Open("stdout", bamHeader, refs);
}
// track the previous and current sequence
// names so that we can identify blocks of
// alignments for a given read ID.
string prevName, currName;
prevName = currName = "";
vector<BamAlignment> alignments; // vector of BAM alignments for a given ID in a BAM file.
alignments.reserve(100);
_bedA->bedType = 10; // it's a full BEDPE given it's BAM
// rip through the BAM file and convert each mapped entry to BEDPE
BamAlignment bam1, bam2;
while (reader.GetNextAlignment(bam1)) {
reader.GetNextAlignment(bam2);
if (bam1.Name != bam2.Name) {
while (bam1.Name != bam2.Name)
{
if (bam1.IsPaired())
{
cerr << "*****WARNING: Query " << bam1.Name
<< " is marked as paired, but it's mate does not occur"
<< " next to it in your BAM file. Skipping. " << endl;
}
bam1 = bam2;
reader.GetNextAlignment(bam2);
}
}
else if (bam1.IsPaired() && bam1.IsPaired()) {
ProcessBamBlock(bam1, bam2, refs, writer);
}
}
// close up
reader.Close();
if (_bamOutput == true) {
writer.Close();
}
}
示例7: IntersectBam
void BedIntersect::IntersectBam(string bamFile) {
// load the "B" bed file into a map so
// that we can easily compare "A" to it for overlaps
_bedB = new BedFile(_bedBFile);
_bedB->loadBedFileIntoMap();
// create a dummy BED A file for printing purposes if not
// using BAM output.
if (_bamOutput == false) {
_bedA = new BedFile(_bedAFile);
_bedA->bedType = 12;
}
// open the BAM file
BamReader reader;
BamWriter writer;
reader.Open(bamFile);
// get header & reference information
string bamHeader = reader.GetHeaderText();
RefVector refs = reader.GetReferenceData();
// open a BAM output to stdout if we are writing BAM
if (_bamOutput == true) {
// set compression mode
BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed;
writer.SetCompressionMode(compressionMode);
// open our BAM writer
writer.Open("stdout", bamHeader, refs);
}
vector<BED> hits;
// reserve some space
hits.reserve(100);
BamAlignment bam;
// get each set of alignments for each pair.
while (reader.GetNextAlignment(bam)) {
// save an unaligned read if -v
if (!bam.IsMapped()) {
if (_noHit == true)
writer.SaveAlignment(bam);
continue;
}
// break alignment into discrete blocks,
bedVector bed_blocks;
string chrom = refs.at(bam.RefID).RefName;
GetBamBlocks(bam, chrom, bed_blocks, false, true);
// create a basic BED entry from the BAM alignment
BED bed;
MakeBedFromBam(bam, chrom, bed_blocks, bed);
bool overlapsFound = false;
if ((_bamOutput == true) && (_obeySplits == false))
{
overlapsFound = _bedB->anyHits(bed.chrom, bed.start, bed.end,
bed.strand, _sameStrand, _diffStrand,
_overlapFraction, _reciprocal);
}
else if ( ((_bamOutput == true) && (_obeySplits == true)) ||
((_bamOutput == false) && (_obeySplits == true)) )
{
// find the hits that overlap with the full span of the blocked BED
_bedB->allHits(bed.chrom, bed.start, bed.end, bed.strand,
hits, _sameStrand, _diffStrand,
_overlapFraction, _reciprocal);
// find the overlaps between the block in A and B
overlapsFound = FindBlockedOverlaps(bed, bed_blocks, hits, _bamOutput);
}
else if ((_bamOutput == false) && (_obeySplits == false))
{
FindOverlaps(bed, hits);
}
// save the BAM alignment if overlap reqs. were met
if (_bamOutput == true) {
if ((overlapsFound == true) && (_noHit == false))
writer.SaveAlignment(bam);
else if ((overlapsFound == false) && (_noHit == true))
writer.SaveAlignment(bam);
}
hits.clear();
}
// close the relevant BAM files.
reader.Close();
if (_bamOutput == true) {
writer.Close();
}
}
示例8: Tag
void TagBam::Tag() {
// open the annotations files for processing;
OpenAnnoFiles();
// open the BAM file
BamReader reader;
BamWriter writer;
if (!reader.Open(_bamFile)) {
cerr << "Failed to open BAM file " << _bamFile << endl;
exit(1);
}
// get header & reference information
string bamHeader = reader.GetHeaderText();
RefVector refs = reader.GetReferenceData();
// set compression mode
BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
// if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed;
writer.SetCompressionMode(compressionMode);
// open our BAM writer
writer.Open("stdout", bamHeader, refs);
// rip through the BAM file and test for overlaps with each annotation file.
BamAlignment al;
vector<BED> hits;
while (reader.GetNextAlignment(al)) {
if (al.IsMapped() == true) {
BED a;
a.chrom = refs.at(al.RefID).RefName;
a.start = al.Position;
a.end = al.GetEndPosition(false, false);
a.strand = "+";
if (al.IsReverseStrand()) a.strand = "-";
ostringstream annotations;
// annotate the BAM file based on overlaps with the annotation files.
for (size_t i = 0; i < _annoFiles.size(); ++i)
{
// grab the current annotation file.
BedFile *anno = _annoFiles[i];
if (!_useNames && !_useScores && !_useIntervals) {
// add the label for this annotation file to tag if there is overlap
if (anno->anyHits(a.chrom, a.start, a.end, a.strand,
_sameStrand, _diffStrand, _overlapFraction, false))
{
annotations << _annoLabels[i] << ";";
}
}
// use the score field
else if (!_useNames && _useScores && !_useIntervals) {
anno->allHits(a.chrom, a.start, a.end, a.strand,
hits, _sameStrand, _diffStrand, 0.0, false);
for (size_t i = 0; i < hits.size(); ++i) {
annotations << hits[i].score;
if (i < hits.size() - 1) annotations << ",";
}
if (hits.size() > 0) annotations << ";";
hits.clear();
}
// use the name field from the annotation files to populate tag
else if (_useNames && !_useScores && !_useIntervals) {
anno->allHits(a.chrom, a.start, a.end, a.strand,
hits, _sameStrand, _diffStrand, 0.0, false);
for (size_t j = 0; j < hits.size(); ++j) {
annotations << hits[j].name;
if (j < hits.size() - 1) annotations << ",";
}
if (hits.size() > 0) annotations << ";";
hits.clear();
}
// use the full interval information annotation files to populate tag
else if (!_useNames && !_useScores && _useIntervals) {
anno->allHits(a.chrom, a.start, a.end, a.strand,
hits, _sameStrand, _diffStrand, 0.0, false);
for (size_t j = 0; j < hits.size(); ++j) {
annotations << _annoLabels[i] << ":" <<
hits[j].chrom << ":" <<
hits[j].start << "-" <<
hits[j].end << "," <<
hits[j].name << "," <<
hits[j].score << "," <<
hits[j].strand;
if (j < hits.size() - 1) annotations << ",";
}
if (hits.size() > 0) annotations << ";";
hits.clear();
}
}
// were there any overlaps with which to make a tag?
if (annotations.str().size() > 0) {
al.AddTag(_tag, "Z", annotations.str().substr(0, annotations.str().size() - 1)); // get rid of the last ";"
}
}
writer.SaveAlignment(al);
}
reader.Close();
//.........这里部分代码省略.........
示例9: WindowIntersectBam
void BedWindow::WindowIntersectBam(string bamFile) {
// load the "B" bed file into a map so
// that we can easily compare "A" to it for overlaps
_bedB->loadBedFileIntoMap();
// open the BAM file
BamReader reader;
BamWriter writer;
reader.Open(bamFile);
// get header & reference information
string bamHeader = reader.GetHeaderText();
RefVector refs = reader.GetReferenceData();
// open a BAM output to stdout if we are writing BAM
if (_bamOutput == true) {
// set compression mode
BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed;
writer.SetCompressionMode(compressionMode);
// open our BAM writer
writer.Open("stdout", bamHeader, refs);
}
vector<BED> hits; // vector of potential hits
// reserve some space
hits.reserve(100);
_bedA->bedType = 6;
BamAlignment bam;
bool overlapsFound;
// get each set of alignments for each pair.
while (reader.GetNextAlignment(bam)) {
if (bam.IsMapped()) {
BED a;
a.chrom = refs.at(bam.RefID).RefName;
a.start = bam.Position;
a.end = bam.GetEndPosition(false, false);
// build the name field from the BAM alignment.
a.name = bam.Name;
if (bam.IsFirstMate()) a.name += "/1";
if (bam.IsSecondMate()) a.name += "/2";
a.score = ToString(bam.MapQuality);
a.strand = "+"; if (bam.IsReverseStrand()) a.strand = "-";
if (_bamOutput == true) {
overlapsFound = FindOneOrMoreWindowOverlaps(a);
if (overlapsFound == true) {
if (_noHit == false)
writer.SaveAlignment(bam);
}
else {
if (_noHit == true)
writer.SaveAlignment(bam);
}
}
else {
FindWindowOverlaps(a, hits);
hits.clear();
}
}
// BAM IsMapped() is false
else if (_noHit == true) {
writer.SaveAlignment(bam);
}
}
// close the relevant BAM files.
reader.Close();
if (_bamOutput == true) {
writer.Close();
}
}
示例10: CoverageBam
void BedGenomeCoverage::CoverageBam(string bamFile) {
ResetChromCoverage();
// open the BAM file
BamReader reader;
if (!reader.Open(bamFile)) {
cerr << "Failed to open BAM file " << bamFile << endl;
exit(1);
}
// get header & reference information
string header = reader.GetHeaderText();
RefVector refs = reader.GetReferenceData();
// load the BAM header references into a BEDTools "genome file"
_genome = new GenomeFile(refs);
// convert each aligned BAM entry to BED
// and compute coverage on B
BamAlignment bam;
while (reader.GetNextAlignment(bam)) {
// skip if the read is unaligned
if (bam.IsMapped() == false)
continue;
bool _isReverseStrand = bam.IsReverseStrand();
//changing second mate's strand to opposite
if( _dUTP && bam.IsPaired() && bam.IsMateMapped() && bam.IsSecondMate())
_isReverseStrand = !bam.IsReverseStrand();
// skip if we care about strands and the strand isn't what
// the user wanted
if ( (_filterByStrand == true) &&
((_requestedStrand == "-") != _isReverseStrand) )
continue;
// extract the chrom, start and end from the BAM alignment
string chrom(refs.at(bam.RefID).RefName);
CHRPOS start = bam.Position;
CHRPOS end = bam.GetEndPosition(false, false) - 1;
// are we on a new chromosome?
if ( chrom != _currChromName )
StartNewChrom(chrom);
if(_pair_chip_) {
// Skip if not a proper pair
if (bam.IsPaired() && (!bam.IsProperPair() or !bam.IsMateMapped()) )
continue;
// Skip if wrong coordinates
if( ( (bam.Position<bam.MatePosition) && bam.IsReverseStrand() ) ||
( (bam.MatePosition < bam.Position) && bam.IsMateReverseStrand() ) ) {
//chemically designed: left on positive strand, right on reverse one
continue;
}
/*if(_haveSize) {
if (bam.IsFirstMate() && bam.IsReverseStrand()) { //put fragmentSize in to the middle of pair end_fragment
int mid = bam.MatePosition+abs(bam.InsertSize)/2;
if(mid<_fragmentSize/2)
AddCoverage(0, mid+_fragmentSize/2);
else
AddCoverage(mid-_fragmentSize/2, mid+_fragmentSize/2);
}
else if (bam.IsFirstMate() && bam.IsMateReverseStrand()) { //put fragmentSize in to the middle of pair end_fragment
int mid = start+abs(bam.InsertSize)/2;
if(mid<_fragmentSize/2)
AddCoverage(0, mid+_fragmentSize/2);
else
AddCoverage(mid-_fragmentSize/2, mid+_fragmentSize/2);
}
} else */
if (bam.IsFirstMate() && bam.IsReverseStrand()) { //prolong to the mate to the left
AddCoverage(bam.MatePosition, end);
}
else if (bam.IsFirstMate() && bam.IsMateReverseStrand()) { //prolong to the mate to the right
AddCoverage(start, start + abs(bam.InsertSize) - 1);
}
} else if (_haveSize) {
if(bam.IsReverseStrand()) {
if(end<_fragmentSize) { //sometimes fragmentSize is bigger :(
AddCoverage(0, end);
} else {
AddCoverage(end + 1 - _fragmentSize, end );
}
} else {
AddCoverage(start,start+_fragmentSize - 1);
}
} else
// add coverage accordingly.
if (!_only_5p_end && !_only_3p_end) {
bedVector bedBlocks;
// we always want to split blocks when a D CIGAR op is found.
// if the user invokes -split, we want to also split on N ops.
if (_obeySplits) { // "D" true, "N" true
GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, true, true);
}
else { // "D" true, "N" false
GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, true, false);
//.........这里部分代码省略.........
示例11: IntersectBam
void BedIntersect::IntersectBam(string bamFile) {
// load the "B" bed file into a map so
// that we can easily compare "A" to it for overlaps
_bedB->loadBedFileIntoMap();
// open the BAM file
BamReader reader;
BamWriter writer;
reader.Open(bamFile);
// get header & reference information
string header = reader.GetHeaderText();
RefVector refs = reader.GetReferenceData();
// open a BAM output to stdout if we are writing BAM
if (_bamOutput == true) {
// open our BAM writer
writer.Open("stdout", header, refs, _isUncompressedBam);
}
vector<BED> hits;
// reserve some space
hits.reserve(100);
_bedA->bedType = 6;
BamAlignment bam;
// get each set of alignments for each pair.
while (reader.GetNextAlignment(bam)) {
if (bam.IsMapped()) {
BED a;
a.chrom = refs.at(bam.RefID).RefName;
a.start = bam.Position;
a.end = bam.GetEndPosition(false);
// build the name field from the BAM alignment.
a.name = bam.Name;
if (bam.IsFirstMate()) a.name += "/1";
if (bam.IsSecondMate()) a.name += "/2";
a.score = ToString(bam.MapQuality);
a.strand = "+";
if (bam.IsReverseStrand()) a.strand = "-";
if (_bamOutput == true) {
bool overlapsFound = false;
// treat the BAM alignment as a single "block"
if (_obeySplits == false) {
overlapsFound = FindOneOrMoreOverlap(a);
}
// split the BAM alignment into discrete blocks and
// look for overlaps only within each block.
else {
bool overlapFoundForBlock;
bedVector bedBlocks; // vec to store the discrete BED "blocks" from a
// we don't want to split on "D" ops, hence the "false"
getBamBlocks(bam, refs, bedBlocks, false);
vector<BED>::const_iterator bedItr = bedBlocks.begin();
vector<BED>::const_iterator bedEnd = bedBlocks.end();
for (; bedItr != bedEnd; ++bedItr) {
overlapFoundForBlock = FindOneOrMoreOverlap(a);
if (overlapFoundForBlock == true)
overlapsFound = true;
}
}
if (overlapsFound == true) {
if (_noHit == false)
writer.SaveAlignment(bam);
}
else {
if (_noHit == true) {
writer.SaveAlignment(bam);
}
}
}
else {
// treat the BAM alignment as a single BED "block"
if (_obeySplits == false) {
FindOverlaps(a, hits);
hits.clear();
}
// split the BAM alignment into discrete BED blocks and
// look for overlaps only within each block.
else {
bedVector bedBlocks; // vec to store the discrete BED "blocks" from a
getBamBlocks(bam, refs, bedBlocks, false);
vector<BED>::const_iterator bedItr = bedBlocks.begin();
vector<BED>::const_iterator bedEnd = bedBlocks.end();
for (; bedItr != bedEnd; ++bedItr) {
FindOverlaps(*bedItr, hits);
hits.clear();
}
}
}
}
}
//.........这里部分代码省略.........
示例12: realign_bam
void realign_bam(Parameters& params) {
FastaReference reference;
reference.open(params.fasta_reference);
bool suppress_output = false;
int dag_window_size = params.dag_window_size;
// open BAM file
BamReader reader;
if (!reader.Open("stdin")) {
cerr << "could not open stdin for reading" << endl;
exit(1);
}
BamWriter writer;
if (!params.dry_run && !writer.Open("stdout", reader.GetHeaderText(), reader.GetReferenceData())) {
cerr << "could not open stdout for writing" << endl;
exit(1);
}
// store the names of all the reference sequences in the BAM file
map<int, string> referenceIDToName;
vector<RefData> referenceSequences = reader.GetReferenceData();
int i = 0;
for (RefVector::iterator r = referenceSequences.begin(); r != referenceSequences.end(); ++r) {
referenceIDToName[i] = r->RefName;
++i;
}
vcf::VariantCallFile vcffile;
if (!params.vcf_file.empty()) {
if (!vcffile.open(params.vcf_file)) {
cerr << "could not open VCF file " << params.vcf_file << endl;
exit(1);
}
} else {
cerr << "realignment requires VCF file" << endl;
exit(1);
}
vcf::Variant var(vcffile);
BamAlignment alignment;
map<long int, vector<BamAlignment> > alignmentSortQueue;
// get alignment
// assemble DAG in region around alignment
// loop for each alignment in BAM:
// update DAG when current alignment gets close to edge of assembled DAG
// attempt to realign if read has a certain number of mismatches + gaps or softclips, weighted by basequal
// if alignment to DAG has fewer mismatches and gaps than original alignment, use it
// flatten read into reference space (for now just output alleles from VCF un-spanned insertions)
// write read to queue for streaming re-sorting (some positional change will occur)
long int dag_start_position = 0;
string currentSeqname;
string ref;
//vector<Cigar> cigars; // contains the Cigar strings of nodes in the graph
//vector<long int> refpositions; // contains the reference start coords of nodes in the graph
ReferenceMappings ref_map;
gssw_graph* graph = gssw_graph_create(0);
int8_t* nt_table = gssw_create_nt_table();
int8_t* mat = gssw_create_score_matrix(params.match, params.mism);
int total_reads = 0;
int total_realigned = 0;
int total_improved = 0;
bool emptyDAG = false; // if the dag is constructed over empty sequence
// such as when realigning reads mapped to all-N sequence
if (params.debug) {
cerr << "about to start processing alignments" << endl;
}
while (reader.GetNextAlignment(alignment)) {
string& seqname = referenceIDToName[alignment.RefID];
if (params.debug) {
cerr << "--------------------------------------------" << endl
<< "processing alignment " << alignment.Name << " at "
<< seqname << ":" << alignment.Position << endl;
}
/*
if (!alignment.IsMapped() && graph->size == 0) {
if (params.debug) {
cerr << "unable to build DAG using unmapped read "
<< alignment.Name << " @ "
<< seqname << ":" << alignment.Position
<< " no previous mapped read found and DAG currently empty" << endl;
}
alignmentSortQueue[dag_start_position+dag_window_size].push_back(alignment);
continue;
}
*/
++total_reads;
BamAlignment originalAlignment = alignment;
//.........这里部分代码省略.........