本文整理汇总了C++中BamAlignment::IsMapped方法的典型用法代码示例。如果您正苦于以下问题:C++ BamAlignment::IsMapped方法的具体用法?C++ BamAlignment::IsMapped怎么用?C++ BamAlignment::IsMapped使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类BamAlignment
的用法示例。
在下文中一共展示了BamAlignment::IsMapped方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: main
//.........这里部分代码省略.........
eatline(line, regions,noChr);
it = regions.begin();
if (it->chr == old_chr){
gene_processing(*it,locus_b);
regions.clear();
continue;
}
}
continue;
}
int chr_len = refs.at(chr_id).RefLength;
if ( !reader.SetRegion(chr_id, 1, chr_id, chr_len) ) // here set region
{
cerr << "bamtools count ERROR: Jump region failed " << it->chr << endl;
reader.Close();
exit(1);
}
//pile-up pos stats
set <string> fragment;
map <string, unsigned int> pileup;
bool isposPileup = false;
unsigned int old_start = 0;
unsigned int total_tags = 0;
unsigned int total_pos = 0;
unsigned int pileup_pos = 0;
BamAlignment bam;
while (reader.GetNextAlignment(bam)) {
if ( bam.IsMapped() == false ) continue; // skip unaligned reads
unsigned int unique;
bam.GetTag("NH", unique);
if (param->unique == 1) {
if (unique != 1) { // skipe uniquelly mapped reads
continue;
}
}
if (read_length == 0){
read_length = bam.Length;
}
//cout << bam.Name << endl;
string chrom = refs.at(bam.RefID).RefName;
string strand = "+";
if (bam.IsReverseStrand()) strand = "-";
unsigned int alignmentStart = bam.Position+1;
unsigned int mateStart;
if (type == "p") mateStart = bam.MatePosition+1;
unsigned int alignmentEnd = bam.GetEndPosition();
unsigned int cigarEnd;
vector <int> blockLengths;
vector <int> blockStarts;
blockStarts.push_back(0);
ParseCigar(bam.CigarData, blockStarts, blockLengths, cigarEnd);
// position check for unique mapped reads (because is paired-end reads, shoule base on fragment level for paired end reads)
if (posc == true && unique == 1) {
示例2: main
//.........这里部分代码省略.........
vector<RefData> testRefData=reader.GetReferenceData();
const SamHeader header = reader.GetHeader();
const RefVector references = reader.GetReferenceData();
BamWriter writerDeam;
if ( !writerDeam.Open(deambam, header, references) ) {
cerr << "Could not open output BAM file" << endl;
return 1;
}
BamWriter writerNoDeam;
if ( !writerNoDeam.Open(nondeambam, header, references) ) {
cerr << "Could not open output BAM file" << endl;
return 1;
}
unsigned int totalReads =0;
unsigned int deaminatedReads =0;
unsigned int ndeaminatedReads =0;
unsigned int skipped =0;
//iterating over the alignments for these regions
BamAlignment al;
int i;
while ( reader.GetNextAlignment(al) ) {
// cerr<<al.Name<<endl;
//skip unmapped
if(!al.IsMapped()){
skipped++;
continue;
}
//skip paired end !
if(al.IsPaired() ){
continue;
// cerr<<"Paired end not yet coded"<<endl;
// return 1;
}
string reconstructedReference = reconstructRef(&al);
char refeBase;
char readBase;
bool isDeaminated;
if(al.Qualities.size() != reconstructedReference.size()){
cerr<<"Quality line is not the same size as the reconstructed reference"<<endl;
return 1;
}
isDeaminated=false;
if(al.IsReverseStrand()){
//first base next to 3'
i = 0 ;
refeBase=toupper(reconstructedReference[i]);
readBase=toupper( al.QueryBases[i]);
示例3: findTranslocationsOnTheFly
void StructuralVariations::findTranslocationsOnTheFly(string bamFileName, bool outtie, float meanCoverage, string outputFileHeader, map<string,int> SV_options) {
size_t start = time(NULL);
//open the bam file
BamReader bamFile;
bamFile.Open(bamFileName);
//Information from the header is needed to initialize the data structure
SamHeader head = bamFile.GetHeader();
// now create Translocation on the fly
Window *window;
window = new Window(bamFileName,outtie,meanCoverage,outputFileHeader,SV_options);
window->initTrans(head);
//expands a vector so that it is large enough to hold reads from each contig in separate elements
window->eventReads.resize(SV_options["contigsNumber"]);
window->eventSplitReads.resize(SV_options["contigsNumber"]);
window-> binnedCoverage.resize(SV_options["contigsNumber"]);
window-> linksFromWin.resize(SV_options["contigsNumber"]);
window -> numberOfEvents = 0;
string line;
string coverageFile=outputFileHeader+".tab";
ifstream inputFile( coverageFile.c_str() );
int line_number=0;
while (std::getline( inputFile, line )){
if(line_number > 0){
vector<string> splitline;
std::stringstream ss(line);
std::string item;
while (std::getline(ss, item, '\t')) {
splitline.push_back(item);
}
window -> binnedCoverage[window -> contig2position[splitline[0]]].push_back(atof(splitline[3].c_str()));
}
line_number += 1;
}
inputFile.close();
//Initialize bam entity
BamAlignment currentRead;
//now start to iterate over the bam file
int counter = 0;
while ( bamFile.GetNextAlignmentCore(currentRead) ) {
if(currentRead.IsMapped()) {
window->insertRead(currentRead);
}
}
for(int i=0;i< window-> eventReads.size();i++){
if(window -> eventReads[i].size() >= window -> minimumPairs){
window->computeVariations(i);
}
window->eventReads[i]=queue<BamAlignment>();
window->eventSplitReads[i] = vector<BamAlignment>();
}
window->interChrVariationsVCF.close();
window->intraChrVariationsVCF.close();
printf ("variant calling time consumption= %lds\n", time(NULL) - start);
}
示例4: IonstatsTestFragments
int IonstatsTestFragments(int argc, const char *argv[])
{
OptArgs opts;
opts.ParseCmdLine(argc, argv);
string input_bam_filename = opts.GetFirstString('i', "input", "");
string fasta_filename = opts.GetFirstString('r', "ref", "");
string output_json_filename = opts.GetFirstString('o', "output", "ionstats_tf.json");
int histogram_length = opts.GetFirstInt ('h', "histogram-length", 400);
if(argc < 2 or input_bam_filename.empty() or fasta_filename.empty()) {
IonstatsTestFragmentsHelp();
return 1;
}
//
// Prepare for metric calculation
//
map<string,string> tf_sequences;
PopulateReferenceSequences(tf_sequences, fasta_filename);
BamReader input_bam;
if (!input_bam.Open(input_bam_filename)) {
fprintf(stderr, "[ionstats] ERROR: cannot open %s\n", input_bam_filename.c_str());
return 1;
}
int num_tfs = input_bam.GetReferenceCount();
SamHeader sam_header = input_bam.GetHeader();
if(!sam_header.HasReadGroups()) {
fprintf(stderr, "[ionstats] ERROR: no read groups in %s\n", input_bam_filename.c_str());
return 1;
}
string flow_order;
string key;
for (SamReadGroupIterator rg = sam_header.ReadGroups.Begin(); rg != sam_header.ReadGroups.End(); ++rg) {
if(rg->HasFlowOrder())
flow_order = rg->FlowOrder;
if(rg->HasKeySequence())
key = rg->KeySequence;
}
// Need these metrics stratified by TF.
vector<ReadLengthHistogram> called_histogram(num_tfs);
vector<ReadLengthHistogram> aligned_histogram(num_tfs);
vector<ReadLengthHistogram> AQ10_histogram(num_tfs);
vector<ReadLengthHistogram> AQ17_histogram(num_tfs);
vector<SimpleHistogram> error_by_position(num_tfs);
vector<MetricGeneratorSNR> system_snr(num_tfs);
vector<MetricGeneratorHPAccuracy> hp_accuracy(num_tfs);
for (int tf = 0; tf < num_tfs; ++tf) {
called_histogram[tf].Initialize(histogram_length);
aligned_histogram[tf].Initialize(histogram_length);
AQ10_histogram[tf].Initialize(histogram_length);
AQ17_histogram[tf].Initialize(histogram_length);
error_by_position[tf].Initialize(histogram_length);
}
vector<uint16_t> flow_signal_fz(flow_order.length());
vector<int16_t> flow_signal_zm(flow_order.length());
const RefVector& refs = input_bam.GetReferenceData();
// Missing:
// - hp accuracy - tough, copy verbatim from TFMapper?
BamAlignment alignment;
vector<char> MD_op;
vector<int> MD_len;
MD_op.reserve(1024);
MD_len.reserve(1024);
string MD_tag;
//
// Main loop over mapped reads in the input BAM
//
while(input_bam.GetNextAlignment(alignment)) {
if (!alignment.IsMapped() or !alignment.GetTag("MD",MD_tag))
continue;
// The check below eliminates unexpected alignments
if (alignment.IsReverseStrand() or alignment.Position > 5)
continue;
int current_tf = alignment.RefID;
//
// Step 1. Parse MD tag
//
//.........这里部分代码省略.........
示例5: while
// generates mutiple sorted temp BAM files from single unsorted BAM file
bool SortTool::SortToolPrivate::GenerateSortedRuns(void) {
// open input BAM file
BamReader reader;
if ( !reader.Open(m_settings->InputBamFilename) ) {
cerr << "bamtools sort ERROR: could not open " << m_settings->InputBamFilename
<< " for reading... Aborting." << endl;
return false;
}
// get basic data that will be shared by all temp/output files
SamHeader header = reader.GetHeader();
header.SortOrder = ( m_settings->IsSortingByName
? Constants::SAM_HD_SORTORDER_QUERYNAME
: Constants::SAM_HD_SORTORDER_COORDINATE );
m_headerText = header.ToString();
m_references = reader.GetReferenceData();
// set up alignments buffer
BamAlignment al;
vector<BamAlignment> buffer;
buffer.reserve( (size_t)(m_settings->MaxBufferCount*1.1) );
bool bufferFull = false;
// if sorting by name, we need to generate full char data
// so can't use GetNextAlignmentCore()
if ( m_settings->IsSortingByName ) {
// iterate through file
while ( reader.GetNextAlignment(al)) {
// check buffer's usage
bufferFull = ( buffer.size() >= m_settings->MaxBufferCount );
// store alignments until buffer is "full"
if ( !bufferFull )
buffer.push_back(al);
// if buffer is "full"
else {
// push any unmapped reads into buffer,
// don't want to split these into a separate temp file
if ( !al.IsMapped() )
buffer.push_back(al);
// "al" is mapped, so create a sorted temp file with current buffer contents
// then push "al" into fresh buffer
else {
CreateSortedTempFile(buffer);
buffer.push_back(al);
}
}
}
}
// sorting by position, can take advantage of GNACore() speedup
else {
// iterate through file
while ( reader.GetNextAlignmentCore(al) ) {
// check buffer's usage
bufferFull = ( buffer.size() >= m_settings->MaxBufferCount );
// store alignments until buffer is "full"
if ( !bufferFull )
buffer.push_back(al);
// if buffer is "full"
else {
// push any unmapped reads into buffer,
// don't want to split these into a separate temp file
if ( !al.IsMapped() )
buffer.push_back(al);
// "al" is mapped, so create a sorted temp file with current buffer contents
// then push "al" into fresh buffer
else {
CreateSortedTempFile(buffer);
buffer.push_back(al);
}
}
}
}
// handle any leftover buffer contents
if ( !buffer.empty() )
CreateSortedTempFile(buffer);
// close reader & return success
reader.Close();
return true;
}
示例6: main
//.........这里部分代码省略.........
{
//cerr << " (*sraIter).first= " << (*sraIter).first << " minmaxRefSeqPos.at(0)= " << minmaxRefSeqPos.at(0) << " winstartpos - ((windowlen - 1)*0.9)= " << winstartpos - ((windowlen - 1)*0.9) << endl;
if (((float) (*sraIter).first < floor((float) (winstartpos - ((windowlen - 1)*0.9)))) && ((minmaxRefSeqPos.at(0) > 0) && ((*sraIter).first < minmaxRefSeqPos.at(0)))) {
//writer.SaveAlignment((*sraIter).second); // Why sometimes, it doesn't work ?????
if (!writer.SaveAlignment((*sraIter).second))
cerr << writer.GetErrorString() << endl;
SortRealignedAlignmentsMultimap.erase(sraIter++);
} else {
++sraIter;
}
}
//cerr << "#Done: Write alignments and delete alignments with start position exceed the right end of the window/Region " << endl;
}
//cerr << " winstart: " << winstartpos << " ; winend: " << winendpos;
//cerr << " " << alignment.Name << " Chr " << alignment.RefID << " Startpos: " << alignment.Position << " Endpos: " << alignment.GetEndPosition() << " Length: " << alignment.Length << endl;
//cerr << ": " << alignment.RefID << " :" << RefIDRedName[alignment.RefID] << " : " << RefIDRedName[alignment.RefID] << endl;
//cerr << "Start: Gather aligmenets that lie (fully or partially) within the window frame and group INDELs if there are ... " << endl;
// Gather Reads within a window frame
while ((IsNextAlignment) && (refid == alignment.RefID)) // Neeed more conditions
{
if (SetLowComplexityRegion == true)
{
string sequenceInWindow = reference->getSubSequence(RefIDRedName[alignment.RefID], winstartpos, (winendpos-winstartpos+1) );
if (IsWindowInRepeatRegion(sequenceInWindow) == true)
{
if ((IsWithinWindow(alignment, winstartpos, winendpos, AllowableBasesInWindow)) == 0)
{
TotalReads++;
if (alignment.IsMapped())
{
string referenceSequence = reference->getSubSequence(RefIDRedName[alignment.RefID], alignment.Position, 2*alignment.Length);
vector<SalRealignInfo>::iterator tIter;
SalRealignInfo alr;
alr.al = alignment;
alr.currentReadPosition = 0;
alr.currentGenomeSeqPosition = 0;
alr.currentAlPosition = alignment.Position;
alr.cigarindex = 0;
alr.HasRealign = false;
alr.CigarSoftclippingLength = 0;
string str = "ZZZZZZZZZZZZZZZZZ";
if (alignment.Name.find(str) != string::npos) {
stringstream cigar;
for (vector<CigarOp>::const_iterator cigarIter = alignment.CigarData.begin(); cigarIter != alignment.CigarData.end(); ++cigarIter)
cigar << cigarIter->Length << cigarIter->Type;
string cigarstr = cigar.str();
cerr << " TRACKING: " << alignment.RefID << " " << alignment.Name << " pos: " << alignment.Position << " cigar: " << cigarstr << endl;
}
RealignFunction.ParseAlignmentsAndExtractVariantsByBaseQualv(AlGroups, alr, tIter, alignment, referenceSequence, minmaxRefSeqPos, winstartpos, winendpos, (float) minbaseQ, true);
NewReadMappedcount++;
}
else
{
SortRealignedAlignmentsMultimap.insert(pair<int, BamAlignment > (alignment.Position, alignment));
cerr << "UNmapped : " << alignment.Name << endl;
}
}
示例7: main
//.........这里部分代码省略.........
// cerr<<"ERROR: the out directory does not exist"<<endl;
// return 1;
// }
BamReader reader;
if ( !reader.Open(bamfiletopen) ) {
cerr << "Could not open input BAM files." << endl;
return 1;
}
const SamHeader header = reader.GetHeader();
const RefVector references = reader.GetReferenceData();
vector<RefData> refData=reader.GetReferenceData();
SamReadGroupDictionary srgd=header.ReadGroups;
for(SamReadGroupConstIterator srgci=srgd.ConstBegin();
srgci<srgd.ConstEnd();
srgci++){
//cout<<*srgci<<endl;
const SamReadGroup rg = (*srgci);
//cout<<rg.ID<<endl;
if( rg2Fraction.find(rg.ID) != rg2Fraction.end() ){
rg2BamWriter[rg.ID] = new BamWriter();
rg2BamWriter[rg.ID]->Open(bamDirOutPrefix+"."+rg.ID+".bam",header,references);
}
//cout<<bamDirOutPrefix+"."+rg.ID+".bam"<<endl;
}
// return 1;
// BamWriter unmapped;
// cout<<header.ToString()<<endl;
// return 1;
// if ( !unmapped.Open(bamDirOutPrefix+".unmapped.bam",header,references) ) {
// cerr << "Could not open output BAM file "<< bamDirOutPrefix+".unmapped.bam" << endl;
// return 1;
// }
// cout<<"reading"<<endl;
BamAlignment al;
unsigned int total=0;
while ( reader.GetNextAlignment(al) ) {
if(al.HasTag("RG") &&
al.IsMapped() ){
string rgTag;
al.GetTag("RG",rgTag);
//cout<<rgTag<<endl;
if(rg2BamWriter.find(rgTag) == rg2BamWriter.end()){ //new: ignore completely
}else{
if( randomProb() <= rg2Fraction[ rgTag ] ){
rg2BamWriter[rgTag]->SaveAlignment(al);
//cout<<"wrote "<<rgTag<<endl;
} else{
//cout<<"skipped "<<rgTag<<endl;
}
}
}// else{
// string rgTag="unknown";
// //cout<<rgTag<<endl;
// if(rg2BamWriter.find(rgTag) == rg2BamWriter.end()){ //new
// cerr<<"Found new RG "<<rgTag<<endl;
// rg2BamWriter[rgTag] = new BamWriter();
// if ( !rg2BamWriter[rgTag]->Open(bamDirOutPrefix+"."+rgTag+".bam",header,references) ) {
// cerr << "Could not open output BAM file "<< bamDirOutPrefix<<"."<<rgTag<<".bam" << endl;
// return 1;
// }
// rg2BamWriter[rgTag]->SaveAlignment(al);
// }else{
// rg2BamWriter[rgTag]->SaveAlignment(al);
// }
// // cerr << "Cannot get RG tag for " << al.Name<<endl;
// // return 1;
// }
total++;
} //while al
reader.Close();
// writer.Close();
// unmapped.Close();
map<string,BamWriter *>::iterator rg2BamWriterIt;
for (rg2BamWriterIt =rg2BamWriter.begin();
rg2BamWriterIt!=rg2BamWriter.end();
rg2BamWriterIt++){
rg2BamWriterIt->second->Close();
}
cerr<<"Wrote succesfully "<<total<<" reads"<<endl;
return 0;
}
示例8: IntersectBam
void BedIntersect::IntersectBam(string bamFile) {
// load the "B" bed file into a map so
// that we can easily compare "A" to it for overlaps
_bedB = new BedFile(_bedBFile);
_bedB->loadBedFileIntoMap();
// create a dummy BED A file for printing purposes if not
// using BAM output.
if (_bamOutput == false) {
_bedA = new BedFile(_bedAFile);
_bedA->bedType = 12;
}
// open the BAM file
BamReader reader;
BamWriter writer;
reader.Open(bamFile);
// get header & reference information
string bamHeader = reader.GetHeaderText();
RefVector refs = reader.GetReferenceData();
// open a BAM output to stdout if we are writing BAM
if (_bamOutput == true) {
// set compression mode
BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed;
writer.SetCompressionMode(compressionMode);
// open our BAM writer
writer.Open("stdout", bamHeader, refs);
}
vector<BED> hits;
// reserve some space
hits.reserve(100);
BamAlignment bam;
// get each set of alignments for each pair.
while (reader.GetNextAlignment(bam)) {
// save an unaligned read if -v
if (!bam.IsMapped()) {
if (_noHit == true)
writer.SaveAlignment(bam);
continue;
}
// break alignment into discrete blocks,
bedVector bed_blocks;
string chrom = refs.at(bam.RefID).RefName;
GetBamBlocks(bam, chrom, bed_blocks, false, true);
// create a basic BED entry from the BAM alignment
BED bed;
MakeBedFromBam(bam, chrom, bed_blocks, bed);
bool overlapsFound = false;
if ((_bamOutput == true) && (_obeySplits == false))
{
overlapsFound = _bedB->anyHits(bed.chrom, bed.start, bed.end,
bed.strand, _sameStrand, _diffStrand,
_overlapFraction, _reciprocal);
}
else if ( ((_bamOutput == true) && (_obeySplits == true)) ||
((_bamOutput == false) && (_obeySplits == true)) )
{
// find the hits that overlap with the full span of the blocked BED
_bedB->allHits(bed.chrom, bed.start, bed.end, bed.strand,
hits, _sameStrand, _diffStrand,
_overlapFraction, _reciprocal);
// find the overlaps between the block in A and B
overlapsFound = FindBlockedOverlaps(bed, bed_blocks, hits, _bamOutput);
}
else if ((_bamOutput == false) && (_obeySplits == false))
{
FindOverlaps(bed, hits);
}
// save the BAM alignment if overlap reqs. were met
if (_bamOutput == true) {
if ((overlapsFound == true) && (_noHit == false))
writer.SaveAlignment(bam);
else if ((overlapsFound == false) && (_noHit == true))
writer.SaveAlignment(bam);
}
hits.clear();
}
// close the relevant BAM files.
reader.Close();
if (_bamOutput == true) {
writer.Close();
}
}
示例9: IntersectBam
void BedIntersect::IntersectBam(string bamFile) {
// load the "B" bed file into a map so
// that we can easily compare "A" to it for overlaps
_bedB->loadBedFileIntoMap();
// open the BAM file
BamReader reader;
BamWriter writer;
reader.Open(bamFile);
// get header & reference information
string header = reader.GetHeaderText();
RefVector refs = reader.GetReferenceData();
// open a BAM output to stdout if we are writing BAM
if (_bamOutput == true) {
// open our BAM writer
writer.Open("stdout", header, refs, _isUncompressedBam);
}
vector<BED> hits;
// reserve some space
hits.reserve(100);
_bedA->bedType = 6;
BamAlignment bam;
// get each set of alignments for each pair.
while (reader.GetNextAlignment(bam)) {
if (bam.IsMapped()) {
BED a;
a.chrom = refs.at(bam.RefID).RefName;
a.start = bam.Position;
a.end = bam.GetEndPosition(false);
// build the name field from the BAM alignment.
a.name = bam.Name;
if (bam.IsFirstMate()) a.name += "/1";
if (bam.IsSecondMate()) a.name += "/2";
a.score = ToString(bam.MapQuality);
a.strand = "+";
if (bam.IsReverseStrand()) a.strand = "-";
if (_bamOutput == true) {
bool overlapsFound = false;
// treat the BAM alignment as a single "block"
if (_obeySplits == false) {
overlapsFound = FindOneOrMoreOverlap(a);
}
// split the BAM alignment into discrete blocks and
// look for overlaps only within each block.
else {
bool overlapFoundForBlock;
bedVector bedBlocks; // vec to store the discrete BED "blocks" from a
// we don't want to split on "D" ops, hence the "false"
getBamBlocks(bam, refs, bedBlocks, false);
vector<BED>::const_iterator bedItr = bedBlocks.begin();
vector<BED>::const_iterator bedEnd = bedBlocks.end();
for (; bedItr != bedEnd; ++bedItr) {
overlapFoundForBlock = FindOneOrMoreOverlap(a);
if (overlapFoundForBlock == true)
overlapsFound = true;
}
}
if (overlapsFound == true) {
if (_noHit == false)
writer.SaveAlignment(bam);
}
else {
if (_noHit == true) {
writer.SaveAlignment(bam);
}
}
}
else {
// treat the BAM alignment as a single BED "block"
if (_obeySplits == false) {
FindOverlaps(a, hits);
hits.clear();
}
// split the BAM alignment into discrete BED blocks and
// look for overlaps only within each block.
else {
bedVector bedBlocks; // vec to store the discrete BED "blocks" from a
getBamBlocks(bam, refs, bedBlocks, false);
vector<BED>::const_iterator bedItr = bedBlocks.begin();
vector<BED>::const_iterator bedEnd = bedBlocks.end();
for (; bedItr != bedEnd; ++bedItr) {
FindOverlaps(*bedItr, hits);
hits.clear();
}
}
}
}
}
//.........这里部分代码省略.........
示例10: CoverageBam
void BedGenomeCoverage::CoverageBam(string bamFile) {
ResetChromCoverage();
// open the BAM file
BamReader reader;
if (!reader.Open(bamFile)) {
cerr << "Failed to open BAM file " << bamFile << endl;
exit(1);
}
// get header & reference information
string header = reader.GetHeaderText();
RefVector refs = reader.GetReferenceData();
// load the BAM header references into a BEDTools "genome file"
_genome = new GenomeFile(refs);
// convert each aligned BAM entry to BED
// and compute coverage on B
BamAlignment bam;
while (reader.GetNextAlignment(bam)) {
// skip if the read is unaligned
if (bam.IsMapped() == false)
continue;
bool _isReverseStrand = bam.IsReverseStrand();
//changing second mate's strand to opposite
if( _dUTP && bam.IsPaired() && bam.IsMateMapped() && bam.IsSecondMate())
_isReverseStrand = !bam.IsReverseStrand();
// skip if we care about strands and the strand isn't what
// the user wanted
if ( (_filterByStrand == true) &&
((_requestedStrand == "-") != _isReverseStrand) )
continue;
// extract the chrom, start and end from the BAM alignment
string chrom(refs.at(bam.RefID).RefName);
CHRPOS start = bam.Position;
CHRPOS end = bam.GetEndPosition(false, false) - 1;
// are we on a new chromosome?
if ( chrom != _currChromName )
StartNewChrom(chrom);
if(_pair_chip_) {
// Skip if not a proper pair
if (bam.IsPaired() && (!bam.IsProperPair() or !bam.IsMateMapped()) )
continue;
// Skip if wrong coordinates
if( ( (bam.Position<bam.MatePosition) && bam.IsReverseStrand() ) ||
( (bam.MatePosition < bam.Position) && bam.IsMateReverseStrand() ) ) {
//chemically designed: left on positive strand, right on reverse one
continue;
}
/*if(_haveSize) {
if (bam.IsFirstMate() && bam.IsReverseStrand()) { //put fragmentSize in to the middle of pair end_fragment
int mid = bam.MatePosition+abs(bam.InsertSize)/2;
if(mid<_fragmentSize/2)
AddCoverage(0, mid+_fragmentSize/2);
else
AddCoverage(mid-_fragmentSize/2, mid+_fragmentSize/2);
}
else if (bam.IsFirstMate() && bam.IsMateReverseStrand()) { //put fragmentSize in to the middle of pair end_fragment
int mid = start+abs(bam.InsertSize)/2;
if(mid<_fragmentSize/2)
AddCoverage(0, mid+_fragmentSize/2);
else
AddCoverage(mid-_fragmentSize/2, mid+_fragmentSize/2);
}
} else */
if (bam.IsFirstMate() && bam.IsReverseStrand()) { //prolong to the mate to the left
AddCoverage(bam.MatePosition, end);
}
else if (bam.IsFirstMate() && bam.IsMateReverseStrand()) { //prolong to the mate to the right
AddCoverage(start, start + abs(bam.InsertSize) - 1);
}
} else if (_haveSize) {
if(bam.IsReverseStrand()) {
if(end<_fragmentSize) { //sometimes fragmentSize is bigger :(
AddCoverage(0, end);
} else {
AddCoverage(end + 1 - _fragmentSize, end );
}
} else {
AddCoverage(start,start+_fragmentSize - 1);
}
} else
// add coverage accordingly.
if (!_only_5p_end && !_only_3p_end) {
bedVector bedBlocks;
// we always want to split blocks when a D CIGAR op is found.
// if the user invokes -split, we want to also split on N ops.
if (_obeySplits) { // "D" true, "N" true
GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, true, true);
}
else { // "D" true, "N" false
GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, true, false);
//.........这里部分代码省略.........
示例11: Distinguish
/*
Input file format: BAM file
*/
void Distinguish( string mappingFile, map< string, bool > & hapSNP, map< string, string > & refSeq, string outfilePrefix ) {
// [WARNING] If refSeq is not empty than it's BS data, otherwirse is resequencing data.
string outfile1 = outfilePrefix + ".1.bam";
string outfile2 = outfilePrefix + ".2.bam";
string outHomoF = outfilePrefix + ".homo.bam";
string outUdeci = outfilePrefix + ".ambiguity.bam"; // The ambiguity reads or the reads which has't any SNP.
BamReader h_I; // bam input file handle
if ( !h_I.Open( mappingFile ) ) cerr << "[ERROR]: " << h_I.GetErrorString() << endl;
// "header" and "references" from BAM files, these are required by BamWriter
const SamHeader header = h_I.GetHeader();
const RefVector references = h_I.GetReferenceData();
BamWriter h_O1, h_O2, h_U, h_H;
if ( !h_O1.Open( outfile1, header, references ) ) { cerr << "Cannot open output BAM file: " << outfile1 << endl; exit(1); }
if ( !h_O2.Open( outfile2, header, references ) ) { cerr << "Cannot open output BAM file: " << outfile2 << endl; exit(1); }
if ( !h_U.Open ( outUdeci, header, references ) ) { cerr << "Cannot open output BAM file: " << outUdeci << endl; exit(1); }
if ( !h_H.Open ( outHomoF, header, references ) ) { cerr << "Cannot open output BAM file: " << outHomoF << endl; exit(1); }
int readsNumberRecord(0);
SamLine samline; // Samline class
SamExt sam;
BamAlignment al;
map< string, pair<BamAlignment, SamExt> > firstMateAl; // record the first mate reads alignment, HIstory problem to be like this struct!!
string refstr; // Just For BS data.
bool isC2T ( false ); // Just For BS data.
while ( h_I.GetNextAlignment( al ) ) {
++readsNumberRecord;
if ( readsNumberRecord % 1000000 == 0 )
cerr << "Have been dealed " << readsNumberRecord << " lines. " << local_time ();
if ( !al.IsMapped() ) continue;
//if ( al.InsertSize == 0 || al.RefID != al.MateRefID ) continue;
samline._RID = al.Name; samline._Flag= al.AlignmentFlag;
samline._ref_id = h_I.GetReferenceData()[al.RefID].RefName;
samline._position = al.Position + 1; // Position (0-base starts in BamTools), but I need 1-base starts
samline._mapQ = al.MapQuality;
// MateRefID == -1 means mate read is unmapping
samline._XorD = ( al.MateRefID > -1 ) ? h_I.GetReferenceData()[al.MateRefID].RefName : "*";
samline._coor = al.MatePosition + 1; // Position (0-base starts in BamTools), but I need 1-base starts
samline._seq = al.QueryBases;
samline._insert_size = abs (al.InsertSize);
if ( samline._ref_id.compare( "BIG_ID_CAT" ) == 0 ) continue; // Ignore "BIG_ID_CAT"
// get cigar;
samline._cigar = itoa(al.CigarData[0].Length); samline._cigar.append( 1, al.CigarData[0].Type );
for ( size_t i(1); i < al.CigarData.size(); ++i ) {
samline._cigar += itoa(al.CigarData[i].Length);
samline._cigar.append( 1, al.CigarData[i].Type );
}
sam.assign( &samline );
/*********************************** For BS Data *********************************************/
if ( !refSeq.empty() ) { // If the data is BS data, we should modify the QueryBases.
if ( !refSeq.count( samline._ref_id ) ) {
cerr << "[ERROR]There's no such reference in the reference file. " << samline._ref_id << endl;
exit(1);
}
if ( al.IsFirstMate() && !al.IsReverseStrand() ) {
isC2T = true;
}
else if ( al.IsFirstMate() && al.IsReverseStrand() ) {
isC2T = false;
}
else if ( al.IsSecondMate() && !al.IsReverseStrand() ) {
isC2T = false;
}
else if ( al.IsSecondMate() && al.IsReverseStrand() ) {
isC2T = true;
} else {
cerr << "[ERROR MATCH] " << endl; exit(1);
}
refstr.assign( refSeq[samline._ref_id], sam.ref_start() - 1, sam.ref_end() - sam.ref_start() + 1 );
modifyBSreadBases( samline._ref_id, sam.ref_start (), sam.read_start(), sam.cigar_seq(), refstr, sam._seq, hapSNP, isC2T );
}
/********************************** End For BS Data *******************************************/
// Consider the mate pair reads
if ( !firstMateAl.count(al.Name) && (al.MateRefID > -1) ) {
firstMateAl[al.Name] = std::make_pair( al, sam );
} else { // Consider the mate pair reads
if ( !firstMateAl.count(al.Name) ) {
switch ( Decide( sam, hapSNP ) ) {
case 1 : h_O1.SaveAlignment( al ); break; // Hap1
case 2 : h_O2.SaveAlignment( al ); break; // Hap2
case 0 : h_U.SaveAlignment ( al ); break; // Ambiguity
default: // This alignment didn't contain any hete SNP.
h_H.SaveAlignment ( al ); // Homozygous reads
}
//.........这里部分代码省略.........
示例12: setMateInfo
void setMateInfo( BamAlignment & rec1, BamAlignment & rec2, SamHeader & header) {
const int NO_ALIGNMENT_REFERENCE_INDEX = -1;
const int NO_ALIGNMENT_START = -1;
// If neither read is unmapped just set their mate info
if (rec1.IsMapped() && rec2.IsMapped()) {
rec1.MateRefID = rec2.MateRefID;
rec1.MatePosition = rec2.Position;
rec1.SetIsReverseStrand(rec2.IsReverseStrand());
rec1.SetIsMapped(true);
rec1.AddTag("MQ", "i", rec2.MapQuality);
rec2.MateRefID = rec1.RefID;
rec2.MatePosition = rec1.Position;
rec2.SetIsReverseStrand( rec1.IsReverseStrand() );
rec2.SetIsMapped(true);
rec2.AddTag("MQ", "i", rec1.MapQuality);
}
// Else if they're both unmapped set that straight
else if (!rec1.IsMapped() && !rec2.IsMapped()) {
rec1.RefID = NO_ALIGNMENT_REFERENCE_INDEX;
rec1.Position = NO_ALIGNMENT_START;
rec1.MateRefID = NO_ALIGNMENT_REFERENCE_INDEX;
rec1.MatePosition = NO_ALIGNMENT_START;
rec1.SetIsReverseStrand(rec2.IsReverseStrand());
rec1.SetIsMapped(false);
rec2.RemoveTag("MQ");
rec1.Length = 0;
rec2.RefID = NO_ALIGNMENT_REFERENCE_INDEX;
rec2.Position = NO_ALIGNMENT_START;
rec2.MateRefID = NO_ALIGNMENT_REFERENCE_INDEX;
rec2.MatePosition = NO_ALIGNMENT_START;
rec2.SetIsReverseStrand(rec1.IsReverseStrand());
rec2.SetIsMapped(false);
rec2.RemoveTag("MQ");
rec2.Length = 0;
}
// And if only one is mapped copy it's coordinate information to the mate
else {
BamAlignment & mapped = rec1.IsMapped() ? rec1 : rec2;
BamAlignment & unmapped = rec1.IsMapped() ? rec2 : rec1;
unmapped.RefID = mapped.RefID;
unmapped.Position = mapped.Position;
mapped.MateRefID = unmapped.RefID;
mapped.MatePosition = unmapped.Position;
mapped.SetIsMateReverseStrand(unmapped.IsReverseStrand());
mapped.SetIsMateMapped(false);
mapped.Length = 0;
unmapped.MateRefID = mapped.RefID;
unmapped.MatePosition = mapped.Position;
unmapped.SetIsMateReverseStrand(mapped.IsReverseStrand());
unmapped.SetIsMateMapped(true);
unmapped.Length = 0;
}
const int insertSize = computeInsertSize(rec1, rec2);
rec1.Length = insertSize;
rec2.Length = -insertSize;
}
示例13: main
//.........这里部分代码省略.........
return 1;
}
vector<RefData> testRefData=reader.GetReferenceData();
SamHeader header = reader.GetHeader();
string pID = "decrQualDeaminated";
string pName = "decrQualDeaminated";
string pCommandLine = "";
for(int i=0;i<(argc);i++){
pCommandLine += (string(argv[i])+" ");
}
putProgramInHeader(&header,pID,pName,pCommandLine);
const RefVector references = reader.GetReferenceData();
BamWriter writer;
if ( !writer.Open(outbamFile, header, references) ) {
cerr << "Could not open output BAM file" << endl;
return 1;
}
BamAlignment al;
// BamAlignment al2;
// bool al2Null=true;
while ( reader.GetNextAlignment(al) ) {
if(al.IsPaired() ){
if(al.IsFirstMate() ){ //5' end, need to check first base only
if(al.IsReverseStrand()){ //
if(!al.IsMapped()){
cerr << "Cannot have reverse complemented unmapped reads :" <<al.Name<< endl;
//return 1;
}
int indexToCheck;
//5' of first mate reversed
indexToCheck=al.QueryBases.length()-1;
for(int i=0;i<bpToDecrease5;i++){
if(toupper(al.QueryBases[indexToCheck]) == 'A'){
al.Qualities[indexToCheck]=char(offset+baseQualForDeam);
}
indexToCheck=max(indexToCheck-1,0);
}
}else{
int indexToCheck;
//5' of first mate
indexToCheck=0;
for(int i=0;i<bpToDecrease5;i++){ //first base
if(toupper(al.QueryBases[indexToCheck]) == 'T'){
al.Qualities[indexToCheck]=char(offset+baseQualForDeam);
}
indexToCheck=min(indexToCheck+1,int(al.Qualities.size()));
}
}
}else{ //3' end, need to check last two bases only
if( al.IsSecondMate() ){
if(al.IsReverseStrand()){ //
示例14: check
bool check(const PropertyFilter& filter, const BamAlignment& al) {
bool keepAlignment = true;
const PropertyMap& properties = filter.Properties;
PropertyMap::const_iterator propertyIter = properties.begin();
PropertyMap::const_iterator propertyEnd = properties.end();
for ( ; propertyIter != propertyEnd; ++propertyIter ) {
// check alignment data field depending on propertyName
const string& propertyName = (*propertyIter).first;
const PropertyFilterValue& valueFilter = (*propertyIter).second;
if ( propertyName == ALIGNMENTFLAG_PROPERTY ) keepAlignment &= valueFilter.check(al.AlignmentFlag);
else if ( propertyName == CIGAR_PROPERTY ) {
stringstream cigarSs;
const vector<CigarOp>& cigarData = al.CigarData;
if ( !cigarData.empty() ) {
vector<CigarOp>::const_iterator cigarBegin = cigarData.begin();
vector<CigarOp>::const_iterator cigarIter = cigarBegin;
vector<CigarOp>::const_iterator cigarEnd = cigarData.end();
for ( ; cigarIter != cigarEnd; ++cigarIter ) {
const CigarOp& op = (*cigarIter);
cigarSs << op.Length << op.Type;
}
keepAlignment &= valueFilter.check(cigarSs.str());
}
}
else if ( propertyName == INSERTSIZE_PROPERTY ) keepAlignment &= valueFilter.check(al.InsertSize);
else if ( propertyName == ISDUPLICATE_PROPERTY ) keepAlignment &= valueFilter.check(al.IsDuplicate());
else if ( propertyName == ISFAILEDQC_PROPERTY ) keepAlignment &= valueFilter.check(al.IsFailedQC());
else if ( propertyName == ISFIRSTMATE_PROPERTY ) keepAlignment &= valueFilter.check(al.IsFirstMate());
else if ( propertyName == ISMAPPED_PROPERTY ) keepAlignment &= valueFilter.check(al.IsMapped());
else if ( propertyName == ISMATEMAPPED_PROPERTY ) keepAlignment &= valueFilter.check(al.IsMateMapped());
else if ( propertyName == ISMATEREVERSESTRAND_PROPERTY ) keepAlignment &= valueFilter.check(al.IsMateReverseStrand());
else if ( propertyName == ISPAIRED_PROPERTY ) keepAlignment &= valueFilter.check(al.IsPaired());
else if ( propertyName == ISPRIMARYALIGNMENT_PROPERTY ) keepAlignment &= valueFilter.check(al.IsPrimaryAlignment());
else if ( propertyName == ISPROPERPAIR_PROPERTY ) keepAlignment &= valueFilter.check(al.IsProperPair());
else if ( propertyName == ISREVERSESTRAND_PROPERTY ) keepAlignment &= valueFilter.check(al.IsReverseStrand());
else if ( propertyName == ISSECONDMATE_PROPERTY ) keepAlignment &= valueFilter.check(al.IsSecondMate());
else if ( propertyName == ISSINGLETON_PROPERTY ) {
const bool isSingleton = al.IsPaired() && al.IsMapped() && !al.IsMateMapped();
keepAlignment &= valueFilter.check(isSingleton);
}
else if ( propertyName == MAPQUALITY_PROPERTY ) keepAlignment &= valueFilter.check(al.MapQuality);
else if ( propertyName == MATEPOSITION_PROPERTY ) keepAlignment &= ( al.IsPaired() && al.IsMateMapped() && valueFilter.check(al.MateRefID) );
else if ( propertyName == MATEREFERENCE_PROPERTY ) {
if ( !al.IsPaired() || !al.IsMateMapped() ) return false;
BAMTOOLS_ASSERT_MESSAGE( (al.MateRefID>=0 && (al.MateRefID<(int)filterToolReferences.size())), "Invalid MateRefID");
const string& refName = filterToolReferences.at(al.MateRefID).RefName;
keepAlignment &= valueFilter.check(refName);
}
else if ( propertyName == NAME_PROPERTY ) keepAlignment &= valueFilter.check(al.Name);
else if ( propertyName == POSITION_PROPERTY ) keepAlignment &= valueFilter.check(al.Position);
else if ( propertyName == QUERYBASES_PROPERTY ) keepAlignment &= valueFilter.check(al.QueryBases);
else if ( propertyName == REFERENCE_PROPERTY ) {
BAMTOOLS_ASSERT_MESSAGE( (al.RefID>=0 && (al.RefID<(int)filterToolReferences.size())), "Invalid RefID");
const string& refName = filterToolReferences.at(al.RefID).RefName;
keepAlignment &= valueFilter.check(refName);
}
else if ( propertyName == TAG_PROPERTY ) keepAlignment &= checkAlignmentTag(valueFilter, al);
else BAMTOOLS_ASSERT_UNREACHABLE;
// if alignment fails at ANY point, just quit and return false
if ( !keepAlignment ) return false;
}
BAMTOOLS_ASSERT_MESSAGE( keepAlignment, "Error in BamAlignmentChecker... keepAlignment should be true here");
return keepAlignment;
}
示例15: WindowIntersectBam
void BedWindow::WindowIntersectBam(string bamFile) {
// load the "B" bed file into a map so
// that we can easily compare "A" to it for overlaps
_bedB->loadBedFileIntoMap();
// open the BAM file
BamReader reader;
BamWriter writer;
reader.Open(bamFile);
// get header & reference information
string bamHeader = reader.GetHeaderText();
RefVector refs = reader.GetReferenceData();
// open a BAM output to stdout if we are writing BAM
if (_bamOutput == true) {
// set compression mode
BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed;
writer.SetCompressionMode(compressionMode);
// open our BAM writer
writer.Open("stdout", bamHeader, refs);
}
vector<BED> hits; // vector of potential hits
// reserve some space
hits.reserve(100);
_bedA->bedType = 6;
BamAlignment bam;
bool overlapsFound;
// get each set of alignments for each pair.
while (reader.GetNextAlignment(bam)) {
if (bam.IsMapped()) {
BED a;
a.chrom = refs.at(bam.RefID).RefName;
a.start = bam.Position;
a.end = bam.GetEndPosition(false, false);
// build the name field from the BAM alignment.
a.name = bam.Name;
if (bam.IsFirstMate()) a.name += "/1";
if (bam.IsSecondMate()) a.name += "/2";
a.score = ToString(bam.MapQuality);
a.strand = "+"; if (bam.IsReverseStrand()) a.strand = "-";
if (_bamOutput == true) {
overlapsFound = FindOneOrMoreWindowOverlaps(a);
if (overlapsFound == true) {
if (_noHit == false)
writer.SaveAlignment(bam);
}
else {
if (_noHit == true)
writer.SaveAlignment(bam);
}
}
else {
FindWindowOverlaps(a, hits);
hits.clear();
}
}
// BAM IsMapped() is false
else if (_noHit == true) {
writer.SaveAlignment(bam);
}
}
// close the relevant BAM files.
reader.Close();
if (_bamOutput == true) {
writer.Close();
}
}