本文整理汇总了C++中BamAlignment::GetEndPosition方法的典型用法代码示例。如果您正苦于以下问题:C++ BamAlignment::GetEndPosition方法的具体用法?C++ BamAlignment::GetEndPosition怎么用?C++ BamAlignment::GetEndPosition使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类BamAlignment
的用法示例。
在下文中一共展示了BamAlignment::GetEndPosition方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: if
//{{{ SV_Pair:: SV_Pair(const BamAlignment &bam_a,
// if both reads are on the same chrome, then read_l must map before read_r
// if the reads are on different strands then read_l must be on the lexo
// lesser chrom (using the string.compare() method)
SV_Pair::
SV_Pair(const BamAlignment &bam_a,
const BamAlignment &bam_b,
const RefVector &refs,
int _weight,
int _ev_id,
SV_PairReader *_reader)
{
reader = _reader;
if ( bam_a.MapQuality < bam_b.MapQuality )
min_mapping_quality = bam_a.MapQuality;
else
min_mapping_quality = bam_b.MapQuality;
struct interval tmp_a, tmp_b;
tmp_a.start = bam_a.Position;
tmp_a.end = bam_a.GetEndPosition(false, false) - 1;
tmp_a.chr = refs.at(bam_a.RefID).RefName;
if ( bam_a.IsReverseStrand() == true )
tmp_a.strand = '-';
else
tmp_a.strand = '+';
tmp_b.start = bam_b.Position;
tmp_b.end = bam_b.GetEndPosition(false, false) - 1;
tmp_b.chr = refs.at(bam_b.RefID).RefName;
if ( bam_b.IsReverseStrand() == true )
tmp_b.strand = '-';
else
tmp_b.strand = '+';
//if ( tmp_a.chr.compare(tmp_b.chr) > 0 ) {
if ( bam_a.RefID < bam_b.RefID ) {
read_l = tmp_a;
read_r = tmp_b;
//} else if ( tmp_a.chr.compare(tmp_b.chr) < 0 ) {
} else if ( bam_a.RefID > bam_b.RefID) {
read_l = tmp_b;
read_r = tmp_a;
} else { // ==
if (tmp_a.start > tmp_b.start) {
read_l = tmp_b;
read_r = tmp_a;
} else {
read_l = tmp_a;
read_r = tmp_b;
}
}
weight = _weight;
ev_id = _ev_id;
}
示例2: IsWithinWindow
int IsWithinWindow(BamAlignment& alignment, int winstartpos, int winendpos, int AllowableBasesInWindow) {
//int allowableFracLen = ceil((float) PercentReadLengthInWindow * alignment.Length);
if ((alignment.Position >= winstartpos) && ((alignment.GetEndPosition() - 1) <= winendpos)) // reads that are exactly in window
return 0;
else if (((alignment.Position >= winstartpos) && (alignment.Position <= winendpos))
|| (((alignment.GetEndPosition() - 1) <= winendpos) && ((alignment.GetEndPosition() - AllowableBasesInWindow - 1) >= winstartpos))) // certain reads that are partially in window
return 0;
else if ((alignment.Position < winstartpos) && ((alignment.GetEndPosition() - 1) >= winendpos)) // window is in read length
return 0;
else if ((alignment.Position < winstartpos) && ((alignment.GetEndPosition() - 1)< winstartpos) )
return 1;
else if (alignment.Position > winendpos)
return 2;
}
示例3: processMatchOrMismatch
pos_t VariantProcessor::processMatchOrMismatch(const BamAlignment& alignment,
vector<VariantPtr>& read_variants,
const uint32_t& op_length, const string& refseq,
const pos_t& refpos, const pos_t& readpos) {
// Process a matching or mismatching sequence in the CIGAR string,
// adding any SNP variants present.
int endpos = alignment.GetEndPosition();
for (int i = 0; i < op_length; i++) {
assert(alignment.Position + i < endpos);
char query_base = alignment.QueryBases[readpos + i];
assert(refpos + i < refseq.size());
char ref_base = refseq[refpos + i];
if (ref_base != query_base) {
// SNP
string ref(1, ref_base), alt(1, query_base);
char qual_base = alignment.Qualities[refpos + i]; // TODO check
VariantPtr snp(new Variant(VariantType::SNP, alignment.RefID,
alignment.Position+i, 1, 0, ref, alt));
block_variants.insert(snp);
read_variants.push_back(snp);
//cout << "mismatch at " << alignment.Position + i <<" refbase: " << ref_base << " querybase: " << query_base << endl;
}
}
}
示例4: processAlignment
pos_t VariantProcessor::processAlignment(const BamAlignment& alignment) {
/*
For each alignment, extract the MD and NM tags, validate against
CIGAR string, and create Variants and ReadHaplotypes. All reads
for a block are stored in a deque, and processed again to create
candidate haplotypes.
Returns the start position of this alignment (TODO correct?)
*/
if (!alignment.HasTag("NM") || !alignment.HasTag("MD")) {
std::cerr << "error: BamAlignment '" << alignment.Name <<
"' does not have either NM or MD tags" << std::endl;
}
int nm_tag;
string md_tag;
unsigned int aln_len = alignment.GetEndPosition() - alignment.Position;
alignment.GetTag("MD", md_tag);
alignment.GetTag("NM", nm_tag);
// Reconstruct reference sequence using MD tags
string refseq = createReferenceSequence(alignment);
// With reconstructed reference sequence and query sequence, look
// for variants. It's a bit roundabout to reconstruct reference from
// MD, then use it to find variants (already in MD) but keeping
// state between CIGAR and MD is tricky. This also is a good
// validation; variants found must much the number of variants in
// CIGAR/MD.
vector<VariantPtr> variants;
vector<VariantPtr> read_variants;
const vector<CigarOp>& cigar = alignment.CigarData;
int refpos = 0, readpos = 0;
for (vector<CigarOp>::const_iterator op = cigar.begin(); op != cigar.end(); ++op) {
if (op->Type == 'S') {
readpos += op->Length;
} else if (op->Type == 'M') {
// match or SNP
processMatchOrMismatch(alignment, read_variants, op->Length, refseq, refpos, readpos);
readpos += op->Length;
refpos += op->Length;
} else if (op->Type == 'I') {
processInsertion(alignment, read_variants, op->Length, refseq, refpos, readpos);
readpos += op->Length;
} else if (op->Type == 'D') {
processDeletion(alignment, read_variants, op->Length, refseq, refpos, readpos);
refpos += op->Length; // deletion w.r.t reference; skip ref length
} else {
cerr << "error: unidentified CIGAR type: " << op->Type << endl;
exit(1);
}
}
// Add to alignments list
block_alignments.push_back(alignment);
return 0; // TODO(vsbuffalo)
}
示例5: IsOverlap
// returns region state - whether alignment ends before, overlaps, or starts after currently specified region
// this *internal* method should ONLY called when (at least) IsLeftBoundSpecified == true
BamReaderPrivate::RegionState BamReaderPrivate::IsOverlap(BamAlignment& bAlignment) {
// if alignment is on any reference sequence before left bound
if ( bAlignment.RefID < Region.LeftRefID ) return BEFORE_REGION;
// if alignment starts on left bound reference
else if ( bAlignment.RefID == Region.LeftRefID ) {
// if alignment starts at or after left boundary
if ( bAlignment.Position >= Region.LeftPosition) {
// if right boundary is specified AND
// left/right boundaries are on same reference AND
// alignment starts past right boundary
if ( Region.isRightBoundSpecified() &&
Region.LeftRefID == Region.RightRefID &&
bAlignment.Position > Region.RightPosition )
return AFTER_REGION;
// otherwise, alignment is within region
return WITHIN_REGION;
}
// alignment starts before left boundary
else {
// check if alignment overlaps left boundary
if ( bAlignment.GetEndPosition() >= Region.LeftPosition ) return WITHIN_REGION;
else return BEFORE_REGION;
}
}
// alignment starts on a reference after the left bound
else {
// if region has a right boundary
if ( Region.isRightBoundSpecified() ) {
// alignment is on reference between boundaries
if ( bAlignment.RefID < Region.RightRefID ) return WITHIN_REGION;
// alignment is on reference after right boundary
else if ( bAlignment.RefID > Region.RightRefID ) return AFTER_REGION;
// alignment is on right bound reference
else {
// check if alignment starts before or at right boundary
if ( bAlignment.Position <= Region.RightPosition ) return WITHIN_REGION;
else return AFTER_REGION;
}
}
// otherwise, alignment is after left bound reference, but there is no right boundary
else return WITHIN_REGION;
}
}
示例6:
void ConvertTool::ConvertToolPrivate::PrintBed(const BamAlignment& a)
{
// tab-delimited, 0-based half-open
// (e.g. a 50-base read aligned to pos 10 could have BED coordinates (10, 60) instead of BAM coordinates (10, 59) )
// <chromName> <chromStart> <chromEnd> <readName> <score> <strand>
m_out << m_references.at(a.RefID).RefName << '\t' << a.Position << '\t' << a.GetEndPosition()
<< '\t' << a.Name << '\t' << a.MapQuality << '\t' << (a.IsReverseStrand() ? '-' : '+')
<< std::endl;
}
示例7: CollectCoverageBam
void BedCoverage::CollectCoverageBam(string bamFile) {
// load the "B" bed file into a map so
// that we can easily compare "A" to it for overlaps
_bedB->loadBedCovFileIntoMap();
// open the BAM file
BamReader reader;
reader.Open(bamFile);
// get header & reference information
string header = reader.GetHeaderText();
RefVector refs = reader.GetReferenceData();
// convert each aligned BAM entry to BED
// and compute coverage on B
BamAlignment bam;
while (reader.GetNextAlignment(bam)) {
if (bam.IsMapped()) {
// treat the BAM alignment as a single "block"
if (_obeySplits == false) {
// construct a new BED entry from the current BAM alignment.
BED a;
a.chrom = refs.at(bam.RefID).RefName;
a.start = bam.Position;
a.end = bam.GetEndPosition(false, false);
a.strand = "+";
if (bam.IsReverseStrand()) a.strand = "-";
_bedB->countHits(a, _sameStrand, _diffStrand, _countsOnly);
}
// split the BAM alignment into discrete blocks and
// look for overlaps only within each block.
else {
// vec to store the discrete BED "blocks" from a
bedVector bedBlocks;
// since we are counting coverage, we do want to split blocks when a
// deletion (D) CIGAR op is encountered (hence the true for the last parm)
GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, false, true);
// use countSplitHits to avoid over-counting each split chunk
// as distinct read coverage.
_bedB->countSplitHits(bedBlocks, _sameStrand, _diffStrand, _countsOnly);
}
}
}
// report the coverage (summary or histogram) for BED B.
if (_countsOnly == true)
ReportCounts();
else
ReportCoverage();
// close the BAM file
reader.Close();
}
示例8: delete
//{{{ void process_pair(const BamAlignment &curr,
void
SV_Pair::
process_pair(const BamAlignment &curr,
const RefVector refs,
map<string, BamAlignment> &mapped_pairs,
UCSCBins<SV_BreakPoint*> &r_bin,
int weight,
int ev_id,
SV_PairReader *reader)
{
if (mapped_pairs.find(curr.Name) == mapped_pairs.end())
mapped_pairs[curr.Name] = curr;
else {
SV_Pair *new_pair = new SV_Pair(mapped_pairs[curr.Name],
curr,
refs,
weight,
ev_id,
reader);
//cerr << count_clipped(curr.CigarData) << "\t" <<
//count_clipped(mapped_pairs[curr.Name].CigarData) << endl;
if ( new_pair->is_sane() &&
new_pair->is_aberrant() &&
(count_clipped(curr.CigarData) > 0) &&
(count_clipped(mapped_pairs[curr.Name].CigarData) > 0) ) {
SV_BreakPoint *new_bp = new_pair->get_bp();
#ifdef TRACE
cerr << "READ\t" <<
refs.at(mapped_pairs[curr.Name].RefID).RefName << "," <<
mapped_pairs[curr.Name].Position << "," <<
(mapped_pairs[curr.Name].GetEndPosition(false, false) - 1)
<< "\t" <<
refs.at(curr.RefID).RefName << "," <<
curr.Position << "," <<
(curr.GetEndPosition(false, false) - 1)
<<
endl;
cerr << "\tPE\t" << *new_bp << endl;
#endif
new_bp->cluster(r_bin);
} else {
delete(new_pair);
}
mapped_pairs.erase(curr.Name);
}
}
示例9: CountDepth
void CountDepth(Histogram& hist, BamMultiReader& reader, BamAlignment& al, int32_t refID, int64_t refLen)
{
bool moreReads = (al.RefID == refID);
int32_t maxReadLen = 1000;
vector<int64_t> readEnds(maxReadLen);
int64_t depth = 0;
for(int64_t pos=0; pos<refLen; ++pos){
while(moreReads and al.Position == pos){
++depth;
assert(al.GetEndPosition() - pos < maxReadLen);
++readEnds[al.GetEndPosition() % maxReadLen];
moreReads = GetNextAlignment(al, reader, refID);
}
depth -= readEnds[pos % maxReadLen];
assert(depth >= 0);
readEnds[pos % maxReadLen] = 0;
if(depth >= hist.size())
hist.resize(2 * depth);
++hist[depth];
}
}
示例10: if
//{{{ SV_SplitRead:: SV_SplitRead(vector< BamAlignment > &block,
SV_SplitRead::
SV_SplitRead(const BamAlignment &bam_a,
const BamAlignment &bam_b,
const RefVector &refs,
int _weight,
int _id,
int _sample_id,
SV_SplitReadReader *_reader)
{
reader = _reader;
sample_id = _sample_id;
if ( bam_a.MapQuality < bam_b.MapQuality )
min_mapping_quality = bam_a.MapQuality;
else
min_mapping_quality = bam_b.MapQuality;
struct cigar_query query_a =
calc_query_pos_from_cigar(bam_a.CigarData,
bam_a.IsReverseStrand() );
struct cigar_query query_b =
calc_query_pos_from_cigar(bam_b.CigarData,
bam_b.IsReverseStrand() );
struct interval tmp_a, tmp_b;
tmp_a.strand = '+';
if (bam_a.IsReverseStrand())
tmp_a.strand = '-';
tmp_a.chr = refs.at(bam_a.RefID).RefName;
tmp_a.start = bam_a.Position;
tmp_a.end = bam_a.GetEndPosition();
tmp_b.strand = '+';
if (bam_b.IsReverseStrand())
tmp_b.strand = '-';
tmp_b.chr = refs.at(bam_b.RefID).RefName;
tmp_b.start = bam_b.Position;
tmp_b.end = bam_b.GetEndPosition();
//if ( ( tmp_a.chr.compare(tmp_b.chr) > 0 ) ||
//( ( tmp_a.chr.compare(tmp_b.chr) == 0 ) &&
//( tmp_a.start > tmp_b.start ) ) ) {
if ( (bam_a.RefID > bam_b.RefID) ||
( (bam_a.RefID == bam_b.RefID) &&
(tmp_a.start > tmp_b.start ) ) ) {
side_r = tmp_a;
side_l = tmp_b;
query_r = query_a;
query_l = query_b;
} else {
side_l = tmp_a;
side_r = tmp_b;
query_l = query_a;
query_r = query_b;
}
if (side_l.strand != side_r.strand)
type = SV_BreakPoint::INVERSION;
else if ( ( ( side_l.strand == '+' ) &&
( side_r.strand == '+' ) &&
( query_l.qs_pos < query_r.qs_pos ) ) ||
( ( side_l.strand == '-' ) &&
( side_r.strand == '-' ) &&
( query_l.qs_pos > query_r.qs_pos) ) )
type = SV_BreakPoint::DELETION;
else if ( ( ( side_l.strand == '+' ) &&
( side_r.strand == '+' ) &&
( query_l.qs_pos > query_r.qs_pos ) ) ||
( ( side_l.strand == '-' ) &&
( side_r.strand == '-' ) &&
( query_l.qs_pos < query_r.qs_pos) ) )
type = SV_BreakPoint::DUPLICATION;
else {
cerr << "ERROR IN BAM FILE. " <<
"TYPE not detected (DELETION,DUPLICATION,INVERSION)" <<
endl;
cerr << "\t" << query_l.qs_pos << "," << side_l.strand << "\t" <<
query_r.qs_pos << "," << side_r.strand << "\t" <<
tmp_a.chr << "," << tmp_a.start << "," << tmp_a.end << "\t" <<
tmp_b.chr << "," << tmp_b.start << "," << tmp_b.end << "\t" <<
endl;
throw(1);
}
weight = _weight;
id = _id;
}
示例11: realign_bam
//.........这里部分代码省略.........
if (params.debug) {
cerr << "unable to build DAG using unmapped read "
<< alignment.Name << " @ "
<< seqname << ":" << alignment.Position
<< " no previous mapped read found and DAG currently empty" << endl;
}
alignmentSortQueue[dag_start_position+dag_window_size].push_back(alignment);
continue;
}
*/
++total_reads;
BamAlignment originalAlignment = alignment;
long unsigned int initialAlignmentPosition = alignment.Position;
//if (dag_start_position == 1) {
// dag_start_position = max(1, (int)initialAlignmentPosition - dag_window_size/2);
//}
// should we construct a new DAG? do so when 3/4 of the way through the current one
// center on current position + 1/2 dag window
// TODO check this scheme using some scribbles on paper
// alignment.IsMapped()
if ((seqname != currentSeqname
|| ((alignment.Position + (alignment.QueryBases.size()/2)
> (3*dag_window_size/4) + dag_start_position)))
&& alignment.Position < reference.sequenceLength(seqname)) {
if (seqname != currentSeqname) {
if (params.debug) {
cerr << "switched ref seqs" << endl;
}
dag_start_position = max((long int) 0,
(long int) (alignment.GetEndPosition() - dag_window_size/2));
// recenter DAG
} else if (!ref_map.empty()) {
dag_start_position = dag_start_position + dag_window_size/2;
dag_start_position = max(dag_start_position,
(long int) (alignment.GetEndPosition() - dag_window_size/2));
} else {
dag_start_position = alignment.Position - dag_window_size/2;
}
dag_start_position = max((long int)0, dag_start_position);
// TODO get sequence length and use to bound noted window size (edge case)
//cerr << "getting ref " << seqname << " " << max((long int) 0, dag_start_position) << " " << dag_window_size << endl;
// get variants for new DAG
vector<vcf::Variant> variants;
if (!vcffile.setRegion(seqname,
dag_start_position + 1,
dag_start_position + dag_window_size)) {
// this is not necessarily an error; there should be a better way to check for VCF file validity
/*
cerr << "could not set region on VCF file to " << currentSeqname << ":"
<< dag_start_position << "-" << dag_start_position + ref.size()
<< endl;
*/
//exit(1);
} else {
// check first variant
if (vcffile.getNextVariant(var)) {
while (var.position <= dag_start_position + 1) {
//cerr << "var position == dag_start_position " << endl;
dag_start_position -= 1;
示例12: Tag
void TagBam::Tag() {
// open the annotations files for processing;
OpenAnnoFiles();
// open the BAM file
BamReader reader;
BamWriter writer;
if (!reader.Open(_bamFile)) {
cerr << "Failed to open BAM file " << _bamFile << endl;
exit(1);
}
// get header & reference information
string bamHeader = reader.GetHeaderText();
RefVector refs = reader.GetReferenceData();
// set compression mode
BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
// if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed;
writer.SetCompressionMode(compressionMode);
// open our BAM writer
writer.Open("stdout", bamHeader, refs);
// rip through the BAM file and test for overlaps with each annotation file.
BamAlignment al;
vector<BED> hits;
while (reader.GetNextAlignment(al)) {
if (al.IsMapped() == true) {
BED a;
a.chrom = refs.at(al.RefID).RefName;
a.start = al.Position;
a.end = al.GetEndPosition(false, false);
a.strand = "+";
if (al.IsReverseStrand()) a.strand = "-";
ostringstream annotations;
// annotate the BAM file based on overlaps with the annotation files.
for (size_t i = 0; i < _annoFiles.size(); ++i)
{
// grab the current annotation file.
BedFile *anno = _annoFiles[i];
if (!_useNames && !_useScores && !_useIntervals) {
// add the label for this annotation file to tag if there is overlap
if (anno->anyHits(a.chrom, a.start, a.end, a.strand,
_sameStrand, _diffStrand, _overlapFraction, false))
{
annotations << _annoLabels[i] << ";";
}
}
// use the score field
else if (!_useNames && _useScores && !_useIntervals) {
anno->allHits(a.chrom, a.start, a.end, a.strand,
hits, _sameStrand, _diffStrand, 0.0, false);
for (size_t i = 0; i < hits.size(); ++i) {
annotations << hits[i].score;
if (i < hits.size() - 1) annotations << ",";
}
if (hits.size() > 0) annotations << ";";
hits.clear();
}
// use the name field from the annotation files to populate tag
else if (_useNames && !_useScores && !_useIntervals) {
anno->allHits(a.chrom, a.start, a.end, a.strand,
hits, _sameStrand, _diffStrand, 0.0, false);
for (size_t j = 0; j < hits.size(); ++j) {
annotations << hits[j].name;
if (j < hits.size() - 1) annotations << ",";
}
if (hits.size() > 0) annotations << ";";
hits.clear();
}
// use the full interval information annotation files to populate tag
else if (!_useNames && !_useScores && _useIntervals) {
anno->allHits(a.chrom, a.start, a.end, a.strand,
hits, _sameStrand, _diffStrand, 0.0, false);
for (size_t j = 0; j < hits.size(); ++j) {
annotations << _annoLabels[i] << ":" <<
hits[j].chrom << ":" <<
hits[j].start << "-" <<
hits[j].end << "," <<
hits[j].name << "," <<
hits[j].score << "," <<
hits[j].strand;
if (j < hits.size() - 1) annotations << ",";
}
if (hits.size() > 0) annotations << ";";
hits.clear();
}
}
// were there any overlaps with which to make a tag?
if (annotations.str().size() > 0) {
al.AddTag(_tag, "Z", annotations.str().substr(0, annotations.str().size() - 1)); // get rid of the last ";"
}
}
writer.SaveAlignment(al);
}
reader.Close();
//.........这里部分代码省略.........
示例13: main
//.........这里部分代码省略.........
map <string, unsigned int> pileup;
bool isposPileup = false;
unsigned int old_start = 0;
unsigned int total_tags = 0;
unsigned int total_pos = 0;
unsigned int pileup_pos = 0;
BamAlignment bam;
while (reader.GetNextAlignment(bam)) {
if ( bam.IsMapped() == false ) continue; // skip unaligned reads
unsigned int unique;
bam.GetTag("NH", unique);
if (param->unique == 1) {
if (unique != 1) { // skipe uniquelly mapped reads
continue;
}
}
if (read_length == 0){
read_length = bam.Length;
}
//cout << bam.Name << endl;
string chrom = refs.at(bam.RefID).RefName;
string strand = "+";
if (bam.IsReverseStrand()) strand = "-";
unsigned int alignmentStart = bam.Position+1;
unsigned int mateStart;
if (type == "p") mateStart = bam.MatePosition+1;
unsigned int alignmentEnd = bam.GetEndPosition();
unsigned int cigarEnd;
vector <int> blockLengths;
vector <int> blockStarts;
blockStarts.push_back(0);
ParseCigar(bam.CigarData, blockStarts, blockLengths, cigarEnd);
// position check for unique mapped reads (because is paired-end reads, shoule base on fragment level for paired end reads)
if (posc == true && unique == 1) {
if (type == "p" && fragment.count(bam.Name) > 0)
fragment.erase(bam.Name);
else {
total_tags++;
if (type == "p"){
fragment.insert(bam.Name);
}
string alignSum;
if (type == "p") {
alignSum = int2str(alignmentStart) + "\t" + int2str(mateStart) + "\t.\t" + strand;
} else {
alignSum = int2str(alignmentStart) + "\t" + int2str(alignmentEnd) + "\t.\t" + strand;
}
if ( alignmentStart != old_start ) {
isposPileup = false;
map <string, unsigned int>::iterator pit = pileup.begin();
for (; pit != pileup.end(); pit++) {
posc_f << chrom << "\truping\tpileup\t" << pit->first << "\t.\t" << "Pileup=" << pit->second << endl; //print pileup
}
示例14: main_aseregion
int main_aseregion(const vector<string> &all_args)
{
Init(all_args);
cerr << "* Reading bam file " << endl;
OpenBam(bam_reader, bam_file);
bam_reader.OpenIndex(bam_file + ".bai");
vector<string> readGroupVector; //Obtain all the readgroups.
SamHeader header = bam_reader.GetHeader();
SamReadGroupDictionary headerRG = header.ReadGroups;
for (SamReadGroupIterator it = headerRG.Begin(); it != headerRG.End(); it ++)
{
readGroupVector.push_back(it -> ID);
}
cout << "#CHROM" << "\t" << "StartPos" << "\t" << "EndPos";
for (vector<string>::iterator it = readGroupVector.begin(); it != readGroupVector.end(); it ++)
{
cout << "\t" << *it;
}
cout << endl;
vector<RefData> chroms = bam_reader.GetReferenceData();
StlFor(chrom_idx, chroms)
{
string &chrom = chroms[chrom_idx].RefName;
cerr << "* On chrom " << chrom << endl;
map<string, vector<GenomicRegion> >::iterator searchIt = chrom_genomicRegions.find(chrom);
BamAlignment startPointer; // This pointer will point to the region immediately before the start of current regions under inspection.
bam_reader.Jump(chrom_idx);
if (!bam_reader.GetNextAlignment(startPointer))
break;
int count = 0;
// For each region, walk through all the reads correspoinding to this region and count the reads.
for (vector<GenomicRegion>::iterator it = searchIt -> second.begin(); it != searchIt -> second.end(); ++it)
{
bam_reader.Jump(chrom_idx, startPointer.Position); // Fix the reading pointer.
if (!bam_reader.GetNextAlignment(startPointer))
break;
int flag = 0;
while (true)
{
int startEnd = startPointer.GetEndPosition();
if (startEnd < it -> start)
{
if (!bam_reader.GetNextAlignment(startPointer))
{
flag = 1;
break;
}
}
else
{
break;
}
}
if (flag == 1)
{
break;
}
// Now startPointer assumes its rightful position.
BamAlignment nextPointer = startPointer; //This pointer traverse through all reads that align to the current genomic region in bed file and the iteration ends when this pointer pass through the end of the region.
while (true)
{
int nextStart = nextPointer.Position;
if (nextStart > it -> end)
{
break; // This iteration is done.
}
if (nextPointer.MapQuality < min_map_qual)
{
if (!bam_reader.GetNextAlignment(nextPointer))
{
break;
}
continue;
}
string currentRG;
Assert(nextPointer.GetReadGroup(currentRG));
map<string, int> &RG_counts = nextPointer.IsReverseStrand() ? it -> revCounts : it -> fwdCounts;
map<string, int>::iterator searchItForRG = RG_counts.find(currentRG);
if (searchItForRG == RG_counts.end())
{
RG_counts[currentRG] = 1;
}
else
{
++ RG_counts[currentRG];
}
if (!bam_reader.GetNextAlignment(nextPointer))
//.........这里部分代码省略.........
示例15: CropBam
int CropBamTool::CropBam()
{
// open bam files
BamMultiReader bamReader;
bamReader.Open(bamFiles);
// the dictionary of chromosomes
RefVector genome = bamReader.GetReferenceData();
// get the scanning window
vector<tuple<int,int,int>> windows;
int numWindows = GenericRegionTools::toScanWindow(genome, regionStrings, windows);
unordered_set<string> readpool;
// temporary struct for sequence object
typedef struct {
string name;
int head_soft_clip;
int tail_soft_clip;
string seq;
string qual;
}cropbam_seq_t;
// temporary struct for unique seqs
map<string,list<cropbam_seq_t>> uniqueSeqPool;
// lambda expression for output
auto Output = [this](cropbam_seq_t &a){
if (this->outFormat=="fasta"){
cout << ">" << a.name << "\t"
<< "head_soft_clip=" << a.head_soft_clip << "\t"
<< "tail_soft_clip=" << a.tail_soft_clip << "\t"
<< endl
<< a.seq << endl;
}
if (this->outFormat=="fastq"){
cout << "@" << a.name << "\t"
<< "head_soft_clip=" << a.head_soft_clip << "\t"
<< "tail_soft_clip=" << a.tail_soft_clip << "\t"
<< endl
<< a.seq << endl;
cout << "+" << endl
<< a.qual << endl;
}
};
// loop over windows
omp_set_dynamic(0);
omp_set_num_threads(numThreads);
#pragma omp parallel for shared(genome)
for (int i=0; i<numWindows; i++)
{
clock_t tStart = clock();
bamReader.Open(bamFiles);
int wId = get<0>(windows[i]);
int wLp = get<1>(windows[i]);
int wRp = get<2>(windows[i]);
if (verbose>=1) Verbose("process the window " + genome[wId].RefName + ":" + to_string(wLp+1) + "-" + to_string(wRp));
// rewind the bam reader
bamReader.Rewind();
// set the region
bamReader.SetRegion(wId, wLp, wId, wRp);
int numReads = 0;
// retrieve the alignment
BamAlignment aln;
while (bamReader.GetNextAlignment(aln))
{
// skip the alignment if it doesn't overlap the window
if (aln.Position>=wRp || aln.GetEndPosition()<=wLp)
continue;
// skip the invalid alignment
if (!isValidAlignment(aln, readLenThres, mapQualThres, alnFlagMarker))
continue;
// skip the alignment harboring too many mismatches
if (!GenericBamAlignmentTools::validReadIdentity(aln, 1-alnIdenThres))
continue;
stringstream keyss;
keyss << GenericBamAlignmentTools::getBamAlignmentName(aln) << "-"
<< wId << "-" << wLp << "-" << wRp;
string key = keyss.str();
auto ptr = readpool.find(key);
if (ptr!=readpool.end())
continue;
readpool.emplace(key);
// get the partial read
string readSegment, readQualSegment, genomeSegment;
GenericBamAlignmentTools::getLocalAlignment(aln, wLp, wRp-wLp, readSegment, readQualSegment, genomeSegment);
// add soft clip
int hsc=0;
//.........这里部分代码省略.........