本文整理汇总了C++中BamAlignment::GetTag方法的典型用法代码示例。如果您正苦于以下问题:C++ BamAlignment::GetTag方法的具体用法?C++ BamAlignment::GetTag怎么用?C++ BamAlignment::GetTag使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类BamAlignment
的用法示例。
在下文中一共展示了BamAlignment::GetTag方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: processAlignment
pos_t VariantProcessor::processAlignment(const BamAlignment& alignment) {
/*
For each alignment, extract the MD and NM tags, validate against
CIGAR string, and create Variants and ReadHaplotypes. All reads
for a block are stored in a deque, and processed again to create
candidate haplotypes.
Returns the start position of this alignment (TODO correct?)
*/
if (!alignment.HasTag("NM") || !alignment.HasTag("MD")) {
std::cerr << "error: BamAlignment '" << alignment.Name <<
"' does not have either NM or MD tags" << std::endl;
}
int nm_tag;
string md_tag;
unsigned int aln_len = alignment.GetEndPosition() - alignment.Position;
alignment.GetTag("MD", md_tag);
alignment.GetTag("NM", nm_tag);
// Reconstruct reference sequence using MD tags
string refseq = createReferenceSequence(alignment);
// With reconstructed reference sequence and query sequence, look
// for variants. It's a bit roundabout to reconstruct reference from
// MD, then use it to find variants (already in MD) but keeping
// state between CIGAR and MD is tricky. This also is a good
// validation; variants found must much the number of variants in
// CIGAR/MD.
vector<VariantPtr> variants;
vector<VariantPtr> read_variants;
const vector<CigarOp>& cigar = alignment.CigarData;
int refpos = 0, readpos = 0;
for (vector<CigarOp>::const_iterator op = cigar.begin(); op != cigar.end(); ++op) {
if (op->Type == 'S') {
readpos += op->Length;
} else if (op->Type == 'M') {
// match or SNP
processMatchOrMismatch(alignment, read_variants, op->Length, refseq, refpos, readpos);
readpos += op->Length;
refpos += op->Length;
} else if (op->Type == 'I') {
processInsertion(alignment, read_variants, op->Length, refseq, refpos, readpos);
readpos += op->Length;
} else if (op->Type == 'D') {
processDeletion(alignment, read_variants, op->Length, refseq, refpos, readpos);
refpos += op->Length; // deletion w.r.t reference; skip ref length
} else {
cerr << "error: unidentified CIGAR type: " << op->Type << endl;
exit(1);
}
}
// Add to alignments list
block_alignments.push_back(alignment);
return 0; // TODO(vsbuffalo)
}
示例2: createReferenceSequence
string createReferenceSequence(const BamAlignment& alignment) {
// Recreate a reference sequence for a particular alignment. This is
// the reference sequence that is identical to the reference at this
// spot. This means skipping insertions or soft clipped regions in
// reads, adding deletions back in, and keeping read matches.
const vector<CigarOp> cigar = alignment.CigarData;
const string querybases = alignment.QueryBases;
string md_tag;
alignment.GetTag("MD", md_tag);
vector<MDToken> tokens;
string refseq, alignedseq; // final ref bases; aligned portion of ref bases
int md_len = TokenizeMD(md_tag, tokens);
// Create reference-aligned sequence of read; doesn't contain soft
// clips or insertions. Then, deletions and reference alleles are
// added onto this.
int pos=0;
for (vector<CigarOp>::const_iterator op = cigar.begin(); op != cigar.end(); ++op) {
if (!(op->Type == 'S' || op->Type == 'I')) {
alignedseq.append(querybases.substr(pos, op->Length));
pos += op->Length;
} else {
pos += op->Length; // increment read position past skipped bases
}
}
// the size of the aligned sequence MUST equal what is returned from
// TokenizeMD: the number of aligned bases. Not the real reference
// sequence is this length + deletions, which we add in below.
assert(alignedseq.size() == md_len);
pos = 0;
for (vector<MDToken>::const_iterator it = tokens.begin(); it != tokens.end(); ++it) {
if (it->type == MDType::isMatch) {
refseq.append(alignedseq.substr(pos, it->length));
pos += it->length;
} else if (it->type == MDType::isSNP) {
assert(it->length == it->seq.size());
refseq.append(it->seq);
pos += it->length;
} else if (it->type == MDType::isDel) {
// does not increment position in alignedseq
assert(it->length == it->seq.size());
refseq.append(it->seq);
} else {
assert(false);
}
}
return refseq;
}
示例3:
// reverts a BAM alignment
// default behavior (for now) is : replace Qualities with OQ, clear IsDuplicate flag
// can override default behavior using command line options
void RevertTool::RevertToolPrivate::RevertAlignment(BamAlignment& al) {
// replace Qualities with OQ, if requested
if ( !m_settings->IsKeepQualities ) {
string originalQualities;
if ( al.GetTag(m_OQ, originalQualities) ) {
al.Qualities = originalQualities;
al.RemoveTag(m_OQ);
}
}
// clear duplicate flag, if requested
if ( !m_settings->IsKeepDuplicateFlag )
al.SetIsDuplicate(false);
}
示例4: main
int main (int argc, char *argv[]) {
if( (argc== 1) ||
(argc== 2 && string(argv[1]) == "-h") ||
(argc== 2 && string(argv[1]) == "-help") ||
(argc== 2 && string(argv[1]) == "--help") ){
cout<<"Usage:editDist [in bam]"<<endl<<"this program returns the NM field of all aligned reads"<<endl;
return 1;
}
string bamfiletopen = string(argv[1]);
// cout<<bamfiletopen<<endl;
BamReader reader;
// cout<<"ok"<<endl;
if ( !reader.Open(bamfiletopen) ) {
cerr << "Could not open input BAM files." << endl;
return 1;
}
BamAlignment al;
// cout<<"ok"<<endl;
while ( reader.GetNextAlignment(al) ) {
// cout<<al.Name<<endl;
if(!al.IsMapped())
continue;
if(al.HasTag("NM") ){
int editDist;
if(al.GetTag("NM",editDist) ){
cout<<editDist<<endl;
}else{
cerr<<"Cannot retrieve NM field for "<<al.Name<<endl;
return 1;
}
}else{
cerr<<"Warning: read "<<al.Name<<" is aligned but has no NM field"<<endl;
}
} //while al
reader.Close();
return 0;
}
示例5: getLibraryName
/**
* Gets the library name from the header for the record. If the RG tag is not present on
* the record, or the library isn't denoted on the read group, a constant string is
* returned.
*/
string MarkDuplicates::getLibraryName(SamHeader & header, const BamAlignment & rec) {
string read_group;
static const string RG("RG");
static const string unknown_library("Unknown Library");
rec.GetTag(RG, read_group);
if (read_group.size() > 0 && header.ReadGroups.Contains(read_group)) {
SamReadGroupDictionary & d = header.ReadGroups;
const SamReadGroup & rg = d[read_group];
if(rg.HasLibrary()) {
return rg.Library;
}
}
return unknown_library;
}
示例6: IdentifySample
//bool SampleManager::IdentifySample(Alignment& ra) const
bool SampleManager::IdentifySample(const BamAlignment& alignment, int& sample_index, bool& primary_sample) const
{
string read_group;
if (!alignment.GetTag("RG", read_group)) {
cerr << "ERROR: Couldn't find read group id (@RG tag) for BAM Alignment " << alignment.Name << endl;
exit(1);
}
map<string,int>::const_iterator I = read_group_to_sample_idx_.find(read_group);
if (I == read_group_to_sample_idx_.end())
return false;
sample_index =I->second;
primary_sample = (sample_index == primary_sample_);
return true;
}
示例7: IonstatsTestFragments
int IonstatsTestFragments(int argc, const char *argv[])
{
OptArgs opts;
opts.ParseCmdLine(argc, argv);
string input_bam_filename = opts.GetFirstString('i', "input", "");
string fasta_filename = opts.GetFirstString('r', "ref", "");
string output_json_filename = opts.GetFirstString('o', "output", "ionstats_tf.json");
int histogram_length = opts.GetFirstInt ('h', "histogram-length", 400);
if(argc < 2 or input_bam_filename.empty() or fasta_filename.empty()) {
IonstatsTestFragmentsHelp();
return 1;
}
//
// Prepare for metric calculation
//
map<string,string> tf_sequences;
PopulateReferenceSequences(tf_sequences, fasta_filename);
BamReader input_bam;
if (!input_bam.Open(input_bam_filename)) {
fprintf(stderr, "[ionstats] ERROR: cannot open %s\n", input_bam_filename.c_str());
return 1;
}
int num_tfs = input_bam.GetReferenceCount();
SamHeader sam_header = input_bam.GetHeader();
if(!sam_header.HasReadGroups()) {
fprintf(stderr, "[ionstats] ERROR: no read groups in %s\n", input_bam_filename.c_str());
return 1;
}
string flow_order;
string key;
for (SamReadGroupIterator rg = sam_header.ReadGroups.Begin(); rg != sam_header.ReadGroups.End(); ++rg) {
if(rg->HasFlowOrder())
flow_order = rg->FlowOrder;
if(rg->HasKeySequence())
key = rg->KeySequence;
}
// Need these metrics stratified by TF.
vector<ReadLengthHistogram> called_histogram(num_tfs);
vector<ReadLengthHistogram> aligned_histogram(num_tfs);
vector<ReadLengthHistogram> AQ10_histogram(num_tfs);
vector<ReadLengthHistogram> AQ17_histogram(num_tfs);
vector<SimpleHistogram> error_by_position(num_tfs);
vector<MetricGeneratorSNR> system_snr(num_tfs);
vector<MetricGeneratorHPAccuracy> hp_accuracy(num_tfs);
for (int tf = 0; tf < num_tfs; ++tf) {
called_histogram[tf].Initialize(histogram_length);
aligned_histogram[tf].Initialize(histogram_length);
AQ10_histogram[tf].Initialize(histogram_length);
AQ17_histogram[tf].Initialize(histogram_length);
error_by_position[tf].Initialize(histogram_length);
}
vector<uint16_t> flow_signal_fz(flow_order.length());
vector<int16_t> flow_signal_zm(flow_order.length());
const RefVector& refs = input_bam.GetReferenceData();
// Missing:
// - hp accuracy - tough, copy verbatim from TFMapper?
BamAlignment alignment;
vector<char> MD_op;
vector<int> MD_len;
MD_op.reserve(1024);
MD_len.reserve(1024);
string MD_tag;
//
// Main loop over mapped reads in the input BAM
//
while(input_bam.GetNextAlignment(alignment)) {
if (!alignment.IsMapped() or !alignment.GetTag("MD",MD_tag))
continue;
// The check below eliminates unexpected alignments
if (alignment.IsReverseStrand() or alignment.Position > 5)
continue;
int current_tf = alignment.RefID;
//
// Step 1. Parse MD tag
//
//.........这里部分代码省略.........
示例8: if
//{{{ void process_intra_chrom_split(const BamAlignment &curr,
void
SV_SplitRead::
process_intra_chrom_split(const BamAlignment &curr,
const RefVector refs,
BamWriter &inter_chrom_reads,
map<string, BamAlignment> &mapped_splits,
UCSCBins<SV_BreakPoint*> &r_bin,
int weight,
int id,
int sample_id,
SV_SplitReadReader *_reader)
{
if (mapped_splits.find(curr.Name) == mapped_splits.end()) {
uint32_t clipped = count_clipped(curr.CigarData);
if ( curr.HasTag("YP") == true) {
uint32_t t;
curr.GetTag("YP", t);
if (t == 2)
mapped_splits[curr.Name] = curr;
}
else if (clipped >= _reader->min_clip)
mapped_splits[curr.Name] = curr;
} else {
if ( mapped_splits[curr.Name].RefID == curr.RefID ) {
try {
SV_SplitRead *new_split_read =
new SV_SplitRead(mapped_splits[curr.Name],
curr,
refs,
weight,
id,
sample_id,
_reader);
SV_BreakPoint *new_bp = NULL;
if (new_split_read->is_sane()) {
new_bp = new_split_read->get_bp();
if (new_bp != NULL) {
new_bp->cluster(r_bin);
} else {
cerr << "Alignment name:" << curr.Name << endl;
free(new_split_read);
}
} else
free(new_split_read);
} catch (int) {
cerr << "Error creating split read: " << endl;
}
} else {
BamAlignment al1 = curr;
BamAlignment al2 = mapped_splits[curr.Name];
al1.MateRefID = al2.RefID;
al2.MateRefID = al1.RefID;
al1.MatePosition = al2.Position;
al2.MatePosition = al1.Position;
string x = _reader->get_source_file_name();
al1.AddTag("LS","Z",x);
al2.AddTag("LS","Z",x);
inter_chrom_reads.SaveAlignment(al1);
inter_chrom_reads.SaveAlignment(al2);
}
mapped_splits.erase(curr.Name);
}
}
示例9: main
int main (int argc, char *argv[]) {
if( (argc!= 3) ||
(argc== 2 && string(argv[1]) == "-h") ||
(argc== 2 && string(argv[1]) == "-help") ||
(argc== 2 && string(argv[1]) == "--help") ){
cerr<<"Usage:splitByRG [in bam] [out prefix]"<<endl<<"this program creates one bam file per RG in the with the outprefix\nFor example splitByRG in.bam out will create\nout.rg1.bam\nout.rg2.bam\n"<<endl;
return 1;
}
string bamfiletopen = string(argv[1]);
// if(!strEndsWith(bamfiletopen,".bam")){
// }
string bamDirOutPrefix = string(argv[2]);
map<string,BamWriter *> rg2BamWriter;
// if(!isDirectory(bamDirOut)){
// cerr<<"ERROR: the out directory does not exist"<<endl;
// return 1;
// }
BamReader reader;
if ( !reader.Open(bamfiletopen) ) {
cerr << "Could not open input BAM files." << endl;
return 1;
}
SamHeader header = reader.GetHeader();
const RefVector references = reader.GetReferenceData();
vector<RefData> refData=reader.GetReferenceData();
string pID = "splitByRG";
string pName = "splitByRG";
string pCommandLine = "";
for(int i=0;i<(argc);i++){
pCommandLine += (string(argv[i])+" ");
}
putProgramInHeader(&header,pID,pName,pCommandLine,returnGitHubVersion(string(argv[0]),".."));
SamReadGroupDictionary srgd=header.ReadGroups;
for(SamReadGroupConstIterator srgci=srgd.ConstBegin();
srgci<srgd.ConstEnd();
srgci++){
//cout<<*srgci<<endl;
const SamReadGroup rg = (*srgci);
//cout<<rg.ID<<endl;
rg2BamWriter[rg.ID] = new BamWriter();
rg2BamWriter[rg.ID]->Open(bamDirOutPrefix+"."+rg.ID+".bam",header,references);
}
BamAlignment al;
unsigned int total=0;
while ( reader.GetNextAlignment(al) ) {
// al.SetIsFailedQC(false);
// writer.SaveAlignment(al);
// if(al.IsMapped () ){
// if(rg2BamWriter.find(refData[al.RefID].RefName) == rg2BamWriter.end()){ //new
// rg2BamWriter[refData[al.RefID].RefName] = new BamWriter();
// if ( !rg2BamWriter[refData[al.RefID].RefName]->Open(bamDirOutPrefix+"."+refData[al.RefID].RefName+".bam",header,references) ) {
// cerr << "Could not open output BAM file "<< bamDirOutPrefix<<"."<<refData[al.RefID].RefName<<".bam" << endl;
// return 1;
// }
// }else{
// rg2BamWriter[refData[al.RefID].RefName]->SaveAlignment(al);
// }
// }else{
// unmapped.SaveAlignment(al);
// }
if(al.HasTag("RG")){
string rgTag;
al.GetTag("RG",rgTag);
//cout<<rgTag<<endl;
if(rg2BamWriter.find(rgTag) == rg2BamWriter.end()){ //new
cerr<<"Found new RG "<<rgTag<<endl;
rg2BamWriter[rgTag] = new BamWriter();
if ( !rg2BamWriter[rgTag]->Open(bamDirOutPrefix+"."+rgTag+".bam",header,references) ) {
cerr << "Could not open output BAM file "<< bamDirOutPrefix<<"."<<rgTag<<".bam" << endl;
return 1;
}
rg2BamWriter[rgTag]->SaveAlignment(al);
}else{
rg2BamWriter[rgTag]->SaveAlignment(al);
}
}else{
string rgTag="unknown";
//cout<<rgTag<<endl;
if(rg2BamWriter.find(rgTag) == rg2BamWriter.end()){ //new
cerr<<"Found new RG "<<rgTag<<endl;
rg2BamWriter[rgTag] = new BamWriter();
if ( !rg2BamWriter[rgTag]->Open(bamDirOutPrefix+"."+rgTag+".bam",header,references) ) {
cerr << "Could not open output BAM file "<< bamDirOutPrefix<<"."<<rgTag<<".bam" << endl;
return 1;
}
//.........这里部分代码省略.........
示例10: main
//.........这里部分代码省略.........
time_t start_time = time(NULL);
Realigner aligner;
aligner.verbose_ = verbose;
aligner.debug_ = debug;
if (!aligner.SetScores(score_vals))
cout << "bamrealignment: Four scores need to be provided: match, mismatch, gap open, gap extend score!" << endl;
aligner.SetAlignmentBandwidth(bandwidth);
BamAlignment alignment;
while(reader.GetNextAlignment(alignment)){
readcounter ++;
position_shift = false;
if ( (readcounter % 100000) == 0 )
cout << "Processed " << readcounter << " reads. Elapsed time: " << (time(NULL) - start_time) << endl;
if (alignment.IsMapped()) {
orig_position = alignment.Position;
mapped_readcounter++;
aligner.SetClipping(clipping, !alignment.IsReverseStrand());
if (aligner.verbose_) {
cout << endl;
if (alignment.IsReverseStrand())
cout << "The read is from the reverse strand." << endl;
else
cout << "The read is from the forward strand." << endl;
}
if (!alignment.GetTag("MD", md_tag)) {
if (aligner.verbose_)
cout << "Warning: Skipping read " << alignment.Name << ". It is mapped but missing MD tag." << endl;
if (logf.is_open ())
logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "MISSMD" << '\n';
bad_md_tag_readcount++;
} else if (aligner.CreateRefFromQueryBases(alignment.QueryBases, alignment.CigarData, md_tag, anchors)) {
bool clipfail = false;
if (Realigner::CR_ERR_CLIP_ANCHOR == aligner.GetCreateRefError ())
{
clipfail = true;
failed_clip_realigned_readcount ++;
}
if (!aligner.computeSWalignment(new_cigar_data, new_md_data, start_position_shift)) {
if (aligner.verbose_)
cout << "Error in the alignment! Not updating read information." << endl;
if (logf.is_open ())
logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "SWERR" << '\n';
error_sw_readcount++;
writer.SaveAlignment(alignment); // Write alignment unchanged
continue;
}
if (!aligner.addClippedBasesToTags(new_cigar_data, new_md_data, alignment.QueryBases.size())) {
if (aligner.verbose_)
cout << "Error when adding clipped anchors back to tags! Not updating read information." << endl;
if (logf.is_open ())
logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "UNCLIPERR" << '\n';
writer.SaveAlignment(alignment); // Write alignment unchanged
error_unclip_readcount ++;
continue;
}
示例11: checkAlignmentTag
bool checkAlignmentTag(const PropertyFilterValue& valueFilter, const BamAlignment& al) {
// ensure filter contains string data
Variant entireTagFilter = valueFilter.Value;
if ( !entireTagFilter.is_type<string>() ) return false;
// localize string from variant
const string& entireTagFilterString = entireTagFilter.get<string>();
// ensure we have at least "XX:x"
if ( entireTagFilterString.length() < 4 ) return false;
// get tagName & lookup in alignment
// if found, set tagType to tag type character
// if not found, return false
const string& tagName = entireTagFilterString.substr(0,2);
char tagType = '\0';
if ( !al.GetTagType(tagName, tagType) ) return false;
// remove tagName & ":" from beginning tagFilter
string tagFilterString = entireTagFilterString.substr(3);
// switch on tag type to set tag query value & parse filter token
int32_t intFilterValue, intQueryValue;
uint32_t uintFilterValue, uintQueryValue;
float realFilterValue, realQueryValue;
string stringFilterValue, stringQueryValue;
PropertyFilterValue tagFilter;
PropertyFilterValue::ValueCompareType compareType;
bool keepAlignment = false;
switch (tagType) {
// signed int tag type
case 'c' :
case 's' :
case 'i' :
if ( al.GetTag(tagName, intQueryValue) ) {
if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, intFilterValue, compareType) ) {
tagFilter.Value = intFilterValue;
tagFilter.Type = compareType;
keepAlignment = tagFilter.check(intQueryValue);
}
}
break;
// unsigned int tag type
case 'C' :
case 'S' :
case 'I' :
if ( al.GetTag(tagName, uintQueryValue) ) {
if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, uintFilterValue, compareType) ) {
tagFilter.Value = uintFilterValue;
tagFilter.Type = compareType;
keepAlignment = tagFilter.check(uintQueryValue);
}
}
break;
// 'real' tag type
case 'f' :
if ( al.GetTag(tagName, realQueryValue) ) {
if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, realFilterValue, compareType) ) {
tagFilter.Value = realFilterValue;
tagFilter.Type = compareType;
keepAlignment = tagFilter.check(realQueryValue);
}
}
break;
// string tag type
case 'A':
case 'Z':
case 'H':
if ( al.GetTag(tagName, stringQueryValue) ) {
if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, stringFilterValue, compareType) ) {
tagFilter.Value = stringFilterValue;
tagFilter.Type = compareType;
keepAlignment = tagFilter.check(stringQueryValue);
}
}
break;
// unknown tag type
default :
keepAlignment = false;
}
return keepAlignment;
}
示例12: main
int main (int argc, char *argv[]) {
if( (argc== 1) ||
(argc== 2 && string(argv[1]) == "-h") ||
(argc== 2 && string(argv[1]) == "-help") ||
(argc== 2 && string(argv[1]) == "--help") ){
cout<<"Usage:"<<endl;
cout<<""<<endl;
cout<<"plotQualScore input.bam"<<endl;
return 1;
}
string bamfiletopen = string(argv[1]);
BamReader reader;
if ( !reader.Open(bamfiletopen) ) {
cerr << "Could not open input BAM files." << endl;
return 1;
}
// if ( !reader.LocateIndex() ){
// cerr << "warning: cannot locate index for file " << bamfiletopen<<endl;
// //return 1;
// }
BamAlignment al;
BamAlignment al2;
bool unsurePEorSE=true;
bool pe=true;
int strLength=-1;
int vecLengthToUse=-1;
map<short,unsigned long> ** counterA = 0;
map<short,unsigned long> ** counterC = 0;
map<short,unsigned long> ** counterG = 0;
map<short,unsigned long> ** counterT = 0;
int lengthIndex1=0;
int lengthIndex2=0;
string seqInd1;
string seqInd2;
string qualInd1;
string qualInd2;
int offsetInd2;
while ( reader.GetNextAlignment(al) ) {
if(unsurePEorSE){
strLength=al.QueryBases.length();
if(al.IsPaired()){
pe=true;
vecLengthToUse=2*strLength;
}else{
pe=false;
vecLengthToUse=strLength;
}
string index1;
string index2;
if(al.HasTag("XI")){
al.GetTag("XI",index1);
vecLengthToUse+=index1.length();
lengthIndex1=index1.length();
}
if(al.HasTag("XJ")){
al.GetTag("XJ",index2);
vecLengthToUse+=index2.length();
lengthIndex2=index2.length();
}
counterA = new map<short,unsigned long> * [vecLengthToUse];
counterC = new map<short,unsigned long> * [vecLengthToUse];
counterG = new map<short,unsigned long> * [vecLengthToUse];
counterT = new map<short,unsigned long> * [vecLengthToUse];
for(int i=0;i<vecLengthToUse;i++){
counterA[i]=new map<short,unsigned long> ();
counterC[i]=new map<short,unsigned long> ();
counterG[i]=new map<short,unsigned long> ();
counterT[i]=new map<short,unsigned long> ();
for(short k=minQualScore;k<=maxQualScore;k++){
(*counterA[i])[k]=0;
(*counterC[i])[k]=0;
(*counterG[i])[k]=0;
(*counterT[i])[k]=0;
}
}
unsurePEorSE=false;
}else{
if(pe &&
!al.IsPaired()){
cerr << "Cannot have unpaired reads in PE mode" << endl;
return 1;
}
if(!pe &&
al.IsPaired()){
cerr << "Cannot have unpaired reads in SE mode" << endl;
return 1;
}
}
//.........这里部分代码省略.........
示例13: IonstatsAlignment
int IonstatsAlignment(int argc, const char *argv[])
{
OptArgs opts;
opts.ParseCmdLine(argc, argv);
string input_bam_filename = opts.GetFirstString('i', "input", "");
string output_json_filename = opts.GetFirstString('o', "output", "ionstats_alignment.json");
int histogram_length = opts.GetFirstInt ('h', "histogram-length", 400);
if(argc < 2 or input_bam_filename.empty()) {
IonstatsAlignmentHelp();
return 1;
}
//
// Prepare for metric calculation
//
BamReader input_bam;
if (!input_bam.Open(input_bam_filename)) {
fprintf(stderr, "[ionstats] ERROR: cannot open %s\n", input_bam_filename.c_str());
return 1;
}
ReadLengthHistogram called_histogram;
ReadLengthHistogram aligned_histogram;
ReadLengthHistogram AQ7_histogram;
ReadLengthHistogram AQ10_histogram;
ReadLengthHistogram AQ17_histogram;
ReadLengthHistogram AQ20_histogram;
ReadLengthHistogram AQ47_histogram;
SimpleHistogram error_by_position;
called_histogram.Initialize(histogram_length);
aligned_histogram.Initialize(histogram_length);
AQ7_histogram.Initialize(histogram_length);
AQ10_histogram.Initialize(histogram_length);
AQ17_histogram.Initialize(histogram_length);
AQ20_histogram.Initialize(histogram_length);
AQ47_histogram.Initialize(histogram_length);
error_by_position.Initialize(histogram_length);
BamAlignment alignment;
vector<char> MD_op;
vector<int> MD_len;
MD_op.reserve(1024);
MD_len.reserve(1024);
string MD_tag;
//
// Main loop over mapped reads in the input BAM
//
while(input_bam.GetNextAlignment(alignment)) {
// Record read length
called_histogram.Add(alignment.Length);
if (!alignment.IsMapped() or !alignment.GetTag("MD",MD_tag))
continue;
//
// Step 1. Parse MD tag
//
MD_op.clear();
MD_len.clear();
for (const char *MD_ptr = MD_tag.c_str(); *MD_ptr;) {
int item_length = 0;
if (*MD_ptr >= '0' and *MD_ptr <= '9') { // Its a match
MD_op.push_back('M');
for (; *MD_ptr and *MD_ptr >= '0' and *MD_ptr <= '9'; ++MD_ptr)
item_length = 10*item_length + *MD_ptr - '0';
} else {
if (*MD_ptr == '^') { // Its a deletion
MD_ptr++;
MD_op.push_back('D');
} else // Its a substitution
MD_op.push_back('X');
for (; *MD_ptr and *MD_ptr >= 'A' and *MD_ptr <= 'Z'; ++MD_ptr)
item_length++;
}
MD_len.push_back(item_length);
}
//
// Step 2. Synchronously scan through Cigar and MD, doing error accounting
//
int MD_idx = alignment.IsReverseStrand() ? MD_op.size()-1 : 0;
int cigar_idx = alignment.IsReverseStrand() ? alignment.CigarData.size()-1 : 0;
int increment = alignment.IsReverseStrand() ? -1 : 1;
int AQ7_bases = 0;
int AQ10_bases = 0;
int AQ17_bases = 0;
int AQ20_bases = 0;
int AQ47_bases = 0;
int num_bases = 0;
//.........这里部分代码省略.........
示例14: IonstatsBasecaller
int IonstatsBasecaller(int argc, const char *argv[])
{
OptArgs opts;
opts.ParseCmdLine(argc, argv);
string input_bam_filename = opts.GetFirstString('i', "input", "");
string output_json_filename = opts.GetFirstString('o', "output", "ionstats_basecaller.json");
int histogram_length = opts.GetFirstInt ('h', "histogram-length", 400);
if(argc < 2 or input_bam_filename.empty()) {
IonstatsBasecallerHelp();
return 1;
}
BamReader input_bam;
if (!input_bam.Open(input_bam_filename)) {
fprintf(stderr, "[ionstats] ERROR: cannot open %s\n", input_bam_filename.c_str());
return 1;
}
SamHeader sam_header = input_bam.GetHeader();
if(!sam_header.HasReadGroups()) {
fprintf(stderr, "[ionstats] ERROR: no read groups in %s\n", input_bam_filename.c_str());
return 1;
}
ReadLengthHistogram total_full_histo;
ReadLengthHistogram total_insert_histo;
ReadLengthHistogram total_Q17_histo;
ReadLengthHistogram total_Q20_histo;
total_full_histo.Initialize(histogram_length);
total_insert_histo.Initialize(histogram_length);
total_Q17_histo.Initialize(histogram_length);
total_Q20_histo.Initialize(histogram_length);
MetricGeneratorSNR system_snr;
BaseQVHistogram qv_histogram;
string flow_order;
string key;
for (SamReadGroupIterator rg = sam_header.ReadGroups.Begin(); rg != sam_header.ReadGroups.End(); ++rg) {
if(rg->HasFlowOrder())
flow_order = rg->FlowOrder;
if(rg->HasKeySequence())
key = rg->KeySequence;
}
double qv_to_error_rate[256];
for (int qv = 0; qv < 256; qv++)
qv_to_error_rate[qv] = pow(10.0,-0.1*(double)qv);
BamAlignment alignment;
string read_group;
vector<uint16_t> flow_signal_fz(flow_order.length());
vector<int16_t> flow_signal_zm(flow_order.length());
while(input_bam.GetNextAlignment(alignment)) {
// Record read length
unsigned int full_length = alignment.Length;
total_full_histo.Add(full_length);
// Record insert length
int insert_length = 0;
if (alignment.GetTag("ZA",insert_length))
total_insert_histo.Add(insert_length);
// Compute and record Q17 and Q20
int Q17_length = 0;
int Q20_length = 0;
double num_accumulated_errors = 0.0;
for(int pos = 0; pos < alignment.Length; ++pos) {
num_accumulated_errors += qv_to_error_rate[(int)alignment.Qualities[pos] - 33];
if (num_accumulated_errors / (pos + 1) <= 0.02)
Q17_length = pos + 1;
if (num_accumulated_errors / (pos + 1) <= 0.01)
Q20_length = pos + 1;
}
total_Q17_histo.Add(Q17_length);
total_Q20_histo.Add(Q20_length);
// Record data for system snr
if(alignment.GetTag("ZM", flow_signal_zm))
system_snr.Add(flow_signal_zm, key.c_str(), flow_order);
else if(alignment.GetTag("FZ", flow_signal_fz))
system_snr.Add(flow_signal_fz, key.c_str(), flow_order);
// Record qv histogram
qv_histogram.Add(alignment.Qualities);
}
input_bam.Close();
Json::Value output_json(Json::objectValue);
output_json["meta"]["creation_date"] = get_time_iso_string(time(NULL));
//.........这里部分代码省略.........
示例15: main
//.........这里部分代码省略.........
gene_processing(*it,locus_b);
regions.clear();
continue;
}
}
continue;
}
int chr_len = refs.at(chr_id).RefLength;
if ( !reader.SetRegion(chr_id, 1, chr_id, chr_len) ) // here set region
{
cerr << "bamtools count ERROR: Jump region failed " << it->chr << endl;
reader.Close();
exit(1);
}
//pile-up pos stats
set <string> fragment;
map <string, unsigned int> pileup;
bool isposPileup = false;
unsigned int old_start = 0;
unsigned int total_tags = 0;
unsigned int total_pos = 0;
unsigned int pileup_pos = 0;
BamAlignment bam;
while (reader.GetNextAlignment(bam)) {
if ( bam.IsMapped() == false ) continue; // skip unaligned reads
unsigned int unique;
bam.GetTag("NH", unique);
if (param->unique == 1) {
if (unique != 1) { // skipe uniquelly mapped reads
continue;
}
}
if (read_length == 0){
read_length = bam.Length;
}
//cout << bam.Name << endl;
string chrom = refs.at(bam.RefID).RefName;
string strand = "+";
if (bam.IsReverseStrand()) strand = "-";
unsigned int alignmentStart = bam.Position+1;
unsigned int mateStart;
if (type == "p") mateStart = bam.MatePosition+1;
unsigned int alignmentEnd = bam.GetEndPosition();
unsigned int cigarEnd;
vector <int> blockLengths;
vector <int> blockStarts;
blockStarts.push_back(0);
ParseCigar(bam.CigarData, blockStarts, blockLengths, cigarEnd);
// position check for unique mapped reads (because is paired-end reads, shoule base on fragment level for paired end reads)
if (posc == true && unique == 1) {
if (type == "p" && fragment.count(bam.Name) > 0)
fragment.erase(bam.Name);