当前位置: 首页>>代码示例>>C++>>正文


C++ BamAlignment::GetTag方法代码示例

本文整理汇总了C++中BamAlignment::GetTag方法的典型用法代码示例。如果您正苦于以下问题:C++ BamAlignment::GetTag方法的具体用法?C++ BamAlignment::GetTag怎么用?C++ BamAlignment::GetTag使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在BamAlignment的用法示例。


在下文中一共展示了BamAlignment::GetTag方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: processAlignment

pos_t VariantProcessor::processAlignment(const BamAlignment& alignment) {
  /* 
     For each alignment, extract the MD and NM tags, validate against
     CIGAR string, and create Variants and ReadHaplotypes. All reads
     for a block are stored in a deque, and processed again to create
     candidate haplotypes.

     Returns the start position of this alignment (TODO correct?)
  */  
  
  if (!alignment.HasTag("NM") || !alignment.HasTag("MD")) {
    std::cerr << "error: BamAlignment '" << alignment.Name << 
      "' does not have either NM or MD tags" << std::endl;
  }
  
  int nm_tag; 
  string md_tag;
  unsigned int aln_len = alignment.GetEndPosition() - alignment.Position;

  alignment.GetTag("MD", md_tag);
  alignment.GetTag("NM", nm_tag);
  
  // Reconstruct reference sequence using MD tags
  string refseq = createReferenceSequence(alignment);

  // With reconstructed reference sequence and query sequence, look
  // for variants. It's a bit roundabout to reconstruct reference from
  // MD, then use it to find variants (already in MD) but keeping
  // state between CIGAR and MD is tricky. This also is a good
  // validation; variants found must much the number of variants in
  // CIGAR/MD.
  vector<VariantPtr> variants;
  vector<VariantPtr> read_variants;
  const vector<CigarOp>& cigar = alignment.CigarData;
  int refpos = 0, readpos = 0;
  
  for (vector<CigarOp>::const_iterator op = cigar.begin(); op != cigar.end(); ++op) {
    if (op->Type == 'S') {
      readpos += op->Length;
    } else if (op->Type == 'M') {
      // match or SNP
      processMatchOrMismatch(alignment, read_variants, op->Length, refseq, refpos, readpos);
      readpos += op->Length;
      refpos += op->Length;
    } else if (op->Type == 'I') {
      processInsertion(alignment, read_variants, op->Length, refseq, refpos, readpos);
      readpos += op->Length;
    } else if (op->Type == 'D') {
      processDeletion(alignment, read_variants, op->Length, refseq, refpos, readpos);
      refpos += op->Length; // deletion w.r.t reference; skip ref length
    } else {
      cerr << "error: unidentified CIGAR type: " << op->Type << endl;
      exit(1);
    }
  }

  // Add to alignments list
  block_alignments.push_back(alignment);
  return 0; // TODO(vsbuffalo)
}
开发者ID:vsbuffalo,项目名称:hapcount,代码行数:60,代码来源:VariantProcessor.cpp

示例2: createReferenceSequence

string createReferenceSequence(const BamAlignment& alignment) {
  // Recreate a reference sequence for a particular alignment. This is
  // the reference sequence that is identical to the reference at this
  // spot. This means skipping insertions or soft clipped regions in
  // reads, adding deletions back in, and keeping read matches.
  const vector<CigarOp> cigar = alignment.CigarData;
  const string querybases = alignment.QueryBases;
  string md_tag;
  alignment.GetTag("MD", md_tag);
  
  vector<MDToken> tokens;
  string refseq, alignedseq; // final ref bases; aligned portion of ref bases
  int md_len = TokenizeMD(md_tag, tokens);

  // Create reference-aligned sequence of read; doesn't contain soft
  // clips or insertions. Then, deletions and reference alleles are
  // added onto this.
  int pos=0;
  for (vector<CigarOp>::const_iterator op = cigar.begin(); op != cigar.end(); ++op) {
    if (!(op->Type == 'S' || op->Type == 'I')) {
      alignedseq.append(querybases.substr(pos, op->Length));
      pos += op->Length;
    } else {
      pos += op->Length; // increment read position past skipped bases
    }
  }

  // the size of the aligned sequence MUST equal what is returned from
  // TokenizeMD: the number of aligned bases. Not the real reference
  // sequence is this length + deletions, which we add in below.
  assert(alignedseq.size() == md_len);

  pos = 0;
  for (vector<MDToken>::const_iterator it = tokens.begin(); it != tokens.end(); ++it) {
    if (it->type == MDType::isMatch) {
      refseq.append(alignedseq.substr(pos, it->length));
      pos += it->length;
    } else if (it->type == MDType::isSNP) {
      assert(it->length == it->seq.size());
      refseq.append(it->seq);
      pos += it->length;
    } else if (it->type == MDType::isDel) {
      // does not increment position in alignedseq
      assert(it->length == it->seq.size());
      refseq.append(it->seq);
    } else {
      assert(false);
    }
  }
  return refseq;
}
开发者ID:vsbuffalo,项目名称:hapcount,代码行数:51,代码来源:VariantProcessor.cpp

示例3:

// reverts a BAM alignment
// default behavior (for now) is : replace Qualities with OQ, clear IsDuplicate flag
// can override default behavior using command line options
void RevertTool::RevertToolPrivate::RevertAlignment(BamAlignment& al) {

    // replace Qualities with OQ, if requested
    if ( !m_settings->IsKeepQualities ) {
        string originalQualities;
        if ( al.GetTag(m_OQ, originalQualities) ) {
            al.Qualities = originalQualities;
            al.RemoveTag(m_OQ);
        }
    }

    // clear duplicate flag, if requested
    if ( !m_settings->IsKeepDuplicateFlag )
        al.SetIsDuplicate(false);
}
开发者ID:arq5x,项目名称:bamtools,代码行数:18,代码来源:bamtools_revert.cpp

示例4: main

int main (int argc, char *argv[]) {

     if( (argc== 1) ||
    	(argc== 2 && string(argv[1]) == "-h") ||
    	(argc== 2 && string(argv[1]) == "-help") ||
    	(argc== 2 && string(argv[1]) == "--help") ){
	 cout<<"Usage:editDist [in bam]"<<endl<<"this program returns the NM field of all aligned reads"<<endl;
	 return 1;
     }

     string bamfiletopen = string(argv[1]);
     // cout<<bamfiletopen<<endl;
     BamReader reader;
     // cout<<"ok"<<endl;
     if ( !reader.Open(bamfiletopen) ) {
	 cerr << "Could not open input BAM files." << endl;
	 return 1;
     }

     BamAlignment al;
     // cout<<"ok"<<endl;
     while ( reader.GetNextAlignment(al) ) {
	 // cout<<al.Name<<endl;
	 if(!al.IsMapped())
	     continue;

	 if(al.HasTag("NM") ){
	     int editDist;
	     if(al.GetTag("NM",editDist) ){
		 cout<<editDist<<endl;
	     }else{
		 cerr<<"Cannot retrieve NM field for "<<al.Name<<endl;
		 return 1;
	     }
	 }else{
	     cerr<<"Warning: read "<<al.Name<<" is aligned but has no NM field"<<endl;
	 }

		    

     } //while al

     reader.Close();

     return 0;
}
开发者ID:grenaud,项目名称:libbam,代码行数:46,代码来源:editDist.cpp

示例5: getLibraryName

/**
 * Gets the library name from the header for the record. If the RG tag is not present on
 * the record, or the library isn't denoted on the read group, a constant string is
 * returned.
 */
string MarkDuplicates::getLibraryName(SamHeader & header, const BamAlignment & rec) {     
    
    string read_group;
    static const string RG("RG");
    static const string unknown_library("Unknown Library");
    rec.GetTag(RG, read_group);
    
    if (read_group.size() > 0 && header.ReadGroups.Contains(read_group)) {
        SamReadGroupDictionary & d = header.ReadGroups;
        const SamReadGroup & rg = d[read_group];
        
        if(rg.HasLibrary()) {
            return rg.Library;
        }
    }
    
    return unknown_library;
}
开发者ID:teju85,项目名称:openge,代码行数:23,代码来源:mark_duplicates.cpp

示例6: IdentifySample

//bool SampleManager::IdentifySample(Alignment& ra) const
bool SampleManager::IdentifySample(const BamAlignment& alignment, int& sample_index, bool& primary_sample) const
{

  string read_group;
  if (!alignment.GetTag("RG", read_group)) {
    cerr << "ERROR: Couldn't find read group id (@RG tag) for BAM Alignment " << alignment.Name << endl;
    exit(1);
  }

  map<string,int>::const_iterator I = read_group_to_sample_idx_.find(read_group);
  if (I == read_group_to_sample_idx_.end())
    return false;

  sample_index =I->second;
  primary_sample = (sample_index == primary_sample_);

  return true;
}
开发者ID:Lingrui,项目名称:TS,代码行数:19,代码来源:SampleManager.cpp

示例7: IonstatsTestFragments

int IonstatsTestFragments(int argc, const char *argv[])
{
  OptArgs opts;
  opts.ParseCmdLine(argc, argv);
  string input_bam_filename   = opts.GetFirstString('i', "input", "");
  string fasta_filename       = opts.GetFirstString('r', "ref", "");
  string output_json_filename = opts.GetFirstString('o', "output", "ionstats_tf.json");
  int histogram_length        = opts.GetFirstInt   ('h', "histogram-length", 400);

  if(argc < 2 or input_bam_filename.empty() or fasta_filename.empty()) {
    IonstatsTestFragmentsHelp();
    return 1;
  }

  //
  // Prepare for metric calculation
  //

  map<string,string> tf_sequences;
  PopulateReferenceSequences(tf_sequences, fasta_filename);


  BamReader input_bam;
  if (!input_bam.Open(input_bam_filename)) {
    fprintf(stderr, "[ionstats] ERROR: cannot open %s\n", input_bam_filename.c_str());
    return 1;
  }

  int num_tfs = input_bam.GetReferenceCount();


  SamHeader sam_header = input_bam.GetHeader();
  if(!sam_header.HasReadGroups()) {
    fprintf(stderr, "[ionstats] ERROR: no read groups in %s\n", input_bam_filename.c_str());
    return 1;
  }

  string flow_order;
  string key;
  for (SamReadGroupIterator rg = sam_header.ReadGroups.Begin(); rg != sam_header.ReadGroups.End(); ++rg) {
    if(rg->HasFlowOrder())
      flow_order = rg->FlowOrder;
    if(rg->HasKeySequence())
      key = rg->KeySequence;
  }


  // Need these metrics stratified by TF.

  vector<ReadLengthHistogram> called_histogram(num_tfs);
  vector<ReadLengthHistogram> aligned_histogram(num_tfs);
  vector<ReadLengthHistogram> AQ10_histogram(num_tfs);
  vector<ReadLengthHistogram> AQ17_histogram(num_tfs);
  vector<SimpleHistogram> error_by_position(num_tfs);
  vector<MetricGeneratorSNR> system_snr(num_tfs);
  vector<MetricGeneratorHPAccuracy> hp_accuracy(num_tfs);

  for (int tf = 0; tf < num_tfs; ++tf) {
    called_histogram[tf].Initialize(histogram_length);
    aligned_histogram[tf].Initialize(histogram_length);
    AQ10_histogram[tf].Initialize(histogram_length);
    AQ17_histogram[tf].Initialize(histogram_length);
    error_by_position[tf].Initialize(histogram_length);
  }

  vector<uint16_t> flow_signal_fz(flow_order.length());
  vector<int16_t> flow_signal_zm(flow_order.length());

  const RefVector& refs = input_bam.GetReferenceData();

  // Missing:
  //  - hp accuracy - tough, copy verbatim from TFMapper?


  BamAlignment alignment;
  vector<char>  MD_op;
  vector<int>   MD_len;
  MD_op.reserve(1024);
  MD_len.reserve(1024);
  string MD_tag;

  //
  // Main loop over mapped reads in the input BAM
  //

  while(input_bam.GetNextAlignment(alignment)) {


    if (!alignment.IsMapped() or !alignment.GetTag("MD",MD_tag))
      continue;

    // The check below eliminates unexpected alignments
    if (alignment.IsReverseStrand() or alignment.Position > 5)
      continue;

    int current_tf = alignment.RefID;

    //
    // Step 1. Parse MD tag
    //
//.........这里部分代码省略.........
开发者ID:Brainiarc7,项目名称:TS,代码行数:101,代码来源:ionstats_tf.cpp

示例8: if

//{{{ void process_intra_chrom_split(const BamAlignment &curr,
void
SV_SplitRead::
process_intra_chrom_split(const BamAlignment &curr,
                          const RefVector refs,
                          BamWriter &inter_chrom_reads,
                          map<string, BamAlignment> &mapped_splits,
                          UCSCBins<SV_BreakPoint*> &r_bin,
                          int weight,
                          int id,
                          int sample_id,
                          SV_SplitReadReader *_reader)
{

    if (mapped_splits.find(curr.Name) == mapped_splits.end()) {
        uint32_t clipped = count_clipped(curr.CigarData);
        if ( curr.HasTag("YP") == true) {
            uint32_t t;
            curr.GetTag("YP", t);
            if (t == 2)
                mapped_splits[curr.Name] = curr;
        }
        else if (clipped >= _reader->min_clip)
            mapped_splits[curr.Name] = curr;
    } else {
        if ( mapped_splits[curr.Name].RefID == curr.RefID ) {
            try {
                SV_SplitRead *new_split_read =
                    new SV_SplitRead(mapped_splits[curr.Name],
                                     curr,
                                     refs,
                                     weight,
                                     id,
                                     sample_id,
                                     _reader);

                SV_BreakPoint *new_bp = NULL;
                if (new_split_read->is_sane()) {
                    new_bp = new_split_read->get_bp();

                    if (new_bp != NULL) {
                        new_bp->cluster(r_bin);
                    } else {
                        cerr << "Alignment name:" << curr.Name << endl;
                        free(new_split_read);
                    }
                } else
                    free(new_split_read);

            } catch (int) {
                cerr << "Error creating split read: " << endl;
            }

        } else {
            BamAlignment al1 = curr;
            BamAlignment al2 = mapped_splits[curr.Name];

            al1.MateRefID = al2.RefID;
            al2.MateRefID = al1.RefID;

            al1.MatePosition = al2.Position;
            al2.MatePosition = al1.Position;

            string x = _reader->get_source_file_name();

            al1.AddTag("LS","Z",x);
            al2.AddTag("LS","Z",x);

            inter_chrom_reads.SaveAlignment(al1);
            inter_chrom_reads.SaveAlignment(al2);
        }
        mapped_splits.erase(curr.Name);
    }
}
开发者ID:gkno,项目名称:lumpy-sv,代码行数:74,代码来源:SV_SplitRead.cpp

示例9: main

int main (int argc, char *argv[]) {

     if( (argc!= 3) ||
    	(argc== 2 && string(argv[1]) == "-h") ||
    	(argc== 2 && string(argv[1]) == "-help") ||
    	(argc== 2 && string(argv[1]) == "--help") ){
	 cerr<<"Usage:splitByRG [in bam] [out prefix]"<<endl<<"this program creates one bam file per RG in the with the outprefix\nFor example splitByRG in.bam out will create\nout.rg1.bam\nout.rg2.bam\n"<<endl;
    	return 1;
    }


     string bamfiletopen = string(argv[1]);
     // if(!strEndsWith(bamfiletopen,".bam")){

     // }
     string bamDirOutPrefix    = string(argv[2]);
     map<string,BamWriter *> rg2BamWriter;
     
     // if(!isDirectory(bamDirOut)){
     // 	 cerr<<"ERROR: the out directory does not exist"<<endl;
     // 	return 1;
     // }

     BamReader reader;

     if ( !reader.Open(bamfiletopen) ) {
    	cerr << "Could not open input BAM files." << endl;
    	return 1;
     }

    SamHeader header = reader.GetHeader();
    const RefVector references = reader.GetReferenceData();
    vector<RefData>  refData=reader.GetReferenceData();
    string pID          = "splitByRG";   
    string pName        = "splitByRG";   
    string pCommandLine = "";
    for(int i=0;i<(argc);i++){
        pCommandLine += (string(argv[i])+" ");
    }
    putProgramInHeader(&header,pID,pName,pCommandLine,returnGitHubVersion(string(argv[0]),".."));


    SamReadGroupDictionary 	srgd=header.ReadGroups;
    for(SamReadGroupConstIterator srgci=srgd.ConstBegin();
	srgci<srgd.ConstEnd();
	srgci++){
	//cout<<*srgci<<endl;
	const SamReadGroup rg = (*srgci);
	//cout<<rg.ID<<endl;
	rg2BamWriter[rg.ID] = new  BamWriter();
	rg2BamWriter[rg.ID]->Open(bamDirOutPrefix+"."+rg.ID+".bam",header,references); 
    }



    BamAlignment al;
    unsigned int total=0;
    while ( reader.GetNextAlignment(al) ) {

	// al.SetIsFailedQC(false);
	// writer.SaveAlignment(al);
	// if(al.IsMapped () ){
	//     if(rg2BamWriter.find(refData[al.RefID].RefName) == rg2BamWriter.end()){ //new
	// 	rg2BamWriter[refData[al.RefID].RefName] = new  BamWriter();
	// 	if ( !rg2BamWriter[refData[al.RefID].RefName]->Open(bamDirOutPrefix+"."+refData[al.RefID].RefName+".bam",header,references) ) {
	// 	    cerr     << "Could not open output BAM file "<< bamDirOutPrefix<<"."<<refData[al.RefID].RefName<<".bam" << endl;
	// 	    return 1;
	// 	}
	
	//     }else{
	// 	rg2BamWriter[refData[al.RefID].RefName]->SaveAlignment(al);
	//     }
	// }else{
	//     unmapped.SaveAlignment(al);
	// }
	if(al.HasTag("RG")){
	    string rgTag;
	    al.GetTag("RG",rgTag);
	    //cout<<rgTag<<endl;
	    if(rg2BamWriter.find(rgTag) == rg2BamWriter.end()){ //new
		cerr<<"Found new RG "<<rgTag<<endl;
		rg2BamWriter[rgTag] = new  BamWriter();
	 	if ( !rg2BamWriter[rgTag]->Open(bamDirOutPrefix+"."+rgTag+".bam",header,references) ) {
	 	    cerr     << "Could not open output BAM file "<< bamDirOutPrefix<<"."<<rgTag<<".bam" << endl;
	 	    return 1;
	 	}
		rg2BamWriter[rgTag]->SaveAlignment(al);	    	   
	    }else{
		rg2BamWriter[rgTag]->SaveAlignment(al);	    	   
	    }
	}else{
	    string rgTag="unknown";	    
	    //cout<<rgTag<<endl;
	    if(rg2BamWriter.find(rgTag) == rg2BamWriter.end()){ //new
		cerr<<"Found new RG "<<rgTag<<endl;
		rg2BamWriter[rgTag] = new  BamWriter();
	 	if ( !rg2BamWriter[rgTag]->Open(bamDirOutPrefix+"."+rgTag+".bam",header,references) ) {
	 	    cerr     << "Could not open output BAM file "<< bamDirOutPrefix<<"."<<rgTag<<".bam" << endl;
	 	    return 1;
	 	}
//.........这里部分代码省略.........
开发者ID:grenaud,项目名称:aLib,代码行数:101,代码来源:splitByRG.cpp

示例10: main


//.........这里部分代码省略.........
  time_t start_time = time(NULL);

  Realigner aligner;
  aligner.verbose_ = verbose;
  aligner.debug_   = debug;
  if (!aligner.SetScores(score_vals))
    cout << "bamrealignment: Four scores need to be provided: match, mismatch, gap open, gap extend score!" << endl;

  aligner.SetAlignmentBandwidth(bandwidth);

  BamAlignment alignment;
  while(reader.GetNextAlignment(alignment)){
    readcounter ++;
    position_shift = false;
    
    if ( (readcounter % 100000) == 0 )
       cout << "Processed " << readcounter << " reads. Elapsed time: " << (time(NULL) - start_time) << endl;

    if (alignment.IsMapped()) {
      
      
      
      orig_position = alignment.Position;
      mapped_readcounter++;
      aligner.SetClipping(clipping, !alignment.IsReverseStrand());
      if (aligner.verbose_) {
    	cout << endl;
        if (alignment.IsReverseStrand())
          cout << "The read is from the reverse strand." << endl;
        else
          cout << "The read is from the forward strand." << endl;
      }

      if (!alignment.GetTag("MD", md_tag)) {
    	if (aligner.verbose_)
          cout << "Warning: Skipping read " << alignment.Name << ". It is mapped but missing MD tag." << endl;
	if (logf.is_open ())
	  logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "MISSMD" << '\n';
	bad_md_tag_readcount++;
      } else if (aligner.CreateRefFromQueryBases(alignment.QueryBases, alignment.CigarData, md_tag, anchors)) {
	bool clipfail = false;
	if (Realigner::CR_ERR_CLIP_ANCHOR == aligner.GetCreateRefError ())
	{
	  clipfail = true;
	  failed_clip_realigned_readcount ++;
	}

        if (!aligner.computeSWalignment(new_cigar_data, new_md_data, start_position_shift)) {
          if (aligner.verbose_)
            cout << "Error in the alignment! Not updating read information." << endl;
	  if (logf.is_open ())
	    logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "SWERR" << '\n';
	  error_sw_readcount++;
          writer.SaveAlignment(alignment);  // Write alignment unchanged
          continue;
        }

        if (!aligner.addClippedBasesToTags(new_cigar_data, new_md_data, alignment.QueryBases.size())) {
          if (aligner.verbose_)
            cout << "Error when adding clipped anchors back to tags! Not updating read information." << endl;
	  if (logf.is_open ())
	    logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "UNCLIPERR" << '\n';
          writer.SaveAlignment(alignment);  // Write alignment unchanged
	  error_unclip_readcount ++;
          continue;
        }
开发者ID:Lingrui,项目名称:TS,代码行数:67,代码来源:bamrealignment.cpp

示例11: checkAlignmentTag

    bool checkAlignmentTag(const PropertyFilterValue& valueFilter, const BamAlignment& al) {
     
        // ensure filter contains string data
        Variant entireTagFilter = valueFilter.Value;
        if ( !entireTagFilter.is_type<string>() ) return false;

        // localize string from variant
        const string& entireTagFilterString = entireTagFilter.get<string>();

        // ensure we have at least "XX:x"
        if ( entireTagFilterString.length() < 4 ) return false;

        // get tagName & lookup in alignment
        // if found, set tagType to tag type character
        // if not found, return false
        const string& tagName = entireTagFilterString.substr(0,2);
        char tagType = '\0';
        if ( !al.GetTagType(tagName, tagType) ) return false;

        // remove tagName & ":" from beginning tagFilter
        string tagFilterString = entireTagFilterString.substr(3);

        // switch on tag type to set tag query value & parse filter token
        int32_t  intFilterValue,    intQueryValue;
        uint32_t uintFilterValue,   uintQueryValue;
        float    realFilterValue,   realQueryValue;
        string   stringFilterValue, stringQueryValue;

        PropertyFilterValue tagFilter;
        PropertyFilterValue::ValueCompareType compareType;
        bool keepAlignment = false;
        switch (tagType) {

            // signed int tag type
            case 'c' :
            case 's' :
            case 'i' :
                if ( al.GetTag(tagName, intQueryValue) ) {
                    if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, intFilterValue, compareType) ) {
                        tagFilter.Value = intFilterValue;
                        tagFilter.Type  = compareType;
                        keepAlignment   = tagFilter.check(intQueryValue);
                    }
                }
                break;

            // unsigned int tag type
            case 'C' :
            case 'S' :
            case 'I' :
                if ( al.GetTag(tagName, uintQueryValue) ) {
                    if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, uintFilterValue, compareType) ) {
                        tagFilter.Value = uintFilterValue;
                        tagFilter.Type  = compareType;
                        keepAlignment   = tagFilter.check(uintQueryValue);
                    }
                }
                break;

            // 'real' tag type
            case 'f' :
                if ( al.GetTag(tagName, realQueryValue) ) {
                    if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, realFilterValue, compareType) ) {
                        tagFilter.Value = realFilterValue;
                        tagFilter.Type  = compareType;
                        keepAlignment   = tagFilter.check(realQueryValue);
                    }
                }
                break;

            // string tag type
            case 'A':
            case 'Z':
            case 'H':
                if ( al.GetTag(tagName, stringQueryValue) ) {
                    if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, stringFilterValue, compareType) ) {
                        tagFilter.Value = stringFilterValue;
                        tagFilter.Type  = compareType;
                        keepAlignment   = tagFilter.check(stringQueryValue);
                    }
                }
                break;

            // unknown tag type
            default :
                keepAlignment = false;
        }

        return keepAlignment;
    }
开发者ID:BIGLabHYU,项目名称:lumpy-sv,代码行数:90,代码来源:bamtools_filter.cpp

示例12: main

int main (int argc, char *argv[]) {
    if( (argc== 1) ||
	(argc== 2 && string(argv[1]) == "-h") ||
	(argc== 2 && string(argv[1]) == "-help") ||
	(argc== 2 && string(argv[1]) == "--help") ){
	cout<<"Usage:"<<endl;
	cout<<""<<endl;
	cout<<"plotQualScore input.bam"<<endl;
	return 1;
    }

    string bamfiletopen = string(argv[1]);
    BamReader reader;

    if ( !reader.Open(bamfiletopen) ) {
    	cerr << "Could not open input BAM files." << endl;
    	return 1;
    }

    // if ( !reader.LocateIndex() ){
    // 	cerr << "warning: cannot locate index for file " << bamfiletopen<<endl;
    // 	//return 1;
    // }

    BamAlignment al;
    BamAlignment al2;
    
    bool unsurePEorSE=true;
    bool pe=true;
    int strLength=-1;
    int vecLengthToUse=-1;

    map<short,unsigned long>  ** counterA = 0;
    map<short,unsigned long>  ** counterC = 0;
    map<short,unsigned long>  ** counterG = 0;
    map<short,unsigned long>  ** counterT = 0;
    
    int lengthIndex1=0;
    int lengthIndex2=0;
    string seqInd1;
    string seqInd2;
    string qualInd1;
    string qualInd2;
    int offsetInd2;

    while ( reader.GetNextAlignment(al) ) {
	if(unsurePEorSE){
	    strLength=al.QueryBases.length();
	    if(al.IsPaired()){
		pe=true;
		vecLengthToUse=2*strLength;		
	    }else{
		pe=false;
		vecLengthToUse=strLength;
	    }
	    string index1;
	    string index2;
	
	    if(al.HasTag("XI")){
		al.GetTag("XI",index1);
		vecLengthToUse+=index1.length();
		lengthIndex1=index1.length();
	    }

	    if(al.HasTag("XJ")){
		al.GetTag("XJ",index2);
		vecLengthToUse+=index2.length();
		lengthIndex2=index2.length();
	    }

	    counterA     = new map<short,unsigned long>  * [vecLengthToUse];
	    counterC     = new map<short,unsigned long>  * [vecLengthToUse];
	    counterG     = new map<short,unsigned long>  * [vecLengthToUse];
	    counterT     = new map<short,unsigned long>  * [vecLengthToUse];
	    for(int i=0;i<vecLengthToUse;i++){
		counterA[i]=new map<short,unsigned long>  ();
		counterC[i]=new map<short,unsigned long>  ();
		counterG[i]=new map<short,unsigned long>  ();
		counterT[i]=new map<short,unsigned long>  ();
		for(short k=minQualScore;k<=maxQualScore;k++){
		    (*counterA[i])[k]=0;
		    (*counterC[i])[k]=0;
		    (*counterG[i])[k]=0;
		    (*counterT[i])[k]=0;
		}
	    }
	    unsurePEorSE=false;
	}else{
	    if(pe  &&
	       !al.IsPaired()){
		cerr << "Cannot have unpaired reads in PE mode" << endl;
		return 1;
	    }

	    if(!pe  &&
	       al.IsPaired()){
		cerr << "Cannot have unpaired reads in SE mode" << endl;
		return 1;
	    }
	}
//.........这里部分代码省略.........
开发者ID:grenaud,项目名称:aLib,代码行数:101,代码来源:plotQualScores.cpp

示例13: IonstatsAlignment

int IonstatsAlignment(int argc, const char *argv[])
{
  OptArgs opts;
  opts.ParseCmdLine(argc, argv);
  string input_bam_filename   = opts.GetFirstString('i', "input", "");
  string output_json_filename = opts.GetFirstString('o', "output", "ionstats_alignment.json");
  int histogram_length        = opts.GetFirstInt   ('h', "histogram-length", 400);

  if(argc < 2 or input_bam_filename.empty()) {
    IonstatsAlignmentHelp();
    return 1;
  }

  //
  // Prepare for metric calculation
  //

  BamReader input_bam;
  if (!input_bam.Open(input_bam_filename)) {
    fprintf(stderr, "[ionstats] ERROR: cannot open %s\n", input_bam_filename.c_str());
    return 1;
  }

  ReadLengthHistogram called_histogram;
  ReadLengthHistogram aligned_histogram;
  ReadLengthHistogram AQ7_histogram;
  ReadLengthHistogram AQ10_histogram;
  ReadLengthHistogram AQ17_histogram;
  ReadLengthHistogram AQ20_histogram;
  ReadLengthHistogram AQ47_histogram;
  SimpleHistogram error_by_position;

  called_histogram.Initialize(histogram_length);
  aligned_histogram.Initialize(histogram_length);
  AQ7_histogram.Initialize(histogram_length);
  AQ10_histogram.Initialize(histogram_length);
  AQ17_histogram.Initialize(histogram_length);
  AQ20_histogram.Initialize(histogram_length);
  AQ47_histogram.Initialize(histogram_length);
  error_by_position.Initialize(histogram_length);

  BamAlignment alignment;
  vector<char>  MD_op;
  vector<int>   MD_len;
  MD_op.reserve(1024);
  MD_len.reserve(1024);
  string MD_tag;

  //
  // Main loop over mapped reads in the input BAM
  //

  while(input_bam.GetNextAlignment(alignment)) {

    // Record read length
    called_histogram.Add(alignment.Length);

    if (!alignment.IsMapped() or !alignment.GetTag("MD",MD_tag))
      continue;

    //
    // Step 1. Parse MD tag
    //

    MD_op.clear();
    MD_len.clear();

    for (const char *MD_ptr = MD_tag.c_str(); *MD_ptr;) {

      int item_length = 0;
      if (*MD_ptr >= '0' and *MD_ptr <= '9') {    // Its a match
        MD_op.push_back('M');
        for (; *MD_ptr and *MD_ptr >= '0' and *MD_ptr <= '9'; ++MD_ptr)
          item_length = 10*item_length + *MD_ptr - '0';
      } else {
        if (*MD_ptr == '^') {                     // Its a deletion
          MD_ptr++;
          MD_op.push_back('D');
        } else                                    // Its a substitution
          MD_op.push_back('X');
        for (; *MD_ptr and *MD_ptr >= 'A' and *MD_ptr <= 'Z'; ++MD_ptr)
          item_length++;
      }
      MD_len.push_back(item_length);
    }

    //
    // Step 2. Synchronously scan through Cigar and MD, doing error accounting
    //

    int MD_idx = alignment.IsReverseStrand() ? MD_op.size()-1 : 0;
    int cigar_idx = alignment.IsReverseStrand() ? alignment.CigarData.size()-1 : 0;
    int increment = alignment.IsReverseStrand() ? -1 : 1;

    int AQ7_bases = 0;
    int AQ10_bases = 0;
    int AQ17_bases = 0;
    int AQ20_bases = 0;
    int AQ47_bases = 0;
    int num_bases = 0;
//.........这里部分代码省略.........
开发者ID:bdiegel,项目名称:TS,代码行数:101,代码来源:ionstats_alignment.cpp

示例14: IonstatsBasecaller

int IonstatsBasecaller(int argc, const char *argv[])
{
  OptArgs opts;
  opts.ParseCmdLine(argc, argv);
  string input_bam_filename   = opts.GetFirstString('i', "input", "");
  string output_json_filename = opts.GetFirstString('o', "output", "ionstats_basecaller.json");
  int histogram_length        = opts.GetFirstInt   ('h', "histogram-length", 400);

  if(argc < 2 or input_bam_filename.empty()) {
    IonstatsBasecallerHelp();
    return 1;
  }


  BamReader input_bam;
  if (!input_bam.Open(input_bam_filename)) {
    fprintf(stderr, "[ionstats] ERROR: cannot open %s\n", input_bam_filename.c_str());
    return 1;
  }

  SamHeader sam_header = input_bam.GetHeader();
  if(!sam_header.HasReadGroups()) {
    fprintf(stderr, "[ionstats] ERROR: no read groups in %s\n", input_bam_filename.c_str());
    return 1;
  }


  ReadLengthHistogram total_full_histo;
  ReadLengthHistogram total_insert_histo;
  ReadLengthHistogram total_Q17_histo;
  ReadLengthHistogram total_Q20_histo;

  total_full_histo.Initialize(histogram_length);
  total_insert_histo.Initialize(histogram_length);
  total_Q17_histo.Initialize(histogram_length);
  total_Q20_histo.Initialize(histogram_length);

  MetricGeneratorSNR system_snr;
  BaseQVHistogram qv_histogram;

  string flow_order;
  string key;
  for (SamReadGroupIterator rg = sam_header.ReadGroups.Begin(); rg != sam_header.ReadGroups.End(); ++rg) {
    if(rg->HasFlowOrder())
      flow_order = rg->FlowOrder;
    if(rg->HasKeySequence())
      key = rg->KeySequence;
  }

  double qv_to_error_rate[256];
  for (int qv = 0; qv < 256; qv++)
    qv_to_error_rate[qv] =  pow(10.0,-0.1*(double)qv);


  BamAlignment alignment;
  string read_group;
  vector<uint16_t> flow_signal_fz(flow_order.length());
  vector<int16_t> flow_signal_zm(flow_order.length());

  while(input_bam.GetNextAlignment(alignment)) {

    // Record read length
    unsigned int full_length = alignment.Length;
    total_full_histo.Add(full_length);

    // Record insert length
    int insert_length = 0;
    if (alignment.GetTag("ZA",insert_length))
      total_insert_histo.Add(insert_length);

    // Compute and record Q17 and Q20
    int Q17_length = 0;
    int Q20_length = 0;
    double num_accumulated_errors = 0.0;
    for(int pos = 0; pos < alignment.Length; ++pos) {
      num_accumulated_errors += qv_to_error_rate[(int)alignment.Qualities[pos] - 33];
      if (num_accumulated_errors / (pos + 1) <= 0.02)
        Q17_length = pos + 1;
      if (num_accumulated_errors / (pos + 1) <= 0.01)
        Q20_length = pos + 1;
    }
    total_Q17_histo.Add(Q17_length);
    total_Q20_histo.Add(Q20_length);

    // Record data for system snr
    if(alignment.GetTag("ZM", flow_signal_zm))
      system_snr.Add(flow_signal_zm, key.c_str(), flow_order);
    else if(alignment.GetTag("FZ", flow_signal_fz))
      system_snr.Add(flow_signal_fz, key.c_str(), flow_order);

    // Record qv histogram
    qv_histogram.Add(alignment.Qualities);
  }

  input_bam.Close();



  Json::Value output_json(Json::objectValue);
  output_json["meta"]["creation_date"] = get_time_iso_string(time(NULL));
//.........这里部分代码省略.........
开发者ID:isanwong,项目名称:TS,代码行数:101,代码来源:ionstats_basecaller.cpp

示例15: main


//.........这里部分代码省略.........
          gene_processing(*it,locus_b);      
          regions.clear();
          continue;
        }
      }
      continue;
    }

    int chr_len = refs.at(chr_id).RefLength;

    if ( !reader.SetRegion(chr_id, 1, chr_id, chr_len) ) // here set region
      {
        cerr << "bamtools count ERROR: Jump region failed " << it->chr << endl;
        reader.Close();
        exit(1);
      }

    //pile-up pos stats
    set <string> fragment;
    map <string, unsigned int> pileup;
    bool isposPileup = false;
    unsigned int old_start   = 0;
    unsigned int total_tags  = 0;
    unsigned int total_pos   = 0;
    unsigned int pileup_pos  = 0;


    BamAlignment bam;
    while (reader.GetNextAlignment(bam)) {

      if ( bam.IsMapped() == false ) continue;   // skip unaligned reads

      unsigned int unique;
      bam.GetTag("NH", unique);
      if (param->unique == 1) {
        if (unique != 1) {                       // skipe uniquelly mapped reads
          continue;
        }
      }

      if (read_length == 0){
        read_length = bam.Length;
      }

      //cout << bam.Name << endl;
      string chrom = refs.at(bam.RefID).RefName;
      string strand = "+";
      if (bam.IsReverseStrand()) strand = "-";

      unsigned int alignmentStart =  bam.Position+1;
      unsigned int mateStart;
      if (type == "p") mateStart = bam.MatePosition+1;
      unsigned int alignmentEnd = bam.GetEndPosition();
      unsigned int cigarEnd;
      vector <int> blockLengths;
      vector <int> blockStarts;
      blockStarts.push_back(0);
      ParseCigar(bam.CigarData, blockStarts, blockLengths, cigarEnd);


      // position check for unique mapped reads (because is paired-end reads, shoule base on fragment level for paired end reads)
      if (posc == true && unique == 1) {

        if (type == "p" && fragment.count(bam.Name) > 0) 
          fragment.erase(bam.Name);
开发者ID:Cemily,项目名称:TRUP,代码行数:66,代码来源:Rseq_bam_reads2expr.cpp


注:本文中的BamAlignment::GetTag方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。