当前位置: 首页>>代码示例>>C++>>正文


C++ SamRecord::get0BasedPosition方法代码示例

本文整理汇总了C++中SamRecord::get0BasedPosition方法的典型用法代码示例。如果您正苦于以下问题:C++ SamRecord::get0BasedPosition方法的具体用法?C++ SamRecord::get0BasedPosition怎么用?C++ SamRecord::get0BasedPosition使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在SamRecord的用法示例。


在下文中一共展示了SamRecord::get0BasedPosition方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: checkRecordInSection

bool SamFile::checkRecordInSection(SamRecord& record)
{
    bool recordFound = true;
    if(myRefID == BamIndex::REF_ID_ALL)
    {
        return(true);
    }
    // Check to see if it is in the correct reference/position.
    if(record.getReferenceID() != myRefID)
    {
        // Incorrect reference ID, return no more records.
        myStatus = SamStatus::NO_MORE_RECS;
        return(false);
    }
   
    // Found a record.
    recordFound = true;

    // If start/end position are set, verify that the alignment falls
    // within those.
    // If the alignment start is greater than the end of the region,
    // return NO_MORE_RECS.
    // Since myEndPos is Exclusive 0-based, anything >= myEndPos is outside
    // of the region.
    if((myEndPos != -1) && (record.get0BasedPosition() >= myEndPos))
    {
        myStatus = SamStatus::NO_MORE_RECS;
        return(false);
    }
        
    // We know the start is less than the end position, so the alignment
    // overlaps the region if the alignment end position is greater than the
    // start of the region.
    if((myStartPos != -1) && (record.get0BasedAlignmentEnd() < myStartPos))
    {
        // If it does not overlap the region, so go to the next
        // record...set recordFound back to false.
        recordFound = false;
    }

    if(!myOverlapSection)
    {
        // Needs to be fully contained.  Not fully contained if
        // 1) the record start position is < the region start position.
        // or
        // 2) the end position is specified and the record end position
        //    is greater than or equal to the region end position.
        //    (equal to since the region is exclusive.
        if((record.get0BasedPosition() < myStartPos) ||
           ((myEndPos != -1) && 
            (record.get0BasedAlignmentEnd() >= myEndPos)))
        {
            // This record is not fully contained, so move on to the next
            // record.
            recordFound = false;
        }
    }

    return(recordFound);
}
开发者ID:rtchen,项目名称:gotcloud,代码行数:60,代码来源:SamFile.cpp

示例2: hasPositionChanged

// determine whether the record's position is different from the previous record
bool Dedup_LowMem::hasPositionChanged(SamRecord& record)
{
    if (lastReference != record.getReferenceID() ||
            lastCoordinate < record.get0BasedPosition())
    {
        if (lastReference != record.getReferenceID())
        {
            lastReference = record.getReferenceID();
            Logger::gLogger->writeLog("Reading ReferenceID %d\n", lastReference);
        }
        lastCoordinate = record.get0BasedPosition();
        return true;
    }
    return false;
}
开发者ID:statgen,项目名称:bamUtil,代码行数:16,代码来源:Dedup_LowMem.cpp

示例3: softClip

// Soft clip the record from the front and/or the back.
SamFilter::FilterStatus SamFilter::softClip(SamRecord& record, 
                                            int32_t numFrontClips,
                                            int32_t numBackClips)
{
    //////////////////////////////////////////////////////////
    Cigar* cigar = record.getCigarInfo();
    FilterStatus status = NONE;
    int32_t startPos = record.get0BasedPosition();
    CigarRoller updatedCigar;

    status = softClip(*cigar, numFrontClips, numBackClips, 
                      startPos, updatedCigar);

    if(status == FILTERED)
    {
        /////////////////////////////
        // The entire read is clipped, so rather than clipping it,
        // filter it out.
        filterRead(record);
        return(FILTERED);
    }
    else if(status == CLIPPED)
    {
        // Part of the read was clipped, and now that we have
        // an updated cigar, update the read.
        record.setCigar(updatedCigar);

        // Update the starting position.
        record.set0BasedPosition(startPos);
    }
    return(status);
}
开发者ID:Griffan,项目名称:FASTQuick,代码行数:33,代码来源:SamFilter.cpp

示例4: verify

bool Demux::verify(){ //at least one decoy should be present
	BamReader bamReader(bamFilePath);
	SamRecord samRecord;
	int verifiedDecoys = 0;
	while (	bamReader.getNextRecord(samRecord)) {
		string recordName(samRecord.getReadName());
		recordName = cHandler.decrypt(recordName);
		//clean record name
		int len=recordName.find("$");
		recordName=recordName.substr(0,len);
		//clean ended
		if (isDecoy(recordName)){
			char decoyChromosome[2][20];
			int decoyLocation[2];
			int decoyJobID;

			sscanf(	recordName.c_str(),"DECOY.%[^.].%d_%[^.].%d.%d",
					decoyChromosome[0], decoyLocation, 
					decoyChromosome[1], decoyLocation + 1,  &decoyJobID);

			int pair = 	!SamFlag::isFirstFragment(samRecord.getFlag());
			if (strcmp(decoyChromosome[pair],samRecord.getReferenceName())){
				cout << "Chromosome mismatch\n";
				cout << "Expected: " << decoyChromosome[pair] << endl;
				cout << "Got: " << samRecord.getReferenceName() << endl;
				return false;
			}
			if ( decoyLocation[pair] != samRecord.get0BasedPosition()){
				cout << "Position mismatch\n";
				cout << "Expected: " << decoyLocation[pair] << endl;
				cout << "Got: " << samRecord.get0BasedPosition() << endl;
				return false;
			}
			if (decoyJobID != jobID){
				cout << "Job ID mismatch\n";
				cout << "Expected: " << decoyJobID << endl;
				cout << "Got: " << jobID << endl;
				return false;
			}
			verifiedDecoys++;
		}
    }
    if (verifiedDecoys)
		return true;
	else
		return false;
}
开发者ID:coinami,项目名称:coinami-pro,代码行数:47,代码来源:Demux.cpp

示例5: getGenomicCoordinate

// make a 64-bit genomic coordinate [24bit-chr][32bit-pos][8bit-orientation]
uint64_t getGenomicCoordinate(SamRecord& r) {
  // 64bit string consisting of
  // 24bit refID, 32bit pos, 8bit orientation
  if ( ( r.getReferenceID() < 0 ) || ( r.get0BasedPosition() < 0 ) ) {
    return UNMAPPED_GENOMIC_COORDINATE;
  }
  else {
    return ( ( static_cast<uint64_t>(r.getReferenceID()) << 40 ) | ( static_cast<uint64_t>(r.get0BasedPosition()) << 8 ) | static_cast<uint64_t>( r.getFlag() & 0x0010 ) );
  }
}
开发者ID:ascendo,项目名称:bamUtil,代码行数:11,代码来源:MergeBam.cpp

示例6: CalcClusters

void GenomeRegionSeqStats::CalcClusters(String &bamFile, int minMapQuality)
{
  SamFile sam;
  SamRecord samRecord;
  SamFileHeader samHeader;

  if(!sam.OpenForRead(bamFile.c_str()))
    error("Open BAM file %s failed!\n", bamFile.c_str());

  if(!sam.ReadHeader(samHeader)) {
      error("Read BAM file header %s failed!\n", bamFile.c_str());
  }
  
  if(depth.size()==0) depth.resize(referencegenome.sequenceLength());
  
  String contigLabel;
  uint32_t start;
  uint32_t gstart;
  Reset();
  while(sam.ReadRecord(samHeader, samRecord))
    {
      nReads++;
      if(samRecord.getFlag() & SamFlag::UNMAPPED) nUnMapped++;

      if(samRecord.getMapQuality() < minMapQuality) continue;

      CigarRoller cigar(samRecord.getCigar());

      int nonClipSequence = 0;

      if(cigar.size()!=0 && cigar[0].operation==Cigar::softClip)
          nonClipSequence = cigar[0].count;

      contigLabel = samRecord.getReferenceName();
      start = nonClipSequence + samRecord.get0BasedPosition();  // start is 0-based

      gstart = referencegenome.getGenomePosition(contigLabel.c_str(), start);

      if(IsInRegions(contigLabel, start, start+samRecord.getReadLength())) continue;

      for(uint32_t i=gstart; i<gstart+samRecord.getReadLength(); i++)
       if(depth[i]<MAXDP)
        depth[i]++;
      nMappedOutTargets++;
    }
}
开发者ID:aminzia,项目名称:statgen,代码行数:46,代码来源:GenomeRegionSeqStats.cpp

示例7: flushOutputBuffer

bool ClipOverlap::flushOutputBuffer(MateMapByCoord& mateMap,
                                    SamCoordOutput& outputBuffer,
                                    int32_t prevChrom,
                                    int32_t prevPos)
{
    // We will flush the output buffer up to the first record left in the
    // mateMap.  If there are no records left in the mate map, then we
    // flush everything up to the previous chrom/pos that was processed since
    // any new records will have a higher coordinate.
    SamRecord* firstRec = mateMap.first();
    if(firstRec != NULL)
    {
        return(outputBuffer.flush(firstRec->getReferenceID(), 
                                  firstRec->get0BasedPosition()));
    }
    // Otherwise, flush based on the previous 
    return(outputBuffer.flush(prevChrom, prevPos));
}
开发者ID:statgen,项目名称:bamUtil,代码行数:18,代码来源:ClipOverlap.cpp

示例8: CalcRegionStats

void GenomeRegionSeqStats::CalcRegionStats(String &bamFile)
{
  SamFile sam;
  SamRecord samRecord;
  SamFileHeader samHeader;

  if(!sam.OpenForRead(bamFile.c_str()))
    error("Open BAM file %s failed!\n", bamFile.c_str());

  if(!sam.ReadHeader(samHeader)) {
      error("Read BAM file header %s failed!\n", bamFile.c_str());
  }
  
  String contigLabel;
  int start, end;
  Reset();
  while(sam.ReadRecord(samHeader, samRecord))
    {
      nReads++;
      if(samRecord.getFlag() & SamFlag::UNMAPPED) nUnMapped++;

      if(contigFinishedCnt>=contigs.size()) continue;

      CigarRoller cigar(samRecord.getCigar());

      int nonClipSequence = 0;

      if(cigar.size()!=0 && cigar[0].operation==Cigar::softClip)
          nonClipSequence = cigar[0].count;

      contigLabel = samRecord.getReferenceName();
      start = nonClipSequence + samRecord.get0BasedPosition();  // start is 0-based
      end = start + samRecord.getReadLength() - 1;
      if(UpdateRegionStats(contigLabel, start, end)) nMapped2Targets++;
    }
    CalcRegionReadCountInGCBins();
    CalcGroupReadCountInGCBins();
    std::cout << "Total reads : " << nReads << std::endl;
}
开发者ID:aminzia,项目名称:statgen,代码行数:39,代码来源:GenomeRegionSeqStats.cpp

示例9: validateSortOrder

// Validate that the record is sorted compared to the previously read record
// if there is one, according to the specified sort order.
// If the sort order is UNSORTED, true is returned.
bool SamFile::validateSortOrder(SamRecord& record, SamFileHeader& header)
{
    if(myRefPtr != NULL)
    {
        record.setReference(myRefPtr);
    }
    record.setSequenceTranslation(myReadTranslation);

    bool status = false;
    if(mySortedType == UNSORTED)
    {
        // Unsorted, so nothing to validate, just return true.
        status = true;
    }
    else 
    {
        // Check to see if mySortedType is based on the header.
        if(mySortedType == FLAG)
        {
            // Determine the sorted type from what was read out of the header.
            mySortedType = getSortOrderFromHeader(header);
        }

        if(mySortedType == QUERY_NAME)
        {
            // Validate that it is sorted by query name.
            // Get the query name from the record.
            const char* readName = record.getReadName();

            // Check if it is sorted either in samtools way or picard's way.
            if((myPrevReadName.Compare(readName) > 0) &&
               (strcmp(myPrevReadName.c_str(), readName) > 0))
            {
                // return false.
                String errorMessage = "ERROR: File is not sorted by read name at record ";
                errorMessage += myRecordCount;
                errorMessage += "\n\tPrevious record was ";
                errorMessage += myPrevReadName;
                errorMessage += ", but this record is ";
                errorMessage += readName;
                myStatus.setStatus(SamStatus::INVALID_SORT, 
                                   errorMessage.c_str());
                status = false;
            }
            else
            {
                myPrevReadName = readName;
                status = true;
            }
        }
        else 
        {
            // Validate that it is sorted by COORDINATES.
            // Get the leftmost coordinate and the reference index.
            int32_t refID = record.getReferenceID();
            int32_t coord = record.get0BasedPosition();
            // The unmapped reference id is at the end of a sorted file.
            if(refID == BamIndex::REF_ID_UNMAPPED)
            {
                // A new reference ID that is for the unmapped reads
                // is always valid.
                status = true;
                myPrevRefID = refID;
                myPrevCoord = coord;
            }
            else if(myPrevRefID == BamIndex::REF_ID_UNMAPPED)
            {
                // Previous reference ID was for unmapped reads, but the
                // current one is not, so this is not sorted.
                String errorMessage = "ERROR: File is not coordinate sorted at record ";
                errorMessage += myRecordCount;
                errorMessage += "\n\tPrevious record was unmapped, but this record is ";
                errorMessage += header.getReferenceLabel(refID) + ":" + coord;
                myStatus.setStatus(SamStatus::INVALID_SORT, 
                                   errorMessage.c_str());
                status = false;
            }
            else if(refID < myPrevRefID)
            {
                // Current reference id is less than the previous one, 
                //meaning that it is not sorted.
                String errorMessage = "ERROR: File is not coordinate sorted at record ";
                errorMessage += myRecordCount;
                errorMessage += "\n\tPrevious record was ";
                errorMessage += header.getReferenceLabel(myPrevRefID) + ":" + myPrevCoord;
                errorMessage += ", but this record is ";
                errorMessage += header.getReferenceLabel(refID) + ":" + coord;
                myStatus.setStatus(SamStatus::INVALID_SORT, 
                                   errorMessage.c_str());
                status = false;
            }
            else
            {
                // The reference IDs are in the correct order.
                if(refID > myPrevRefID)
                {
                    // New reference id, so set the previous coordinate to -1
//.........这里部分代码省略.........
开发者ID:rtchen,项目名称:gotcloud,代码行数:101,代码来源:SamFile.cpp

示例10: processRecord

bool BamProcessor::processRecord ()
{
    trclog << "\nProcessing record " << read_cnt_ << " - " << rec_.getReadName () << ", " << rec_.get0BasedUnclippedEnd () << "->" << rec_.getReadLength () << ", ref " << rec_.getReferenceName () << std::endl;
    const char* seq = rec_.getSequence ();
    unsigned position = rec_.get0BasedPosition ();
    unsigned new_position = position;
    bool reverse_match = (rec_.getFlag () & 0x10);

    Cigar* cigar_p = rec_.getCigarInfo ();
    if (!cigar_p->size ())  // can not recreate reference is cigar is missing. Keep record unaligned.
    {                       // TODO: allow to specify and load external reference
        ++ unaligned_cnt_;
        return true;
    }

    myassert (cigar_p);

    const String *mdval = rec_.getStringTag ("MD");
    if (!mdval) // can not recreate reference is MD tag is missing. Keep record as is.
    {
        warn << "No MD Tag for record " << proc_cnt_ << ". Skipping record." << std::endl;
        ++nomd_cnt_;
        return true; // record will be kept as-is.
    }
    std::string md_tag = mdval->c_str ();

    // find the non-clipped region
    uint32_t clean_len;
    EndClips clips;
    const char* clean_read = clip_seq (seq, *cigar_p, clean_len, clips);

    // find length needed for the reference
    // this reserves space enough for entire refference, including softclipped ends.
    unsigned ref_len = cigar_p->getExpectedReferenceBaseCount ();
    if (ref_buffer_sz_ < ref_len)
    {
        ref_buffer_sz_ = (1 + ref_len / REF_BUF_INCR) * REF_BUF_INCR;
        ref_buffer_.reset (ref_buffer_sz_);
    }
    if (clean_len > MAX_SEQ_LEN || ref_len > MAX_SEQ_LEN)
    {
        ++ toolongs_;
        return true;
    }

    // recreate reference by Query, Cigar, and MD tag. Do not include softclipped ends in the recreated sequence (use default last parameter)
    recreate_ref (seq, rec_.getReadLength (), cigar_p, md_tag.c_str (), ref_buffer_, ref_buffer_sz_);

    unsigned qry_ins; // extra bases in query     == width_left
    unsigned ref_ins; // extra bases in reference == width_right
    band_width (*cigar_p, qry_ins, ref_ins);

    if (log_matr_ || log_base_)
    {
        logfile_ << "Record " << read_cnt_ << ": " << rec_.getReadName () << "\n"
                 << "   sequence (" << rec_.getReadLength () << " bases)\n";
    }

    CigarRoller roller;
    int ref_shift = 0;  // shift of the new alignment position on refereance relative the original
    unsigned qry_off, ref_off; // offsets on the query and reference of the first non-clipped aligned bases
    double new_score = 0;

    switch (p_->algo ())
    {
        case ContalignParams::TEMPL:
        {
            // call aligner
            new_score = taligner_.eval (clean_read, clean_len, ref_buffer_, ref_len, 0, band_width_);
            // read traceback
            // TODO: convert directly to cigar
            genstr::Alignment* al = taligner_.trace ();
            // convert alignment to cigar
            ref_shift = roll_cigar (roller, *al, clean_len, clips, qry_off, ref_off);
        }
        break;
        case ContalignParams::PLAIN:
        {
            new_score = aligner_.align_band (
                clean_read,                     // xseq
                clean_len,                      // xlen
                ref_buffer_,                    // yseq
                ref_len,                        // ylen
                0,                              // xpos
                0,                              // ypos
                std::max (clean_len, ref_len),  // segment length
                qry_ins + band_width_,          // width_left
                false,                          // unpack
                ref_ins + band_width_,          // width_right - forces to width_left
                true,                           // to_beg
                true                            // to_end
                );
            unsigned bno = aligner_.backtrace (
                    batches_,      // BATCH buffer
                    max_batch_no_, // size of BATCH buffer
                    false,         // fill the BATCH array in reverse direction
                    ref_ins + band_width_ // width
                                    );
            // convert alignment to cigar
            ref_shift = roll_cigar (roller, batches_, bno, clean_len, clips, qry_off, ref_off);
//.........这里部分代码省略.........
开发者ID:vswilliamson,项目名称:TS,代码行数:101,代码来源:context-align-main.cpp

示例11: softClipBeginByRefPos

// Soft Clip from the beginning of the read to the specified reference position.
int32_t CigarHelper::softClipBeginByRefPos(SamRecord& record, 
                                           int32_t refPosition0Based,
                                           CigarRoller& newCigar,
                                           int32_t &new0BasedPosition)
{
    newCigar.clear();
    Cigar* cigar = record.getCigarInfo();
    if(cigar == NULL)
    {
        // Failed to get the cigar.
        ErrorHandler::handleError("Soft clipping, but failed to read the cigar");
        return(NO_CLIP);
    }

    // No cigar or position in the record, so return no clip.
    if((cigar->size() == 0) || (record.get0BasedPosition() == -1))
    {
        return(NO_CLIP);
    }

    // Check to see if the reference position occurs before the record starts,
    // if it does, do no clipping.
    if(refPosition0Based < record.get0BasedPosition())
    {
        // Not within this read, so nothing to clip.
        newCigar.Set(record.getCigar());
        return(NO_CLIP);
    }

    // The position falls after the read starts, so loop through until the
    // position or the end of the read is found.
    int32_t readClipPosition = 0;
    bool clipWritten = false;
    new0BasedPosition = record.get0BasedPosition();
    for(int i = 0; i < cigar->size(); i++)
    {
        const Cigar::CigarOperator* op = &(cigar->getOperator(i));

        if(clipWritten)
        {
            // Clip point has been found, so just add everything.
            newCigar += *op;
            // Go to the next operation.
            continue;
        }

        // The clip point has not yet been found, so check to see if we found 
        // it now.

        // Not a clip, check to see if the operation is found in the
        // reference.
        if(Cigar::foundInReference(*op))
        {
            // match, mismatch, deletion, skip

            // increment the current reference position to just past this
            // operation.
            new0BasedPosition += op->count;

            // Check to see if this is also in the query, because otherwise
            // the operation is still being consumed.
            if(Cigar::foundInQuery(*op))
            {
                // Also in the query, determine if the entire thing should
                // be clipped or just part of it.

                uint32_t numKeep = 0;
                // Check to see if we have hit our clip position.
                if(refPosition0Based < new0BasedPosition)
                {
                    // The specified clip position is in this cigar operation.
                    numKeep = new0BasedPosition - refPosition0Based - 1;
                    
                    if(numKeep > op->count)
                    {
                        // Keep the entire read.  This happens because
                        // we keep reading until the first match/mismatch
                        // after the clip.
                        numKeep = op->count;
                    }
                }

                // Add the part of this operation that is being clipped
                // to the clip count.
                readClipPosition += (op->count - numKeep);

                // Only write the clip if we found a match/mismatch
                // to write.  Otherwise we will keep accumulating clips
                // for the case of insertions.
                if(numKeep > 0)
                {
                    new0BasedPosition -= numKeep;

                    newCigar.Add(Cigar::softClip, readClipPosition);
                    
                    // Add the clipped part of this cigar to the clip
                    // position.
                    newCigar.Add(op->operation, numKeep);
                    
//.........这里部分代码省略.........
开发者ID:Griffan,项目名称:FASTQuick,代码行数:101,代码来源:CigarHelper.cpp

示例12: softClipEndByRefPos

// Soft Clip from the end of the read at the specified reference position.
int32_t CigarHelper::softClipEndByRefPos(SamRecord& record, 
                                         int32_t refPosition0Based,
                                         CigarRoller& newCigar)
{
    newCigar.clear();
    Cigar* cigar = record.getCigarInfo();
    if(cigar == NULL)
    {
        // Failed to get the cigar.
        ErrorHandler::handleError("Soft clipping, but failed to read the cigar");
        return(NO_CLIP);
    }

    // No cigar or position in the record, so return no clip.
    if((cigar->size() == 0) || (record.get0BasedPosition() == -1))
    {
        return(NO_CLIP);
    }

    // Check to see if the reference position occurs after the record ends,
    // if so, do no clipping.
    if(refPosition0Based > record.get0BasedAlignmentEnd())
    {
        // Not within this read, so nothing to clip.
        newCigar.Set(record.getCigar());
        return(NO_CLIP);
    }

    // The position falls before the read ends, so loop through until the
    // position is found.
    int32_t currentRefPosition = record.get0BasedPosition();
    int32_t readClipPosition = 0;
    for(int i = 0; i < cigar->size(); i++)
    {
        const Cigar::CigarOperator* op = &(cigar->getOperator(i));

        // If the operation is found in the reference, increase the
        // reference position.
        if(Cigar::foundInReference(*op))
        {
            // match, mismatch, deletion, skip
            // increment the current reference position to just past
            // this operation.
            currentRefPosition += op->count;
        }
         
        // Check to see if we have hit our clip position.
        if(refPosition0Based < currentRefPosition)
        {
            // If this read is also in the query (match/mismatch), 
            // write the partial op to the new cigar.
            int32_t numKeep = 0;
            if(Cigar::foundInQuery(*op))
            {
                numKeep = op->count - (currentRefPosition - refPosition0Based);
                if(numKeep > 0)
                {
                    newCigar.Add(op->operation, numKeep);
                    readClipPosition += numKeep;
                }
            }
            else if(Cigar::isClip(*op))
            {
                // This is a hard clip, so write it.
                newCigar.Add(op->operation, op->count);
            }
            else
            {

                // Not found in the query (skip/deletion),
                // so don't write any of the operation.
            }
            // Found the clip point, so break.
            break;
        }
        else if(refPosition0Based == currentRefPosition)
        {
            newCigar += *op;
            if(Cigar::foundInQuery(*op))
            {
                readClipPosition += op->count;
            }
        }
        else
        {
            // Not yet to the clip position, so add this operation/size to
            // the new cigar.
            newCigar += *op;
            if(Cigar::foundInQuery(*op))
            {
                // Found in the query, so update the read clip position.
                readClipPosition += op->count;
            }
        }
    } // End loop through cigar.

    // Before adding the softclip, read from the end of the cigar checking to
    // see if the operations are in the query, removing operations that are
    // not (pad/delete/skip) until a hardclip or an operation in the query is
//.........这里部分代码省略.........
开发者ID:Griffan,项目名称:FASTQuick,代码行数:101,代码来源:CigarHelper.cpp

示例13: processFile

int GapInfo::processFile(const char* inputFileName, const char* outputFileName,
                         const char* refFile, bool detailed,
                         bool checkFirst, bool checkStrand)
{
    // Open the file for reading.
    SamFile samIn;
    samIn.OpenForRead(inputFileName);

    // Read the sam header.
    SamFileHeader samHeader;
    samIn.ReadHeader(samHeader);

    SamRecord samRecord;

    GenomeSequence* refPtr = NULL;
    if(strcmp(refFile, "") != 0)
    {
        refPtr = new GenomeSequence(refFile);
    }

    IFILE outFile = ifopen(outputFileName, "w");

    // Map for summary.
    std::map<int, int> gapInfoMap;


    // Keep reading records until ReadRecord returns false.
    while(samIn.ReadRecord(samHeader, samRecord))
    {
        uint16_t samFlags = samRecord.getFlag();

        if((!SamFlag::isMapped(samFlags)) || 
           (!SamFlag::isMateMapped(samFlags)) ||
           (!SamFlag::isPaired(samFlags)) ||
           (samFlags & SamFlag::SECONDARY_ALIGNMENT) || 
           (SamFlag::isDuplicate(samFlags)) ||
           (SamFlag::isQCFailure(samFlags)))
        {
            // unmapped, mate unmapped, not paired,
            // not the primary alignment,
            // duplicate, fails vendor quality check 
            continue;
        }

        // No gap info if the chromosome names are different or
        // are unknown.
        int32_t refID = samRecord.getReferenceID();
        if((refID != samRecord.getMateReferenceID()) || (refID == -1))
        {
            continue;
        }

        int32_t readStart = samRecord.get0BasedPosition();
        int32_t mateStart = samRecord.get0BasedMatePosition();

        // If the mate starts first, then the pair was processed by
        // the mate.
        if(mateStart < readStart)
        {
            continue;
        }
        if((mateStart == readStart) && (SamFlag::isReverse(samFlags)))
        {
            // read and mate start at the same position, so 
            // only process the forward strand.
            continue;
        }

        // Process this read pair.
        int32_t readEnd = samRecord.get0BasedAlignmentEnd();
        
        int32_t gapSize = mateStart - readEnd - 1;

        if(detailed)
        {
            // Output the gap info.
            ifprintf(outFile, "%s\t%d\t%d", 
                     samRecord.getReferenceName(), readEnd+1, gapSize);
            
            // Check if it is not the first or if it is not the forward strand.
            if(checkFirst && !SamFlag::isFirstFragment(samFlags))
            {
                ifprintf(outFile, "\tNotFirst");
            }
            if(checkStrand && SamFlag::isReverse(samFlags))
            {
                ifprintf(outFile, "\tReverse");
            }
            ifprintf(outFile, "\n");
        }
        else
        {
            // Summary.
            // Skip reads that are not the forward strand.
            if(SamFlag::isReverse(samFlags))
            {
                // continue
                continue;
            }

//.........这里部分代码省略.........
开发者ID:BioScripts,项目名称:bamUtil,代码行数:101,代码来源:GapInfo.cpp

示例14: checkDups

// When a record is read, check if it is a duplicate or
// store for future checking.
void Dedup::checkDups(SamRecord& record, uint32_t recordCount)
{
    // Only inside this method if the record is mapped.

    // Get the key for this record.
    static DupKey key;
    static DupKey mateKey;
    key.updateKey(record, getLibraryID(record));

    int flag = record.getFlag(); 
    bool recordPaired = SamFlag::isPaired(flag) && SamFlag::isMateMapped(flag);
    int sumBaseQual = getBaseQuality(record);

    int32_t chromID = record.getReferenceID();
    int32_t mateChromID = record.getMateReferenceID();

    // If we are one-chrom and the mate is not on the same chromosome, 
    // mark it as not paired.
    if(myOneChrom && (chromID != mateChromID))
    {
        recordPaired = false;
    }
    
    // Look in the map to see if an entry for this key exists.
    FragmentMapInsertReturn ireturn = 
        myFragmentMap.insert(std::make_pair(key, ReadData()));

    ReadData* readData = &(ireturn.first->second);

    // Mark this record's data in the fragment record if this is the first
    // entry or if it is a duplicate and the old record is not paired and 
    // the new record is paired or the has a higher quality.
    if((ireturn.second == true) ||
       ((readData->paired == false) && 
        (recordPaired || (sumBaseQual > readData->sumBaseQual))))
    {
        // If there was a previous record, mark it duplicate and release
        // the old record
        if(ireturn.second == false)
        {
            // Mark the old record as a DUPLICATE!
            handleDuplicate(readData->recordIndex, readData->recordPtr);
        }
        // Store this record for later duplicate checking.
        readData->sumBaseQual = sumBaseQual;
        readData->recordIndex = recordCount;
        readData->paired = recordPaired;
        if(recordPaired)
        {
            readData->recordPtr = NULL;
        }
        else
        {
            readData->recordPtr = &record;
        }
    }
    else
    {
        // The old record is not a duplicate so the new record is
        // a duplicate if it is not paired.
        if(recordPaired == false)
        {
            // This record is a duplicate, so mark it and release it.
            handleDuplicate(recordCount, &record);
        }
    }

    // Only paired processing is left, so return if not paired.
    if(recordPaired == false)
    {
        // Not paired, no more operations required, so return.
        return;
    }
    
    // This is a paired record, so check for its mate.
    uint64_t readPos = 
        SamHelper::combineChromPos(chromID,
                                   record.get0BasedPosition());
    uint64_t matePos =
        SamHelper::combineChromPos(mateChromID, 
                                   record.get0BasedMatePosition());
    SamRecord* mateRecord = NULL;
    int mateIndex = 0;
    
    // Check to see if the mate is prior to this record.
    if(matePos <= readPos)
    {
        // The mate map is stored by the mate position, so look for this 
        // record's position.
        // The mate should be in the mate map, so find it.
        std::pair<MateMap::iterator,MateMap::iterator> matches =
            myMateMap.equal_range(readPos);
        // Loop through the elements that matched the pos looking for the mate.
        for(MateMap::iterator iter = matches.first; 
            iter != matches.second; iter++)
        {
            if(strcmp((*iter).second.recordPtr->getReadName(), 
                      record.getReadName()) == 0)
//.........这里部分代码省略.........
开发者ID:KHP-Informatics,项目名称:bamUtil,代码行数:101,代码来源:Dedup.cpp

示例15: handleSortedByCoord

SamStatus::Status ClipOverlap::handleSortedByCoord(SamFile& samIn, 
                                                   SamCoordOutput* outputBufferPtr)
{
    MateMapByCoord mateMap;

    // Get record & track success/fail
    SamStatus::Status returnStatus = SamStatus::SUCCESS;
    
    OverlapHandler::OverlapInfo overlapInfo = OverlapHandler::UNKNOWN_OVERLAP;
    SamRecord* matePtr = NULL;

    // Get the first read.
    SamRecord* recordPtr = myPool.getRecord();
    if(recordPtr == NULL)
    {
        // No records in the pool, can't process.
        std::cerr <<
            "ERROR: No records in the pool, can't process by coordinate.\n";
        return(SamStatus::FAIL_MEM);
    }

    // Track the original chrom/position of the record being processed
    // for flushing.
    int32_t chrom = -1;
    int32_t position = -1;

    bool overlap = false;
    while(returnStatus == SamStatus::SUCCESS)
    {
        // Get the next Record.
        returnStatus = readCoordRecord(samIn, &recordPtr, 
                                       mateMap, outputBufferPtr);
        if(returnStatus != SamStatus::SUCCESS)
        {
            break;
        }

        chrom = recordPtr->getReferenceID();
        position = recordPtr->get0BasedPosition();

        // Cleanup the mate map based on the newly read record.
        cleanupMateMap(mateMap, outputBufferPtr, chrom, position);

        // Check the read for overlaps.
        overlapInfo = myOverlapHandler->getOverlapInfo(*recordPtr, myIntExcludeFlags);
        // Handle the types of overlaps.
        switch(overlapInfo)
        {
            case OverlapHandler::OVERLAP:
                overlap = true;
                // 1st read, so store it in the mate map.
                mateMap.add(*recordPtr);
                // Clear the pointer so a new one is used next time.
                recordPtr = NULL;
                break;
            case OverlapHandler::NO_OVERLAP_WRONG_ORIENT:
                overlap = true;
                myOverlapHandler->handleNoOverlapWrongOrientation(*recordPtr);
                break;
            case OverlapHandler::SAME_START:
                overlap = true;
                // First check the mate map for the mate.
                matePtr = mateMap.getMate(*recordPtr);
                if(matePtr != NULL)
                {
                    // Mate was found, so handle the overlap.
                    myOverlapHandler->handleOverlapPair(*matePtr, *recordPtr);
                }
                else
                {
                    // Mate not found, so store this one.
                    mateMap.add(*recordPtr);
                    // Clear the pointer so a new one is used next time.
                    recordPtr = NULL;
                }
                break;
            case OverlapHandler::UNKNOWN_OVERLAP:
                matePtr = mateMap.getMate(*recordPtr);
                if(matePtr != NULL)
                {
                    // Mate was found, there is an overlap.
                    overlap = true;
                    myOverlapHandler->handleOverlapPair(*matePtr, *recordPtr);
                }
                else
                {
                    // No overlap if mate not found.
                    overlap = false;
                }
                break;
            case OverlapHandler::UNKNOWN_OVERLAP_WRONG_ORIENT:
                overlap = true;
                matePtr = mateMap.getMate(*recordPtr);
                if(matePtr != NULL)
                {
                    // Mate was found, there is an overlap..
                    myOverlapHandler->handleOverlapPair(*matePtr, *recordPtr);
                }
                else
                {
//.........这里部分代码省略.........
开发者ID:statgen,项目名称:bamUtil,代码行数:101,代码来源:ClipOverlap.cpp


注:本文中的SamRecord::get0BasedPosition方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。