本文整理汇总了C++中SamFile::ReadRecord方法的典型用法代码示例。如果您正苦于以下问题:C++ SamFile::ReadRecord方法的具体用法?C++ SamFile::ReadRecord怎么用?C++ SamFile::ReadRecord使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类SamFile
的用法示例。
在下文中一共展示了SamFile::ReadRecord方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: testRead
void testRead(SamFile &inSam)
{
// Read the SAM Header.
SamFileHeader samHeader;
assert(inSam.ReadHeader(samHeader));
validateHeader(samHeader);
testCopyHeader(samHeader);
testModHeader(samHeader);
SamRecord samRecord;
assert(inSam.ReadRecord(samHeader, samRecord) == true);
validateRead1(samRecord);
// Set a new quality and get the buffer.
samRecord.setQuality("ABCDE");
validateRead1ModQuality(samRecord);
// void* buffer = samRecord.getRecordBuffer();
assert(inSam.ReadRecord(samHeader, samRecord) == true);
validateRead2(samRecord);
assert(inSam.ReadRecord(samHeader, samRecord) == true);
validateRead3(samRecord);
assert(inSam.ReadRecord(samHeader, samRecord) == true);
validateRead4(samRecord);
assert(inSam.ReadRecord(samHeader, samRecord) == true);
validateRead5(samRecord);
assert(inSam.ReadRecord(samHeader, samRecord) == true);
validateRead6(samRecord);
assert(inSam.ReadRecord(samHeader, samRecord) == true);
validateRead7(samRecord);
assert(inSam.ReadRecord(samHeader, samRecord) == true);
validateRead8(samRecord);
assert(inSam.ReadRecord(samHeader, samRecord) == true);
validateRead9(samRecord);
assert(inSam.ReadRecord(samHeader, samRecord) == true);
validateRead10(samRecord);
}
示例2: CalcClusters
void GenomeRegionSeqStats::CalcClusters(String &bamFile, int minMapQuality)
{
SamFile sam;
SamRecord samRecord;
SamFileHeader samHeader;
if(!sam.OpenForRead(bamFile.c_str()))
error("Open BAM file %s failed!\n", bamFile.c_str());
if(!sam.ReadHeader(samHeader)) {
error("Read BAM file header %s failed!\n", bamFile.c_str());
}
if(depth.size()==0) depth.resize(referencegenome.sequenceLength());
String contigLabel;
uint32_t start;
uint32_t gstart;
Reset();
while(sam.ReadRecord(samHeader, samRecord))
{
nReads++;
if(samRecord.getFlag() & SamFlag::UNMAPPED) nUnMapped++;
if(samRecord.getMapQuality() < minMapQuality) continue;
CigarRoller cigar(samRecord.getCigar());
int nonClipSequence = 0;
if(cigar.size()!=0 && cigar[0].operation==Cigar::softClip)
nonClipSequence = cigar[0].count;
contigLabel = samRecord.getReferenceName();
start = nonClipSequence + samRecord.get0BasedPosition(); // start is 0-based
gstart = referencegenome.getGenomePosition(contigLabel.c_str(), start);
if(IsInRegions(contigLabel, start, start+samRecord.getReadLength())) continue;
for(uint32_t i=gstart; i<gstart+samRecord.getReadLength(); i++)
if(depth[i]<MAXDP)
depth[i]++;
nMappedOutTargets++;
}
}
示例3: CalcRegionStats
void GenomeRegionSeqStats::CalcRegionStats(String &bamFile)
{
SamFile sam;
SamRecord samRecord;
SamFileHeader samHeader;
if(!sam.OpenForRead(bamFile.c_str()))
error("Open BAM file %s failed!\n", bamFile.c_str());
if(!sam.ReadHeader(samHeader)) {
error("Read BAM file header %s failed!\n", bamFile.c_str());
}
String contigLabel;
int start, end;
Reset();
while(sam.ReadRecord(samHeader, samRecord))
{
nReads++;
if(samRecord.getFlag() & SamFlag::UNMAPPED) nUnMapped++;
if(contigFinishedCnt>=contigs.size()) continue;
CigarRoller cigar(samRecord.getCigar());
int nonClipSequence = 0;
if(cigar.size()!=0 && cigar[0].operation==Cigar::softClip)
nonClipSequence = cigar[0].count;
contigLabel = samRecord.getReferenceName();
start = nonClipSequence + samRecord.get0BasedPosition(); // start is 0-based
end = start + samRecord.getReadLength() - 1;
if(UpdateRegionStats(contigLabel, start, end)) nMapped2Targets++;
}
CalcRegionReadCountInGCBins();
CalcGroupReadCountInGCBins();
std::cout << "Total reads : " << nReads << std::endl;
}
示例4: readCoordRecord
SamStatus::Status ClipOverlap::readCoordRecord(SamFile& samIn,
SamRecord** recordPtr,
MateMapByCoord& mateMap,
SamCoordOutput* outputBufferPtr)
{
// Null pointer, so get a new pointer.
if(*recordPtr == NULL)
{
*recordPtr = myPool.getRecord();
if(*recordPtr == NULL)
{
// Failed to allocate a new record.
// Try to free up records from the mate map
if(!forceRecordFlush(mateMap, outputBufferPtr))
{
std::cerr << "Failed to flush the output buffer.\n";
return(SamStatus::FAIL_IO);
}
// Try to get a new record, one should have been cleared.
*recordPtr = myPool.getRecord();
if(*recordPtr == NULL)
{
std::cerr << "Failed to allocate any records.\n";
return(SamStatus::FAIL_MEM);
}
}
}
// RecordPtr is set.
if(!samIn.ReadRecord(mySamHeader, **recordPtr))
{
// Nothing to process, so return.
return(samIn.GetStatus());
}
return(SamStatus::SUCCESS);
}
示例5: if
//.........这里部分代码省略.........
if(myFirstFile == NULL)
{
std::cerr << "Failed to open " << firstOut
<< " so can't convert bam2FastQ.\n";
return(-1);
}
if(mySecondFile == NULL)
{
std::cerr << "Failed to open " << secondOut
<< " so can't convert bam2FastQ.\n";
return(-1);
}
}
if((readName) || (strcmp(mySamHeader.getSortOrder(), "queryname") == 0))
{
readName = true;
}
else
{
// defaulting to coordinate sorted.
samIn.setSortedValidation(SamFile::COORDINATE);
}
// Setup the '=' translation if the reference was specified.
if(!refFile.IsEmpty())
{
GenomeSequence* refPtr = new GenomeSequence(refFile);
samIn.SetReadSequenceTranslation(SamRecord::BASES);
samIn.SetReference(refPtr);
}
SamRecord* recordPtr;
int16_t samFlag;
SamStatus::Status returnStatus = SamStatus::SUCCESS;
while(returnStatus == SamStatus::SUCCESS)
{
recordPtr = myPool.getRecord();
if(recordPtr == NULL)
{
// Failed to allocate a new record.
throw(std::runtime_error("Failed to allocate a new SAM/BAM record"));
}
if(!samIn.ReadRecord(mySamHeader, *recordPtr))
{
// Failed to read a record.
returnStatus = samIn.GetStatus();
continue;
}
// Have a record. Check to see if it is a pair or unpaired read.
samFlag = recordPtr->getFlag();
if(SamFlag::isPaired(samFlag))
{
if(readName)
{
handlePairedRN(*recordPtr);
}
else
{
handlePairedCoord(*recordPtr);
}
}
else
{
++myNumUnpaired;
writeFastQ(*recordPtr, myUnpairedFile,
myUnpairedFileNameExt);
}
}
// Flush All
cleanUpMateMap(0, true);
if(returnStatus == SamStatus::NO_MORE_RECS)
{
returnStatus = SamStatus::SUCCESS;
}
samIn.Close();
closeFiles();
// Output the results
std::cerr << "\nFound " << myNumPairs << " read pairs.\n";
std::cerr << "Found " << myNumUnpaired << " unpaired reads.\n";
if(myNumMateFailures != 0)
{
std::cerr << "Failed to find mates for " << myNumMateFailures
<< " reads, so they were written as unpaired\n"
<< " (not included in either of the above counts).\n";
}
if(myNumQualTagErrors != 0)
{
std::cerr << myNumQualTagErrors << " records did not have tag "
<< myQField.c_str() << " or it was invalid, so the quality field was used for those records.\n";
}
return(returnStatus);
}
示例6: main
//.........这里部分代码省略.........
else if ( static_cast<int>(fastaFile.vnSequenceLengths[i]) != atoi(pSamHeaderRecord->getTagValue("LN")) ) {
gpLogger->error("SequenceLength is not identical between fasta and input BAM file");
}
else {
if ( !sAS.empty() )
samHeader.setSQTag("AS",sAS.c_str(),fastaFile.vsSequenceNames[i].c_str());
samHeader.setSQTag("M5",fastaFile.vsMD5sums[i].c_str(),fastaFile.vsSequenceNames[i].c_str());
if ( !sUR.empty() )
samHeader.setSQTag("UR",sUR.c_str(),fastaFile.vsSequenceNames[i].c_str());
if ( !sSP.empty() )
samHeader.setSQTag("SP",sSP.c_str(),fastaFile.vsSequenceNames[i].c_str());
}
}
gpLogger->write_log("Finished checking the consistency of SQ tags");
}
else {
gpLogger->write_log("Skipped checking the consistency of SQ tags");
}
// go over the headers again,
// assuming order of HD, SQ, RG, PG, and put proper tags at the end of the original tags
gpLogger->write_log("Creating the header of new output file");
//SamFileHeader outHeader;
samHeader.resetHeaderRecordIter();
for(unsigned int i=0; i < vsHDHeaders.size(); ++i) {
samHeader.addHeaderLine(vsHDHeaders[i].c_str());
}
/*
for(int i=0; i < fastaFile.vsSequenceNames.size(); ++i) {
std::string s("@SQ\tSN:");
char buf[1024];
s += fastaFile.vsSequenceNames[i];
sprintf(buf,"\tLN:%d",fastaFile.vnSequenceLengths[i]);
s += buf;
if ( !sAS.empty() ) {
sprintf(buf,"\tAS:%s",sAS.c_str());
s += buf;
}
if ( !sUR.empty() ) {
sprintf(buf,"\tUR:%s",sUR.c_str());
s += buf;
}
sprintf(buf,"\tM5:%s",fastaFile.vsMD5sums[i].c_str());
s += buf;
if ( !sSP.empty() ) {
sprintf(buf,"\tSP:%s",sSP.c_str());
s += buf;
}
outHeader.addHeaderLine(s.c_str());
}*/
for(unsigned int i=0; i < vsRGHeaders.size(); ++i) {
samHeader.addHeaderLine(vsRGHeaders[i].c_str());
}
for(unsigned int i=0; i < vsPGHeaders.size(); ++i) {
samHeader.addHeaderLine(vsPGHeaders[i].c_str());
}
samOut.WriteHeader(samHeader);
gpLogger->write_log("Adding %d HD, %d RG, and %d PG headers",vsHDHeaders.size(), vsRGHeaders.size(), vsPGHeaders.size());
gpLogger->write_log("Finished writing output headers");
// parse RG tag and get RG ID to append
std::string sRGID;
if ( ! vsRGHeaders.empty() ) {
std::vector<std::string> tokens;
FastaFile::tokenizeString( vsRGHeaders[0].c_str(), tokens );
for(unsigned int i=0; i < tokens.size(); ++i) {
if ( tokens[i].find("ID:") == 0 ) {
sRGID = tokens[i].substr(3);
}
}
}
gpLogger->write_log("Writing output BAM file");
SamRecord samRecord;
while (samIn.ReadRecord(samHeader, samRecord) == true) {
if ( !sRGID.empty() ) {
if ( samRecord.addTag("RG",'Z',sRGID.c_str()) == false ) {
gpLogger->error("Failed to add a RG tag %s",sRGID.c_str());
}
// temporary code added
if ( strncmp(samRecord.getReadName(),"seqcore_",8) == 0 ) {
char buf[1024];
sprintf(buf,"UM%s",samRecord.getReadName()+8);
samRecord.setReadName(buf);
}
}
samOut.WriteRecord(samHeader, samRecord);
//if ( samIn.GetCurrentRecordCount() == 1000 ) break;
}
samOut.Close();
gpLogger->write_log("Successfully written %d records",samIn.GetCurrentRecordCount());
delete gpLogger;
return 0;
}
示例7: handleSortedByReadName
SamStatus::Status ClipOverlap::handleSortedByReadName(SamFile& samIn,
SamFile* samOutPtr)
{
// Set returnStatus to success. It will be changed
// to the failure reason if any of the writes fail.
SamStatus::Status returnStatus = SamStatus::SUCCESS;
// Read the sam records.
SamRecord* prevSamRecord = NULL;
SamRecord* samRecord = new SamRecord;
SamRecord* tmpRecord = new SamRecord;
if((samRecord == NULL) || (tmpRecord == NULL))
{
std::cerr << "Failed to allocate a SamRecord, so exit.\n";
return(SamStatus::FAIL_MEM);
}
// Keep reading records until ReadRecord returns false.
while(samIn.ReadRecord(mySamHeader, *samRecord))
{
int16_t flag = samRecord->getFlag();
if((flag & myIntExcludeFlags) != 0)
{
// This read should not be checked for overlaps.
// Check if there is a previous SamRecord.
if(prevSamRecord != NULL)
{
// There is a previous record.
// If it has a different read name, write it.
if(strcmp(samRecord->getReadName(),
prevSamRecord->getReadName()) != 0)
{
// Different read name, so write the previous record.
if((samOutPtr != NULL) && !myOverlapsOnly)
{
if(!samOutPtr->WriteRecord(mySamHeader, *prevSamRecord))
{
// Failed to write a record.
fprintf(stderr, "%s\n", samOutPtr->GetStatusMessage());
returnStatus = samOutPtr->GetStatus();
}
}
// Clear the previous record info.
tmpRecord = prevSamRecord;
prevSamRecord = NULL;
}
// If it has the same read name, leave it in case there is another read with that name
}
// This record is not being checked for overlaps, so just write it and continue
if((samOutPtr != NULL) && !myOverlapsOnly)
{
if(!samOutPtr->WriteRecord(mySamHeader, *samRecord))
{
// Failed to write a record.
fprintf(stderr, "%s\n", samOutPtr->GetStatusMessage());
returnStatus = samOutPtr->GetStatus();
}
}
continue;
}
if(prevSamRecord == NULL)
{
// Nothing to compare this record to, so set this
// record to the previous, and the next record.
prevSamRecord = samRecord;
samRecord = tmpRecord;
tmpRecord = NULL;
continue;
}
// Check if the read name matches the previous read name.
if(strcmp(samRecord->getReadName(),
prevSamRecord->getReadName()) == 0)
{
bool overlap = false;
// Same Read Name, so check clipping.
OverlapHandler::OverlapInfo prevClipInfo =
myOverlapHandler->getOverlapInfo(*prevSamRecord);
OverlapHandler::OverlapInfo curClipInfo =
myOverlapHandler->getOverlapInfo(*samRecord);
// If either indicate a complete clipping, clip both.
if((prevClipInfo == OverlapHandler::NO_OVERLAP_WRONG_ORIENT) ||
(curClipInfo == OverlapHandler::NO_OVERLAP_WRONG_ORIENT))
{
overlap = true;
myOverlapHandler->handleNoOverlapWrongOrientation(*prevSamRecord);
// Don't update stats since this is the 2nd in the pair
myOverlapHandler->handleNoOverlapWrongOrientation(*samRecord,
false);
}
else if((prevClipInfo == OverlapHandler::OVERLAP) ||
(prevClipInfo == OverlapHandler::SAME_START))
{
// The previous read starts at or before the current one.
overlap = true;
myOverlapHandler->handleOverlapPair(*prevSamRecord,
*samRecord);
//.........这里部分代码省略.........
示例8: testIndex
void testIndex(BamIndex& bamIndex)
{
assert(bamIndex.getNumMappedReads(1) == 2);
assert(bamIndex.getNumUnMappedReads(1) == 0);
assert(bamIndex.getNumMappedReads(0) == 4);
assert(bamIndex.getNumUnMappedReads(0) == 1);
assert(bamIndex.getNumMappedReads(23) == -1);
assert(bamIndex.getNumUnMappedReads(23) == -1);
assert(bamIndex.getNumMappedReads(-1) == 0);
assert(bamIndex.getNumUnMappedReads(-1) == 2);
assert(bamIndex.getNumMappedReads(-2) == -1);
assert(bamIndex.getNumUnMappedReads(-2) == -1);
assert(bamIndex.getNumMappedReads(22) == 0);
assert(bamIndex.getNumUnMappedReads(22) == 0);
// Get the chunks for reference id 1.
Chunk testChunk;
SortedChunkList chunkList;
assert(bamIndex.getChunksForRegion(1, -1, -1, chunkList) == true);
assert(!chunkList.empty());
testChunk = chunkList.pop();
assert(chunkList.empty());
assert(testChunk.chunk_beg == 0x4e7);
assert(testChunk.chunk_end == 0x599);
// Get the chunks for reference id 0.
assert(bamIndex.getChunksForRegion(0, -1, -1, chunkList) == true);
assert(!chunkList.empty());
testChunk = chunkList.pop();
assert(chunkList.empty());
assert(testChunk.chunk_beg == 0x360);
assert(testChunk.chunk_end == 0x4e7);
// Get the chunks for reference id 2.
assert(bamIndex.getChunksForRegion(2, -1, -1, chunkList) == true);
assert(!chunkList.empty());
testChunk = chunkList.pop();
assert(chunkList.empty());
assert(testChunk.chunk_beg == 0x599);
assert(testChunk.chunk_end == 0x5ea);
// Get the chunks for reference id 3.
// There isn't one for this ref id, but still successfully read the file,
// so it should return true, but the list should be empty.
assert(bamIndex.getChunksForRegion(3, -1, -1, chunkList) == true);
assert(chunkList.empty());
// Test reading an indexed bam file.
SamFile inFile;
assert(inFile.OpenForRead("testFiles/sortedBam.bam"));
inFile.setSortedValidation(SamFile::COORDINATE);
assert(inFile.ReadBamIndex("testFiles/sortedBam.bam.bai"));
SamFileHeader samHeader;
assert(inFile.ReadHeader(samHeader));
SamRecord samRecord;
// Test getting num mapped/unmapped reads.
assert(inFile.getNumMappedReadsFromIndex(1) == 2);
assert(inFile.getNumUnMappedReadsFromIndex(1) == 0);
assert(inFile.getNumMappedReadsFromIndex(0) == 4);
assert(inFile.getNumUnMappedReadsFromIndex(0) == 1);
assert(inFile.getNumMappedReadsFromIndex(23) == -1);
assert(inFile.getNumUnMappedReadsFromIndex(23) == -1);
assert(inFile.getNumMappedReadsFromIndex(-1) == 0);
assert(inFile.getNumUnMappedReadsFromIndex(-1) == 2);
assert(inFile.getNumMappedReadsFromIndex(-2) == -1);
assert(inFile.getNumUnMappedReadsFromIndex(-2) == -1);
assert(inFile.getNumMappedReadsFromIndex(22) == 0);
assert(inFile.getNumUnMappedReadsFromIndex(22) == 0);
assert(inFile.getNumMappedReadsFromIndex("2", samHeader) == 2);
assert(inFile.getNumUnMappedReadsFromIndex("2", samHeader) == 0);
assert(inFile.getNumMappedReadsFromIndex("1", samHeader) == 4);
assert(inFile.getNumUnMappedReadsFromIndex("1", samHeader) == 1);
assert(inFile.getNumMappedReadsFromIndex("22", samHeader) == 0);
assert(inFile.getNumUnMappedReadsFromIndex("22", samHeader) == 0);
assert(inFile.getNumMappedReadsFromIndex("", samHeader) == 0);
assert(inFile.getNumUnMappedReadsFromIndex("*", samHeader) == 2);
assert(inFile.getNumMappedReadsFromIndex("unknown", samHeader) == -1);
assert(inFile.getNumUnMappedReadsFromIndex("unknown", samHeader) == -1);
assert(inFile.getNumMappedReadsFromIndex("X", samHeader) == 0);
assert(inFile.getNumUnMappedReadsFromIndex("X", samHeader) == 0);
// Section -1 = Ref *: 2 records (8 & 10 from testSam.sam that is reflected
// in the validation.
assert(inFile.SetReadSection(-1));
assert(inFile.ReadRecord(samHeader, samRecord));
validateRead8(samRecord);
assert(inFile.ReadRecord(samHeader, samRecord));
validateRead10(samRecord);
assert(inFile.ReadRecord(samHeader, samRecord) == false);
// Section 2 = Ref 3: 1 records (9 from testSam.sam that is reflected
// in the validation.
assert(inFile.SetReadSection(2));
assert(inFile.ReadRecord(samHeader, samRecord));
validateRead9(samRecord);
assert(inFile.ReadRecord(samHeader, samRecord) == false);
//.........这里部分代码省略.........
示例9: of
/*
if a discordant read is mapped to MEI (overlap with MEI coord)
add centor of ( anchor_end + 3*avr_ins_var )
skip unmap & clip
check 3 types at the same time
*/
void Sites::AddDiscoverSiteFromSingleBam( SingleSampleInfo & si )
{
/*for(int i=0;i<NMEI; i++) {
std::cout << "m=" << i << ": ";
for(map<string, map<int, bool> >::iterator t=meiCoord[m].begin(); t!=meiCoord[m].end(); t++)
std::cout << t->first << "->" << t->second.size() << " ";
std::cout << std::endl;
}*/
avr_read_length = si.avr_read_length;
avr_ins_size = si.avr_ins_size;
min_read_length = avr_read_length / 3;
current_depth = si.depth;
// total_depth += current_depth;
resetDepthAddFlag();
SamFile bam;
SamFileHeader bam_header;
OpenBamAndBai( bam,bam_header, si.bam_name );
for( int i=0; i<pchr_list->size(); i++ ) {
string chr = (*pchr_list)[i];
// if ( !single_chr.empty() && chr.compare(single_chr)!=0 )
// continue;
if ( siteList.find(chr) == siteList.end() )
siteList[chr].clear();
// map<string, map<int, SingleSite> >::iterator pchr = siteList[m].find(chr);
// map<string, map<int, bool> >::iterator coord_chr_ptr = meiCoord[m].find(chr);
// if (coord_chr_ptr == meiCoord[m].end())
// continue;
bool section_status;
if (range_start<0) { // no range
section_status = bam.SetReadSection( chr.c_str() );
if (!section_status) {
string str = "Cannot set read section at chr " + chr;
morphWarning( str );
}
}
else { // set range
section_status = bam.SetReadSection( chr.c_str(), range_start, range_end );
if (!section_status) {
string str = "Cannot set read section at chr " + chr + " " + std::to_string(range_start) + "-" + std::to_string(range_end);
morphWarning( str );
}
}
// DO ADDING
// if (siteList[chr].empty())
// p_reach_last = 1;
// else {
// p_reach_last = 0;
pnearest = siteList[chr].begin();
// }
SingleSite new_site; // temporary cluster. will be added to map later.
new_site.depth = current_depth;
bool start_new = 1; // check if need to add new_site to map and start new new_site
SamRecord rec;
int between = 0; // count #reads after new_site.end. If end changed, add it to rcount and reset to zero
while( bam.ReadRecord( bam_header, rec ) ) {
if (!start_new) {
if (rec.get1BasedPosition() >= new_site.end)
between++;
else
new_site.rcount++;
}
if (rec.getFlag() & 0x2)
continue;
if ( OneEndUnmap( rec.getFlag() ) )
continue;
if ( IsSupplementary(rec.getFlag()) )
continue;
if ( rec.getReadLength() < min_read_length )
continue;
if ( rec.getMapQuality() < MIN_QUALITY )
continue;
if (chr.compare(rec.getMateReferenceName())==0 && rec.getInsertSize() < abs(avr_ins_size*2))
continue;
bool is_mei = 0;
vector<bool> is_in_coord;
is_in_coord.resize(3, 0);
for(int m=0; m<NMEI; m++) {
map<string, map<int, bool> >::iterator coord_chr_ptr = meiCoord[m].find(rec.getMateReferenceName());
if (coord_chr_ptr == meiCoord[m].end())
is_in_coord[m] = 0;
else
is_in_coord[m] = isWithinCoord( rec.get1BasedMatePosition(), coord_chr_ptr->second ); // within MEI coord
if (is_in_coord[m])
is_mei = 1;
}
if (!is_mei)
continue;
if (start_new) {
setNewCluster( is_in_coord, new_site,rec);
start_new = 0;
//.........这里部分代码省略.........
示例10: setReadCountInSection
void setReadCountInSection( vector<int> & raw_counts, string & chr, int center, SamFile & samIn, SamFileHeader & samHeader, vector<RefSeq*> & REF_SEQ )
{
int st = center - WIN/2;
int ed = center + WIN/2;
bool section_status = samIn.SetReadSection( chr.c_str(), st, ed );
if (!section_status) {
std::cerr << "Warning: Unable to set read section: " << chr << ": " << st << "-" << ed << ". Set section depth = 0!" << std::endl;
return;
}
// proper reads
map<string, vector< DiscPair > > disc_recs; // record where the disc come from
ProperDeck pDeck( REF_SEQ );
SamRecord sam_rec;
while( samIn.ReadRecord(samHeader, sam_rec) ) {
bool pass_qc = PassQC( sam_rec );
if ( !pass_qc )
continue;
if ( sam_rec.getFlag() & 0x2 ) {
if ( sam_rec.getInsertSize() > 0 )
pDeck.Add( sam_rec );
else {
RetrievedIndex rv = pDeck.RetrieveIndex( sam_rec );
int index = getRetrievedIndexToRawCounts( rv );
if (index >= 0) { // for read partially in window, only add clip
if ( sam_rec.get1BasedPosition() < st || sam_rec.get1BasedAlignmentEnd() > ed ) {
if ( index >= 2 )
raw_counts[ index ]++;
}
else
raw_counts[ index ]++;
}
}
}
else { // disc: rec info and wait to reset section later
if ( sam_rec.getReadLength() < 30 )
continue;
string mate_chr = sam_rec.getMateReferenceName();
// check if this one is valid as anchor
DiscPair new_pair( 1, 0, sam_rec, REF_SEQ );
disc_recs[mate_chr].push_back( new_pair );
}
}
// disc reads
for( map<string, vector< DiscPair > >::iterator chr_it = disc_recs.begin(); chr_it != disc_recs.end(); chr_it++ ) {
for( vector< DiscPair >::iterator dp_it = chr_it->second.begin(); dp_it != chr_it->second.end(); dp_it++ ) {
bool section_status = samIn.SetReadSection( chr_it->first.c_str(), dp_it->GetSecondAlignPosition(), dp_it->GetSecondAlignPosition() + WIN );
if (!section_status) {
std::cerr << "ERROR: Unable to set read section: " << chr << ": " << st << "-" << ed << std::endl;
exit(1);
}
SamRecord sam_rec;
while( samIn.ReadRecord(samHeader, sam_rec) ) {
bool pass_qc = PassQC( sam_rec );
if ( !pass_qc )
continue;
if ( !DiscSamPass(sam_rec) )
continue;
int position = sam_rec.get1BasedPosition();
if ( position > dp_it->GetFirstAlignPosition() )
break;
if ( sam_rec.getFlag() & 0x2 )
continue;
bool same_pair = dp_it->IsSamePair( sam_rec );
if ( !same_pair )
continue;
// now add ro raw stats: always use first as anchor
dp_it->AddSecondToPair( sam_rec );
Loci loci = dp_it->GetFirstLoci();
if ( dp_it->FirstValid() ) {
int index = getLociToRawCounts( loci );
raw_counts[ index ]++;
// clear & break
break;
}
}
}
}
}
示例11: testFlagRead
void testFlagRead(const char* fileName)
{
SamFile inSam;
SamFileHeader samHeader;
SamRecord samRecord;
////////////////////////////////////////////////////////////
// Required flag 0x48 (only flag 73 matches)
// Exclude nothing
assert(inSam.OpenForRead(fileName));
assert(inSam.ReadHeader(samHeader));
validateHeader(samHeader);
inSam.SetReadFlags(0x48, 0x0);
assert(inSam.ReadRecord(samHeader, samRecord) == true);
validateRead1(samRecord);
assert(inSam.ReadRecord(samHeader, samRecord) == false);
inSam.Close();
////////////////////////////////////////////////////////////
// No required flags.
// Exclude 0x48. This leaves just the one read with flag 133.
assert(inSam.OpenForRead(fileName));
assert(inSam.ReadHeader(samHeader));
validateHeader(samHeader);
inSam.SetReadFlags(0x0, 0x48);
assert(inSam.ReadRecord(samHeader, samRecord) == true);
validateRead2(samRecord);
assert(inSam.ReadRecord(samHeader, samRecord) == false);
inSam.Close();
////////////////////////////////////////////////////////////
// Required flag 0x40
// Exclude 0x48.
// This will not find any records since the exclude and required conflict.
assert(inSam.OpenForRead(fileName));
assert(inSam.ReadHeader(samHeader));
validateHeader(samHeader);
inSam.SetReadFlags(0x40, 0x48);
assert(inSam.ReadRecord(samHeader, samRecord) == false);
inSam.Close();
////////////////////////////////////////////////////////////
// Required flag 0x4
// Exclude 0x8.
// Only finds flag 133.
assert(inSam.OpenForRead(fileName));
assert(inSam.ReadHeader(samHeader));
validateHeader(samHeader);
inSam.SetReadFlags(0x4, 0x8);
assert(inSam.ReadRecord(samHeader, samRecord) == true);
validateRead2(samRecord);
assert(inSam.ReadRecord(samHeader, samRecord) == false);
inSam.Close();
////////////////////////////////////////////////////////////
// Required flag 0x4
// Exclude nothing
// Finds flags 133 & 141.
assert(inSam.OpenForRead(fileName));
assert(inSam.ReadHeader(samHeader));
validateHeader(samHeader);
inSam.SetReadFlags(0x4, 0x0);
assert(inSam.ReadRecord(samHeader, samRecord) == true);
validateRead2(samRecord);
assert(inSam.ReadRecord(samHeader, samRecord) == true);
validateRead8(samRecord);
assert(inSam.ReadRecord(samHeader, samRecord) == true);
validateRead10(samRecord);
assert(inSam.ReadRecord(samHeader, samRecord) == false);
inSam.Close();
}
示例12: execute
//.........这里部分代码省略.........
numTrimBaseL);
}
else
{
// ignore strand, treating forward & reverse strands the same
fprintf(stderr,"\t#Bases to trim from the left of reverse strands : %d\n",
numTrimBaseL);
fprintf(stderr,"\t#Bases to trim from the right of reverse strands : %d\n",
numTrimBaseR);
}
}
// Read the sam header.
SamFileHeader samHeader;
if(!samIn.ReadHeader(samHeader))
{
fprintf(stderr, "%s\n", samIn.GetStatusMessage());
return(samIn.GetStatus());
}
// Write the sam header.
if(!samOut.WriteHeader(samHeader))
{
fprintf(stderr, "%s\n", samOut.GetStatusMessage());
return(samOut.GetStatus());
}
SamRecord samRecord;
char seq[65536];
char qual[65536];
int i, len;
// Keep reading records until ReadRecord returns false.
while(samIn.ReadRecord(samHeader, samRecord)) {
// Successfully read a record from the file, so write it.
strcpy(seq,samRecord.getSequence());
strcpy(qual,samRecord.getQuality());
// Number of bases to trim from the left/right,
// set based on ignoreStrand flag and strand info.
int trimLeft = numTrimBaseL;
int trimRight = numTrimBaseR;
if(!ignoreStrand)
{
if(SamFlag::isReverse(samRecord.getFlag()))
{
// We are reversing the reverse reads,
// so swap the left & right trim counts.
trimRight = numTrimBaseL;
trimLeft = numTrimBaseR;
}
}
len = strlen(seq);
// Do not trim if sequence is '*'
if ( strcmp(seq, "*") != 0 ) {
bool qualValue = true;
if(strcmp(qual, "*") == 0)
{
qualValue = false;
}
int qualLen = strlen(qual);
if ( (qualLen != len) && qualValue ) {
fprintf(stderr,"ERROR: Sequence and Quality have different length\n");
return(-1);
}
示例13: processFile
int GapInfo::processFile(const char* inputFileName, const char* outputFileName,
const char* refFile, bool detailed,
bool checkFirst, bool checkStrand)
{
// Open the file for reading.
SamFile samIn;
samIn.OpenForRead(inputFileName);
// Read the sam header.
SamFileHeader samHeader;
samIn.ReadHeader(samHeader);
SamRecord samRecord;
GenomeSequence* refPtr = NULL;
if(strcmp(refFile, "") != 0)
{
refPtr = new GenomeSequence(refFile);
}
IFILE outFile = ifopen(outputFileName, "w");
// Map for summary.
std::map<int, int> gapInfoMap;
// Keep reading records until ReadRecord returns false.
while(samIn.ReadRecord(samHeader, samRecord))
{
uint16_t samFlags = samRecord.getFlag();
if((!SamFlag::isMapped(samFlags)) ||
(!SamFlag::isMateMapped(samFlags)) ||
(!SamFlag::isPaired(samFlags)) ||
(samFlags & SamFlag::SECONDARY_ALIGNMENT) ||
(SamFlag::isDuplicate(samFlags)) ||
(SamFlag::isQCFailure(samFlags)))
{
// unmapped, mate unmapped, not paired,
// not the primary alignment,
// duplicate, fails vendor quality check
continue;
}
// No gap info if the chromosome names are different or
// are unknown.
int32_t refID = samRecord.getReferenceID();
if((refID != samRecord.getMateReferenceID()) || (refID == -1))
{
continue;
}
int32_t readStart = samRecord.get0BasedPosition();
int32_t mateStart = samRecord.get0BasedMatePosition();
// If the mate starts first, then the pair was processed by
// the mate.
if(mateStart < readStart)
{
continue;
}
if((mateStart == readStart) && (SamFlag::isReverse(samFlags)))
{
// read and mate start at the same position, so
// only process the forward strand.
continue;
}
// Process this read pair.
int32_t readEnd = samRecord.get0BasedAlignmentEnd();
int32_t gapSize = mateStart - readEnd - 1;
if(detailed)
{
// Output the gap info.
ifprintf(outFile, "%s\t%d\t%d",
samRecord.getReferenceName(), readEnd+1, gapSize);
// Check if it is not the first or if it is not the forward strand.
if(checkFirst && !SamFlag::isFirstFragment(samFlags))
{
ifprintf(outFile, "\tNotFirst");
}
if(checkStrand && SamFlag::isReverse(samFlags))
{
ifprintf(outFile, "\tReverse");
}
ifprintf(outFile, "\n");
}
else
{
// Summary.
// Skip reads that are not the forward strand.
if(SamFlag::isReverse(samFlags))
{
// continue
continue;
}
//.........这里部分代码省略.........
示例14: execute
//.........这里部分代码省略.........
refPtr = new GenomeSequence(refFile);
}
SamRecord::SequenceTranslation translation;
if((useBases) && (refPtr != NULL))
{
translation = SamRecord::BASES;
}
else if((useEquals) && (refPtr != NULL))
{
translation = SamRecord::EQUAL;
}
else
{
useOrigSeq = true;
translation = SamRecord::NONE;
}
if(params)
{
inputParameters.Status();
}
// Open the input file for reading.
SamFile samIn;
if(recover) samIn.setAttemptRecovery(true);
samIn.OpenForRead(inFile);
// Open the output file for writing.
SamFile samOut;
samOut.OpenForWrite(outFile);
samOut.SetWriteSequenceTranslation(translation);
samOut.SetReference(refPtr);
// Read the sam header.
SamFileHeader samHeader;
samIn.ReadHeader(samHeader);
// Write the sam header.
samOut.WriteHeader(samHeader);
SamRecord samRecord;
// Set returnStatus to success. It will be changed
// to the failure reason if any of the writes fail.
SamStatus::Status returnStatus = SamStatus::SUCCESS;
while(1) {
try {
// Keep reading records until ReadRecord returns false.
while(samIn.ReadRecord(samHeader, samRecord))
{
// left shift if necessary.
if(lshift)
{
samRecord.shiftIndelsLeft();
}
// Successfully read a record from the file, so write it.
if(!samOut.WriteRecord(samHeader, samRecord))
{
// Failed to write a record.
fprintf(stderr, "%s\n", samOut.GetStatusMessage());
returnStatus = samOut.GetStatus();
}
}
break;
} catch (std::runtime_error e) {
std::cerr << "Caught runtime error: " << e.what() << "\n";
if(!recover) {
std::cerr << "Corrupted BAM file detected - consider using --recover option.\n";
break;
}
std::cerr << "Attempting to resync at next good BGZF block and BAM record.\n";
// XXX need to resync SamFile stream here
bool rc = samIn.attemptRecoverySync(checkSignature, SIGNATURE_LENGTH);
if(rc) {
std::cerr << "Successful resync - some data lost.\n";
continue; // succeeded
}
std::cerr << "Failed to re-sync on data stream.\n";
break; // failed to resync
}
}
std::cerr << std::endl << "Number of records read = " <<
samIn.GetCurrentRecordCount() << std::endl;
std::cerr << "Number of records written = " <<
samOut.GetCurrentRecordCount() << std::endl;
if(refPtr != NULL)
{
delete(refPtr);
}
// Since the reads were successful, return the status based
// on the status of the writes. If any failed, return
// their failure status.
return(returnStatus);
}
示例15: BEGIN_LONG_PARAMETERS
//.........这里部分代码省略.........
//////////////////////
// Setup in case doing a quality count.
// Quality histogram.
const int MAX_QUAL = 126;
const int START_QUAL = 33;
uint64_t qualCount[MAX_QUAL+1];
for(int i = 0; i <= MAX_QUAL; i++)
{
qualCount[i] = 0;
}
const int START_PHRED = 0;
const int PHRED_DIFF = START_QUAL - START_PHRED;
const int MAX_PHRED = MAX_QUAL - PHRED_DIFF;
uint64_t phredCount[MAX_PHRED+1];
for(int i = 0; i <= MAX_PHRED; i++)
{
phredCount[i] = 0;
}
int refPos = 0;
Cigar* cigarPtr = NULL;
char cigarChar = '?';
// Exclude clips from the qual/phred counts if unmapped reads are excluded.
bool qualExcludeClips = excludeFlags & SamFlag::UNMAPPED;
//////////////////////////////////
// When not reading by sections, getNextSection returns true
// the first time, then false the next time.
while(getNextSection(samIn))
{
// Keep reading records from the file until SamFile::ReadRecord
// indicates to stop (returns false).
while(((maxNumReads < 0) || (numReads < maxNumReads)) && samIn.ReadRecord(samHeader, samRecord))
{
// Another record was read, so increment the number of reads.
++numReads;
// See if the quality histogram should be genereated.
if(qual || phred)
{
// Get the quality.
const char* qual = samRecord.getQuality();
// Check for no quality ('*').
if((qual[0] == '*') && (qual[1] == 0))
{
// This record does not have a quality string, so no
// quality processing is necessary.
}
else
{
int index = 0;
cigarPtr = samRecord.getCigarInfo();
cigarChar = '?';
refPos = samRecord.get0BasedPosition();
if(!qualExcludeClips && (cigarPtr != NULL))
{
// Offset the reference position by any soft clips
// by subtracting the queryIndex of this start position.
// refPos is now the start position of the clips.
refPos -= cigarPtr->getQueryIndex(0);
}
while(qual[index] != 0)
{
// Skip this quality if it is clipped and we are skipping clips.
if(cigarPtr != NULL)