本文整理汇总了C++中BamAlignment::IsPaired方法的典型用法代码示例。如果您正苦于以下问题:C++ BamAlignment::IsPaired方法的具体用法?C++ BamAlignment::IsPaired怎么用?C++ BamAlignment::IsPaired使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类BamAlignment
的用法示例。
在下文中一共展示了BamAlignment::IsPaired方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1:
// print BamAlignment in FASTQ format
// N.B. - uses QueryBases NOT AlignedBases
void ConvertTool::ConvertToolPrivate::PrintFastq(const BamAlignment& a) {
// @BamAlignment.Name
// BamAlignment.QueryBases
// +
// BamAlignment.Qualities
//
// N.B. - QueryBases are reverse-complemented (& Qualities reversed) if aligned to reverse strand .
// Name is appended "/1" or "/2" if paired-end, to reflect which mate this entry is.
// handle paired-end alignments
string name = a.Name;
if ( a.IsPaired() )
name.append( (a.IsFirstMate() ? "/1" : "/2") );
// handle reverse strand alignment - bases & qualities
string qualities = a.Qualities;
string sequence = a.QueryBases;
if ( a.IsReverseStrand() ) {
Utilities::Reverse(qualities);
Utilities::ReverseComplement(sequence);
}
// write to output stream
m_out << "@" << name << endl
<< sequence << endl
<< "+" << endl
<< qualities << endl;
}
示例2: Execute
int DataStatisticsTool::Execute()
{
// iterate over reads in BAM file(s)
BamAlignment alignObj;
while(bamReader.GetNextAlignment(alignObj))
{
if (alignObj.IsDuplicate()) continue;
if (alignObj.IsFailedQC()) continue;
if (!alignObj.IsMapped()) continue;
if (!alignObj.IsPrimaryAlignment()) continue;
if (alignObj.IsPaired() && !alignObj.IsProperPair()) continue;
if (alignObj.IsPaired() && !alignObj.IsMateMapped()) continue;
if (!alignObj.HasTag("MD")) continue;
// // debug
// GenericBamAlignmentTools::printBamAlignmentCigar(alignObj);
// GenericBamAlignmentTools::printBamAlignmentMD(alignObj);
// shift InDel
GenericBamAlignmentTools::leftShiftInDel(alignObj);
// // debug
// GenericBamAlignmentTools::printBamAlignmentCigar(alignObj);
// GenericBamAlignmentTools::printBamAlignmentMD(alignObj);
// get the alignment sequences
string alignRead;
string alignGenome;
GenericBamAlignmentTools::getAlignmentSequences(alignObj, alignRead, alignGenome);
// update the statistics
statistics.update(alignRead, alignGenome);
}
// print to screen
cout << statistics << endl;
// statistics.printMatchMismatch();
// close BAM reader
bamReader.Close();
// close Fasta
genomeFasta.Close();
return 1;
}
示例3: abs
// use current input alignment to update BAM file alignment stats
void StatsTool::StatsToolPrivate::ProcessAlignment(const BamAlignment& al) {
// increment total alignment counter
++numReads;
// check the paired-independent flags
if ( al.IsDuplicate() ) ++numDuplicates;
if ( al.IsFailedQC() ) ++numFailedQC;
if ( al.IsMapped() ) ++numMapped;
// check forward/reverse strand
if ( al.IsReverseStrand() )
++numReverseStrand;
else
++numForwardStrand;
// if alignment is paired-end
if ( al.IsPaired() ) {
// increment PE counter
++numPaired;
// increment first mate/second mate counters
if ( al.IsFirstMate() ) ++numFirstMate;
if ( al.IsSecondMate() ) ++numSecondMate;
// if alignment is mapped, check mate status
if ( al.IsMapped() ) {
// if mate mapped
if ( al.IsMateMapped() )
++numBothMatesMapped;
// else singleton
else
++numSingletons;
}
// check for explicit proper pair flag
if ( al.IsProperPair() ) ++numProperPair;
// store insert size for first mate
if ( settings->IsShowingInsertSizeSummary && al.IsFirstMate() && (al.InsertSize != 0) ) {
int insertSize = abs(al.InsertSize);
insertSizes.push_back( insertSize );
}
}
}
示例4: while
// print BamAlignment in SAM format
void ConvertTool::ConvertToolPrivate::PrintSam(const BamAlignment& a) {
// tab-delimited
// <QNAME> <FLAG> <RNAME> <POS> <MAPQ> <CIGAR> <MRNM> <MPOS> <ISIZE> <SEQ> <QUAL> [ <TAG>:<VTYPE>:<VALUE> [...] ]
// write name & alignment flag
m_out << a.Name << "\t" << a.AlignmentFlag << "\t";
// write reference name
if ( (a.RefID >= 0) && (a.RefID < (int)m_references.size()) )
m_out << m_references[a.RefID].RefName << "\t";
else
m_out << "*\t";
// write position & map quality
m_out << a.Position+1 << "\t" << a.MapQuality << "\t";
// write CIGAR
const vector<CigarOp>& cigarData = a.CigarData;
if ( cigarData.empty() ) m_out << "*\t";
else {
vector<CigarOp>::const_iterator cigarIter = cigarData.begin();
vector<CigarOp>::const_iterator cigarEnd = cigarData.end();
for ( ; cigarIter != cigarEnd; ++cigarIter ) {
const CigarOp& op = (*cigarIter);
m_out << op.Length << op.Type;
}
m_out << "\t";
}
// write mate reference name, mate position, & insert size
if ( a.IsPaired() && (a.MateRefID >= 0) && (a.MateRefID < (int)m_references.size()) ) {
if ( a.MateRefID == a.RefID )
m_out << "=\t";
else
m_out << m_references[a.MateRefID].RefName << "\t";
m_out << a.MatePosition+1 << "\t" << a.InsertSize << "\t";
}
else
m_out << "*\t0\t0\t";
// write sequence
if ( a.QueryBases.empty() )
m_out << "*\t";
else
m_out << a.QueryBases << "\t";
// write qualities
if ( a.Qualities.empty() || (a.Qualities.at(0) == (char)0xFF) )
m_out << "*";
else
m_out << a.Qualities;
// write tag data
const char* tagData = a.TagData.c_str();
const size_t tagDataLength = a.TagData.length();
size_t index = 0;
while ( index < tagDataLength ) {
// write tag name
string tagName = a.TagData.substr(index, 2);
m_out << "\t" << tagName << ":";
index += 2;
// get data type
char type = a.TagData.at(index);
++index;
switch ( type ) {
case (Constants::BAM_TAG_TYPE_ASCII) :
m_out << "A:" << tagData[index];
++index;
break;
case (Constants::BAM_TAG_TYPE_INT8) :
case (Constants::BAM_TAG_TYPE_UINT8) :
m_out << "i:" << (int)tagData[index];
++index;
break;
case (Constants::BAM_TAG_TYPE_INT16) :
m_out << "i:" << BamTools::UnpackSignedShort(&tagData[index]);
index += sizeof(int16_t);
break;
case (Constants::BAM_TAG_TYPE_UINT16) :
m_out << "i:" << BamTools::UnpackUnsignedShort(&tagData[index]);
index += sizeof(uint16_t);
break;
case (Constants::BAM_TAG_TYPE_INT32) :
m_out << "i:" << BamTools::UnpackSignedInt(&tagData[index]);
index += sizeof(int32_t);
break;
case (Constants::BAM_TAG_TYPE_UINT32) :
m_out << "i:" << BamTools::UnpackUnsignedInt(&tagData[index]);
index += sizeof(uint32_t);
break;
//.........这里部分代码省略.........
示例5: main
//.........这里部分代码省略.........
cerr << "Could not open output BAM file" << endl;
return 1;
}
BamWriter writerNoDeam;
if ( !writerNoDeam.Open(nondeambam, header, references) ) {
cerr << "Could not open output BAM file" << endl;
return 1;
}
unsigned int totalReads =0;
unsigned int deaminatedReads =0;
unsigned int ndeaminatedReads =0;
unsigned int skipped =0;
//iterating over the alignments for these regions
BamAlignment al;
int i;
while ( reader.GetNextAlignment(al) ) {
// cerr<<al.Name<<endl;
//skip unmapped
if(!al.IsMapped()){
skipped++;
continue;
}
//skip paired end !
if(al.IsPaired() ){
continue;
// cerr<<"Paired end not yet coded"<<endl;
// return 1;
}
string reconstructedReference = reconstructRef(&al);
char refeBase;
char readBase;
bool isDeaminated;
if(al.Qualities.size() != reconstructedReference.size()){
cerr<<"Quality line is not the same size as the reconstructed reference"<<endl;
return 1;
}
isDeaminated=false;
if(al.IsReverseStrand()){
//first base next to 3'
i = 0 ;
refeBase=toupper(reconstructedReference[i]);
readBase=toupper( al.QueryBases[i]);
if( readBase == 'A' && int(al.Qualities[i]-offset) >= minBaseQuality){ //isDeaminated=true; }
if( skipAlign(reconstructedReference,&al,&skipped) ){ continue; }
transformRef(&refeBase,&readBase);
示例6: check
bool check(const PropertyFilter& filter, const BamAlignment& al) {
bool keepAlignment = true;
const PropertyMap& properties = filter.Properties;
PropertyMap::const_iterator propertyIter = properties.begin();
PropertyMap::const_iterator propertyEnd = properties.end();
for ( ; propertyIter != propertyEnd; ++propertyIter ) {
// check alignment data field depending on propertyName
const string& propertyName = (*propertyIter).first;
const PropertyFilterValue& valueFilter = (*propertyIter).second;
if ( propertyName == ALIGNMENTFLAG_PROPERTY ) keepAlignment &= valueFilter.check(al.AlignmentFlag);
else if ( propertyName == CIGAR_PROPERTY ) {
stringstream cigarSs;
const vector<CigarOp>& cigarData = al.CigarData;
if ( !cigarData.empty() ) {
vector<CigarOp>::const_iterator cigarBegin = cigarData.begin();
vector<CigarOp>::const_iterator cigarIter = cigarBegin;
vector<CigarOp>::const_iterator cigarEnd = cigarData.end();
for ( ; cigarIter != cigarEnd; ++cigarIter ) {
const CigarOp& op = (*cigarIter);
cigarSs << op.Length << op.Type;
}
keepAlignment &= valueFilter.check(cigarSs.str());
}
}
else if ( propertyName == INSERTSIZE_PROPERTY ) keepAlignment &= valueFilter.check(al.InsertSize);
else if ( propertyName == ISDUPLICATE_PROPERTY ) keepAlignment &= valueFilter.check(al.IsDuplicate());
else if ( propertyName == ISFAILEDQC_PROPERTY ) keepAlignment &= valueFilter.check(al.IsFailedQC());
else if ( propertyName == ISFIRSTMATE_PROPERTY ) keepAlignment &= valueFilter.check(al.IsFirstMate());
else if ( propertyName == ISMAPPED_PROPERTY ) keepAlignment &= valueFilter.check(al.IsMapped());
else if ( propertyName == ISMATEMAPPED_PROPERTY ) keepAlignment &= valueFilter.check(al.IsMateMapped());
else if ( propertyName == ISMATEREVERSESTRAND_PROPERTY ) keepAlignment &= valueFilter.check(al.IsMateReverseStrand());
else if ( propertyName == ISPAIRED_PROPERTY ) keepAlignment &= valueFilter.check(al.IsPaired());
else if ( propertyName == ISPRIMARYALIGNMENT_PROPERTY ) keepAlignment &= valueFilter.check(al.IsPrimaryAlignment());
else if ( propertyName == ISPROPERPAIR_PROPERTY ) keepAlignment &= valueFilter.check(al.IsProperPair());
else if ( propertyName == ISREVERSESTRAND_PROPERTY ) keepAlignment &= valueFilter.check(al.IsReverseStrand());
else if ( propertyName == ISSECONDMATE_PROPERTY ) keepAlignment &= valueFilter.check(al.IsSecondMate());
else if ( propertyName == ISSINGLETON_PROPERTY ) {
const bool isSingleton = al.IsPaired() && al.IsMapped() && !al.IsMateMapped();
keepAlignment &= valueFilter.check(isSingleton);
}
else if ( propertyName == MAPQUALITY_PROPERTY ) keepAlignment &= valueFilter.check(al.MapQuality);
else if ( propertyName == MATEPOSITION_PROPERTY ) keepAlignment &= ( al.IsPaired() && al.IsMateMapped() && valueFilter.check(al.MateRefID) );
else if ( propertyName == MATEREFERENCE_PROPERTY ) {
if ( !al.IsPaired() || !al.IsMateMapped() ) return false;
BAMTOOLS_ASSERT_MESSAGE( (al.MateRefID>=0 && (al.MateRefID<(int)filterToolReferences.size())), "Invalid MateRefID");
const string& refName = filterToolReferences.at(al.MateRefID).RefName;
keepAlignment &= valueFilter.check(refName);
}
else if ( propertyName == NAME_PROPERTY ) keepAlignment &= valueFilter.check(al.Name);
else if ( propertyName == POSITION_PROPERTY ) keepAlignment &= valueFilter.check(al.Position);
else if ( propertyName == QUERYBASES_PROPERTY ) keepAlignment &= valueFilter.check(al.QueryBases);
else if ( propertyName == REFERENCE_PROPERTY ) {
BAMTOOLS_ASSERT_MESSAGE( (al.RefID>=0 && (al.RefID<(int)filterToolReferences.size())), "Invalid RefID");
const string& refName = filterToolReferences.at(al.RefID).RefName;
keepAlignment &= valueFilter.check(refName);
}
else if ( propertyName == TAG_PROPERTY ) keepAlignment &= checkAlignmentTag(valueFilter, al);
else BAMTOOLS_ASSERT_UNREACHABLE;
// if alignment fails at ANY point, just quit and return false
if ( !keepAlignment ) return false;
}
BAMTOOLS_ASSERT_MESSAGE( keepAlignment, "Error in BamAlignmentChecker... keepAlignment should be true here");
return keepAlignment;
}
示例7: main
int main (int argc, char *argv[]) {
if( (argc== 1) ||
(argc== 2 && string(argv[1]) == "-h") ||
(argc== 2 && string(argv[1]) == "-help") ||
(argc== 2 && string(argv[1]) == "--help") ){
cout<<"Usage:"<<endl;
cout<<""<<endl;
cout<<"plotQualScore input.bam"<<endl;
return 1;
}
string bamfiletopen = string(argv[1]);
BamReader reader;
if ( !reader.Open(bamfiletopen) ) {
cerr << "Could not open input BAM files." << endl;
return 1;
}
// if ( !reader.LocateIndex() ){
// cerr << "warning: cannot locate index for file " << bamfiletopen<<endl;
// //return 1;
// }
BamAlignment al;
BamAlignment al2;
bool unsurePEorSE=true;
bool pe=true;
int strLength=-1;
int vecLengthToUse=-1;
map<short,unsigned long> ** counterA = 0;
map<short,unsigned long> ** counterC = 0;
map<short,unsigned long> ** counterG = 0;
map<short,unsigned long> ** counterT = 0;
int lengthIndex1=0;
int lengthIndex2=0;
string seqInd1;
string seqInd2;
string qualInd1;
string qualInd2;
int offsetInd2;
while ( reader.GetNextAlignment(al) ) {
if(unsurePEorSE){
strLength=al.QueryBases.length();
if(al.IsPaired()){
pe=true;
vecLengthToUse=2*strLength;
}else{
pe=false;
vecLengthToUse=strLength;
}
string index1;
string index2;
if(al.HasTag("XI")){
al.GetTag("XI",index1);
vecLengthToUse+=index1.length();
lengthIndex1=index1.length();
}
if(al.HasTag("XJ")){
al.GetTag("XJ",index2);
vecLengthToUse+=index2.length();
lengthIndex2=index2.length();
}
counterA = new map<short,unsigned long> * [vecLengthToUse];
counterC = new map<short,unsigned long> * [vecLengthToUse];
counterG = new map<short,unsigned long> * [vecLengthToUse];
counterT = new map<short,unsigned long> * [vecLengthToUse];
for(int i=0;i<vecLengthToUse;i++){
counterA[i]=new map<short,unsigned long> ();
counterC[i]=new map<short,unsigned long> ();
counterG[i]=new map<short,unsigned long> ();
counterT[i]=new map<short,unsigned long> ();
for(short k=minQualScore;k<=maxQualScore;k++){
(*counterA[i])[k]=0;
(*counterC[i])[k]=0;
(*counterG[i])[k]=0;
(*counterT[i])[k]=0;
}
}
unsurePEorSE=false;
}else{
if(pe &&
!al.IsPaired()){
cerr << "Cannot have unpaired reads in PE mode" << endl;
return 1;
}
if(!pe &&
al.IsPaired()){
cerr << "Cannot have unpaired reads in SE mode" << endl;
return 1;
}
}
//.........这里部分代码省略.........
示例8: main
//.........这里部分代码省略.........
cerr << "Could not open output BAM file" << endl;
return 1;
}
BamWriter writerNoDeam;
if ( !writerNoDeam.Open(nondeambam, header, references) ) {
cerr << "Could not open output BAM file" << endl;
return 1;
}
unsigned int totalReads =0;
unsigned int deaminatedReads =0;
unsigned int ndeaminatedReads =0;
unsigned int skipped =0;
//iterating over the alignments for these regions
BamAlignment al;
int i;
while ( reader.GetNextAlignment(al) ) {
// cerr<<al.Name<<endl;
//skip unmapped
if(!al.IsMapped()){
skipped++;
continue;
}
//skip paired end !
if(al.IsPaired() ){
continue;
// cerr<<"Paired end not yet coded"<<endl;
// return 1;
}
string reconstructedReference = reconstructRef(&al);
char refeBase;
char readBase;
bool isDeaminated;
if(al.Qualities.size() != reconstructedReference.size()){
cerr<<"Quality line is not the same size as the reconstructed reference"<<endl;
return 1;
}
isDeaminated=false;
if(al.IsReverseStrand()){
//first base next to 3'
i = 0 ;
refeBase=toupper(reconstructedReference[i]);
readBase=toupper( al.QueryBases[i]);
if( readBase == 'A' && int(al.Qualities[i]-offset) >= minBaseQuality){ //isDeaminated=true; }
if( skipAlign(reconstructedReference,&al,&skipped) ){ continue; }
if( hasGnoA[al.Position+1] &&
!thousandGenomesHasA[al.Position+1] )
示例9: main
int main (int argc, char *argv[]) {
bool mapped =false;
bool unmapped=false;
const string usage=string(string(argv[0])+" [options] input.bam out.bam"+"\n\n"+
"This program takes a BAM file as input and produces\n"+
"another where the putative deaminated bases have\n"+
"have been cut\n"+
"\n"+
"Options:\n");
// "\t"+"-u , --unmapped" +"\n\t\t"+"For an unmapped bam file"+"\n"+
// "\t"+"-m , --mapped" +"\n\t\t"+"For an mapped bam file"+"\n");
if( (argc== 1) ||
(argc== 2 && string(argv[1]) == "-h") ||
(argc== 2 && string(argv[1]) == "-help") ||
(argc== 2 && string(argv[1]) == "--help") ){
cout<<"Usage:"<<endl;
cout<<usage<<endl;
cout<<""<<endl;
return 1;
}
// for(int i=1;i<(argc-1);i++){ //all but the last arg
// if(strcmp(argv[i],"-m") == 0 || strcmp(argv[i],"--mapped") == 0 ){
// mapped=true;
// continue;
// }
// if(strcmp(argv[i],"-u") == 0 || strcmp(argv[i],"--unmapped") == 0 ){
// unmapped=true;
// continue;
// }
// cerr<<"Unknown option "<<argv[i] <<" exiting"<<endl;
// return 1;
// }
if(argc != 3){
cerr<<"Error: Must specify the input and output BAM files";
return 1;
}
string inbamFile =argv[argc-2];
string outbamFile=argv[argc-1];
// if(!mapped && !unmapped){
// cerr << "Please specify whether you reads are mapped or unmapped" << endl;
// return 1;
// }
// if(mapped && unmapped){
// cerr << "Please specify either mapped or unmapped but not both" << endl;
// return 1;
// }
BamReader reader;
if ( !reader.Open(inbamFile) ) {
cerr << "Could not open input BAM files." << endl;
return 1;
}
vector<RefData> testRefData=reader.GetReferenceData();
const SamHeader header = reader.GetHeader();
const RefVector references = reader.GetReferenceData();
BamWriter writer;
if ( !writer.Open(outbamFile, header, references) ) {
cerr << "Could not open output BAM file" << endl;
return 1;
}
BamAlignment al;
// BamAlignment al2;
// bool al2Null=true;
while ( reader.GetNextAlignment(al) ) {
if(al.IsPaired() ){
if(al.IsFirstMate() ){ //5' end, need to check first base only
if(al.IsReverseStrand()){ //
if(!al.IsMapped()){
cerr << "Cannot have reverse complemented unmapped reads: " <<al.Name<< endl;
//return 1;
}
int indexToCheck;
//last
indexToCheck=al.QueryBases.length()-1;
if(toupper(al.QueryBases[indexToCheck]) == 'A'){
//al.Qualities[indexToCheck]=char(offset+baseQualForDeam);
al.QueryBases = al.QueryBases.substr(0,indexToCheck);
al.Qualities = al.Qualities.substr(0, indexToCheck);
}
}else{
//.........这里部分代码省略.........
示例10: main
//.........这里部分代码省略.........
if ( !writer.Open(bamFileOUT,header,references) ) {
cerr << "Could not open output BAM file "<<bamFileOUT << endl;
return 1;
}
SamHeader sh=reader.GetHeader();
//Up to the user to be sure that a sequence is followed by his mate
// if(!sh.HasSortOrder() ||
// sh.SortOrder != "queryname"){
// cerr << "Bamfile must be sorted by queryname" << endl;
// return 1;
// }
BamAlignment al;
BamAlignment al2;
bool al2Null=true;
while ( reader.GetNextAlignment(al) ) {
if(al.IsMapped() || al.HasTag("NM") || al.HasTag("MD") ){
if(!allowAligned){
cerr << "Reads should not be aligned" << endl;
return 1;
}else{
//should we remove tags ?
}
}
if(al.IsPaired() &&
al2Null ){
al2=al;
al2Null=false;
continue;
}else{
if(al.IsPaired() &&
!al2Null){
bool result = mtr.processPair(al,al2);
if( result ){//was merged
BamAlignment orig;
BamAlignment orig2;
if(keepOrig){
orig2 = al2;
orig = al;
}
writer.SaveAlignment(al);
if(keepOrig){
orig.SetIsDuplicate(true);
orig2.SetIsDuplicate(true);
writer.SaveAlignment(orig2);
writer.SaveAlignment(orig);
}
//the second record is empty
}else{
//keep the sequences as pairs
示例11: main
int main (int argc, char *argv[]) {
string usage=string(""+string(argv[0])+" [in BAM file]"+
"\nThis program reads a BAM file and computes the error rate for each cycle\n"+
// "\nreads and the puts the rest into another bam file.\n"+
// "\nTip: if you do not need one of them, use /dev/null as your output\n"+
// "arguments:\n"+
// "\t"+"--bq [base qual] : Minimum base quality to flag a deaminated site (Default: "+stringify(minBaseQuality)+")\n"+
"\n");
if(argc == 1 ||
(argc == 2 && (string(argv[0]) == "-h" || string(argv[0]) == "--help") )
){
cerr << "Usage "<<usage<<endl;
return 1;
}
// for(int i=1;i<(argc-2);i++){
// if(string(argv[i]) == "--bq"){
// minBaseQuality=destringify<int>(argv[i+1]);
// i++;
// continue;
// }
// }
string bamfiletopen = string( argv[ argc-1 ] );
// string deambam = string( argv[ argc-2 ] );
// string nondeambam = string( argv[ argc-1 ] );
BamReader reader;
if ( !reader.Open(bamfiletopen) ) {
cerr << "Could not open input BAM file"<< bamfiletopen << endl;
return 1;
}
//iterating over the alignments for these regions
BamAlignment al;
bool pairedEnd=false;
bool firstRead=true;
while ( reader.GetNextAlignment(al) ) {
if(firstRead){ //reads are either all paired end or single end, I don't allow a mix
numberOfCycles=al.QueryBases.size();
// cout<<"numberOfCycles "<<numberOfCycles<<endl;
if(al.IsPaired() ){
pairedEnd=true;
matches = vector<unsigned int> (2*numberOfCycles,0);
mismatches = vector<unsigned int> (2*numberOfCycles,0);
typesOfMismatches = vector< vector<unsigned int> >();
for(int i=0;i<12;i++)
typesOfMismatches.push_back( vector<unsigned int> (2*numberOfCycles,0) );
}else{
matches = vector<unsigned int> ( numberOfCycles,0);
mismatches = vector<unsigned int> ( numberOfCycles,0);
typesOfMismatches = vector< vector<unsigned int> >();
for(int i=0;i<12;i++)
typesOfMismatches.push_back( vector<unsigned int> ( numberOfCycles,0) );
}
firstRead=false;
}
if( ( pairedEnd && !al.IsPaired()) ||
( !pairedEnd && al.IsPaired()) ){
cerr<<"Read "<<al.Name<<" is wrong, cannot have a mixture of paired and unpaired read for this program"<<endl;
return 1;
}
//skip unmapped
if(!al.IsMapped())
continue;
if(numberOfCycles!=int(al.QueryBases.size())){
cerr<<"The length of read "<<al.Name<<" is wrong, should be "<<numberOfCycles<<"bp"<<endl;
return 1;
}
string reconstructedReference = reconstructRef(&al);
if(al.Qualities.size() != reconstructedReference.size()){
cerr<<"Quality line is not the same size as the reconstructed reference"<<endl;
return 1;
}
if( pairedEnd ){
//.........这里部分代码省略.........
示例12: main
virtual void main()
{
//init
QTextStream out(stdout);
BamReader reader;
NGSHelper::openBAM(reader, getInfile("in"));
FastqOutfileStream out1(getOutfile("out1"), false);
FastqOutfileStream out2(getOutfile("out2"), false);
long long c_unpaired = 0;
long long c_paired = 0;
int max_cached = 0;
//iterate through reads
BamAlignment al;
QHash<QByteArray, BamAlignment> al_cache;
while (reader.GetNextAlignment(al))
{
//skip secondary alinments
if(!al.IsPrimaryAlignment()) continue;
//skip unpaired
if(!al.IsPaired())
{
++c_unpaired;
continue;
}
QByteArray name(al.Name.data()); //TODO use QByteArray::fromStdString (when upgraded to Qt5.4)
//store cached read when we encounter the mate
if (al_cache.contains(name))
{
BamAlignment mate = al_cache.take(name);
//out << name << " [AL] First: " << al.IsFirstMate() << " Reverse: " << al.IsReverseStrand() << " Seq: " << al.QueryBases.data() << endl;
//out << name << " [MA] First: " << mate.IsFirstMate() << " Reverse: " << mate.IsReverseStrand() << " Seq: " << mate.QueryBases.data() << endl;
if (al.IsFirstMate())
{
write(out1, al, al.IsReverseStrand());
write(out2, mate, mate.IsReverseStrand());
}
else
{
write(out1, mate, mate.IsReverseStrand());
write(out2, al, al.IsReverseStrand());
}
++c_paired;
}
//cache read for later retrieval
else
{
al_cache.insert(name, al);
}
max_cached = std::max(max_cached, al_cache.size());
}
reader.Close();
out1.close();
out2.close();
//write debug output
out << "Pair reads (written) : " << c_paired << endl;
out << "Unpaired reads (skipped) : " << c_unpaired << endl;
out << "Unmatched paired reads (skipped): " << al_cache.size() << endl;
out << endl;
out << "Maximum cached reads : " << max_cached << endl;
}
示例13: CoverageBam
void BedGenomeCoverage::CoverageBam(string bamFile) {
ResetChromCoverage();
// open the BAM file
BamReader reader;
if (!reader.Open(bamFile)) {
cerr << "Failed to open BAM file " << bamFile << endl;
exit(1);
}
// get header & reference information
string header = reader.GetHeaderText();
RefVector refs = reader.GetReferenceData();
// load the BAM header references into a BEDTools "genome file"
_genome = new GenomeFile(refs);
// convert each aligned BAM entry to BED
// and compute coverage on B
BamAlignment bam;
while (reader.GetNextAlignment(bam)) {
// skip if the read is unaligned
if (bam.IsMapped() == false)
continue;
bool _isReverseStrand = bam.IsReverseStrand();
//changing second mate's strand to opposite
if( _dUTP && bam.IsPaired() && bam.IsMateMapped() && bam.IsSecondMate())
_isReverseStrand = !bam.IsReverseStrand();
// skip if we care about strands and the strand isn't what
// the user wanted
if ( (_filterByStrand == true) &&
((_requestedStrand == "-") != _isReverseStrand) )
continue;
// extract the chrom, start and end from the BAM alignment
string chrom(refs.at(bam.RefID).RefName);
CHRPOS start = bam.Position;
CHRPOS end = bam.GetEndPosition(false, false) - 1;
// are we on a new chromosome?
if ( chrom != _currChromName )
StartNewChrom(chrom);
if(_pair_chip_) {
// Skip if not a proper pair
if (bam.IsPaired() && (!bam.IsProperPair() or !bam.IsMateMapped()) )
continue;
// Skip if wrong coordinates
if( ( (bam.Position<bam.MatePosition) && bam.IsReverseStrand() ) ||
( (bam.MatePosition < bam.Position) && bam.IsMateReverseStrand() ) ) {
//chemically designed: left on positive strand, right on reverse one
continue;
}
/*if(_haveSize) {
if (bam.IsFirstMate() && bam.IsReverseStrand()) { //put fragmentSize in to the middle of pair end_fragment
int mid = bam.MatePosition+abs(bam.InsertSize)/2;
if(mid<_fragmentSize/2)
AddCoverage(0, mid+_fragmentSize/2);
else
AddCoverage(mid-_fragmentSize/2, mid+_fragmentSize/2);
}
else if (bam.IsFirstMate() && bam.IsMateReverseStrand()) { //put fragmentSize in to the middle of pair end_fragment
int mid = start+abs(bam.InsertSize)/2;
if(mid<_fragmentSize/2)
AddCoverage(0, mid+_fragmentSize/2);
else
AddCoverage(mid-_fragmentSize/2, mid+_fragmentSize/2);
}
} else */
if (bam.IsFirstMate() && bam.IsReverseStrand()) { //prolong to the mate to the left
AddCoverage(bam.MatePosition, end);
}
else if (bam.IsFirstMate() && bam.IsMateReverseStrand()) { //prolong to the mate to the right
AddCoverage(start, start + abs(bam.InsertSize) - 1);
}
} else if (_haveSize) {
if(bam.IsReverseStrand()) {
if(end<_fragmentSize) { //sometimes fragmentSize is bigger :(
AddCoverage(0, end);
} else {
AddCoverage(end + 1 - _fragmentSize, end );
}
} else {
AddCoverage(start,start+_fragmentSize - 1);
}
} else
// add coverage accordingly.
if (!_only_5p_end && !_only_3p_end) {
bedVector bedBlocks;
// we always want to split blocks when a D CIGAR op is found.
// if the user invokes -split, we want to also split on N ops.
if (_obeySplits) { // "D" true, "N" true
GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, true, true);
}
else { // "D" true, "N" false
GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, true, false);
//.........这里部分代码省略.........
示例14: main
//.........这里部分代码省略.........
BamReader reader;
if ( !reader.Open(inbamFile) ) {
cerr << "Could not open input BAM files." << endl;
return 1;
}
vector<RefData> testRefData=reader.GetReferenceData();
SamHeader header = reader.GetHeader();
string pID = "decrQualDeaminated";
string pName = "decrQualDeaminated";
string pCommandLine = "";
for(int i=0;i<(argc);i++){
pCommandLine += (string(argv[i])+" ");
}
putProgramInHeader(&header,pID,pName,pCommandLine);
const RefVector references = reader.GetReferenceData();
BamWriter writer;
if ( !writer.Open(outbamFile, header, references) ) {
cerr << "Could not open output BAM file" << endl;
return 1;
}
BamAlignment al;
// BamAlignment al2;
// bool al2Null=true;
while ( reader.GetNextAlignment(al) ) {
if(al.IsPaired() ){
if(al.IsFirstMate() ){ //5' end, need to check first base only
if(al.IsReverseStrand()){ //
if(!al.IsMapped()){
cerr << "Cannot have reverse complemented unmapped reads :" <<al.Name<< endl;
//return 1;
}
int indexToCheck;
//5' of first mate reversed
indexToCheck=al.QueryBases.length()-1;
for(int i=0;i<bpToDecrease5;i++){
if(toupper(al.QueryBases[indexToCheck]) == 'A'){
al.Qualities[indexToCheck]=char(offset+baseQualForDeam);
}
indexToCheck=max(indexToCheck-1,0);
}
}else{
int indexToCheck;
//5' of first mate
indexToCheck=0;
for(int i=0;i<bpToDecrease5;i++){ //first base
if(toupper(al.QueryBases[indexToCheck]) == 'T'){
al.Qualities[indexToCheck]=char(offset+baseQualForDeam);
}
indexToCheck=min(indexToCheck+1,int(al.Qualities.size()));
}
}
示例15: main
int main (int argc, char *argv[]) {
if(argc != 4){
cerr<<"This program strips the mapping information and cuts sequences"<<endl;
cerr<<"Usage "<<argv[0]<<" [bam file in] [bam file out] [distribution]"<<endl;
cerr<<"The distribution is one per line"<<endl;
return 1;
}
string bamfiletopen = string(argv[1]);
string bamfiletwrite = string(argv[2]);
string fileDist = string(argv[3]);
igzstream myFile;
string line;
vector<int> distToUse;
myFile.open(fileDist.c_str(), ios::in);
if (myFile.good()){
while ( getline (myFile,line)){
distToUse.push_back( destringify<int>(line) );
}
myFile.close();
}else{
cerr << "Unable to open file "<<fileDist<<endl;
return 1;
}
cerr<<"Read "<<distToUse.size()<<" data points "<<endl;
cerr<<"Reading "<<bamfiletopen<<" writing to "<<bamfiletwrite<<endl;
BamReader reader;
BamWriter writer;
if ( !reader.Open(bamfiletopen) ) {
cerr << "Could not open input BAM files." << endl;
return 1;
}
SamHeader myHeader=reader.GetHeader();
SamProgram sp;
string pID = "removeTagsMapping";
string pName = "removeTagsMapping";
string pCommandLine = "";
for(int i=0;i<(argc);i++){
pCommandLine += (string(argv[i])+" ");
}
putProgramInHeader(&myHeader,pID,pName,pCommandLine,returnGitHubVersion(string(argv[0]),"."));
//no @SQ
myHeader.Sequences.Clear();
vector< RefData > emptyRefVector;
if( !writer.Open(bamfiletwrite,myHeader,emptyRefVector ) ) {
cerr << "Could not open output BAM file "<<bamfiletwrite << endl;
return 1;
}
unsigned int readsTotal=0;
BamAlignment al;
while ( reader.GetNextAlignment(al) ) {
//deleting tag data
al.TagData="";
//reset the flag
// if(al.IsPaired()){
// if(al.IsFirstMate()){
// al.AlignmentFlag = flagFirstPair;
// }else{
// al.AlignmentFlag = flagSecondPair;
// }
// }else{
// }
if(al.IsPaired()){
if(al.IsFirstMate()){
al.Name = al.Name+"/1";
}else{
al.Name = al.Name+"/2";
}
}
al.AlignmentFlag = flagSingleReads;
//no ref or positon
al.RefID=-1;
al.MateRefID=-1;
al.Position=-1;
al.MatePosition=-1;
//no insert size
al.InsertSize=0;
//no cigar
al.CigarData.clear();
//no mapping quality
al.MapQuality=0;
int length = distToUse[ randomInt(0,distToUse.size()-1) ];
//.........这里部分代码省略.........