本文整理汇总了C++中BamReader::GetHeader方法的典型用法代码示例。如果您正苦于以下问题:C++ BamReader::GetHeader方法的具体用法?C++ BamReader::GetHeader怎么用?C++ BamReader::GetHeader使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类BamReader
的用法示例。
在下文中一共展示了BamReader::GetHeader方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: main
int main (int argc, char *argv[]) {
if( (argc== 1) ||
(argc== 2 && string(argv[1]) == "-h") ||
(argc== 2 && string(argv[1]) == "-help") ||
(argc== 2 && string(argv[1]) == "--help") ){
cout<<"Usage:setAsUnpaired [in bam] [outbam]"<<endl<<"this program takes flags all paired sequences as singles"<<endl;
return 1;
}
string bamfiletopen = string(argv[1]);
string bamFileOUT = string(argv[2]);
BamReader reader;
BamWriter writer;
if ( !reader.Open(bamfiletopen) ) {
cerr << "Could not open input BAM files." << endl;
return 1;
}
const SamHeader header = reader.GetHeader();
const RefVector references = reader.GetReferenceData();
if ( !writer.Open(bamFileOUT,header,references) ) {
cerr << "Could not open output BAM file "<<bamFileOUT << endl;
return 1;
}
BamAlignment al;
while ( reader.GetNextAlignment(al) ) {
if(al.IsMapped()){
cerr << "Cannot yet handle mapped reads " << endl;
return 1;
}
al.SetIsPaired (false);
writer.SaveAlignment(al);
} //while al
reader.Close();
writer.Close();
return 0;
}
示例2: sort_inter_chrom_bam
//{{{bool sort_inter_chrom_bam(string in_file_name,
bool sort_inter_chrom_bam(string in_file_name,
string out_file_name)
{
// open input BAM file
BamReader reader;
if ( !reader.Open(in_file_name) ) {
cerr << "sort ERROR: could not open " <<
in_file_name << " for reading... Aborting." << endl;
return false;
}
SamHeader header = reader.GetHeader();
if ( !header.HasVersion() )
header.Version = Constants::SAM_CURRENT_VERSION;
string header_text = header.ToString();
RefVector ref = reader.GetReferenceData();
// set up alignments buffer
BamAlignment al;
vector<BamAlignment> buffer;
buffer.reserve( (size_t)(SORT_DEFAULT_MAX_BUFFER_COUNT*1.1) );
bool bufferFull = false;
int buff_count = 0;
// iterate through file
while ( reader.GetNextAlignment(al)) {
// check buffer's usage
bufferFull = ( buffer.size() >= SORT_DEFAULT_MAX_BUFFER_COUNT );
// store alignments until buffer is "full"
if ( !bufferFull )
buffer.push_back(al);
// if buffer is "full"
else {
// so create a sorted temp file with current buffer contents
// then push "al" into fresh buffer
create_sorted_temp_file(buffer,
out_file_name,
buff_count,
header_text,
ref);
++buff_count;
buffer.push_back(al);
}
}
// handle any leftover buffer contents
if ( !buffer.empty() ) {
create_sorted_temp_file(buffer,
out_file_name,
buff_count,
header_text,
ref);
++buff_count;
}
reader.Close();
return merge_sorted_files(out_file_name, buff_count, header_text, ref);
/*
for (int i = 0; i < buff_count; ++i) {
stringstream temp_name;
temp_name << out_file_name << i;
}
*/
}
示例3: while
// generates mutiple sorted temp BAM files from single unsorted BAM file
bool SortTool::SortToolPrivate::GenerateSortedRuns(void) {
// open input BAM file
BamReader reader;
if ( !reader.Open(m_settings->InputBamFilename) ) {
cerr << "bamtools sort ERROR: could not open " << m_settings->InputBamFilename
<< " for reading... Aborting." << endl;
return false;
}
// get basic data that will be shared by all temp/output files
SamHeader header = reader.GetHeader();
header.SortOrder = ( m_settings->IsSortingByName
? Constants::SAM_HD_SORTORDER_QUERYNAME
: Constants::SAM_HD_SORTORDER_COORDINATE );
m_headerText = header.ToString();
m_references = reader.GetReferenceData();
// set up alignments buffer
BamAlignment al;
vector<BamAlignment> buffer;
buffer.reserve( (size_t)(m_settings->MaxBufferCount*1.1) );
bool bufferFull = false;
// if sorting by name, we need to generate full char data
// so can't use GetNextAlignmentCore()
if ( m_settings->IsSortingByName ) {
// iterate through file
while ( reader.GetNextAlignment(al)) {
// check buffer's usage
bufferFull = ( buffer.size() >= m_settings->MaxBufferCount );
// store alignments until buffer is "full"
if ( !bufferFull )
buffer.push_back(al);
// if buffer is "full"
else {
// push any unmapped reads into buffer,
// don't want to split these into a separate temp file
if ( !al.IsMapped() )
buffer.push_back(al);
// "al" is mapped, so create a sorted temp file with current buffer contents
// then push "al" into fresh buffer
else {
CreateSortedTempFile(buffer);
buffer.push_back(al);
}
}
}
}
// sorting by position, can take advantage of GNACore() speedup
else {
// iterate through file
while ( reader.GetNextAlignmentCore(al) ) {
// check buffer's usage
bufferFull = ( buffer.size() >= m_settings->MaxBufferCount );
// store alignments until buffer is "full"
if ( !bufferFull )
buffer.push_back(al);
// if buffer is "full"
else {
// push any unmapped reads into buffer,
// don't want to split these into a separate temp file
if ( !al.IsMapped() )
buffer.push_back(al);
// "al" is mapped, so create a sorted temp file with current buffer contents
// then push "al" into fresh buffer
else {
CreateSortedTempFile(buffer);
buffer.push_back(al);
}
}
}
}
// handle any leftover buffer contents
if ( !buffer.empty() )
CreateSortedTempFile(buffer);
// close reader & return success
reader.Close();
return true;
}
示例4: main
int main (int argc, const char *argv[])
{
printf ("------------- bamrealignment --------------\n");
OptArgs opts;
opts.ParseCmdLine(argc, argv);
vector<int> score_vals(4);
string input_bam = opts.GetFirstString ('i', "input", "");
string output_bam = opts.GetFirstString ('o', "output", "");
opts.GetOption(score_vals, "4,-6,-5,-2", 's', "scores");
int clipping = opts.GetFirstInt ('c', "clipping", 2);
bool anchors = opts.GetFirstBoolean ('a', "anchors", true);
int bandwidth = opts.GetFirstInt ('b', "bandwidth", 10);
bool verbose = opts.GetFirstBoolean ('v', "verbose", false);
bool debug = opts.GetFirstBoolean ('d', "debug", false);
int format = opts.GetFirstInt ('f', "format", 1);
int num_threads = opts.GetFirstInt ('t', "threads", 8);
string log_fname = opts.GetFirstString ('l', "log", "");
if (input_bam.empty() or output_bam.empty())
return PrintHelp();
opts.CheckNoLeftovers();
std::ofstream logf;
if (log_fname.size ())
{
logf.open (log_fname.c_str ());
if (!logf.is_open ())
{
fprintf (stderr, "bamrealignment: Failed to open log file %s\n", log_fname.c_str());
return 1;
}
}
BamReader reader;
if (!reader.Open(input_bam)) {
fprintf(stderr, "bamrealignment: Failed to open input file %s\n", input_bam.c_str());
return 1;
}
SamHeader header = reader.GetHeader();
RefVector refs = reader.GetReferenceData();
BamWriter writer;
writer.SetNumThreads(num_threads);
if (format == 1)
writer.SetCompressionMode(BamWriter::Uncompressed);
else
writer.SetCompressionMode(BamWriter::Compressed);
if (!writer.Open(output_bam, header, refs)) {
fprintf(stderr, "bamrealignment: Failed to open output file %s\n", output_bam.c_str());
return 1;
}
// The meat starts here ------------------------------------
if (verbose)
cout << "Verbose option is activated, each alignment will print to screen." << endl
<< " After a read hit RETURN to continue to the next one," << endl
<< " or press q RETURN to quit the program," << endl
<< " or press s Return to silence verbose," << endl
<< " or press c RETURN to continue printing without further prompt." << endl << endl;
unsigned int readcounter = 0;
unsigned int mapped_readcounter = 0;
unsigned int realigned_readcounter = 0;
unsigned int modified_alignment_readcounter = 0;
unsigned int pos_update_readcounter = 0;
unsigned int failed_clip_realigned_readcount = 0;
unsigned int already_perfect_readcount = 0;
unsigned int bad_md_tag_readcount = 0;
unsigned int error_recreate_ref_readcount = 0;
unsigned int error_clip_anchor_readcount = 0;
unsigned int error_sw_readcount = 0;
unsigned int error_unclip_readcount = 0;
unsigned int start_position_shift;
int orig_position;
int new_position;
string md_tag, new_md_tag, input = "x";
vector<CigarOp> new_cigar_data;
vector<MDelement> new_md_data;
bool position_shift = false;
time_t start_time = time(NULL);
Realigner aligner;
aligner.verbose_ = verbose;
aligner.debug_ = debug;
if (!aligner.SetScores(score_vals))
cout << "bamrealignment: Four scores need to be provided: match, mismatch, gap open, gap extend score!" << endl;
aligner.SetAlignmentBandwidth(bandwidth);
//.........这里部分代码省略.........
示例5: main
int main(const int argc, char* const argv[]) {
int c, min_mapQ=0, seed=chrono::system_clock::now().time_since_epoch().count();
unsigned int flag_on=0, flag_off=0;
string fn_tgt, fn_in, fn_out="", out_format="b";
while ((c = getopt(argc, argv, "SbBcCt:h1Ho:q:f:F:ul:r:?T:R:L:s:@:m:x:U:")) >= 0) {
switch (c) {
case 's': seed = atoi(optarg); break;
case 'm': break;
case 'c': break;
case 'S': break;
case 'b': break;
case 'C': break;
case 'h': break;
case 'H': break;
case 'o': fn_out = optarg; break;
case 'U': break;
case 'f': flag_on |= strtol(optarg, 0, 0); break;
case 'F': flag_off |= strtol(optarg, 0, 0); break;
case 'q': min_mapQ = atoi(optarg); break;
case 'u': out_format = "u"; break;
case '1': break;
case 'l': break;
case 'r': break;
case 't': fn_tgt = optarg; break;
case 'R': break;
case '?': return usage();
case 'T': break;
case 'B': break;
case '@': break;
case 'x': break;
default: return usage();
}
}
if (fn_tgt.compare("") == 0) return usage();
if (argc == optind) return usage();
fn_in = argv[optind];
BamReader reader;
if (!reader.Open(fn_in)) {
cerr << "ERROR: cannot open [" << fn_in << "] for reading\n";
return 1;
}
if (!reader.LocateIndex()) {
cerr << "ERROR: cannot find BAM index for [" << fn_in << "]\n";
return 1;
}
const SamHeader header = reader.GetHeader();
if (header.SortOrder.compare("coordinate") != 0) {
cerr << "ERROR: [" << fn_in << "] not sorted by coordinate\n";
return 1;
}
const RefVector refseq = reader.GetReferenceData();
vector<BamRegion> regions;
vector<unsigned int> src_depths, tgt_depths;
if (read_region_depth(fn_tgt.c_str(), reader, regions, src_depths, tgt_depths) != 0) return 1;
BamWriter writer;
if (!writer.Open(fn_out, header, refseq)) {
cerr << "ERROR: cannot open [" << fn_out << "] for writing\n";
return 1;
}
BamAlignment aln;
vector<BamAlignment> reads;
vector<string> paired, unpaired;
unordered_map<int, int> kept;
unordered_map<string, unsigned int> seen, sampled;
unordered_map<string, vector<int> > pool;
for (size_t i=0; i<regions.size(); ++i) {
reads.clear();
paired.clear();
unpaired.clear();
kept.clear();
pool.clear();
char region_string[256];
sprintf(region_string, "%s:%d-%d", refseq[regions[i].LeftRefID].RefName.c_str(), regions[i].LeftPosition, regions[i].RightPosition);
if (!reader.SetRegion(regions[i])) {
cerr << "WARNING: failed to locate [" << region_string << "]\n";
//cerr << "WARNING: failed to locate [" << refseq[regions[i].LeftRefID].RefName << ':' << regions[i].LeftPosition << '-' << regions[i].RightPosition << "]\n";
continue;
}
while (reader.GetNextAlignment(aln)) {
if ((aln.AlignmentFlag & flag_on) == flag_on && !(aln.AlignmentFlag & flag_off) && aln.MapQuality >= min_mapQ)
reads.push_back(aln);
}
if (reads.size() == 0) continue;
unsigned int depth = 0;
for (size_t k=0; k<reads.size(); ++k) {
aln = reads[k];
string rn = aln.Name;
if (seen.find(rn) != seen.end()) { // if seen in previous regions
if (sampled.find(rn) != sampled.end()) { // if self or mate sampled before, sample it
if (sampled[rn] != aln.AlignmentFlag) kept[k] = 1; // if mate sampled before, keep it
depth += get_overlap(aln, regions[i]);
}
//.........这里部分代码省略.........
示例6: ValidateReaders
// ValidateReaders checks that all the readers point to BAM files representing
// alignments against the same set of reference sequences, and that the
// sequences are identically ordered. If these checks fail the operation of
// the multireader is undefined, so we force program exit.
bool BamMultiReaderPrivate::ValidateReaders() const
{
m_errorString.clear();
// skip if 0 or 1 readers opened
if (m_readers.empty() || (m_readers.size() == 1)) return true;
// retrieve first reader
const MergeItem& firstItem = m_readers.front();
const BamReader* firstReader = firstItem.Reader;
if (firstReader == 0) return false;
// retrieve first reader's header data
const SamHeader& firstReaderHeader = firstReader->GetHeader();
const std::string& firstReaderSortOrder = firstReaderHeader.SortOrder;
// retrieve first reader's reference data
const RefVector& firstReaderRefData = firstReader->GetReferenceData();
const int firstReaderRefCount = firstReader->GetReferenceCount();
const int firstReaderRefSize = firstReaderRefData.size();
// iterate over all readers
std::vector<MergeItem>::const_iterator readerIter = m_readers.begin();
std::vector<MergeItem>::const_iterator readerEnd = m_readers.end();
for (; readerIter != readerEnd; ++readerIter) {
const MergeItem& item = (*readerIter);
BamReader* reader = item.Reader;
if (reader == 0) continue;
// get current reader's header data
const SamHeader& currentReaderHeader = reader->GetHeader();
const std::string& currentReaderSortOrder = currentReaderHeader.SortOrder;
// check compatible sort order
if (currentReaderSortOrder != firstReaderSortOrder) {
const std::string message =
std::string("mismatched sort order in ") + reader->GetFilename() + ", expected " +
firstReaderSortOrder + ", but found " + currentReaderSortOrder;
SetErrorString("BamMultiReader::ValidateReaders", message);
return false;
}
// get current reader's reference data
const RefVector currentReaderRefData = reader->GetReferenceData();
const int currentReaderRefCount = reader->GetReferenceCount();
const int currentReaderRefSize = currentReaderRefData.size();
// init reference data iterators
RefVector::const_iterator firstRefIter = firstReaderRefData.begin();
RefVector::const_iterator firstRefEnd = firstReaderRefData.end();
RefVector::const_iterator currentRefIter = currentReaderRefData.begin();
// compare reference counts from BamReader ( & container size, in case of BR error)
if ((currentReaderRefCount != firstReaderRefCount) ||
(firstReaderRefSize != currentReaderRefSize)) {
std::stringstream s;
s << "mismatched reference count in " << reader->GetFilename() << ", expected "
<< firstReaderRefCount << ", but found " << currentReaderRefCount;
SetErrorString("BamMultiReader::ValidateReaders", s.str());
return false;
}
// this will be ok; we just checked above that we have identically-sized sets of references
// here we simply check if they are all, in fact, equal in content
while (firstRefIter != firstRefEnd) {
const RefData& firstRef = (*firstRefIter);
const RefData& currentRef = (*currentRefIter);
// compare reference name & length
if ((firstRef.RefName != currentRef.RefName) ||
(firstRef.RefLength != currentRef.RefLength)) {
std::stringstream s;
s << "mismatched references found in" << reader->GetFilename()
<< "expected: " << std::endl;
// print first reader's reference data
RefVector::const_iterator refIter = firstReaderRefData.begin();
RefVector::const_iterator refEnd = firstReaderRefData.end();
for (; refIter != refEnd; ++refIter) {
const RefData& entry = (*refIter);
std::stringstream s;
s << entry.RefName << ' ' << std::endl;
}
s << "but found: " << std::endl;
// print current reader's reference data
refIter = currentReaderRefData.begin();
refEnd = currentReaderRefData.end();
for (; refIter != refEnd; ++refIter) {
const RefData& entry = (*refIter);
s << entry.RefName << ' ' << entry.RefLength << std::endl;
}
SetErrorString("BamMultiReader::ValidateReaders", s.str());
//.........这里部分代码省略.........
示例7: main
int main (int argc, char *argv[]) {
if( (argc== 1) ||
(argc== 2 && string(argv[1]) == "-h") ||
(argc== 2 && string(argv[1]) == "-help") ||
(argc== 2 && string(argv[1]) == "--help") ){
cout<<"Usage:"<<argv[0]<<"<options> [in bam] [ref original length] [extension length]"<<endl;
cout<<"This program returns the same BAM file except with the XA flag for circular references"<<endl;
cout<<"Options:"<<endl;
//cout<<"\t-m\t\t\t\tUse mapped reads only"<<endl;
return 1;
}
for(int i=1;i<(argc-3);i++){ //all but the last 3 args
// if(string(argv[i]) == "-m" ){
// onlyMapped=true;
// continue;
// }
cerr<<"Unknown option "<<argv[i] <<" exiting"<<endl;
return 1;
}
string bamfiletopen = string( argv[argc-3]);
int origLength = destringify<int>(argv[argc-2]);
int extLength = destringify<int>(argv[argc-1]);
string outputFilename = "/dev/stdout";
BamReader reader;
if ( !reader.Open(bamfiletopen) ) {
cerr << "Could not open input BAM files." << endl;
return 1;
}
const SamHeader header = reader.GetHeader();
const RefVector references = reader.GetReferenceData();
BamWriter writer;
if ( !writer.Open(outputFilename, header, references) ) {
cerr << "Could not open output BAM file" << endl;
return 1;
}
BamAlignment al;
string nameTAG="XA";
while ( reader.GetNextAlignment(al) ) {
if(al.HasTag(nameTAG)) {
cerr << "ERROR: Read "<<al.Name<<" already has XA tags" << endl;
return 1;
}
writer.SaveAlignment(al);
} //while al
reader.Close();
writer.Close();
return 0;
}
示例8: main
int main (int argc, char *argv[]) {
if( (argc!= 3) ||
(argc== 2 && string(argv[1]) == "-h") ||
(argc== 2 && string(argv[1]) == "-help") ||
(argc== 2 && string(argv[1]) == "--help") ){
cerr<<"Usage:splitByRG [in bam] [out prefix]"<<endl<<"this program creates one bam file per RG in the with the outprefix\nFor example splitByRG in.bam out will create\nout.rg1.bam\nout.rg2.bam\n"<<endl;
return 1;
}
string bamfiletopen = string(argv[1]);
// if(!strEndsWith(bamfiletopen,".bam")){
// }
string bamDirOutPrefix = string(argv[2]);
map<string,BamWriter *> rg2BamWriter;
// if(!isDirectory(bamDirOut)){
// cerr<<"ERROR: the out directory does not exist"<<endl;
// return 1;
// }
BamReader reader;
if ( !reader.Open(bamfiletopen) ) {
cerr << "Could not open input BAM files." << endl;
return 1;
}
SamHeader header = reader.GetHeader();
const RefVector references = reader.GetReferenceData();
vector<RefData> refData=reader.GetReferenceData();
string pID = "splitByRG";
string pName = "splitByRG";
string pCommandLine = "";
for(int i=0;i<(argc);i++){
pCommandLine += (string(argv[i])+" ");
}
putProgramInHeader(&header,pID,pName,pCommandLine,returnGitHubVersion(string(argv[0]),".."));
SamReadGroupDictionary srgd=header.ReadGroups;
for(SamReadGroupConstIterator srgci=srgd.ConstBegin();
srgci<srgd.ConstEnd();
srgci++){
//cout<<*srgci<<endl;
const SamReadGroup rg = (*srgci);
//cout<<rg.ID<<endl;
rg2BamWriter[rg.ID] = new BamWriter();
rg2BamWriter[rg.ID]->Open(bamDirOutPrefix+"."+rg.ID+".bam",header,references);
}
BamAlignment al;
unsigned int total=0;
while ( reader.GetNextAlignment(al) ) {
// al.SetIsFailedQC(false);
// writer.SaveAlignment(al);
// if(al.IsMapped () ){
// if(rg2BamWriter.find(refData[al.RefID].RefName) == rg2BamWriter.end()){ //new
// rg2BamWriter[refData[al.RefID].RefName] = new BamWriter();
// if ( !rg2BamWriter[refData[al.RefID].RefName]->Open(bamDirOutPrefix+"."+refData[al.RefID].RefName+".bam",header,references) ) {
// cerr << "Could not open output BAM file "<< bamDirOutPrefix<<"."<<refData[al.RefID].RefName<<".bam" << endl;
// return 1;
// }
// }else{
// rg2BamWriter[refData[al.RefID].RefName]->SaveAlignment(al);
// }
// }else{
// unmapped.SaveAlignment(al);
// }
if(al.HasTag("RG")){
string rgTag;
al.GetTag("RG",rgTag);
//cout<<rgTag<<endl;
if(rg2BamWriter.find(rgTag) == rg2BamWriter.end()){ //new
cerr<<"Found new RG "<<rgTag<<endl;
rg2BamWriter[rgTag] = new BamWriter();
if ( !rg2BamWriter[rgTag]->Open(bamDirOutPrefix+"."+rgTag+".bam",header,references) ) {
cerr << "Could not open output BAM file "<< bamDirOutPrefix<<"."<<rgTag<<".bam" << endl;
return 1;
}
rg2BamWriter[rgTag]->SaveAlignment(al);
}else{
rg2BamWriter[rgTag]->SaveAlignment(al);
}
}else{
string rgTag="unknown";
//cout<<rgTag<<endl;
if(rg2BamWriter.find(rgTag) == rg2BamWriter.end()){ //new
cerr<<"Found new RG "<<rgTag<<endl;
rg2BamWriter[rgTag] = new BamWriter();
if ( !rg2BamWriter[rgTag]->Open(bamDirOutPrefix+"."+rgTag+".bam",header,references) ) {
cerr << "Could not open output BAM file "<< bamDirOutPrefix<<"."<<rgTag<<".bam" << endl;
return 1;
}
//.........这里部分代码省略.........
示例9: main
//.........这里部分代码省略.........
//else BAM
// initMerge();
// set_adapter_sequences(adapter_F,
// adapter_S,
// adapter_chimera);
// set_options(trimCutoff,allowMissing,mergeoverlap);
if(key != ""){
size_t found=key.find(",");
if (found == string::npos){ //single end reads
key1=key;
key2="";
} else{ //paired-end
key1=key.substr(0,found);
key2=key.substr(found+1,key.length()-found+1);
}
}
if( bamFileOUT == "" ){
cerr<<"The output must be a be specified, exiting"<<endl;
return 1;
}
if ( !reader.Open(bamFile) ) {
cerr << "Could not open input BAM file "<<bamFile << endl;
return 1;
}
SamHeader header = reader.GetHeader();
string pID = "mergeTrimReadsBAM";
string pName = "mergeTrimReadsBAM";
string pCommandLine = "";
for(int i=0;i<(argc);i++){
pCommandLine += (string(argv[i])+" ");
}
putProgramInHeader(&header,pID,pName,pCommandLine,returnGitHubVersion(string(argv[0]),".."));
const RefVector references = reader.GetReferenceData();
//we will not call bgzip with full compression, good for piping into another program to
//lessen the load on the CPU
if(produceUnCompressedBAM)
writer.SetCompressionMode(BamWriter::Uncompressed);
if ( !writer.Open(bamFileOUT,header,references) ) {
cerr << "Could not open output BAM file "<<bamFileOUT << endl;
return 1;
}
SamHeader sh=reader.GetHeader();
//Up to the user to be sure that a sequence is followed by his mate
// if(!sh.HasSortOrder() ||
// sh.SortOrder != "queryname"){
// cerr << "Bamfile must be sorted by queryname" << endl;
// return 1;
// }
示例10: if
int
main(int argc, char* argv[]) {
// validate argument count
if( argc != 2 ) {
cerr << "USAGE: " << argv[0] << " <input BAM file> " << endl;
return EXIT_FAILURE;
}
string filename = argv[1];
//cerr << "Printing alignments from file: " << filename << endl;
BamReader reader;
if (!reader.Open(filename)) {
cerr << "could not open filename " << filename << endl;
return EXIT_FAILURE;
}
cerr << filename << ": Done opening" << endl;
// Header can't be used to accurately determine sort order because samtools never
// changes it; instead, check after loading each read as is done with "samtools index"
// We don't need to load an index (right?)
// if (!reader.LocateIndex()) {
// const string index_filename = filename + ".bai";
// if (!reader.OpenIndex(index_filename)) {
// cerr << "could not open index" << endl;
// }
// }
const SamHeader header = reader.GetHeader();
cerr << filename << ": Done getting header" << endl;
const RefVector refs = reader.GetReferenceData();
cerr << filename << ": Done getting reference data" << endl;
BamWriter writer;
if (! output_bam_filename.empty()) {
if (! writer.Open(output_bam_filename, header, refs)) {
cerr << "Could not open BAM output file " << output_bam_filename << endl;
return EXIT_FAILURE;
}
cerr << filename << ": Done opening BAM output file " << output_bam_filename << endl;
}
alignmentMap read1Map; // a single map, for all reads awaiting their mate
typedef map<string,int32_t> stringMap;
typedef stringMap::iterator stringMapI;
stringMap ref_mates;
// alignmentMap read1Map, read2Map;
BamAlignment full_al;
int32_t count = 0;
uint32_t max_reads_in_map = 0;
int32_t n_reads_skipped_unmapped = 0;
int32_t n_reads_skipped_mate_unmapped = 0;
int32_t n_reads_skipped_wont_see_mate = 0;
int32_t n_reads_skipped_mate_tail_est = 0;
int32_t n_reads_skipped_ref_mate = 0;
int32_t n_reads = 0;
int32_t n_singleton_reads = 0;
int32_t last_RefID = -1;
int32_t last_Position = -1;
cerr << filename << ": Looking for up to " << pairs_to_process << " link pairs,"
<< " total tail = " << link_pair_total_tail
<< " critical tail = " << link_pair_crit_tail
<< ", must be on diff chromosome = " << link_pair_diff_chrom << endl;
while (reader.GetNextAlignment(full_al)
&& (! pairs_to_process || count < pairs_to_process)) {
BamAlignment al = full_al;
//printAlignmentInfo(al, refs);
//++count;
++n_reads;
if (last_RefID < 0) last_RefID = al.RefID;
if (last_Position < 0) last_Position = al.Position;
if (al.RefID > last_RefID) {
// We've moved to the next reference sequence
// Clean up reads with mates expected here that haven't been seen
if (debug_ref_mate) {
cerr << "MISSED " << ref_mates.size() << " ref_mates on this reference "
<< last_RefID << " " << refs[last_RefID].RefName << endl;
}
for (stringMapI rmI = ref_mates.begin(); rmI != ref_mates.end(); ++rmI) {
++n_reads_skipped_ref_mate;
read1Map.erase(read1Map.find(rmI->first));
ref_mates.erase(ref_mates.find(rmI->first));
}
last_RefID = al.RefID;
last_Position = al.Position;
} else if (! isCoordinateSorted(al.RefID, al.Position, last_RefID, last_Position)) {
cerr << filename << " is not sorted, " << al.Name << " out of position" << endl;
return EXIT_FAILURE;
}
if (! al.IsMapped()) { ++n_reads_skipped_unmapped; continue; }
//.........这里部分代码省略.........
示例11: main
int main(int argc, char** argv) {
FILE *fsp = 0, *fep = 0;
int flag; // 0
char chrom[255]; // 1
int pos; // 2
int quality; // 3
char *cigar; // 4
size_t len = 0;
char chromcopy[255] = { 0 };
char fn[255];
int i;
int forward, reverse;
bool direction; // 0
bool leftflag;
const char *prefix = "";
int cnt = 0;
BamReader reader;
//if (argc < 2 || argc > 4) usage();
for (i=1; i<argc; i++) {
if ((strcmp(argv[i], "-p") == 0) && (i != argc-1)) {
prefix = argv[++i];
} else {
if (!reader.IsOpen()) {
try {
reader.Open(argv[i]);
} catch (exception& e) {
cout << e.what();
throw;
}
} else {
usage();
}
}
}
if (!reader.IsOpen()) usage();
const SamHeader header = reader.GetHeader();
const RefVector references = reader.GetReferenceData();
BamAlignment al;
while(reader.GetNextAlignmentCore(al)) {
flag = al.AlignmentFlag;
if ((flag & 256) || (flag & 4) || (flag & 512) || (flag & 1024)) continue;
direction = ( (flag & 16) == 16);
strcpy(chrom, references[al.RefID].RefName.c_str());
pos = al.Position+1;
quality = al.MapQuality;
if (quality == 0) continue;
if (strcmp(chromcopy, chrom) != 0) {
if (fsp) fclose(fsp);
if (fep) fclose(fep);
fn[0] = 0;
if (prefix != "") {
strcat(fn, prefix);
}
strcat(fn, chrom);
strcat(fn, "_forward.txt");
fsp = fopen(fn, "w");
fn[0] = 0;
if (prefix != "") {
strcat(fn, prefix);
}
strcat(fn, chrom);
strcat(fn, "_reverse.txt");
fep = fopen(fn, "w");
strcpy(chromcopy, chrom);
}
vector<CigarOp> cigars = al.CigarData;
forward = pos;
reverse = pos;
leftflag = true;
for (i = 0; i < cigars.size(); i++) {
if (cigars[i].Type == 'S' || cigars[i].Type == 'H') {
if (leftflag == false) break;
continue;
} else {
if (cigars[i].Type != 'I') reverse += cigars[i].Length;
leftflag = false;
}
}
if (direction == 0)
fprintf(fsp, "%d\n", forward);
else
fprintf(fep, "%d\n", reverse-1);
cnt++;
if (cnt % 500000 == 0)
//.........这里部分代码省略.........
示例12: main
int main (int argc, char *argv[]) {
// bool mapped =false;
// bool unmapped=false;
int bpToDecrease5=1;
int bpToDecrease3=2;
const string usage=string(string(argv[0])+" [options] input.bam out.bam"+"\n\n"+
"\tThis program takes a BAM file as input and produces\n"+
"\tanother where the putative deaminated bases have\n"+
"\ta base quality score of "+intStringify(baseQualForDeam)+"\n"+
"\tgiven an "+intStringify(offset)+" offset \n"+
"\n"+
"\tOptions:\n"+
"\t\t"+"-n5" +"\t\t\t"+"Decrease the nth bases surrounding the 5' ends (Default:"+stringify(bpToDecrease5)+") "+"\n"+
"\t\t"+"-n3" +"\t\t\t"+"Decrease the nth bases surrounding the 3' ends (Default:"+stringify(bpToDecrease3)+") "+"\n"
);
// "\t"+"-u , --unmapped" +"\n\t\t"+"For an unmapped bam file"+"\n"+
// "\t"+"-m , --mapped" +"\n\t\t"+"For an mapped bam file"+"\n");
if( (argc== 1) ||
(argc== 2 && string(argv[1]) == "-h") ||
(argc== 2 && string(argv[1]) == "-help") ||
(argc== 2 && string(argv[1]) == "--help") ){
cout<<"Usage:"<<endl;
cout<<usage<<endl;
cout<<""<<endl;
return 1;
}
for(int i=1;i<(argc-2);i++){ //all but the last arg
if( string(argv[i]) == "-n5" ){
bpToDecrease5 = destringify<int>(argv[i+1]);
i++;
continue;
}
if( string(argv[i]) == "-n3" ){
bpToDecrease3 = destringify<int>(argv[i+1]);
i++;
continue;
}
cerr<<"Unknown option "<<argv[i] <<" exiting"<<endl;
return 1;
}
if(argc < 3){
cerr<<"Error: Must specify the input and output BAM files";
return 1;
}
string inbamFile =argv[argc-2];
string outbamFile=argv[argc-1];
if(inbamFile == outbamFile){
cerr<<"Input and output files are the same"<<endl;
return 1;
}
// if(!mapped && !unmapped){
// cerr << "Please specify whether you reads are mapped or unmapped" << endl;
// return 1;
// }
// if(mapped && unmapped){
// cerr << "Please specify either mapped or unmapped but not both" << endl;
// return 1;
// }
BamReader reader;
if ( !reader.Open(inbamFile) ) {
cerr << "Could not open input BAM files." << endl;
return 1;
}
vector<RefData> testRefData=reader.GetReferenceData();
SamHeader header = reader.GetHeader();
string pID = "decrQualDeaminated";
string pName = "decrQualDeaminated";
string pCommandLine = "";
for(int i=0;i<(argc);i++){
pCommandLine += (string(argv[i])+" ");
}
putProgramInHeader(&header,pID,pName,pCommandLine);
const RefVector references = reader.GetReferenceData();
BamWriter writer;
if ( !writer.Open(outbamFile, header, references) ) {
cerr << "Could not open output BAM file" << endl;
return 1;
}
BamAlignment al;
// BamAlignment al2;
//.........这里部分代码省略.........
示例13: main
int main (int argc, char *argv[]) {
int length=0;
const string usage=string(string(argv[0])+" [options] input.bam out.bam"+"\n\n"+
"This program takes a BAM file as input and produces\n"+
"another where the first l bases have been cut\n"+
"\n"+
"Options:\n"+
"\t"+"-l" +"\n\t\t"+"Cut this length (Default "+stringify(length)+")"+"\n"
);
// "\t"+"-m , --mapped" +"\n\t\t"+"For an mapped bam file"+"\n");
if( (argc== 1) ||
(argc== 2 && string(argv[1]) == "-h") ||
(argc== 2 && string(argv[1]) == "-help") ||
(argc== 2 && string(argv[1]) == "--help") ) {
cout<<"Usage:"<<endl;
cout<<usage<<endl;
cout<<""<<endl;
return 1;
}
for(int i=1; i<(argc-2); i++) { //all but the last arg
// if(strcmp(argv[i],"-m") == 0 || strcmp(argv[i],"--mapped") == 0 ){
// mapped=true;
// continue;
// }
if(string(argv[i]) == "-l" ) {
length=destringify<int>(string(argv[i+1]));
i++;
continue;
}
cerr<<"Unknown option "<<argv[i] <<" exiting"<<endl;
return 1;
}
if(argc < 3) {
cerr<<"Error: Must specify the input and output BAM files";
return 1;
}
string inbamFile =argv[argc-2];
string outbamFile=argv[argc-1];
// if(!mapped && !unmapped){
// cerr << "Please specify whether you reads are mapped or unmapped" << endl;
// return 1;
// }
// if(mapped && unmapped){
// cerr << "Please specify either mapped or unmapped but not both" << endl;
// return 1;
// }
BamReader reader;
if ( !reader.Open(inbamFile) ) {
cerr << "Could not open input BAM files." << endl;
return 1;
}
vector<RefData> testRefData=reader.GetReferenceData();
SamHeader myHeader = reader.GetHeader();
const RefVector references = reader.GetReferenceData();
string pID = "cutStart";
string pName = "cutStart";
string pCommandLine = "";
for(int i=0; i<(argc); i++) {
pCommandLine += (string(argv[i])+" ");
}
putProgramInHeader(&myHeader,pID,pName,pCommandLine,returnGitHubVersion(string(argv[0]),"."));
BamWriter writer;
if ( !writer.Open(outbamFile, myHeader, references) ) {
cerr << "Could not open output BAM file" << endl;
return 1;
}
BamAlignment al;
// BamAlignment al2;
// bool al2Null=true;
while ( reader.GetNextAlignment(al) ) {
int lengthAl=MIN(al.QueryBases.size(),length);
al.QueryBases = al.QueryBases.substr(lengthAl);
al.Qualities = al.Qualities.substr(lengthAl);
writer.SaveAlignment(al);
}// while ( reader.GetNextAlignment(al) ) {
reader.Close();
//.........这里部分代码省略.........
示例14: main
int main (int argc, char *argv[]) {
int minBaseQuality = 0;
string usage=string(""+string(argv[0])+" [in BAM file] [in VCF file] [chr name] [deam out BAM] [not deam out BAM]"+
"\nThis program divides aligned single end reads into potentially deaminated\n"+
"\nreads and the puts the rest into another bam file if the deaminated positions are not called as the alternative base in the VCF.\n"+
"\nTip: if you do not need one of them, use /dev/null as your output\n"+
"arguments:\n"+
"\t"+"--bq [base qual] : Minimum base quality to flag a deaminated site (Default: "+stringify(minBaseQuality)+")\n"+
"\n");
if(argc == 1 ||
argc < 4 ||
(argc == 2 && (string(argv[0]) == "-h" || string(argv[0]) == "--help") )
){
cerr << "Usage "<<usage<<endl;
return 1;
}
for(int i=1;i<(argc-2);i++){
if(string(argv[i]) == "--bq"){
minBaseQuality=destringify<int>(argv[i+1]);
i++;
continue;
}
}
string bamfiletopen = string( argv[ argc-5 ] );
string vcffiletopen = string( argv[ argc-4 ] );
string chrname = string( argv[ argc-3 ] );
string deambam = string( argv[ argc-2 ] );
string nondeambam = string( argv[ argc-1 ] );
//dummy reader, will need to reposition anyway
VCFreader vcfr (vcffiletopen,
vcffiletopen+".tbi",
chrname,
1,
1,
0);
BamReader reader;
if ( !reader.Open(bamfiletopen) ) {
cerr << "Could not open input BAM file"<< bamfiletopen << endl;
return 1;
}
// if ( !reader.LocateIndex() ) {
// cerr << "The index for the BAM file cannot be located" << endl;
// return 1;
// }
// if ( !reader.HasIndex() ) {
// cerr << "The BAM file has not been indexed." << endl;
// return 1;
// }
//positioning the bam file
int refid=reader.GetReferenceID(chrname);
if(refid < 0){
cerr << "Cannot retrieve the reference ID for "<< chrname << endl;
return 1;
}
//cout<<"redif "<<refid<<endl;
//setting the BAM reader at that position
reader.SetRegion(refid,
0,
refid,
-1);
vector<RefData> testRefData=reader.GetReferenceData();
const SamHeader header = reader.GetHeader();
const RefVector references = reader.GetReferenceData();
BamWriter writerDeam;
if ( !writerDeam.Open(deambam, header, references) ) {
cerr << "Could not open output BAM file" << endl;
return 1;
}
BamWriter writerNoDeam;
if ( !writerNoDeam.Open(nondeambam, header, references) ) {
cerr << "Could not open output BAM file" << endl;
return 1;
}
unsigned int totalReads =0;
unsigned int deaminatedReads =0;
unsigned int ndeaminatedReads =0;
//.........这里部分代码省略.........
示例15: main
int main (int argc, char *argv[]) {
bool mapped =false;
bool unmapped=false;
const string usage=string(string(argv[0])+" [options] input.bam out.bam"+"\n\n"+
"This program takes a BAM file as input and produces\n"+
"another where the putative deaminated bases have\n"+
"have been cut\n"+
"\n"+
"Options:\n");
// "\t"+"-u , --unmapped" +"\n\t\t"+"For an unmapped bam file"+"\n"+
// "\t"+"-m , --mapped" +"\n\t\t"+"For an mapped bam file"+"\n");
if( (argc== 1) ||
(argc== 2 && string(argv[1]) == "-h") ||
(argc== 2 && string(argv[1]) == "-help") ||
(argc== 2 && string(argv[1]) == "--help") ){
cout<<"Usage:"<<endl;
cout<<usage<<endl;
cout<<""<<endl;
return 1;
}
// for(int i=1;i<(argc-1);i++){ //all but the last arg
// if(strcmp(argv[i],"-m") == 0 || strcmp(argv[i],"--mapped") == 0 ){
// mapped=true;
// continue;
// }
// if(strcmp(argv[i],"-u") == 0 || strcmp(argv[i],"--unmapped") == 0 ){
// unmapped=true;
// continue;
// }
// cerr<<"Unknown option "<<argv[i] <<" exiting"<<endl;
// return 1;
// }
if(argc != 3){
cerr<<"Error: Must specify the input and output BAM files";
return 1;
}
string inbamFile =argv[argc-2];
string outbamFile=argv[argc-1];
// if(!mapped && !unmapped){
// cerr << "Please specify whether you reads are mapped or unmapped" << endl;
// return 1;
// }
// if(mapped && unmapped){
// cerr << "Please specify either mapped or unmapped but not both" << endl;
// return 1;
// }
BamReader reader;
if ( !reader.Open(inbamFile) ) {
cerr << "Could not open input BAM files." << endl;
return 1;
}
vector<RefData> testRefData=reader.GetReferenceData();
const SamHeader header = reader.GetHeader();
const RefVector references = reader.GetReferenceData();
BamWriter writer;
if ( !writer.Open(outbamFile, header, references) ) {
cerr << "Could not open output BAM file" << endl;
return 1;
}
BamAlignment al;
// BamAlignment al2;
// bool al2Null=true;
while ( reader.GetNextAlignment(al) ) {
if(al.IsPaired() ){
if(al.IsFirstMate() ){ //5' end, need to check first base only
if(al.IsReverseStrand()){ //
if(!al.IsMapped()){
cerr << "Cannot have reverse complemented unmapped reads: " <<al.Name<< endl;
//return 1;
}
int indexToCheck;
//last
indexToCheck=al.QueryBases.length()-1;
if(toupper(al.QueryBases[indexToCheck]) == 'A'){
//al.Qualities[indexToCheck]=char(offset+baseQualForDeam);
al.QueryBases = al.QueryBases.substr(0,indexToCheck);
al.Qualities = al.Qualities.substr(0, indexToCheck);
}
}else{
//.........这里部分代码省略.........