本文整理汇总了C++中BamReader::GetReferenceData方法的典型用法代码示例。如果您正苦于以下问题:C++ BamReader::GetReferenceData方法的具体用法?C++ BamReader::GetReferenceData怎么用?C++ BamReader::GetReferenceData使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类BamReader
的用法示例。
在下文中一共展示了BamReader::GetReferenceData方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: IntersectBamPE
void BedIntersectPE::IntersectBamPE(string bamFile) {
// load the "B" bed file into a map so
// that we can easily compare "A" to it for overlaps
_bedB->loadBedFileIntoMap();
// open the BAM file
BamReader reader;
BamWriter writer;
reader.Open(bamFile);
// get header & reference information
string bamHeader = reader.GetHeaderText();
RefVector refs = reader.GetReferenceData();
// open a BAM output to stdout if we are writing BAM
if (_bamOutput == true) {
// set compression mode
BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed;
writer.SetCompressionMode(compressionMode);
// open our BAM writer
writer.Open("stdout", bamHeader, refs);
}
// track the previous and current sequence
// names so that we can identify blocks of
// alignments for a given read ID.
string prevName, currName;
prevName = currName = "";
vector<BamAlignment> alignments; // vector of BAM alignments for a given ID in a BAM file.
alignments.reserve(100);
_bedA->bedType = 10; // it's a full BEDPE given it's BAM
// rip through the BAM file and convert each mapped entry to BEDPE
BamAlignment bam1, bam2;
while (reader.GetNextAlignment(bam1)) {
// the alignment must be paired
if (bam1.IsPaired() == true) {
// grab the second alignment for the pair.
reader.GetNextAlignment(bam2);
// require that the alignments are from the same query
if (bam1.Name == bam2.Name) {
ProcessBamBlock(bam1, bam2, refs, writer);
}
else {
cerr << "*****ERROR: -bedpe requires BAM to be sorted or grouped by query name. " << endl;
exit(1);
}
}
}
// close up
reader.Close();
if (_bamOutput == true) {
writer.Close();
}
}
示例2: while
bool RevertTool::RevertToolPrivate::Run(void) {
// opens the BAM file without checking for indexes
BamReader reader;
if ( !reader.Open(m_settings->InputFilename) ) {
cerr << "Could not open input BAM file... quitting." << endl;
return false;
}
// get BAM file metadata
const string& headerText = reader.GetHeaderText();
const RefVector& references = reader.GetReferenceData();
// open writer
BamWriter writer;
bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression );
if ( !writer.Open(m_settings->OutputFilename, headerText, references, writeUncompressed) ) {
cerr << "Could not open " << m_settings->OutputFilename << " for writing." << endl;
return false;
}
// plow through file, reverting alignments
BamAlignment al;
while ( reader.GetNextAlignment(al) ) {
RevertAlignment(al);
writer.SaveAlignment(al);
}
// clean and exit
reader.Close();
writer.Close();
return true;
}
示例3: ValidateReaders
// ValidateReaders checks that all the readers point to BAM files representing
// alignments against the same set of reference sequences, and that the
// sequences are identically ordered. If these checks fail the operation of
// the multireader is undefined, so we force program exit.
void BamMultiReader::ValidateReaders(void) const {
int firstRefCount = readers.front().first->GetReferenceCount();
BamTools::RefVector firstRefData = readers.front().first->GetReferenceData();
for (vector<pair<BamReader*, BamAlignment*> >::const_iterator it = readers.begin(); it != readers.end(); ++it) {
BamReader* reader = it->first;
BamTools::RefVector currentRefData = reader->GetReferenceData();
BamTools::RefVector::const_iterator f = firstRefData.begin();
BamTools::RefVector::const_iterator c = currentRefData.begin();
if (reader->GetReferenceCount() != firstRefCount || firstRefData.size() != currentRefData.size()) {
cerr << "ERROR: mismatched number of references in " << reader->GetFilename()
<< " expected " << firstRefCount
<< " reference sequences but only found " << reader->GetReferenceCount() << endl;
exit(1);
}
// this will be ok; we just checked above that we have identically-sized sets of references
// here we simply check if they are all, in fact, equal in content
while (f != firstRefData.end()) {
if (f->RefName != c->RefName || f->RefLength != c->RefLength) {
cerr << "ERROR: mismatched references found in " << reader->GetFilename()
<< " expected: " << endl;
for (BamTools::RefVector::const_iterator a = firstRefData.begin(); a != firstRefData.end(); ++a)
cerr << a->RefName << " " << a->RefLength << endl;
cerr << "but found: " << endl;
for (BamTools::RefVector::const_iterator a = currentRefData.begin(); a != currentRefData.end(); ++a)
cerr << a->RefName << " " << a->RefLength << endl;
exit(1);
}
++f; ++c;
}
}
}
示例4: CollectCoverageBam
void BedCoverage::CollectCoverageBam(string bamFile) {
// load the "B" bed file into a map so
// that we can easily compare "A" to it for overlaps
_bedB->loadBedCovFileIntoMap();
// open the BAM file
BamReader reader;
reader.Open(bamFile);
// get header & reference information
string header = reader.GetHeaderText();
RefVector refs = reader.GetReferenceData();
// convert each aligned BAM entry to BED
// and compute coverage on B
BamAlignment bam;
while (reader.GetNextAlignment(bam)) {
if (bam.IsMapped()) {
// treat the BAM alignment as a single "block"
if (_obeySplits == false) {
// construct a new BED entry from the current BAM alignment.
BED a;
a.chrom = refs.at(bam.RefID).RefName;
a.start = bam.Position;
a.end = bam.GetEndPosition(false, false);
a.strand = "+";
if (bam.IsReverseStrand()) a.strand = "-";
_bedB->countHits(a, _sameStrand, _diffStrand, _countsOnly);
}
// split the BAM alignment into discrete blocks and
// look for overlaps only within each block.
else {
// vec to store the discrete BED "blocks" from a
bedVector bedBlocks;
// since we are counting coverage, we do want to split blocks when a
// deletion (D) CIGAR op is encountered (hence the true for the last parm)
GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, false, true);
// use countSplitHits to avoid over-counting each split chunk
// as distinct read coverage.
_bedB->countSplitHits(bedBlocks, _sameStrand, _diffStrand, _countsOnly);
}
}
}
// report the coverage (summary or histogram) for BED B.
if (_countsOnly == true)
ReportCounts();
else
ReportCoverage();
// close the BAM file
reader.Close();
}
示例5: fetchLines
void parser::fetchLines(vector<BamAlignment>& result, uint32_t n,
const std::string& file) {
BamReader bam;
BamAlignment read;
Guarded<FileNotGood> g(!(bam.Open(file)), file.c_str());
const RefVector refvec = bam.GetReferenceData();
while (bam.GetNextAlignment(read) && n) {
result.push_back(read);
// cout << "read " << n << "\t" << read << "\n";
n--;
}
}
示例6: CoverageVisitor
bool CoverageTool::CoverageToolPrivate::Run(void) {
// if output filename given
ofstream outFile;
if ( m_settings->HasOutputFile ) {
// open output file stream
outFile.open(m_settings->OutputFilename.c_str());
if ( !outFile ) {
cerr << "bamtools coverage ERROR: could not open " << m_settings->OutputFilename
<< " for output" << endl;
return false;
}
// set m_out to file's streambuf
m_out.rdbuf(outFile.rdbuf());
}
//open our BAM reader
BamReader reader;
if ( !reader.Open(m_settings->InputBamFilename) ) {
cerr << "bamtools coverage ERROR: could not open input BAM file: " << m_settings->InputBamFilename << endl;
return false;
}
// retrieve references
m_references = reader.GetReferenceData();
// set up our output 'visitor'
CoverageVisitor* cv = new CoverageVisitor(m_references, &m_out);
// set up pileup engine with 'visitor'
PileupEngine pileup;
pileup.AddVisitor(cv);
// process input data
BamAlignment al;
while ( reader.GetNextAlignment(al) )
pileup.AddAlignment(al);
// clean up
reader.Close();
if ( m_settings->HasOutputFile )
outFile.close();
delete cv;
cv = 0;
// return success
return true;
}
示例7: main
int main (int argc, char *argv[]) {
if( (argc== 1) ||
(argc== 2 && string(argv[1]) == "-h") ||
(argc== 2 && string(argv[1]) == "-help") ||
(argc== 2 && string(argv[1]) == "--help") ){
cout<<"Usage:setAsUnpaired [in bam] [outbam]"<<endl<<"this program takes flags all paired sequences as singles"<<endl;
return 1;
}
string bamfiletopen = string(argv[1]);
string bamFileOUT = string(argv[2]);
BamReader reader;
BamWriter writer;
if ( !reader.Open(bamfiletopen) ) {
cerr << "Could not open input BAM files." << endl;
return 1;
}
const SamHeader header = reader.GetHeader();
const RefVector references = reader.GetReferenceData();
if ( !writer.Open(bamFileOUT,header,references) ) {
cerr << "Could not open output BAM file "<<bamFileOUT << endl;
return 1;
}
BamAlignment al;
while ( reader.GetNextAlignment(al) ) {
if(al.IsMapped()){
cerr << "Cannot yet handle mapped reads " << endl;
return 1;
}
al.SetIsPaired (false);
writer.SaveAlignment(al);
} //while al
reader.Close();
writer.Close();
return 0;
}
示例8: main
int main(const int argc, char* const argv[]) {
int c, min_mapQ=0, seed=chrono::system_clock::now().time_since_epoch().count();
unsigned int flag_on=0, flag_off=0;
string fn_tgt, fn_in, fn_out="", out_format="b";
while ((c = getopt(argc, argv, "SbBcCt:h1Ho:q:f:F:ul:r:?T:R:L:s:@:m:x:U:")) >= 0) {
switch (c) {
case 's': seed = atoi(optarg); break;
case 'm': break;
case 'c': break;
case 'S': break;
case 'b': break;
case 'C': break;
case 'h': break;
case 'H': break;
case 'o': fn_out = optarg; break;
case 'U': break;
case 'f': flag_on |= strtol(optarg, 0, 0); break;
case 'F': flag_off |= strtol(optarg, 0, 0); break;
case 'q': min_mapQ = atoi(optarg); break;
case 'u': out_format = "u"; break;
case '1': break;
case 'l': break;
case 'r': break;
case 't': fn_tgt = optarg; break;
case 'R': break;
case '?': return usage();
case 'T': break;
case 'B': break;
case '@': break;
case 'x': break;
default: return usage();
}
}
if (fn_tgt.compare("") == 0) return usage();
if (argc == optind) return usage();
fn_in = argv[optind];
BamReader reader;
if (!reader.Open(fn_in)) {
cerr << "ERROR: cannot open [" << fn_in << "] for reading\n";
return 1;
}
if (!reader.LocateIndex()) {
cerr << "ERROR: cannot find BAM index for [" << fn_in << "]\n";
return 1;
}
const SamHeader header = reader.GetHeader();
if (header.SortOrder.compare("coordinate") != 0) {
cerr << "ERROR: [" << fn_in << "] not sorted by coordinate\n";
return 1;
}
const RefVector refseq = reader.GetReferenceData();
vector<BamRegion> regions;
vector<unsigned int> src_depths, tgt_depths;
if (read_region_depth(fn_tgt.c_str(), reader, regions, src_depths, tgt_depths) != 0) return 1;
BamWriter writer;
if (!writer.Open(fn_out, header, refseq)) {
cerr << "ERROR: cannot open [" << fn_out << "] for writing\n";
return 1;
}
BamAlignment aln;
vector<BamAlignment> reads;
vector<string> paired, unpaired;
unordered_map<int, int> kept;
unordered_map<string, unsigned int> seen, sampled;
unordered_map<string, vector<int> > pool;
for (size_t i=0; i<regions.size(); ++i) {
reads.clear();
paired.clear();
unpaired.clear();
kept.clear();
pool.clear();
char region_string[256];
sprintf(region_string, "%s:%d-%d", refseq[regions[i].LeftRefID].RefName.c_str(), regions[i].LeftPosition, regions[i].RightPosition);
if (!reader.SetRegion(regions[i])) {
cerr << "WARNING: failed to locate [" << region_string << "]\n";
//cerr << "WARNING: failed to locate [" << refseq[regions[i].LeftRefID].RefName << ':' << regions[i].LeftPosition << '-' << regions[i].RightPosition << "]\n";
continue;
}
while (reader.GetNextAlignment(aln)) {
if ((aln.AlignmentFlag & flag_on) == flag_on && !(aln.AlignmentFlag & flag_off) && aln.MapQuality >= min_mapQ)
reads.push_back(aln);
}
if (reads.size() == 0) continue;
unsigned int depth = 0;
for (size_t k=0; k<reads.size(); ++k) {
aln = reads[k];
string rn = aln.Name;
if (seen.find(rn) != seen.end()) { // if seen in previous regions
if (sampled.find(rn) != sampled.end()) { // if self or mate sampled before, sample it
if (sampled[rn] != aln.AlignmentFlag) kept[k] = 1; // if mate sampled before, keep it
depth += get_overlap(aln, regions[i]);
}
//.........这里部分代码省略.........
示例9: IntersectBam
void BedIntersect::IntersectBam(string bamFile) {
// load the "B" bed file into a map so
// that we can easily compare "A" to it for overlaps
_bedB->loadBedFileIntoMap();
// open the BAM file
BamReader reader;
BamWriter writer;
reader.Open(bamFile);
// get header & reference information
string header = reader.GetHeaderText();
RefVector refs = reader.GetReferenceData();
// open a BAM output to stdout if we are writing BAM
if (_bamOutput == true) {
// open our BAM writer
writer.Open("stdout", header, refs, _isUncompressedBam);
}
vector<BED> hits;
// reserve some space
hits.reserve(100);
_bedA->bedType = 6;
BamAlignment bam;
// get each set of alignments for each pair.
while (reader.GetNextAlignment(bam)) {
if (bam.IsMapped()) {
BED a;
a.chrom = refs.at(bam.RefID).RefName;
a.start = bam.Position;
a.end = bam.GetEndPosition(false);
// build the name field from the BAM alignment.
a.name = bam.Name;
if (bam.IsFirstMate()) a.name += "/1";
if (bam.IsSecondMate()) a.name += "/2";
a.score = ToString(bam.MapQuality);
a.strand = "+";
if (bam.IsReverseStrand()) a.strand = "-";
if (_bamOutput == true) {
bool overlapsFound = false;
// treat the BAM alignment as a single "block"
if (_obeySplits == false) {
overlapsFound = FindOneOrMoreOverlap(a);
}
// split the BAM alignment into discrete blocks and
// look for overlaps only within each block.
else {
bool overlapFoundForBlock;
bedVector bedBlocks; // vec to store the discrete BED "blocks" from a
// we don't want to split on "D" ops, hence the "false"
getBamBlocks(bam, refs, bedBlocks, false);
vector<BED>::const_iterator bedItr = bedBlocks.begin();
vector<BED>::const_iterator bedEnd = bedBlocks.end();
for (; bedItr != bedEnd; ++bedItr) {
overlapFoundForBlock = FindOneOrMoreOverlap(a);
if (overlapFoundForBlock == true)
overlapsFound = true;
}
}
if (overlapsFound == true) {
if (_noHit == false)
writer.SaveAlignment(bam);
}
else {
if (_noHit == true) {
writer.SaveAlignment(bam);
}
}
}
else {
// treat the BAM alignment as a single BED "block"
if (_obeySplits == false) {
FindOverlaps(a, hits);
hits.clear();
}
// split the BAM alignment into discrete BED blocks and
// look for overlaps only within each block.
else {
bedVector bedBlocks; // vec to store the discrete BED "blocks" from a
getBamBlocks(bam, refs, bedBlocks, false);
vector<BED>::const_iterator bedItr = bedBlocks.begin();
vector<BED>::const_iterator bedEnd = bedBlocks.end();
for (; bedItr != bedEnd; ++bedItr) {
FindOverlaps(*bedItr, hits);
hits.clear();
}
}
}
}
}
//.........这里部分代码省略.........
示例10: IntersectBamPE
void BedIntersectPE::IntersectBamPE(string bamFile) {
// load the "B" bed file into a map so
// that we can easily compare "A" to it for overlaps
_bedB->loadBedFileIntoMap();
// open the BAM file
BamReader reader;
BamWriter writer;
reader.Open(bamFile);
// get header & reference information
string bamHeader = reader.GetHeaderText();
RefVector refs = reader.GetReferenceData();
// open a BAM output to stdout if we are writing BAM
if (_bamOutput == true) {
// set compression mode
BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed;
writer.SetCompressionMode(compressionMode);
// open our BAM writer
writer.Open("stdout", bamHeader, refs);
}
// track the previous and current sequence
// names so that we can identify blocks of
// alignments for a given read ID.
string prevName, currName;
prevName = currName = "";
vector<BamAlignment> alignments; // vector of BAM alignments for a given ID in a BAM file.
alignments.reserve(100);
_bedA->bedType = 10; // it's a full BEDPE given it's BAM
// rip through the BAM file and convert each mapped entry to BEDPE
BamAlignment bam1, bam2;
while (reader.GetNextAlignment(bam1)) {
reader.GetNextAlignment(bam2);
if (bam1.Name != bam2.Name) {
while (bam1.Name != bam2.Name)
{
if (bam1.IsPaired())
{
cerr << "*****WARNING: Query " << bam1.Name
<< " is marked as paired, but it's mate does not occur"
<< " next to it in your BAM file. Skipping. " << endl;
}
bam1 = bam2;
reader.GetNextAlignment(bam2);
}
}
else if (bam1.IsPaired() && bam1.IsPaired()) {
ProcessBamBlock(bam1, bam2, refs, writer);
}
}
// close up
reader.Close();
if (_bamOutput == true) {
writer.Close();
}
}
示例11: main
int main (int argc, const char *argv[])
{
printf ("------------- bamrealignment --------------\n");
OptArgs opts;
opts.ParseCmdLine(argc, argv);
vector<int> score_vals(4);
string input_bam = opts.GetFirstString ('i', "input", "");
string output_bam = opts.GetFirstString ('o', "output", "");
opts.GetOption(score_vals, "4,-6,-5,-2", 's', "scores");
int clipping = opts.GetFirstInt ('c', "clipping", 2);
bool anchors = opts.GetFirstBoolean ('a', "anchors", true);
int bandwidth = opts.GetFirstInt ('b', "bandwidth", 10);
bool verbose = opts.GetFirstBoolean ('v', "verbose", false);
bool debug = opts.GetFirstBoolean ('d', "debug", false);
int format = opts.GetFirstInt ('f', "format", 1);
int num_threads = opts.GetFirstInt ('t', "threads", 8);
string log_fname = opts.GetFirstString ('l', "log", "");
if (input_bam.empty() or output_bam.empty())
return PrintHelp();
opts.CheckNoLeftovers();
std::ofstream logf;
if (log_fname.size ())
{
logf.open (log_fname.c_str ());
if (!logf.is_open ())
{
fprintf (stderr, "bamrealignment: Failed to open log file %s\n", log_fname.c_str());
return 1;
}
}
BamReader reader;
if (!reader.Open(input_bam)) {
fprintf(stderr, "bamrealignment: Failed to open input file %s\n", input_bam.c_str());
return 1;
}
SamHeader header = reader.GetHeader();
RefVector refs = reader.GetReferenceData();
BamWriter writer;
writer.SetNumThreads(num_threads);
if (format == 1)
writer.SetCompressionMode(BamWriter::Uncompressed);
else
writer.SetCompressionMode(BamWriter::Compressed);
if (!writer.Open(output_bam, header, refs)) {
fprintf(stderr, "bamrealignment: Failed to open output file %s\n", output_bam.c_str());
return 1;
}
// The meat starts here ------------------------------------
if (verbose)
cout << "Verbose option is activated, each alignment will print to screen." << endl
<< " After a read hit RETURN to continue to the next one," << endl
<< " or press q RETURN to quit the program," << endl
<< " or press s Return to silence verbose," << endl
<< " or press c RETURN to continue printing without further prompt." << endl << endl;
unsigned int readcounter = 0;
unsigned int mapped_readcounter = 0;
unsigned int realigned_readcounter = 0;
unsigned int modified_alignment_readcounter = 0;
unsigned int pos_update_readcounter = 0;
unsigned int failed_clip_realigned_readcount = 0;
unsigned int already_perfect_readcount = 0;
unsigned int bad_md_tag_readcount = 0;
unsigned int error_recreate_ref_readcount = 0;
unsigned int error_clip_anchor_readcount = 0;
unsigned int error_sw_readcount = 0;
unsigned int error_unclip_readcount = 0;
unsigned int start_position_shift;
int orig_position;
int new_position;
string md_tag, new_md_tag, input = "x";
vector<CigarOp> new_cigar_data;
vector<MDelement> new_md_data;
bool position_shift = false;
time_t start_time = time(NULL);
Realigner aligner;
aligner.verbose_ = verbose;
aligner.debug_ = debug;
if (!aligner.SetScores(score_vals))
cout << "bamrealignment: Four scores need to be provided: match, mismatch, gap open, gap extend score!" << endl;
aligner.SetAlignmentBandwidth(bandwidth);
//.........这里部分代码省略.........
示例12: WindowIntersectBam
void BedWindow::WindowIntersectBam(string bamFile) {
// load the "B" bed file into a map so
// that we can easily compare "A" to it for overlaps
_bedB->loadBedFileIntoMap();
// open the BAM file
BamReader reader;
BamWriter writer;
reader.Open(bamFile);
// get header & reference information
string bamHeader = reader.GetHeaderText();
RefVector refs = reader.GetReferenceData();
// open a BAM output to stdout if we are writing BAM
if (_bamOutput == true) {
// set compression mode
BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed;
writer.SetCompressionMode(compressionMode);
// open our BAM writer
writer.Open("stdout", bamHeader, refs);
}
vector<BED> hits; // vector of potential hits
// reserve some space
hits.reserve(100);
_bedA->bedType = 6;
BamAlignment bam;
bool overlapsFound;
// get each set of alignments for each pair.
while (reader.GetNextAlignment(bam)) {
if (bam.IsMapped()) {
BED a;
a.chrom = refs.at(bam.RefID).RefName;
a.start = bam.Position;
a.end = bam.GetEndPosition(false, false);
// build the name field from the BAM alignment.
a.name = bam.Name;
if (bam.IsFirstMate()) a.name += "/1";
if (bam.IsSecondMate()) a.name += "/2";
a.score = ToString(bam.MapQuality);
a.strand = "+"; if (bam.IsReverseStrand()) a.strand = "-";
if (_bamOutput == true) {
overlapsFound = FindOneOrMoreWindowOverlaps(a);
if (overlapsFound == true) {
if (_noHit == false)
writer.SaveAlignment(bam);
}
else {
if (_noHit == true)
writer.SaveAlignment(bam);
}
}
else {
FindWindowOverlaps(a, hits);
hits.clear();
}
}
// BAM IsMapped() is false
else if (_noHit == true) {
writer.SaveAlignment(bam);
}
}
// close the relevant BAM files.
reader.Close();
if (_bamOutput == true) {
writer.Close();
}
}
示例13: main
int main(int argc, char** argv) {
FILE *fsp = 0, *fep = 0;
int flag; // 0
char chrom[255]; // 1
int pos; // 2
int quality; // 3
char *cigar; // 4
size_t len = 0;
char chromcopy[255] = { 0 };
char fn[255];
int i;
int forward, reverse;
bool direction; // 0
bool leftflag;
const char *prefix = "";
int cnt = 0;
BamReader reader;
//if (argc < 2 || argc > 4) usage();
for (i=1; i<argc; i++) {
if ((strcmp(argv[i], "-p") == 0) && (i != argc-1)) {
prefix = argv[++i];
} else {
if (!reader.IsOpen()) {
try {
reader.Open(argv[i]);
} catch (exception& e) {
cout << e.what();
throw;
}
} else {
usage();
}
}
}
if (!reader.IsOpen()) usage();
const SamHeader header = reader.GetHeader();
const RefVector references = reader.GetReferenceData();
BamAlignment al;
while(reader.GetNextAlignmentCore(al)) {
flag = al.AlignmentFlag;
if ((flag & 256) || (flag & 4) || (flag & 512) || (flag & 1024)) continue;
direction = ( (flag & 16) == 16);
strcpy(chrom, references[al.RefID].RefName.c_str());
pos = al.Position+1;
quality = al.MapQuality;
if (quality == 0) continue;
if (strcmp(chromcopy, chrom) != 0) {
if (fsp) fclose(fsp);
if (fep) fclose(fep);
fn[0] = 0;
if (prefix != "") {
strcat(fn, prefix);
}
strcat(fn, chrom);
strcat(fn, "_forward.txt");
fsp = fopen(fn, "w");
fn[0] = 0;
if (prefix != "") {
strcat(fn, prefix);
}
strcat(fn, chrom);
strcat(fn, "_reverse.txt");
fep = fopen(fn, "w");
strcpy(chromcopy, chrom);
}
vector<CigarOp> cigars = al.CigarData;
forward = pos;
reverse = pos;
leftflag = true;
for (i = 0; i < cigars.size(); i++) {
if (cigars[i].Type == 'S' || cigars[i].Type == 'H') {
if (leftflag == false) break;
continue;
} else {
if (cigars[i].Type != 'I') reverse += cigars[i].Length;
leftflag = false;
}
}
if (direction == 0)
fprintf(fsp, "%d\n", forward);
else
fprintf(fep, "%d\n", reverse-1);
cnt++;
if (cnt % 500000 == 0)
//.........这里部分代码省略.........
示例14: main
int main (int argc, char *argv[]) {
if( (argc!= 4 && argc !=5 && argc !=6) ||
(argc== 2 && string(argv[1]) == "-h") ||
(argc== 2 && string(argv[1]) == "-help") ||
(argc== 2 && string(argv[1]) == "--help") ){
cerr<<"Usage:splitByRG [in bam] [rg Tally] [out prefix] (optional target)"<<endl<<"this program will subsample a BAM file per read group for a certain target\nFor example splitByRG in.bam tally.txt out will create\nout.rg1.bam\nout.rg2.bam\n"<<endl;
return 1;
}
string bamfiletopen = string(argv[1]);
string rgTally = string(argv[2]);
string bamDirOutPrefix = string(argv[3]);
int target = 200000;
int maxTarget = 1000000;
if(argc==5){
target = destringify<int> ( string(argv[4]) );
}
if(argc==6){
target = destringify<int> ( string(argv[4]) );
maxTarget = destringify<int> ( string(argv[5]) );
}
cerr<<"minimum fragments:\t"<<target<<endl;
cerr<<"target fragments:\t"<<maxTarget<<endl;
string line;
ifstream myFileTally;
map<string,double> rg2Fraction;
myFileTally.open(rgTally.c_str(), ios::in);
cerr<<"Retained groups:\n"<<endl;
cerr<<"RG\t#mapped\tfraction retained"<<endl;
cerr<<"-----------------------------------"<<endl;
if (myFileTally.is_open()){
while ( getline (myFileTally,line)){
vector<string> tokens = allTokens(line,'\t');
if(tokens.size() > 6)
if( tokens[1] == "pass" &&
(tokens[0] != "\"\"" &&
tokens[0] != "control" &&
tokens[0] != "TOTAL") ){
//cout<<tokens[0]<<"\t"<<tokens[5]<<endl;
int count = destringify<int>(tokens[5]);
if(count>target){
if(count>=maxTarget){
rg2Fraction[ tokens[0] ] = double(maxTarget)/double(count);
cout<<tokens[0]<<"\t"<<count<<"\t"<<double(maxTarget)/double(count)<<endl;
}else{
cout<<tokens[0]<<"\t"<<count<<"\t"<<1.0<<endl;
rg2Fraction[ tokens[0] ] = 1.0;
}
}
}
}
myFileTally.close();
}else{
cerr << "Unable to open file "<<rgTally<<endl;
return 1;
}
map<string,BamWriter *> rg2BamWriter;
// if(!isDirectory(bamDirOut)){
// cerr<<"ERROR: the out directory does not exist"<<endl;
// return 1;
// }
BamReader reader;
if ( !reader.Open(bamfiletopen) ) {
cerr << "Could not open input BAM files." << endl;
return 1;
}
const SamHeader header = reader.GetHeader();
const RefVector references = reader.GetReferenceData();
vector<RefData> refData=reader.GetReferenceData();
SamReadGroupDictionary srgd=header.ReadGroups;
for(SamReadGroupConstIterator srgci=srgd.ConstBegin();
srgci<srgd.ConstEnd();
srgci++){
//cout<<*srgci<<endl;
const SamReadGroup rg = (*srgci);
//cout<<rg.ID<<endl;
if( rg2Fraction.find(rg.ID) != rg2Fraction.end() ){
rg2BamWriter[rg.ID] = new BamWriter();
rg2BamWriter[rg.ID]->Open(bamDirOutPrefix+"."+rg.ID+".bam",header,references);
}
//cout<<bamDirOutPrefix+"."+rg.ID+".bam"<<endl;
//.........这里部分代码省略.........
示例15: call
int GenericIndividualSnpCall::call(Fasta &fastaObj, BamReader &bamObj, BamRegion &roi, GenericProbabilisticAlignment &probAligner, VariantCallSetting& snpCallSettings, vector<GenericVariant> &variantSet)
{
RefVector chromosomes = bamObj.GetReferenceData();
// set up genome blocks
vector<int> BlockChrID, BlockLeftPos, BlockRightPos;
int BlockNumber=setupGenomeBlock(chromosomes, roi, BlockChrID, BlockLeftPos, BlockRightPos);
int numSNP = 0;
// iterate throught blocks
for (int i=0; i<BlockNumber; ++i)
{
if (m_verbosity>=1)
{
cout << "processing " << chromosomes[BlockChrID[i]].RefName << ":" << BlockLeftPos[i]+1 << "-" << BlockRightPos[i] << endl;
}
clock_t startTime = clock();
// genome
string BlockGenome;
fastaObj.GetSequence(BlockChrID[i], BlockLeftPos[i], BlockRightPos[i], BlockGenome);
map<int,list<tuple<char,int,int,double>>> BlockBamData;
AlleleSet BlockSnpAlleleCandidates;
// profile SNP sites by the simple method
simpleSnpCall(BlockGenome, bamObj, BlockChrID[i], BlockLeftPos[i], BlockRightPos[i], BlockSnpAlleleCandidates, BlockBamData);
// merge SNP sites to SNP blocks
vector<tuple<int,int,list<Allele>>> BlockSnpLoci;
mergeSnpSitesToBlocks(BlockSnpAlleleCandidates, BlockSnpLoci);
// iterate through Snp locus
for (int j=0; j<BlockSnpLoci.size(); j++)
{
int BlockSnpLeftPos = get<0>(BlockSnpLoci[j]);
int BlockSnpRightPos = get<1>(BlockSnpLoci[j]);
// it is a SNP site
if (BlockSnpRightPos==BlockSnpLeftPos+1)
{
simpleBayesianSnpCall(fastaObj, bamObj, BlockChrID[i], BlockSnpLeftPos, BlockSnpRightPos, get<2>(BlockSnpLoci[j]), BlockBamData[BlockSnpLeftPos], snpCallSettings, variantSet);
}else if (BlockSnpRightPos==BlockSnpLeftPos+2)
{
for (int pos=BlockSnpLeftPos; pos<BlockSnpRightPos; pos++)
{
list<Allele> fAlleles = get<2>(BlockSnpLoci[j]);
list<Allele> tAlleles;
for (list<Allele>::iterator faIter=fAlleles.begin(); faIter!=fAlleles.end(); faIter++)
{
if (faIter->m_chrPosition==pos)
tAlleles.emplace_back(*faIter);
}
if (!tAlleles.empty())
simpleBayesianSnpCall(fastaObj, bamObj, BlockChrID[i], pos, pos+1, tAlleles, BlockBamData[pos], snpCallSettings, variantSet);
}
}
else // it is a MNP site
{
PyroHMMsnp(fastaObj, bamObj, BlockChrID[i], BlockSnpLeftPos, BlockSnpRightPos, probAligner, get<2>(BlockSnpLoci[j]), snpCallSettings, variantSet);
}
}
clock_t endTime = clock();
if (m_verbosity>=1)
{
cout << "time elapsed " << ((endTime-startTime)/(double)CLOCKS_PER_SEC/60.) << " minutes";
cout << ", ";
cout << "call " << variantSet.size()-numSNP << " SNPs" << endl;
}
numSNP = variantSet.size();
}
return variantSet.size();
}