本文整理汇总了C++中FASTAReader::Initialize方法的典型用法代码示例。如果您正苦于以下问题:C++ FASTAReader::Initialize方法的具体用法?C++ FASTAReader::Initialize怎么用?C++ FASTAReader::Initialize使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类FASTAReader
的用法示例。
在下文中一共展示了FASTAReader::Initialize方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: main
int main(int argc, char* argv[]) {
string refFileName, queryFileName;
int maxHammingDistance;
if (argc < 4) {
cout << "usage: hammer ref query maxHam " << endl;
exit(1);
}
refFileName = argv[1];
queryFileName = argv[2];
maxHammingDistance = atoi(argv[3]);
FASTAReader reader;
reader.Initialize(refFileName);
FASTASequence ref, refRC;
reader.GetNext(ref);
ref.MakeRC(refRC);
FASTAReader queryReader;
queryReader.Initialize(queryFileName);
FASTASequence query;
queryReader.GetNext(query);
DNALength p;
for(p=0; p < ref.length-query.length-1; p++ ){
DNASequence subseq;
subseq.seq = &ref.seq[p];
subseq.length = query.length;
// cout << "t "; subseq.PrintSeq(cout);
// cout << "q "; ((DNASequence*)&query)->PrintSeq(cout);
if (HammingDistance(&subseq.seq[0], &query.seq[0], query.length) < maxHammingDistance) {
cout << ">" << p << endl;
subseq.PrintSeq(cout);
}
int i;
for (i =0; i < query.length; i++) {
subseq.seq[i] = toupper(subseq.seq[i]);
}
}
for(p=0; p < ref.length-query.length-1; p++ ){
DNASequence subseq;
subseq.seq = &refRC.seq[p];
subseq.length = query.length;
if (HammingDistance(&subseq.seq[0], &query.seq[0], query.length) < maxHammingDistance) {
cout << ">" << p << "rc" << endl;
subseq.PrintSeq(cout);
}
int i;
for (i =0; i < query.length; i++) {
subseq.seq[i] = toupper(subseq.seq[i]);
}
}
}
示例2: main
int main(int argc, char* argv[]) {
string inFileName, outFileName;
int length;
inFileName = argv[1];
outFileName = argv[2];
length = atoi(argv[3]);
int argi = 4;
int stride = 0;
float coverage = 0;
while (argi < argc) {
if (strcmp(argv[argi], "-stride")) {
stride = atoi(argv[++argi]);
}
else if (strcmp(argv[argi], "-coverage")) {
coverage = atof(argv[++argi]);
}
++argi;
}
FASTAReader reader;
reader.Initialize(inFileName);
FASTASequence genome;
reader.GetNext(genome);
if (stride == 0 and coverage == 0) {
cout << "ERROR, must provide stride or coverage. " << endl;
exit(1);
}
if (stride == 0) {
stride = genome.length * coverage / length;
}
示例3: main
int main(int argc, char* argv[]) {
if (argc < 3) {
cout << "usage: testRandomSequence genome.fa ntries " << endl;
exit(0);
}
string inFile = argv[1];
int nSamples = atoi(argv[2]);
if (nSamples == 0) {
return 0;
}
FASTAReader reader;
reader.Initialize(inFile);
vector<FASTASequence> genome;
reader.ReadAllSequences(genome);
int i;
cout << "title pos" << endl;
for (i = 0; i < nSamples; i++) {
DNALength chrIndex, chrPos;
FindRandomPos(genome, chrIndex, chrPos);
cout << genome[chrIndex].title << " " << chrPos << endl;
}
return 0;
}
示例4: main
int main(int argc, char* argv[]) {
if (argc < 4) {
PrintUsage();
exit(1);
}
int argi = 1;
string saInFile = argv[argi++];
string genomeFileName = argv[argi++];
string saOutFile = argv[argi++];
vector<string> inFiles;
int doBLT = 0;
int doBLCP = 0;
int bltPrefixLength = 0;
int lcpLength = 0;
int parsingOptions = 0;
while (argi < argc) {
if (strcmp(argv[argi], "-blt") == 0) {
doBLT = 1;
bltPrefixLength = atoi(argv[++argi]);
}
else if (strcmp(argv[argi], "-blcp") == 0) {
doBLCP = 1;
lcpLength = atoi(argv[++argi]);
}
else {
PrintUsage();
cout << "Bad option: " << argv[argi] << endl;
exit(1);
}
++argi;
}
//
// Read the suffix array to modify.
//
DNASuffixArray sa;
sa.Read(saInFile);
FASTAReader reader;
reader.Initialize(genomeFileName);
FASTASequence seq;
reader.ReadAllSequencesIntoOne(seq);
if (doBLT) {
sa.BuildLookupTable(seq.seq, seq.length, bltPrefixLength);
}
if (doBLCP) {
cout << "LCP Table not yet implemented." << endl;
}
sa.Write(saOutFile);
}
示例5: main
int main(int argc, char* argv[])
{
std::string seqInName, seqOutName, dotOutName;
if (argc < 4) {
std::cout << "usage: exciseRepeats inName repMaskOutFile outName" << std::endl;
std::exit(EXIT_FAILURE);
}
seqInName = argv[1];
dotOutName = argv[2];
seqOutName = argv[3];
FASTAReader reader;
reader.Initialize(seqInName);
FASTASequence origSeq;
reader.GetNext(origSeq);
std::ifstream dotOutFile;
CrucialOpen(dotOutName, dotOutFile);
std::ofstream seqOutFile;
std::ofstream seqOut;
CrucialOpen(seqOutName, seqOut, std::ios::out);
std::string dotOutLine;
getline(dotOutFile, dotOutLine);
getline(dotOutFile, dotOutLine);
getline(dotOutFile, dotOutLine);
while (getline(dotOutFile, dotOutLine)) {
std::stringstream lineStrm(dotOutLine);
int swScore;
float pctDiv, pctDel, pctIns;
std::string query;
DNALength qPosBegin, qPosEnd;
std::string left;
char strand;
std::string matchingRepeat;
std::string repClass;
std::string repPos, repEnd, repLeft;
int id;
lineStrm >> swScore >> pctDiv >> pctDel >> pctIns >> query >> qPosBegin >> qPosEnd >>
left >> strand >> matchingRepeat >> repClass >> repPos >> repEnd >> repLeft >> id;
for (DNALength seqPos = qPosBegin; seqPos < qPosEnd; seqPos++) {
origSeq.seq[seqPos] = 'X';
}
}
DNALength seqPos, unexPos;
unexPos = 0;
for (seqPos = 0; seqPos < origSeq.length; seqPos++) {
if (origSeq.seq[seqPos] != 'X') {
origSeq.seq[unexPos] = origSeq.seq[seqPos];
unexPos++;
}
}
origSeq.length = unexPos;
origSeq.PrintSeq(seqOut);
return 0;
}
示例6: main
int main(int argc, char* argv[1]) {
if (argc < 3) {
cout << "Usage: findUnique genome.fasta query.fasta effective_k [options]" << endl;
cout << " genome.fasta.sa must exist." << endl;
cout << " Finds sequences at least effective_k in length that are unique." << endl;
cout << " -max m Allow up to m matches" << endl;
cout << " -minLength l Ensure the length of the match is at least this." << endl;
cout << " -prefix p n Allow up to n matches across a prefix of length p" << endl;
cout << " -suffix s n Allow up to n matches across a suffix of length s" << endl;
cout << " Prefix and suffix options override max." << endl;
cout << " -out file Print queries to this output file (query.fasta.queries)" << endl;
exit(0);
}
DNASuffixArray sarray;
string genomeFileName = argv[1];
string suffixArrayFileName = genomeFileName + ".sa";
FASTAReader reader;
FASTASequence genome;
int maxN = 0;
int prefix = 0;
int suffix = 0;
int prefixN = 0;
int suffixN = 0;
int argi = 4;
string outputFileName = "";
int minLength = 0;
while (argi < argc) {
if (strcmp(argv[argi], "-max") == 0) {
++argi;
maxN = atoi(argv[argi]);
}
else if (strcmp(argv[argi], "-prefix") == 0) {
++argi;
prefix = atoi(argv[argi]);
++argi;
prefixN = atoi(argv[argi]);
}
else if (strcmp(argv[argi], "-suffix") == 0) {
++argi;
suffix = atoi(argv[argi]);
++argi;
suffixN = atoi(argv[argi]);
}
else if (strcmp(argv[argi], "-out") == 0) {
++argi;
outputFileName = argv[argi];
}
else if (strcmp(argv[argi], "-minLength") == 0) {
++argi;
minLength = atoi(argv[argi]);
}
++argi;
}
reader.Initialize(genomeFileName);
reader.ReadAllSequencesIntoOne(genome);
sarray.Read(suffixArrayFileName);
FASTAReader queryReader;
FASTASequence querySequence;
string queryFileName = argv[2];
int maxLength = atoi(argv[3]);
string summaryTableFileName = queryFileName + ".summary";
if (outputFileName == "") {
outputFileName = queryFileName + ".queries";
}
ofstream summaryTable(summaryTableFileName.c_str());
ofstream outputFile(outputFileName.c_str());
queryReader.Initialize(queryFileName);
while (queryReader.GetNext(querySequence)) {
int i;
cerr << "searching " << querySequence.title << endl;
if (querySequence.length < maxLength) {
continue;
}
int nMatches = 0;
querySequence.ToUpper();
int localMax;
for (i = 0; i < querySequence.length - maxLength + 1; i++) {
if ((i + 1) % 100000 == 0) {
cerr << "processed: " << i + 1 << endl;
}
int lcpLength;
vector<SAIndex> lcpLeftBounds, lcpRightBounds;
vector<SAIndex> rclcpLeftBounds, rclcpRightBounds;
localMax = maxN;
if (i < prefix) {
localMax = prefixN;
}
//.........这里部分代码省略.........
示例7: main
int main(int argc, char* argv[]) {
CommandLineParser clp;
string readsFileName;
string alignmentsFileName;
string outputFileName;
float minMergeIdentity = 0.70;
clp.RegisterStringOption("reads", &readsFileName, "Reads used for alignments.");
clp.RegisterStringOption("alignments", &alignmentsFileName, "SAM formatted alignments.");
clp.RegisterIntOption("k", &vertexSize, "Minimum match length", CommandLineParser::PositiveInteger);
clp.RegisterStringOption("outfile", &outputFileName, "Alignment output.");
clp.RegisterPreviousFlagsAsHidden();
clp.RegisterFlagOption("v", &verbose, "");
clp.RegisterFloatOption("minMergeIdentity",
&minMergeIdentity,
"Minimum identity to merge paths.", CommandLineParser::PositiveFloat);
clp.ParseCommandLine(argc, argv);
if (minMergeIdentity < 0 or minMergeIdentity > 1) {
cout << "ERROR. minMergeIdentity must be between 0 and 1" << endl;
exit(1);
}
vector<FASTASequence> reads;
FASTAReader fastaReader;
fastaReader.Initialize(readsFileName);
fastaReader.ReadAllSequences(reads);
//
// It is necessary to go from read title to index in the list of reads.
//
map<string, int> readNameToIndex;
BuildReadNameToIndexMap(reads, readNameToIndex);
ReadWordMatchVector readWordMatches;
InitializeFromReads(reads, readWordMatches);
//
// Get ready to read in the alignments.
//
SAMReader<SAMFullReferenceSequence, SAMReadGroup, SAMPosAlignment> samReader;
samReader.Initialize(alignmentsFileName);
AlignmentSet<SAMFullReferenceSequence, SAMReadGroup, SAMPosAlignment> alignmentSet;
samReader.ReadHeader(alignmentSet);
SAMAlignment samAlignment;
AlignmentCandidate<> alignment;
int numAlignedBases = 0;
int alignmentIndex = 0;
while ( samReader.GetNextAlignment( samAlignment ) ) {
vector<AlignmentCandidate<> > alignments;
SAMAlignmentsToCandidates(samAlignment,
reads,
readNameToIndex,
alignments, false, true);
int i;
++alignmentIndex;
int a;
for (a = 0; a < alignments.size();a++) {
if (alignments[a].qName != alignments[a].tName) {
MarkMatches(alignments[a], readNameToIndex, vertexSize, readWordMatches);
}
}
if (alignmentIndex % 1000 == 0) {
cout << alignmentIndex << endl;
}
}
int numMatches = 0;
int parentIndex = 1;
int r;
for (r = 0; r < readWordMatches.size(); r++) {
readWordMatches[r].CreateParents();
numMatches += readWordMatches[r].pos.size();
}
vector<int> parentIndices;
parentIndices.resize(2*numMatches + 1);
fill(parentIndices.begin(), parentIndices.end(), 0);
//
// Start indexing off at 1 so that 0 does not need to be treated in
// a special case.
//
int curParentIndex = 1;
cout << "There are " << numMatches << " matches." << endl;
samReader.Close();
samReader.Initialize(alignmentsFileName);
AlignmentSet<SAMFullReferenceSequence, SAMReadGroup, SAMPosAlignment> alignmentSet2;
samReader.ReadHeader(alignmentSet2);
numAlignedBases = 0;
alignmentIndex = 0;
while ( samReader.GetNextAlignment( samAlignment ) ) {
vector<AlignmentCandidate<> > alignments;
SAMAlignmentsToCandidates(samAlignment,
//.........这里部分代码省略.........
示例8: main
int main(int argc, char* argv[]) {
string refFileName, notNormalFileName, normalFileName;
if (argc < 4) {
cout << "usage: normalizeGCContent ref source dest " << endl
<< " flips the C/Gs in source randomly until they are the same gc content as ref." << endl;
exit(1);
}
refFileName = argv[1];
notNormalFileName = argv[2];
normalFileName = argv[3];
FASTAReader reader;
FASTAReader queryReader;
FASTASequence ref;
vector<FASTASequence> querySequences;
int queryTotalLength;
reader.Initialize(refFileName);
reader.ReadAllSequencesIntoOne(ref);
queryReader.Initialize(notNormalFileName);
int refCounts[5], queryCounts[5];
int s;
refCounts[0] = refCounts[1] =refCounts[2] = refCounts[3] = refCounts[4] = 0;
queryCounts[0] = queryCounts[1] =queryCounts[2] = queryCounts[3] = queryCounts[4] = 0;
queryReader.ReadAllSequences(querySequences);
ofstream normOut;
CrucialOpen(normalFileName, normOut);
CountNucs(ref, refCounts);
float refGC = (1.0*refCounts[TwoBit['c']] + refCounts[TwoBit['g']]) / (refCounts[TwoBit['a']] + refCounts[TwoBit['c']] + refCounts[TwoBit['g']] + refCounts[TwoBit['t']]);
int q;
for (q = 0; q < querySequences.size(); q++) {
CountNucs(querySequences[q], queryCounts);
}
float queryGC = (1.0*queryCounts[TwoBit['c']] + queryCounts[TwoBit['g']]) / (queryCounts[TwoBit['a']] + queryCounts[TwoBit['c']] + queryCounts[TwoBit['g']] + queryCounts[TwoBit['t']]);
float gcToat = 0.0;
float atTogc = 0.0;
if (refGC > queryGC) {
atTogc = (refGC - queryGC);
}
else {
gcToat = (queryGC - refGC);
}
DNALength queryGenomeLength = queryCounts[0] + queryCounts[1] + queryCounts[2] + queryCounts[3] + queryCounts[4];
DNALength unmaskedQueryLength = queryCounts[0] + queryCounts[1] + queryCounts[2] + queryCounts[3];
DNALength ngc2at = unmaskedQueryLength * gcToat;
DNALength nat2gc = unmaskedQueryLength * atTogc;
cout << refGC << " " << queryGC << " " << gcToat << " " << atTogc << " " << ngc2at << " " << nat2gc << endl;
vector<FASTASequence> normalized;
normalized.resize(querySequences.size());
vector<DNALength> cumLengths;
cumLengths.resize(normalized.size()+1);
cumLengths[0] = 0;
for (q = 0; q < querySequences.size(); q++) {
normalized[q] = querySequences[q];
cumLengths[q+1] = cumLengths[q] + querySequences[q].length;
}
DNALength i;
for (i = 0; i < ngc2at; i+=2) {
DNALength pos, chr;
FindRandomNuc(normalized, queryGenomeLength, cumLengths, 'G', chr, pos);
normalized[chr].seq[pos] = 'A';
FindRandomNuc(normalized, queryGenomeLength, cumLengths, 'C', chr, pos);
normalized[chr].seq[pos] = 'T';
}
for (i = 0; i < nat2gc; i+=2) {
DNALength pos, chr;
FindRandomNuc(normalized, queryGenomeLength, cumLengths, 'A', chr, pos);
normalized[chr].seq[pos] = 'g';
FindRandomNuc(normalized, queryGenomeLength, cumLengths, 'T', chr, pos);
normalized[chr].seq[pos] = 'c';
}
for (q = 0; q < normalized.size(); q++ ){
normalized[q].PrintSeq(normOut);
}
}
示例9: main
int main(int argc, char* argv[]) {
string program = "samtoh5";
string versionString = VERSION;
AppendPerforceChangelist(PERFORCE_VERSION_STRING, versionString);
string samFileName, cmpFileName, refFileName;
bool parseSmrtTitle = false;
bool useShortRefName = false;
CommandLineParser clp;
string readType = "standard";
int verbosity = 0;
clp.SetProgramName(program);
clp.SetProgramSummary("Converts in.sam file to out.cmp.h5 file.");
clp.SetVersion(versionString);
clp.RegisterStringOption("in.sam", &samFileName,
"Input SAM file.", true);
clp.RegisterStringOption("reference.fasta", &refFileName,
"Reference used to generate reads.", true);
clp.RegisterStringOption("out.cmp.h5", &cmpFileName,
"Output cmp.h5 file.", true);
clp.RegisterPreviousFlagsAsHidden();
clp.RegisterFlagOption("smrtTitle", &parseSmrtTitle,
"Use this option when converting alignments "
"generated from reads produced by the "
"pls2fasta from bas.h5 files by parsing read "
"coordinates from the SMRT read title. The title "
"is in the format /name/hole/coordinates, where "
"coordinates are in the format \\d+_\\d+, and "
"represent the interval of the read that was "
"aligned.");
clp.RegisterStringOption("readType", &readType,
"Set the read type: 'standard', 'strobe', 'CCS', "
"or 'cDNA'");
clp.RegisterIntOption("verbosity", &verbosity,
"Set desired verbosity.",
CommandLineParser::PositiveInteger);
clp.RegisterFlagOption("useShortRefName", &useShortRefName,
"Use abbreviated reference names obtained "
"from file.sam instead of using full names "
"from reference.fasta.");
string description = ("Because SAM has optional tags that have different "
"meanings in different programs, careful usage is required in order to "
"have proper output. The \"xs\" tag in bwa-sw is used to show the "
"suboptimal score, but in PacBio SAM (blasr) it is defined as the start "
"in the query sequence of the alignment.\nWhen \"-smrtTitle\" is "
"specified, the xs tag is ignored, but when it is not specified, the "
"coordinates given by the xs and xe tags are used to define the interval "
"of a read that is aligned. The CIGAR string is relative to this interval.");
clp.SetExamples(description);
clp.ParseCommandLine(argc, argv);
if (readType != "standard" and readType != "strobe" and
readType != "cDNA" and readType != "CCS") {
cout << "ERROR. Read type '" << readType
<< "' must be one of either 'standard', 'strobe', 'cDNA' or 'CCS'."
<< endl;
exit(1);
}
cerr << "[INFO] " << GetTimestamp() << " [" << program << "] started." << endl;
SAMReader<SAMFullReferenceSequence, SAMReadGroup, SAMPosAlignment> samReader;
FASTAReader fastaReader;
HDFCmpFile<AlignmentCandidate<FASTASequence, FASTASequence> > cmpFile;
//
// Initialize input/output files.
//
samReader.Initialize(samFileName);
fastaReader.Initialize(refFileName);
cmpFile.Create(cmpFileName);
//
// Configure the file log.
//
string command;
CommandLineParser::CommandLineToString(argc, argv, command);
string log = "Convert sam to cmp.h5";
cmpFile.fileLogGroup.AddEntry(command, log, program, GetTimestamp(), versionString);
//
// Set the readType
//
cmpFile.SetReadType(readType);
//
// Read necessary input.
//
vector<FASTASequence> references;
fastaReader.ReadAllSequences(references);
//
// This should probably be handled by the alignmentSetAdapter, but
// time constraints...
//
AlignmentSet<SAMFullReferenceSequence, SAMReadGroup, SAMPosAlignment> alignmentSet;
samReader.ReadHeader(alignmentSet);
//.........这里部分代码省略.........
示例10: main
int main(int argc, char* argv[]) {
string gencodeGffFileName, genomeFileName, genesOutFileName;
string geneType = "protein_coding";
bool randomSplicing = false;
int numRandomSplicing = 1;
float pSkip = 0.5;
if (argc < 4) {
cout << "Usage: extractGenes gencodeGTFFile genomeFile genesOutFileName [-geneType type (protein_coding)] [-randomSplicing] [-numRandomSplicing n] [-pSkip prob (0-1, default:0.5)]" << endl;
exit(1);
}
gencodeGffFileName = argv[1];
genomeFileName = argv[2];
genesOutFileName = argv[3];
int argi = 4;
string coordinatesFileName;
while (argi < argc) {
if (strcmp(argv[argi], "-geneType") == 0) {
geneType = argv[++argi];
}
else if (strcmp(argv[argi], "-randomSplicing") == 0) {
randomSplicing = true;
}
else if (strcmp(argv[argi], "-numRandomSplicing") == 0) {
numRandomSplicing = atoi(argv[++argi]);
}
else if (strcmp(argv[argi], "-pSkip") == 0) {
pSkip = atof(argv[++argi]);
}
else {
cout << "ERROR, bad option " << argv[argi] << endl;
exit(1);
}
++argi;
}
coordinatesFileName = genesOutFileName;
coordinatesFileName.append(".pos");
FASTAReader reader;
reader.Initialize(genomeFileName);
ofstream outFile, coordsFile;
CrucialOpen(genesOutFileName, outFile, std::ios::out);
string coordsFileName = genesOutFileName + ".coords";
CrucialOpen(coordsFileName, coordsFile, std::ios::out);
vector<FASTASequence> referenceSequences;
reader.ReadAllSequences(referenceSequences);
int i;
map<string, int> titleToIndex;
for (i = 0; i < referenceSequences.size(); i++) {
titleToIndex[referenceSequences[i].title] = i;
}
GencodeGFFFile gencodeFile;
gencodeFile.ReadAll(gencodeGffFileName);
vector<GencodeGFFGene> genes;
IndexGencodeGenes(gencodeFile, genes, geneType);
for (i = 0; i < genes.size(); i++) {
genes[i].OrderExonsByStart();
}
int e;
for (i = 0; i < genes.size(); i++) {
FASTASequence geneSequence;
geneSequence.CopyTitle(genes[i].geneName);
if (titleToIndex.find(genes[i].chromosome) == titleToIndex.end()) {
continue;
}
int chrIndex = titleToIndex[genes[i].chromosome];
string sequence = "";
//
// Do nothing with 0 length exons.
//
if (genes[i].exons.size() == 0) {
continue;
}
vector<FASTASequence> geneSequences;
vector<GeneCoordinates> geneCoordinates;
genes[i].GenerateGeneSequences(referenceSequences[chrIndex], geneSequences, geneCoordinates, randomSplicing);
int gi;
for (gi = 0; gi < geneSequences.size(); gi++) {
if (genes[i].GetStrand() == '+') {
geneSequences[gi].PrintSeq(outFile);
}
else {
FASTASequence rc;
geneSequences[gi].MakeRC(rc);
rc.PrintSeq(outFile);
rc.Free();
}
coordsFile << geneSequences[gi].title << " " << geneCoordinates[gi].chromosome << " " << geneCoordinates[gi].exonCoordinates.size() << " " << geneCoordinates[gi].strand;
int i;
for (i = 0; i < geneCoordinates[gi].exonCoordinates.size(); i++) {
coordsFile << " "
//.........这里部分代码省略.........
示例11: main
int main(int argc, char* argv[]) {
string program = "samtom4";
string versionString = VERSION;
AppendPerforceChangelist(PERFORCE_VERSION_STRING, versionString);
string samFileName, refFileName, outFileName;
bool printHeader = false;
bool parseSmrtTitle = false;
bool useShortRefName = false;
CommandLineParser clp;
clp.SetProgramName(program);
clp.SetVersion(versionString);
clp.SetProgramSummary("Converts a SAM file generated by blasr to M4 format.");
clp.RegisterStringOption("in.sam", &samFileName,
"Input SAM file, which is produced by blasr.");
clp.RegisterStringOption("reference.fasta", &refFileName,
"Reference used to generate file.sam.");
clp.RegisterStringOption("out.m4", &outFileName,
"Output in blasr M4 format.");
clp.RegisterPreviousFlagsAsHidden();
clp.RegisterFlagOption("header", &printHeader,
"Print M4 header.");
clp.RegisterFlagOption("useShortRefName", &useShortRefName,
"Use abbreviated reference names obtained "
"from file.sam instead of using full names "
"from reference.fasta.");
//clp.SetExamples(program + " file.sam reference.fasta out.m4");
clp.ParseCommandLine(argc, argv);
ostream * outFilePtr = &cout;
ofstream outFileStrm;
if (outFileName != "") {
CrucialOpen(outFileName, outFileStrm, std::ios::out);
outFilePtr = &outFileStrm;
}
SAMReader<SAMFullReferenceSequence, SAMReadGroup, SAMAlignment> samReader;
FASTAReader fastaReader;
//
// Initialize samReader and fastaReader.
//
samReader.Initialize(samFileName);
fastaReader.Initialize(refFileName);
//
// Configure the file log.
//
string command;
CommandLineParser::CommandLineToString(argc, argv, command);
//
// Read necessary input.
//
vector<FASTASequence> references;
fastaReader.ReadAllSequences(references);
AlignmentSet<SAMFullReferenceSequence, SAMReadGroup, SAMAlignment> alignmentSet;
samReader.ReadHeader(alignmentSet);
//
// The order of references in vector<FASTASequence> references and
// AlignmentSet<, , >alignmentSet.references can be different.
// Rearrange alignmentSet.references such that it is ordered in
// exactly the same way as vector<FASTASequence> references.
//
alignmentSet.RearrangeReferences(references);
//
// Map short names for references obtained from file.sam to
// full names obtained from reference.fasta
//
map<string, string> shortRefNameToFull;
map<string, string>::iterator it;
assert(references.size() == alignmentSet.references.size());
if (!useShortRefName) {
for (size_t i = 0; i < references.size(); i++) {
string shortRefName = alignmentSet.references[i].GetSequenceName();
string fullRefName(references[i].title);
if (shortRefNameToFull.find(shortRefName) != shortRefNameToFull.end()) {
cout << "ERROR, Found more than one reference " << shortRefName << "in sam header" << endl;
exit(1);
}
shortRefNameToFull[shortRefName] = fullRefName;
alignmentSet.references[i].sequenceName = fullRefName;
}
}
// Map reference name obtained from SAM file to indices
map<string, int> refNameToIndex;
for (size_t i = 0; i < references.size(); i++) {
string refName = alignmentSet.references[i].GetSequenceName();
refNameToIndex[refName] = i;
}
//
// Store the alignments.
//
//.........这里部分代码省略.........
示例12: main
int main(int argc, char* argv[]) {
string cmpFileName;
string refFileName;
string readsFileName;
string mapqvTrackName;
if (argc < 2) {
cout << " printMapqvTrack: print a gff file of the average mapping quality value" << endl;
exit(1);
}
vector<int> refPositions;
cmpFileName = argv[1];
refFileName = argv[2];
mapqvFileName = argv[3];
CmpFile cmpFile;
FASTASequence ref;
FASTAReader reader;
reader.Initialize(refFileName);
reader.GetNext(ref);
HDFBasReader basReader;
SMRTSequence seq, *seqPtr;
vector<int> refCoverage;
refCoverage.resize(ref.length);
std::fill(refCoverage.begin(), refCoverage.end(), 0);
/*
* These guys pull information from the same pls file.
*/
HDFCmpReader<CmpAlignment> cmpReader;
if (cmpReader.Initialize(cmpFileName) == 0) {
cout << "ERROR, could not open the cmp file." << endl;
exit(1);
}
cmpReader.Read(cmpFile);
UInt alignmentIndex;
// movieIndexSets.resize(nMovies);
for (alignmentIndex = 0; alignmentIndex < cmpFile.alnInfo.alignments.size(); alignmentIndex++) {
int refSeqId = cmpFile.alnInfo.alignments[alignmentIndex].GetRefSeqId();
int readGroupId = cmpFile.alnInfo.alignments[alignmentIndex].GetReadGroupId();
int refSeqIdIndex;
if (cmpFile.refSeqTable.GetIndexOfId(refSeqId, refSeqIdIndex) == false) {
//
// Sanity check -- we're only looking at alignments to references in the cmp file.
//
cout << "ERROR, ref seq id: " << refSeqId << " should exist in the cmp file but it does not." << endl;
assert(0);
}
int readGroupIdIndex;
cmpFile.readGroupTable.GetIndexOfId(readGroupId, readGroupIdIndex);
string readGroupPath = cmpFile.readGroupTable.names[readGroupIdIndex];
string readGroup = cmpReader.readGroupPathToReadGroup[readGroupPath];
int readGroupArrayIndex = cmpReader.refAlignGroups[refSeqIdIndex]->experimentNameToIndex[readGroup];
vector<char> alignedSequence, alignedTarget;
//
// This read overlaps one of the ref positions.
UInt offsetEnd, offsetBegin;
offsetEnd = cmpFile.alnInfo.alignments[alignmentIndex].GetOffsetEnd();
offsetBegin = cmpFile.alnInfo.alignments[alignmentIndex].GetOffsetBegin();
vector<unsigned char> byteAlignment;
int alignedSequenceLength = offsetEnd - offsetBegin;
if (alignedSequenceLength >= 0) {
alignedSequence.resize(alignedSequenceLength);
alignedTarget.resize(alignedSequenceLength);
byteAlignment.resize(alignedSequenceLength);
}
cmpReader.refAlignGroups[refSeqIdIndex]->readGroups[readGroupArrayIndex]->alignmentArray.Read(offsetBegin, offsetEnd, &byteAlignment[0]);
UInt refStart = cmpFile.alnInfo.alignments[alignmentIndex].GetRefStart();
UInt refEnd = cmpFile.alnInfo.alignments[alignmentIndex].GetRefEnd();
UInt readStart= cmpFile.alnInfo.alignments[alignmentIndex].GetQueryStart();
UInt readEnd = cmpFile.alnInfo.alignments[alignmentIndex].GetQueryEnd();
//
// Read the alignment string.
//
if (refSeqIdIndex > 0) continue;
//
// Convert to something we can compare easily.
//
alignedSequence[alignedSequence.size()-1]= '\0';
ByteAlignmentToQueryString(&byteAlignment[0], byteAlignment.size(), &alignedSequence[0]);
ByteAlignmentToRefString(&byteAlignment[0], byteAlignment.size(), &alignedTarget[0]);
//.........这里部分代码省略.........
示例13: main
//.........这里部分代码省略.........
string errMsg;
if (not filterCriteria.MakeSane(errMsg)) {
cout << errMsg << endl;
exit(1);
}
// Parse hole number ranges.
if (holeNumberStr.size() != 0) {
if (not holeNumberRanges.setRanges(holeNumberStr)) {
cout << "Could not parse hole number ranges: "
<< holeNumberStr << "." << endl;
exit(1);
}
}
// Open output file.
ostream * outFilePtr = &cout;
ofstream outFileStrm;
if (outFileName != "") {
CrucialOpen(outFileName, outFileStrm, std::ios::out);
outFilePtr = &outFileStrm;
}
GFFFile adapterGffFile;
if (adapterGffFileName != "")
adapterGffFile.ReadAll(adapterGffFileName);
SAMReader<SAMFullReferenceSequence, SAMReadGroup, SAMAlignment> samReader;
FASTAReader fastaReader;
//
// Initialize samReader and fastaReader.
//
samReader.Initialize(samFileName);
fastaReader.Initialize(refFileName);
//
// Configure the file log.
//
string command;
CommandLineParser::CommandLineToString(argc, argv, command);
string log = "Filter sam hits.";
string program = "samFilter";
string versionString = VERSION;
AppendPerforceChangelist(PERFORCE_VERSION_STRING, versionString);
//
// Read necessary input.
//
vector<FASTASequence> references;
fastaReader.ReadAllSequences(references);
// If the SAM file is generated by blasr with -titleTable,
// then references in the SAM are represented by
// their corresponding indices in the title table.
// In that case, we need to convert reference titles in fasta file
// to their corresponding indices in the title table, such that
// references in both SAM and fasta files are represented
// by title table indices and therefore can match.
if (titleTableName != "") {
ConvertTitlesToTitleTableIndices(references, titleTableName);
}
AlignmentSet<SAMFullReferenceSequence, SAMReadGroup, SAMAlignment> alignmentSet;
vector<string> allHeaders = samReader.ReadHeader(alignmentSet);