本文整理汇总了C++中FASTAReader::ReadAllSequencesIntoOne方法的典型用法代码示例。如果您正苦于以下问题:C++ FASTAReader::ReadAllSequencesIntoOne方法的具体用法?C++ FASTAReader::ReadAllSequencesIntoOne怎么用?C++ FASTAReader::ReadAllSequencesIntoOne使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类FASTAReader
的用法示例。
在下文中一共展示了FASTAReader::ReadAllSequencesIntoOne方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: main
int main(int argc, char* argv[]) {
if (argc < 4) {
PrintUsage();
exit(1);
}
int argi = 1;
string saInFile = argv[argi++];
string genomeFileName = argv[argi++];
string saOutFile = argv[argi++];
vector<string> inFiles;
int doBLT = 0;
int doBLCP = 0;
int bltPrefixLength = 0;
int lcpLength = 0;
int parsingOptions = 0;
while (argi < argc) {
if (strcmp(argv[argi], "-blt") == 0) {
doBLT = 1;
bltPrefixLength = atoi(argv[++argi]);
}
else if (strcmp(argv[argi], "-blcp") == 0) {
doBLCP = 1;
lcpLength = atoi(argv[++argi]);
}
else {
PrintUsage();
cout << "Bad option: " << argv[argi] << endl;
exit(1);
}
++argi;
}
//
// Read the suffix array to modify.
//
DNASuffixArray sa;
sa.Read(saInFile);
FASTAReader reader;
reader.Initialize(genomeFileName);
FASTASequence seq;
reader.ReadAllSequencesIntoOne(seq);
if (doBLT) {
sa.BuildLookupTable(seq.seq, seq.length, bltPrefixLength);
}
if (doBLCP) {
cout << "LCP Table not yet implemented." << endl;
}
sa.Write(saOutFile);
}
示例2: main
int main(int argc, char* argv[1]) {
if (argc < 3) {
cout << "Usage: findUnique genome.fasta query.fasta effective_k [options]" << endl;
cout << " genome.fasta.sa must exist." << endl;
cout << " Finds sequences at least effective_k in length that are unique." << endl;
cout << " -max m Allow up to m matches" << endl;
cout << " -minLength l Ensure the length of the match is at least this." << endl;
cout << " -prefix p n Allow up to n matches across a prefix of length p" << endl;
cout << " -suffix s n Allow up to n matches across a suffix of length s" << endl;
cout << " Prefix and suffix options override max." << endl;
cout << " -out file Print queries to this output file (query.fasta.queries)" << endl;
exit(0);
}
DNASuffixArray sarray;
string genomeFileName = argv[1];
string suffixArrayFileName = genomeFileName + ".sa";
FASTAReader reader;
FASTASequence genome;
int maxN = 0;
int prefix = 0;
int suffix = 0;
int prefixN = 0;
int suffixN = 0;
int argi = 4;
string outputFileName = "";
int minLength = 0;
while (argi < argc) {
if (strcmp(argv[argi], "-max") == 0) {
++argi;
maxN = atoi(argv[argi]);
}
else if (strcmp(argv[argi], "-prefix") == 0) {
++argi;
prefix = atoi(argv[argi]);
++argi;
prefixN = atoi(argv[argi]);
}
else if (strcmp(argv[argi], "-suffix") == 0) {
++argi;
suffix = atoi(argv[argi]);
++argi;
suffixN = atoi(argv[argi]);
}
else if (strcmp(argv[argi], "-out") == 0) {
++argi;
outputFileName = argv[argi];
}
else if (strcmp(argv[argi], "-minLength") == 0) {
++argi;
minLength = atoi(argv[argi]);
}
++argi;
}
reader.Initialize(genomeFileName);
reader.ReadAllSequencesIntoOne(genome);
sarray.Read(suffixArrayFileName);
FASTAReader queryReader;
FASTASequence querySequence;
string queryFileName = argv[2];
int maxLength = atoi(argv[3]);
string summaryTableFileName = queryFileName + ".summary";
if (outputFileName == "") {
outputFileName = queryFileName + ".queries";
}
ofstream summaryTable(summaryTableFileName.c_str());
ofstream outputFile(outputFileName.c_str());
queryReader.Initialize(queryFileName);
while (queryReader.GetNext(querySequence)) {
int i;
cerr << "searching " << querySequence.title << endl;
if (querySequence.length < maxLength) {
continue;
}
int nMatches = 0;
querySequence.ToUpper();
int localMax;
for (i = 0; i < querySequence.length - maxLength + 1; i++) {
if ((i + 1) % 100000 == 0) {
cerr << "processed: " << i + 1 << endl;
}
int lcpLength;
vector<SAIndex> lcpLeftBounds, lcpRightBounds;
vector<SAIndex> rclcpLeftBounds, rclcpRightBounds;
localMax = maxN;
if (i < prefix) {
localMax = prefixN;
}
//.........这里部分代码省略.........
示例3: main
int main(int argc, char* argv[]) {
string refFileName, notNormalFileName, normalFileName;
if (argc < 4) {
cout << "usage: normalizeGCContent ref source dest " << endl
<< " flips the C/Gs in source randomly until they are the same gc content as ref." << endl;
exit(1);
}
refFileName = argv[1];
notNormalFileName = argv[2];
normalFileName = argv[3];
FASTAReader reader;
FASTAReader queryReader;
FASTASequence ref;
vector<FASTASequence> querySequences;
int queryTotalLength;
reader.Initialize(refFileName);
reader.ReadAllSequencesIntoOne(ref);
queryReader.Initialize(notNormalFileName);
int refCounts[5], queryCounts[5];
int s;
refCounts[0] = refCounts[1] =refCounts[2] = refCounts[3] = refCounts[4] = 0;
queryCounts[0] = queryCounts[1] =queryCounts[2] = queryCounts[3] = queryCounts[4] = 0;
queryReader.ReadAllSequences(querySequences);
ofstream normOut;
CrucialOpen(normalFileName, normOut);
CountNucs(ref, refCounts);
float refGC = (1.0*refCounts[TwoBit['c']] + refCounts[TwoBit['g']]) / (refCounts[TwoBit['a']] + refCounts[TwoBit['c']] + refCounts[TwoBit['g']] + refCounts[TwoBit['t']]);
int q;
for (q = 0; q < querySequences.size(); q++) {
CountNucs(querySequences[q], queryCounts);
}
float queryGC = (1.0*queryCounts[TwoBit['c']] + queryCounts[TwoBit['g']]) / (queryCounts[TwoBit['a']] + queryCounts[TwoBit['c']] + queryCounts[TwoBit['g']] + queryCounts[TwoBit['t']]);
float gcToat = 0.0;
float atTogc = 0.0;
if (refGC > queryGC) {
atTogc = (refGC - queryGC);
}
else {
gcToat = (queryGC - refGC);
}
DNALength queryGenomeLength = queryCounts[0] + queryCounts[1] + queryCounts[2] + queryCounts[3] + queryCounts[4];
DNALength unmaskedQueryLength = queryCounts[0] + queryCounts[1] + queryCounts[2] + queryCounts[3];
DNALength ngc2at = unmaskedQueryLength * gcToat;
DNALength nat2gc = unmaskedQueryLength * atTogc;
cout << refGC << " " << queryGC << " " << gcToat << " " << atTogc << " " << ngc2at << " " << nat2gc << endl;
vector<FASTASequence> normalized;
normalized.resize(querySequences.size());
vector<DNALength> cumLengths;
cumLengths.resize(normalized.size()+1);
cumLengths[0] = 0;
for (q = 0; q < querySequences.size(); q++) {
normalized[q] = querySequences[q];
cumLengths[q+1] = cumLengths[q] + querySequences[q].length;
}
DNALength i;
for (i = 0; i < ngc2at; i+=2) {
DNALength pos, chr;
FindRandomNuc(normalized, queryGenomeLength, cumLengths, 'G', chr, pos);
normalized[chr].seq[pos] = 'A';
FindRandomNuc(normalized, queryGenomeLength, cumLengths, 'C', chr, pos);
normalized[chr].seq[pos] = 'T';
}
for (i = 0; i < nat2gc; i+=2) {
DNALength pos, chr;
FindRandomNuc(normalized, queryGenomeLength, cumLengths, 'A', chr, pos);
normalized[chr].seq[pos] = 'g';
FindRandomNuc(normalized, queryGenomeLength, cumLengths, 'T', chr, pos);
normalized[chr].seq[pos] = 'c';
}
for (q = 0; q < normalized.size(); q++ ){
normalized[q].PrintSeq(normOut);
}
}
示例4: main
//.........这里部分代码省略.........
cout << "ERROR, bad option: " << argv[argi] << endl;
exit(1);
}
}
++argi;
}
if (inFiles.size() == 0) {
//
// Special use case: the input file is a fasta file. Write to that file + .sa
//
inFiles.push_back(saFile);
saFile = saFile + ".sa";
}
VectorIndex inFileIndex;
FASTASequence seq;
CompressedSequence<FASTASequence> compSeq;
if (read4BitCompressed == 0) {
for (inFileIndex = 0; inFileIndex < inFiles.size(); ++inFileIndex) {
FASTAReader reader;
reader.Init(inFiles[inFileIndex]);
reader.SetSpacePadding(111);
if (saBuildType == kark) {
//
// The Karkkainen sa building method requires a little extra
// space at the end of the dna sequence so that counting may
// be done mod 3 without adding extra logic for boundaries.
//
}
if (inFileIndex == 0) {
reader.ReadAllSequencesIntoOne(seq);
reader.Close();
}
else {
while(reader.ConcatenateNext(seq)) {
cout << "added " << seq.title << endl;
}
}
}
seq.ToThreeBit();
//seq.ToUpper();
}
else {
assert(inFiles.size() == 1);
cout << "reading compressed sequence." << endl;
compSeq.Read(inFiles[0]);
seq.seq = compSeq.seq;
seq.length = compSeq.length;
compSeq.RemoveCompressionCounts();
cout << "done." << endl;
}
//
// For now, do not allow creation of suffix arrays on sequences > 4G.
//
if (seq.length >= UINT_MAX) {
cout << "ERROR, references greater than " << UINT_MAX << " bases are not supported." << endl;
cout << "Consider breaking the reference into multiple files, running alignment. " << endl;
cout << "against each file, and merging the result." << endl;
exit(1);
}
vector<int> alphabet;
示例5: main
int main(int argc, char* argv[]) {
string genomeFileName;
string suffixArrayFileName;
if (argc < 4) {
cout << "Usage: printWordCount genome suffixArray k [k2 k3 k4...]" << endl;
exit(1);
}
genomeFileName = argv[1];
suffixArrayFileName = argv[2];
int argi = 3;
vector<DNALength> k;
while (argi < argc) {
k.push_back(atoi(argv[argi]));
argi++;
}
// Get the ref sequence.
FASTAReader reader;
reader.Init(genomeFileName);
FASTASequence seq;
// reader.GetNext(seq);
reader.ReadAllSequencesIntoOne(seq);
seq.ToUpper();
// Get the suffix array.
DNASuffixArray sarray;
sarray.Read(suffixArrayFileName);
int ki;
char *word;
cout << "wordlen word nword" << endl;
for (ki = 0; ki < k.size(); ki++) {
word = new char[k[ki]+1];
word[k[ki]] = '\0';
DNALength i;
DNALength numUnique = 0;
for (i = 0; i < seq.length - k[ki] - 1; ) {
DNALength j = i + 1;
bool seqAtN = false;
int si;
for(si = 0; si < k[ki]; si++) {
if (seq.seq[sarray.index[i] + si] == 'N') {
seqAtN = true;
break;
}
}
if (seqAtN) {
i++;
continue;
}
while (j < seq.length - k[ki] and
seq.length - sarray.index[i] >= k[ki] and
seq.length - sarray.index[j] >= k[ki] and
strncmp((const char*) &seq.seq[sarray.index[i]], (const char*) &seq.seq[sarray.index[j]], k[ki]) == 0) {
j++;
}
if (seq.length - sarray.index[i] >= k[ki]) {
for(si = 0; si < k[ki]; si++) {
word[si] = seq.seq[sarray.index[i]+si];
}
cout << k[ki] << " " << word << " " << j - i + 1 << endl;
if (j == i + 1) {
++numUnique;
}
}
i = j;
}
}
}