本文整理汇总了C++中FASTAReader类的典型用法代码示例。如果您正苦于以下问题:C++ FASTAReader类的具体用法?C++ FASTAReader怎么用?C++ FASTAReader使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了FASTAReader类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: main
int main(int argc, char* argv[]) {
string seqFileName;
TupleMetrics tm;
string outFileName;
if (argc < 3) {
cout << "usage: storeTuplePosList seqFile tupleSize outFile" << endl;
return 0;
}
seqFileName = argv[1];
tm.tupleSize = atoi(argv[2]);
outFileName = argv[3];
ofstream outFile;
// CrucialOpen(outFileName, outFile, std::ios::out| std::ios::binary);
FASTAReader reader;
reader.Init(seqFileName);
FASTASequence seq;
reader.GetNext(seq);
// vector<PositionDNATuple>
TupleList<PositionDNATuple>tuplePosList;
tuplePosList.SetTupleMetrics(tm);
// StoreTuplePosList(seq, tm, tuplePosList);
SequenceToTupleList(seq, tm, tuplePosList);
tuplePosList.Sort();
tuplePosList.WriteToFile(outFileName); //WriteTuplePosList(tuplePosList, tm.tupleSize, outFile);
outFile.close();
return 0;
}
示例2: main
int main(int argc, char* argv[]) {
if (argc < 3) {
cout << "usage: testRandomSequence genome.fa ntries " << endl;
exit(0);
}
string inFile = argv[1];
int nSamples = atoi(argv[2]);
if (nSamples == 0) {
return 0;
}
FASTAReader reader;
reader.Initialize(inFile);
vector<FASTASequence> genome;
reader.ReadAllSequences(genome);
int i;
cout << "title pos" << endl;
for (i = 0; i < nSamples; i++) {
DNALength chrIndex, chrPos;
FindRandomPos(genome, chrIndex, chrPos);
cout << genome[chrIndex].title << " " << chrPos << endl;
}
return 0;
}
示例3: main
int main(int argc, char* argv[]) {
string inFileName, outFileName;
int length;
inFileName = argv[1];
outFileName = argv[2];
length = atoi(argv[3]);
int argi = 4;
int stride = 0;
float coverage = 0;
while (argi < argc) {
if (strcmp(argv[argi], "-stride")) {
stride = atoi(argv[++argi]);
}
else if (strcmp(argv[argi], "-coverage")) {
coverage = atof(argv[++argi]);
}
++argi;
}
FASTAReader reader;
reader.Initialize(inFileName);
FASTASequence genome;
reader.GetNext(genome);
if (stride == 0 and coverage == 0) {
cout << "ERROR, must provide stride or coverage. " << endl;
exit(1);
}
if (stride == 0) {
stride = genome.length * coverage / length;
}
示例4: main
int main(int argc, char* argv[]) {
if (argc < 3) {
cout << "usage: testBuildOccBins genomeFileName suffixArray" << endl;
exit(0);
}
string genomeFileName = argv[1];
string suffixArrayFileName = argv[2];
FASTAReader reader;
reader.Init(genomeFileName);
FASTASequence seq;
reader.GetNext(seq);
DNASuffixArray suffixArray;
suffixArray.Read(suffixArrayFileName);
Bwt<PackedDNASequence, FASTASequence> bwt;
//bwt.InitializeFromSuffixArray(seq, suffixArray.index);
bwt.InitializeBWTStringFromSuffixArray(seq, suffixArray.index);
bwt.occ.Initialize(bwt.bwtSequence, 4096, 64);
bwt.occ.PrintBins(cout);
}
示例5: main
int main(int argc, char* argv[]) {
if (argc < 4) {
PrintUsage();
exit(1);
}
int argi = 1;
string saInFile = argv[argi++];
string genomeFileName = argv[argi++];
string saOutFile = argv[argi++];
vector<string> inFiles;
int doBLT = 0;
int doBLCP = 0;
int bltPrefixLength = 0;
int lcpLength = 0;
int parsingOptions = 0;
while (argi < argc) {
if (strcmp(argv[argi], "-blt") == 0) {
doBLT = 1;
bltPrefixLength = atoi(argv[++argi]);
}
else if (strcmp(argv[argi], "-blcp") == 0) {
doBLCP = 1;
lcpLength = atoi(argv[++argi]);
}
else {
PrintUsage();
cout << "Bad option: " << argv[argi] << endl;
exit(1);
}
++argi;
}
//
// Read the suffix array to modify.
//
DNASuffixArray sa;
sa.Read(saInFile);
FASTAReader reader;
reader.Initialize(genomeFileName);
FASTASequence seq;
reader.ReadAllSequencesIntoOne(seq);
if (doBLT) {
sa.BuildLookupTable(seq.seq, seq.length, bltPrefixLength);
}
if (doBLCP) {
cout << "LCP Table not yet implemented." << endl;
}
sa.Write(saOutFile);
}
示例6: main
int main(int argc, char* argv[]) {
if (argc < 3) {
cout << "usage: bwtLocateList bwtName querySeqFile" << endl;
exit(1);
}
string bwtFileName = argv[1];
string querySeqFileName = argv[2];
bool doPrintResults = false;
int maxCount = 0;
int argi = 3;
bool countOnly = false;
while(argi < argc) {
if (strcmp(argv[argi], "-print") == 0) {
doPrintResults = true;
}
else if (strcmp(argv[argi], "-max") == 0) {
maxCount = atoi(argv[++argi]);
}
else if (strcmp(argv[argi], "-count") == 0) {
countOnly = true;
}
else {
cout << "bad option: " << argv[argi] << endl;
}
++argi;
}
Bwt<PackedDNASequence, FASTASequence> bwt;
bwt.Read(bwtFileName);
FASTAReader queryReader;
queryReader.Init(querySeqFileName);
FASTASequence seq;
int seqIndex = 0;
vector<DNALength> positions;
while(queryReader.GetNext(seq)) {
positions.clear();
if (countOnly == false) {
bwt.Locate(seq, positions, maxCount);
}
else {
DNALength sp,ep;
bwt.Count(seq, sp, ep);
}
// cout << "matched " << positions.size() << " positions." << endl;
if (doPrintResults) {
int i;
for (i = 0; i < positions.size(); i++ ){
cout << positions[i] << " ";
}
cout << endl;
}
++seqIndex;
}
// float wordCountsPerLookup = (bwt.bwtSequence.nCountInWord *1.0) / bwt.bwtSequence.nCountNuc;
// cout << "word counts per lookup: " << wordCountsPerLookup << endl;
return 0;
}
示例7: main
int main(int argc, char* argv[])
{
std::string seqInName, seqOutName, dotOutName;
if (argc < 4) {
std::cout << "usage: exciseRepeats inName repMaskOutFile outName" << std::endl;
std::exit(EXIT_FAILURE);
}
seqInName = argv[1];
dotOutName = argv[2];
seqOutName = argv[3];
FASTAReader reader;
reader.Initialize(seqInName);
FASTASequence origSeq;
reader.GetNext(origSeq);
std::ifstream dotOutFile;
CrucialOpen(dotOutName, dotOutFile);
std::ofstream seqOutFile;
std::ofstream seqOut;
CrucialOpen(seqOutName, seqOut, std::ios::out);
std::string dotOutLine;
getline(dotOutFile, dotOutLine);
getline(dotOutFile, dotOutLine);
getline(dotOutFile, dotOutLine);
while (getline(dotOutFile, dotOutLine)) {
std::stringstream lineStrm(dotOutLine);
int swScore;
float pctDiv, pctDel, pctIns;
std::string query;
DNALength qPosBegin, qPosEnd;
std::string left;
char strand;
std::string matchingRepeat;
std::string repClass;
std::string repPos, repEnd, repLeft;
int id;
lineStrm >> swScore >> pctDiv >> pctDel >> pctIns >> query >> qPosBegin >> qPosEnd >>
left >> strand >> matchingRepeat >> repClass >> repPos >> repEnd >> repLeft >> id;
for (DNALength seqPos = qPosBegin; seqPos < qPosEnd; seqPos++) {
origSeq.seq[seqPos] = 'X';
}
}
DNALength seqPos, unexPos;
unexPos = 0;
for (seqPos = 0; seqPos < origSeq.length; seqPos++) {
if (origSeq.seq[seqPos] != 'X') {
origSeq.seq[unexPos] = origSeq.seq[seqPos];
unexPos++;
}
}
origSeq.length = unexPos;
origSeq.PrintSeq(seqOut);
return 0;
}
示例8: read_seqs
/*
* Wrapper for FASTAReader
*/
bool read_seqs(FASTAReader &reader1, FASTAReader &reader2,
Buffer<int16_t> *seqs1, Buffer<int16_t> *seqs2,
int *seqs1_len, int *seqs2_len,
std::vector<std::string> *seqs1_ids,
std::vector<std::string> *seqs2_ids)
{
bool ans;
#pragma omp critical
ans = reader1.next(seqs1, seqs1_len, seqs1_ids) &&
reader2.next(seqs2, seqs2_len, seqs2_ids);
return ans;
}
示例9: main
int main(int argc, char* argv[]) {
string genomeFileName, subseqFileName;
if (argc != 3) {
cout << "usage: extractRepeats genome repeat" << endl;
exit(0);
}
genomeFileName = argv[1];
subseqFileName = argv[2];
FASTASequence genome, sub;
FASTAReader reader;
reader.Init(genomeFileName);
reader.GetNext(genome);
reader.Init(subseqFileName);
reader.GetNext(sub);
genome.ToUpper();
sub.ToUpper();
DNALength genomePos;
FASTASequence genomeSub;
int kband = (int) (0.15) * sub.length;
vector<int> scoreMat;
vector<Arrow> pathMat;
int readIndex = 0;
cout << "starting extraction" << endl;
for (genomePos = 0; genomePos < genome.length - sub.length + 1; genomePos++) {
genomeSub.seq = &genome.seq[genomePos];
genomeSub.length = sub.length;
int alignScore;
Alignment alignment;
alignScore = SWAlign(genomeSub, sub,
EditDistanceMatrix, 1, //1,kband,
scoreMat, pathMat,
alignment, QueryFit);
if (alignScore < 0.25 * sub.length) {
stringstream titlestrm;
titlestrm << readIndex << "|"
<< genomePos << "|"
<< genomePos + sub.length << " " << alignScore/ (1.0*sub.length);
FASTASequence subcopy;
subcopy.CopyTitle(titlestrm.str());
subcopy.seq = &genome.seq[genomePos];
subcopy.length = sub.length;
subcopy.PrintSeq(std::cout);
genomePos += sub.length;
}
}
}
示例10: main
int main(int argc, char *argv[]) {
string sequencesInName, sequencesOutName;
if (argc <3){
cout << "usage: scramble in out" << endl;
exit(1);
}
sequencesInName = argv[1];
sequencesOutName= argv[2];
vector<FASTASequence*> sequences;
vector<int> sequenceIndices;
FASTAReader reader;
reader.Init(sequencesInName);
ofstream out;
CrucialOpen(sequencesOutName, out, std::ios::out);
FASTASequence read;
FASTASequence*readPtr;
while(reader.GetNext(read)) {
readPtr = new FASTASequence;
*readPtr = read;
sequences.push_back(readPtr);
}
int i;
for (i = 0; i < sequences.size(); i++) {
sequenceIndices.push_back(i);
}
for (i = 0; i < 10*sequences.size(); i++ ){
//
// shuffle indices.
//
int idx1;
int idx2;
idx1 = RandomInt(sequences.size());
idx2 = RandomInt(sequences.size());
int tmp;
tmp = sequenceIndices[idx1];
sequenceIndices[idx1] = sequenceIndices[idx2];
sequenceIndices[idx2] = tmp;
}
for (i = 0; i < sequenceIndices.size(); i++ ){
sequences[sequenceIndices[i]]->PrintSeq(out);
}
return 0;
}
示例11: main
int main(int argc, char* argv[]) {
CommandLineParser clp;
string fastaFileName, indexFileName;
vector<string> fastaFileNames;
vector<string> opts;
clp.SetProgramName("bsdb");
clp.SetProgramSummary("Build an index database on a file of sequences.\n"
" The index is used to map to reads given alignment positions.\n");
clp.RegisterStringOption("fasta", &fastaFileName, "A file with sequences to build an index.");
clp.RegisterStringOption("index", &indexFileName, "The index file.");
clp.RegisterPreviousFlagsAsHidden();
clp.ParseCommandLine(argc, argv, opts);
ifstream fastaIn;
ofstream indexOut;
if (FileOfFileNames::IsFOFN(fastaFileName)) {
FileOfFileNames::FOFNToList(fastaFileName, fastaFileNames);
}
else {
fastaFileNames.push_back(fastaFileName);
}
CrucialOpen(indexFileName, indexOut, std::ios::out | std::ios::binary);
SequenceIndexDatabase<FASTASequence> seqDB;
int fileNameIndex;
for (fileNameIndex = 0; fileNameIndex < fastaFileNames.size(); fileNameIndex++){
FASTAReader reader;
FASTASequence seq;
reader.Init(fastaFileNames[fileNameIndex]);
int i = 0;
while (reader.GetNext(seq)) {
seqDB.AddSequence(seq);
i++;
}
}
seqDB.Finalize();
seqDB.WriteDatabase(indexOut);
return 0;
}
示例12: main
int main(int argc, char* argv[]) {
if (argc < 4) {
cout << "usage: splitContigs in.fa contiglength out" << endl;
exit(1);
}
string inFileName, outFileName;
inFileName = argv[1];
int contigLength = atoi(argv[2]);
outFileName = argv[3];
ofstream seqOut;
CrucialOpen(outFileName, seqOut, std::ios::out);
FASTAReader reader;
reader.Init(inFileName);
FASTASequence seq;
DNALength curOffset;
while(reader.GetNext(seq)) {
FASTASequence subseq;
int i;
curOffset = 0;
for (i =0 ; i < seq.length / contigLength + 1; i++ ) {
subseq.seq = &seq.seq[curOffset];
subseq.title = seq.title;
if (curOffset + contigLength > seq.length) {
subseq.length = seq.length - curOffset;
}
else {
subseq.length = contigLength;
}
subseq.PrintSeq(seqOut);
curOffset += contigLength;
}
}
return 0;
}
示例13: main
int main(int argc, char* argv[]) {
string refFileName, queryFileName;
int maxHammingDistance;
if (argc < 4) {
cout << "usage: hammer ref query maxHam " << endl;
exit(1);
}
refFileName = argv[1];
queryFileName = argv[2];
maxHammingDistance = atoi(argv[3]);
FASTAReader reader;
reader.Initialize(refFileName);
FASTASequence ref, refRC;
reader.GetNext(ref);
ref.MakeRC(refRC);
FASTAReader queryReader;
queryReader.Initialize(queryFileName);
FASTASequence query;
queryReader.GetNext(query);
DNALength p;
for(p=0; p < ref.length-query.length-1; p++ ){
DNASequence subseq;
subseq.seq = &ref.seq[p];
subseq.length = query.length;
// cout << "t "; subseq.PrintSeq(cout);
// cout << "q "; ((DNASequence*)&query)->PrintSeq(cout);
if (HammingDistance(&subseq.seq[0], &query.seq[0], query.length) < maxHammingDistance) {
cout << ">" << p << endl;
subseq.PrintSeq(cout);
}
int i;
for (i =0; i < query.length; i++) {
subseq.seq[i] = toupper(subseq.seq[i]);
}
}
for(p=0; p < ref.length-query.length-1; p++ ){
DNASequence subseq;
subseq.seq = &refRC.seq[p];
subseq.length = query.length;
if (HammingDistance(&subseq.seq[0], &query.seq[0], query.length) < maxHammingDistance) {
cout << ">" << p << "rc" << endl;
subseq.PrintSeq(cout);
}
int i;
for (i =0; i < query.length; i++) {
subseq.seq[i] = toupper(subseq.seq[i]);
}
}
}
示例14: main
int main(int argc, char* argv[]) {
string refFileName, notNormalFileName, normalFileName;
if (argc < 4) {
cout << "usage: normalizeGCContent ref source dest " << endl
<< " flips the C/Gs in source randomly until they are the same gc content as ref." << endl;
exit(1);
}
refFileName = argv[1];
notNormalFileName = argv[2];
normalFileName = argv[3];
FASTAReader reader;
FASTAReader queryReader;
FASTASequence ref;
vector<FASTASequence> querySequences;
int queryTotalLength;
reader.Initialize(refFileName);
reader.ReadAllSequencesIntoOne(ref);
queryReader.Initialize(notNormalFileName);
int refCounts[5], queryCounts[5];
int s;
refCounts[0] = refCounts[1] =refCounts[2] = refCounts[3] = refCounts[4] = 0;
queryCounts[0] = queryCounts[1] =queryCounts[2] = queryCounts[3] = queryCounts[4] = 0;
queryReader.ReadAllSequences(querySequences);
ofstream normOut;
CrucialOpen(normalFileName, normOut);
CountNucs(ref, refCounts);
float refGC = (1.0*refCounts[TwoBit['c']] + refCounts[TwoBit['g']]) / (refCounts[TwoBit['a']] + refCounts[TwoBit['c']] + refCounts[TwoBit['g']] + refCounts[TwoBit['t']]);
int q;
for (q = 0; q < querySequences.size(); q++) {
CountNucs(querySequences[q], queryCounts);
}
float queryGC = (1.0*queryCounts[TwoBit['c']] + queryCounts[TwoBit['g']]) / (queryCounts[TwoBit['a']] + queryCounts[TwoBit['c']] + queryCounts[TwoBit['g']] + queryCounts[TwoBit['t']]);
float gcToat = 0.0;
float atTogc = 0.0;
if (refGC > queryGC) {
atTogc = (refGC - queryGC);
}
else {
gcToat = (queryGC - refGC);
}
DNALength queryGenomeLength = queryCounts[0] + queryCounts[1] + queryCounts[2] + queryCounts[3] + queryCounts[4];
DNALength unmaskedQueryLength = queryCounts[0] + queryCounts[1] + queryCounts[2] + queryCounts[3];
DNALength ngc2at = unmaskedQueryLength * gcToat;
DNALength nat2gc = unmaskedQueryLength * atTogc;
cout << refGC << " " << queryGC << " " << gcToat << " " << atTogc << " " << ngc2at << " " << nat2gc << endl;
vector<FASTASequence> normalized;
normalized.resize(querySequences.size());
vector<DNALength> cumLengths;
cumLengths.resize(normalized.size()+1);
cumLengths[0] = 0;
for (q = 0; q < querySequences.size(); q++) {
normalized[q] = querySequences[q];
cumLengths[q+1] = cumLengths[q] + querySequences[q].length;
}
DNALength i;
for (i = 0; i < ngc2at; i+=2) {
DNALength pos, chr;
FindRandomNuc(normalized, queryGenomeLength, cumLengths, 'G', chr, pos);
normalized[chr].seq[pos] = 'A';
FindRandomNuc(normalized, queryGenomeLength, cumLengths, 'C', chr, pos);
normalized[chr].seq[pos] = 'T';
}
for (i = 0; i < nat2gc; i+=2) {
DNALength pos, chr;
FindRandomNuc(normalized, queryGenomeLength, cumLengths, 'A', chr, pos);
normalized[chr].seq[pos] = 'g';
FindRandomNuc(normalized, queryGenomeLength, cumLengths, 'T', chr, pos);
normalized[chr].seq[pos] = 'c';
}
for (q = 0; q < normalized.size(); q++ ){
normalized[q].PrintSeq(normOut);
}
}
示例15: main
int main(int argc, char* argv[]) {
CommandLineParser clp;
string refGenomeName;
string mutGenomeName;
string gffFileName;
float insRate = 0;
float delRate = 0;
float mutRate = 0;
bool lower = false;
gffFileName = "";
clp.RegisterStringOption("refGenome", &refGenomeName, "Reference genome.", true);
clp.RegisterStringOption("mutGenome", &mutGenomeName, "Mutated genome.", true);
clp.RegisterPreviousFlagsAsHidden();
clp.RegisterStringOption("gff", &gffFileName, "GFF file describing the modifications made to the genome.");
clp.RegisterFloatOption("i", &insRate, "Insertion rate: (0-1].",
CommandLineParser::NonNegativeFloat, false);
clp.RegisterFloatOption("d", &delRate, "Deletion rate: (0-1]",
CommandLineParser::NonNegativeFloat, false);
clp.RegisterFloatOption("m", &mutRate, "Mutation rate, even across all nucleotides: (0-1]",
CommandLineParser::NonNegativeFloat, false);
clp.RegisterFlagOption("lower", &lower, "Make mutations in lower case", false);
vector<string> leftovers;
clp.ParseCommandLine(argc, argv, leftovers);
FASTAReader reader;
FASTASequence refGenome;
reader.Init(refGenomeName);
ofstream mutGenomeOut;
CrucialOpen(mutGenomeName, mutGenomeOut, std::ios::out);
ofstream gffOut;
if (gffFileName != "") {
CrucialOpen(gffFileName, gffOut, std::ios::out);
}
vector<int> insIndices, delIndices, subIndices;
int readIndex = 0;
InitializeRandomGeneratorWithTime();
while (reader.GetNext(refGenome)) {
insIndices.resize(refGenome.length);
delIndices.resize(refGenome.length);
subIndices.resize(refGenome.length);
std::fill(insIndices.begin(), insIndices.end(), false);
std::fill(delIndices.begin(), delIndices.end(), false);
std::fill(subIndices.begin(), subIndices.end(), 0);
enum ChangeType { Ins, Del, Mut, None};
float changeProb[4];
changeProb[Ins] = insRate;
changeProb[Del] = changeProb[Ins] + delRate;
changeProb[Mut] = changeProb[Del] + mutRate;
changeProb[None] = 1;
if (changeProb[Mut] > 1) {
cout << "ERROR! The sum of the error probabilities must be less than 1" << endl;
exit(1);
}
DNALength pos;
float randomNumber;
int numIns = 0;
int numDel = 0;
int numMut = 0;
for (pos =0 ; pos < refGenome.length; pos++) {
randomNumber = Random();
if (randomNumber < changeProb[Ins]) {
insIndices[pos] = true;
numIns++;
}
else if (randomNumber < changeProb[Del]) {
delIndices[pos] = true;
numDel++;
}
else if (randomNumber < changeProb[Mut]){
Nucleotide newNuc = TwoBitToAscii[RandomInt(4)];
int maxIts = 100000;
int it = 0;
while (newNuc == refGenome.seq[pos]) {
newNuc = TwoBitToAscii[RandomInt(4)];
if (it == maxIts) {
cout << "ERROR, something is wrong with the random number generation, it took too many tries to generate a new nucleotide" << endl;
exit(1);
}
}
subIndices[pos] = refGenome[pos];
refGenome.seq[pos] = ToLower(newNuc,lower);
++numMut;
}
}
// cout << readIndex << " m " << numMut << " i " << numIns << " d " << numDel << endl;
if (readIndex % 100000 == 0 && readIndex > 0) {
cout << readIndex << endl;
}
//
// Now add the insertions and deletions.
//
FASTASequence newSequence;
DNALength newPos;
if (numIns - numDel + refGenome.length < 0) {
cout << "ERROR, the genome has been deleted to nothing." << endl;
//.........这里部分代码省略.........