本文整理汇总了C++中FASTAReader::Init方法的典型用法代码示例。如果您正苦于以下问题:C++ FASTAReader::Init方法的具体用法?C++ FASTAReader::Init怎么用?C++ FASTAReader::Init使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类FASTAReader
的用法示例。
在下文中一共展示了FASTAReader::Init方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: main
int main(int argc, char* argv[]) {
string genomeFileName, subseqFileName;
if (argc != 3) {
cout << "usage: extractRepeats genome repeat" << endl;
exit(0);
}
genomeFileName = argv[1];
subseqFileName = argv[2];
FASTASequence genome, sub;
FASTAReader reader;
reader.Init(genomeFileName);
reader.GetNext(genome);
reader.Init(subseqFileName);
reader.GetNext(sub);
genome.ToUpper();
sub.ToUpper();
DNALength genomePos;
FASTASequence genomeSub;
int kband = (int) (0.15) * sub.length;
vector<int> scoreMat;
vector<Arrow> pathMat;
int readIndex = 0;
cout << "starting extraction" << endl;
for (genomePos = 0; genomePos < genome.length - sub.length + 1; genomePos++) {
genomeSub.seq = &genome.seq[genomePos];
genomeSub.length = sub.length;
int alignScore;
Alignment alignment;
alignScore = SWAlign(genomeSub, sub,
EditDistanceMatrix, 1, //1,kband,
scoreMat, pathMat,
alignment, QueryFit);
if (alignScore < 0.25 * sub.length) {
stringstream titlestrm;
titlestrm << readIndex << "|"
<< genomePos << "|"
<< genomePos + sub.length << " " << alignScore/ (1.0*sub.length);
FASTASequence subcopy;
subcopy.CopyTitle(titlestrm.str());
subcopy.seq = &genome.seq[genomePos];
subcopy.length = sub.length;
subcopy.PrintSeq(std::cout);
genomePos += sub.length;
}
}
}
示例2: main
int main(int argc, char* argv[]) {
if (argc < 3) {
cout << "usage: testBuildOccBins genomeFileName suffixArray" << endl;
exit(0);
}
string genomeFileName = argv[1];
string suffixArrayFileName = argv[2];
FASTAReader reader;
reader.Init(genomeFileName);
FASTASequence seq;
reader.GetNext(seq);
DNASuffixArray suffixArray;
suffixArray.Read(suffixArrayFileName);
Bwt<PackedDNASequence, FASTASequence> bwt;
//bwt.InitializeFromSuffixArray(seq, suffixArray.index);
bwt.InitializeBWTStringFromSuffixArray(seq, suffixArray.index);
bwt.occ.Initialize(bwt.bwtSequence, 4096, 64);
bwt.occ.PrintBins(cout);
}
示例3: main
int main(int argc, char* argv[]) {
string seqFileName;
TupleMetrics tm;
string outFileName;
if (argc < 3) {
cout << "usage: storeTuplePosList seqFile tupleSize outFile" << endl;
return 0;
}
seqFileName = argv[1];
tm.tupleSize = atoi(argv[2]);
outFileName = argv[3];
ofstream outFile;
// CrucialOpen(outFileName, outFile, std::ios::out| std::ios::binary);
FASTAReader reader;
reader.Init(seqFileName);
FASTASequence seq;
reader.GetNext(seq);
// vector<PositionDNATuple>
TupleList<PositionDNATuple>tuplePosList;
tuplePosList.SetTupleMetrics(tm);
// StoreTuplePosList(seq, tm, tuplePosList);
SequenceToTupleList(seq, tm, tuplePosList);
tuplePosList.Sort();
tuplePosList.WriteToFile(outFileName); //WriteTuplePosList(tuplePosList, tm.tupleSize, outFile);
outFile.close();
return 0;
}
示例4: main
int main(int argc, char* argv[]) {
if (argc < 3) {
cout << "usage: bwtLocateList bwtName querySeqFile" << endl;
exit(1);
}
string bwtFileName = argv[1];
string querySeqFileName = argv[2];
bool doPrintResults = false;
int maxCount = 0;
int argi = 3;
bool countOnly = false;
while(argi < argc) {
if (strcmp(argv[argi], "-print") == 0) {
doPrintResults = true;
}
else if (strcmp(argv[argi], "-max") == 0) {
maxCount = atoi(argv[++argi]);
}
else if (strcmp(argv[argi], "-count") == 0) {
countOnly = true;
}
else {
cout << "bad option: " << argv[argi] << endl;
}
++argi;
}
Bwt<PackedDNASequence, FASTASequence> bwt;
bwt.Read(bwtFileName);
FASTAReader queryReader;
queryReader.Init(querySeqFileName);
FASTASequence seq;
int seqIndex = 0;
vector<DNALength> positions;
while(queryReader.GetNext(seq)) {
positions.clear();
if (countOnly == false) {
bwt.Locate(seq, positions, maxCount);
}
else {
DNALength sp,ep;
bwt.Count(seq, sp, ep);
}
// cout << "matched " << positions.size() << " positions." << endl;
if (doPrintResults) {
int i;
for (i = 0; i < positions.size(); i++ ){
cout << positions[i] << " ";
}
cout << endl;
}
++seqIndex;
}
// float wordCountsPerLookup = (bwt.bwtSequence.nCountInWord *1.0) / bwt.bwtSequence.nCountNuc;
// cout << "word counts per lookup: " << wordCountsPerLookup << endl;
return 0;
}
示例5: main
int main(int argc, char *argv[]) {
string sequencesInName, sequencesOutName;
if (argc <3){
cout << "usage: scramble in out" << endl;
exit(1);
}
sequencesInName = argv[1];
sequencesOutName= argv[2];
vector<FASTASequence*> sequences;
vector<int> sequenceIndices;
FASTAReader reader;
reader.Init(sequencesInName);
ofstream out;
CrucialOpen(sequencesOutName, out, std::ios::out);
FASTASequence read;
FASTASequence*readPtr;
while(reader.GetNext(read)) {
readPtr = new FASTASequence;
*readPtr = read;
sequences.push_back(readPtr);
}
int i;
for (i = 0; i < sequences.size(); i++) {
sequenceIndices.push_back(i);
}
for (i = 0; i < 10*sequences.size(); i++ ){
//
// shuffle indices.
//
int idx1;
int idx2;
idx1 = RandomInt(sequences.size());
idx2 = RandomInt(sequences.size());
int tmp;
tmp = sequenceIndices[idx1];
sequenceIndices[idx1] = sequenceIndices[idx2];
sequenceIndices[idx2] = tmp;
}
for (i = 0; i < sequenceIndices.size(); i++ ){
sequences[sequenceIndices[i]]->PrintSeq(out);
}
return 0;
}
示例6: main
int main(int argc, char* argv[]) {
CommandLineParser clp;
string fastaFileName, indexFileName;
vector<string> fastaFileNames;
vector<string> opts;
clp.SetProgramName("bsdb");
clp.SetProgramSummary("Build an index database on a file of sequences.\n"
" The index is used to map to reads given alignment positions.\n");
clp.RegisterStringOption("fasta", &fastaFileName, "A file with sequences to build an index.");
clp.RegisterStringOption("index", &indexFileName, "The index file.");
clp.RegisterPreviousFlagsAsHidden();
clp.ParseCommandLine(argc, argv, opts);
ifstream fastaIn;
ofstream indexOut;
if (FileOfFileNames::IsFOFN(fastaFileName)) {
FileOfFileNames::FOFNToList(fastaFileName, fastaFileNames);
}
else {
fastaFileNames.push_back(fastaFileName);
}
CrucialOpen(indexFileName, indexOut, std::ios::out | std::ios::binary);
SequenceIndexDatabase<FASTASequence> seqDB;
int fileNameIndex;
for (fileNameIndex = 0; fileNameIndex < fastaFileNames.size(); fileNameIndex++){
FASTAReader reader;
FASTASequence seq;
reader.Init(fastaFileNames[fileNameIndex]);
int i = 0;
while (reader.GetNext(seq)) {
seqDB.AddSequence(seq);
i++;
}
}
seqDB.Finalize();
seqDB.WriteDatabase(indexOut);
return 0;
}
示例7: main
int main(int argc, char* argv[]) {
if (argc < 4) {
cout << "usage: splitContigs in.fa contiglength out" << endl;
exit(1);
}
string inFileName, outFileName;
inFileName = argv[1];
int contigLength = atoi(argv[2]);
outFileName = argv[3];
ofstream seqOut;
CrucialOpen(outFileName, seqOut, std::ios::out);
FASTAReader reader;
reader.Init(inFileName);
FASTASequence seq;
DNALength curOffset;
while(reader.GetNext(seq)) {
FASTASequence subseq;
int i;
curOffset = 0;
for (i =0 ; i < seq.length / contigLength + 1; i++ ) {
subseq.seq = &seq.seq[curOffset];
subseq.title = seq.title;
if (curOffset + contigLength > seq.length) {
subseq.length = seq.length - curOffset;
}
else {
subseq.length = contigLength;
}
subseq.PrintSeq(seqOut);
curOffset += contigLength;
}
}
return 0;
}
示例8: main
int main(int argc, char* argv[]) {
string ad1File, ad2File, readsFile, readsOutFile;
FASTAReader ad1Reader;
FASTAReader ad2Reader;
FASTAReader reader;
CommandLineParser cl;
float minPctSimilarity = 0.60;
int indel = 3;
int minLength = 10;
cl.RegisterStringOption("ad1", &ad1File, "FASTA file with the first adapter");
cl.RegisterStringOption("ad2", &ad2File, "FASTA file with the second adapter");
cl.RegisterStringOption("reads", &readsFile, "FASTA file with SMRTBell reads");
cl.RegisterStringOption("readsout", &readsOutFile, "output file for split reads");
cl.RegisterPreviousFlagsAsHidden();
cl.RegisterFloatOption("pctSim", &minPctSimilarity, "Minimum percent similarity to trigger a match to an adapter.",
CommandLineParser::PositiveFloat);
cl.RegisterIntOption("indel", &indel, "Penalty for indel (positive)", CommandLineParser::NonNegativeInteger);
cl.RegisterIntOption("minLength", &minLength, "Minimum length pass to retain.", CommandLineParser::PositiveInteger);
vector<string> opts;
cl.ParseCommandLine(argc, argv, opts);
/*
* Open all the required files, quitting if they are unavailable.
*/
ad1Reader.Init(ad1File);
ad2Reader.Init(ad2File);
reader.Init(readsFile);
ofstream splitOut;
CrucialOpen(readsOutFile, splitOut);
FASTASequence ad1, ad2;
ad1Reader.GetNext(ad1);
ad2Reader.GetNext(ad2);
FASTASequence read;
vector<int> scoreMat;
vector<Arrow> pathMat;
int readIndex = 0;
while(reader.GetNext(read)) {
read.ToUpper();
//
// Do a fitting sequence alignment to match one of the two
// adapters into the read.
//
vector<int> passStarts, passLengths, la;
read.PrintSeq(cout);
SplitRead(read, 0, read.length, ad1, ad2,
indel,
passStarts, passLengths,la, 0,
scoreMat, pathMat, minPctSimilarity, minLength);
int i;
for (i = 0; i < passStarts.size(); i++) {
cout << "read: " << readIndex << " pass: " << i << " " << passStarts[i] << " " << passLengths[i] << " " << la[i] << endl;
}
++readIndex;
}
}
示例9: main
int main(int argc, char* argv[]) {
CommandLineParser clp;
string refGenomeName;
string mutGenomeName;
string gffFileName;
float insRate = 0;
float delRate = 0;
float mutRate = 0;
bool lower = false;
gffFileName = "";
clp.RegisterStringOption("refGenome", &refGenomeName, "Reference genome.", true);
clp.RegisterStringOption("mutGenome", &mutGenomeName, "Mutated genome.", true);
clp.RegisterPreviousFlagsAsHidden();
clp.RegisterStringOption("gff", &gffFileName, "GFF file describing the modifications made to the genome.");
clp.RegisterFloatOption("i", &insRate, "Insertion rate: (0-1].",
CommandLineParser::NonNegativeFloat, false);
clp.RegisterFloatOption("d", &delRate, "Deletion rate: (0-1]",
CommandLineParser::NonNegativeFloat, false);
clp.RegisterFloatOption("m", &mutRate, "Mutation rate, even across all nucleotides: (0-1]",
CommandLineParser::NonNegativeFloat, false);
clp.RegisterFlagOption("lower", &lower, "Make mutations in lower case", false);
vector<string> leftovers;
clp.ParseCommandLine(argc, argv, leftovers);
FASTAReader reader;
FASTASequence refGenome;
reader.Init(refGenomeName);
ofstream mutGenomeOut;
CrucialOpen(mutGenomeName, mutGenomeOut, std::ios::out);
ofstream gffOut;
if (gffFileName != "") {
CrucialOpen(gffFileName, gffOut, std::ios::out);
}
vector<int> insIndices, delIndices, subIndices;
int readIndex = 0;
InitializeRandomGeneratorWithTime();
while (reader.GetNext(refGenome)) {
insIndices.resize(refGenome.length);
delIndices.resize(refGenome.length);
subIndices.resize(refGenome.length);
std::fill(insIndices.begin(), insIndices.end(), false);
std::fill(delIndices.begin(), delIndices.end(), false);
std::fill(subIndices.begin(), subIndices.end(), 0);
enum ChangeType { Ins, Del, Mut, None};
float changeProb[4];
changeProb[Ins] = insRate;
changeProb[Del] = changeProb[Ins] + delRate;
changeProb[Mut] = changeProb[Del] + mutRate;
changeProb[None] = 1;
if (changeProb[Mut] > 1) {
cout << "ERROR! The sum of the error probabilities must be less than 1" << endl;
exit(1);
}
DNALength pos;
float randomNumber;
int numIns = 0;
int numDel = 0;
int numMut = 0;
for (pos =0 ; pos < refGenome.length; pos++) {
randomNumber = Random();
if (randomNumber < changeProb[Ins]) {
insIndices[pos] = true;
numIns++;
}
else if (randomNumber < changeProb[Del]) {
delIndices[pos] = true;
numDel++;
}
else if (randomNumber < changeProb[Mut]){
Nucleotide newNuc = TwoBitToAscii[RandomInt(4)];
int maxIts = 100000;
int it = 0;
while (newNuc == refGenome.seq[pos]) {
newNuc = TwoBitToAscii[RandomInt(4)];
if (it == maxIts) {
cout << "ERROR, something is wrong with the random number generation, it took too many tries to generate a new nucleotide" << endl;
exit(1);
}
}
subIndices[pos] = refGenome[pos];
refGenome.seq[pos] = ToLower(newNuc,lower);
++numMut;
}
}
// cout << readIndex << " m " << numMut << " i " << numIns << " d " << numDel << endl;
if (readIndex % 100000 == 0 && readIndex > 0) {
cout << readIndex << endl;
}
//
// Now add the insertions and deletions.
//
FASTASequence newSequence;
DNALength newPos;
if (numIns - numDel + refGenome.length < 0) {
cout << "ERROR, the genome has been deleted to nothing." << endl;
//.........这里部分代码省略.........
示例10: main
int main(int argc, char* argv[]) {
FASTAReader reader;
if (argc < 5) {
cout << "usage: wordCounter seqFile tupleSize tupleOutputFile posOutputFile" << endl;
exit(1);
}
string fileName = argv[1];
int tupleSize = atoi(argv[2]);
string tupleListName = argv[3];
string posOutName = argv[4];
TupleMetrics tm;
tm.Initialize(tupleSize);
reader.Init(fileName);
FASTASequence seq;
reader.GetNext(seq);
vector<CountedDNATuple> tupleList;
CountedDNATuple tuple;
DNALength i;
for (i = 0; i < seq.length - tm.tupleSize + 1; i++ ) {
if (tuple.FromStringRL((Nucleotide*) (seq.seq + i), tm)) {
tuple.count = i;
tupleList.push_back(tuple);
}
}
std::sort(tupleList.begin(), tupleList.end());
int t;
int t2;
int numTuples = tupleList.size();
t = t2 = 0;
int numUnique = 0;
while (t < numTuples) {
t2 = t;
t2++;
while (t2 < numTuples and tupleList[t] == tupleList[t2]) {
t2++;
}
++numUnique;
t = t2;
}
ofstream countedTupleListOut;
countedTupleListOut.open(tupleListName.c_str(), ios_base::binary);
ofstream posOut;
posOut.open(posOutName.c_str(), ios_base::binary);
countedTupleListOut.write((const char*) &numUnique, sizeof(int));
countedTupleListOut.write((const char*) &tm.tupleSize, sizeof(int));
posOut.write((const char*) &numUnique, sizeof(int));
//
// Write out the tuple+counts to a file.
//
t = t2 = 0;
CountedDNATuple countedTuple;
int numMultOne = 0;
while (t < numTuples) {
t2 = t;
t2++;
while (t2 < numTuples and tupleList[t] == tupleList[t2]) {
t2++;
}
countedTuple.tuple = tupleList[t].tuple;
countedTuple.count = t2 - t;
if (countedTuple.count == 1) ++numMultOne;
countedTupleListOut.write((const char*) &countedTuple,sizeof(CountedDNATuple));
posOut.write((char*)&countedTuple.count, sizeof(int));
int tc;
for (tc = t; tc < t2; tc++) {
posOut.write((char*) &tupleList[tc].count, sizeof(int));
}
t = t2;
}
//
// Write out the positions of the tuples to a file.
//
posOut.close();
countedTupleListOut.close();
// cout << "found " << numUnique << " distinct " << DNATuple::TupleSize << "-mers." << endl;
cout << numMultOne << endl;
return 0;
}
示例11: main
//.........这里部分代码省略.........
diffCoverSize == 2281) ) {
cout << "The difference cover size must be one of 7,32,64,111, or 2281." << endl;
cout << "Larger numbers use less space but are more slow." << endl;
exit(1);
}
}
else if (strcmp(argv[argi], "-4bit") == 0) {
read4BitCompressed = 1;
}
else {
PrintUsage();
cout << "ERROR, bad option: " << argv[argi] << endl;
exit(1);
}
}
++argi;
}
if (inFiles.size() == 0) {
//
// Special use case: the input file is a fasta file. Write to that file + .sa
//
inFiles.push_back(saFile);
saFile = saFile + ".sa";
}
VectorIndex inFileIndex;
FASTASequence seq;
CompressedSequence<FASTASequence> compSeq;
if (read4BitCompressed == 0) {
for (inFileIndex = 0; inFileIndex < inFiles.size(); ++inFileIndex) {
FASTAReader reader;
reader.Init(inFiles[inFileIndex]);
reader.SetSpacePadding(111);
if (saBuildType == kark) {
//
// The Karkkainen sa building method requires a little extra
// space at the end of the dna sequence so that counting may
// be done mod 3 without adding extra logic for boundaries.
//
}
if (inFileIndex == 0) {
reader.ReadAllSequencesIntoOne(seq);
reader.Close();
}
else {
while(reader.ConcatenateNext(seq)) {
cout << "added " << seq.title << endl;
}
}
}
seq.ToThreeBit();
//seq.ToUpper();
}
else {
assert(inFiles.size() == 1);
cout << "reading compressed sequence." << endl;
compSeq.Read(inFiles[0]);
seq.seq = compSeq.seq;
seq.length = compSeq.length;
compSeq.RemoveCompressionCounts();
cout << "done." << endl;
}
示例12: main
int main(int argc, char* argv[]) {
string inFileName, readsFileName;
DNALength readLength;
float coverage = 0;
bool noRandInit = false;
int numReads = -1;
CommandLineParser clp;
int qualityValue = 20;
bool printFastq = false;
int stratify = 0;
string titleType = "pacbio";
string fastqType = "illumina"; // or "sanger"
clp.RegisterStringOption("inFile", &inFileName, "Reference sequence", 0);
clp.RegisterPreviousFlagsAsHidden();
clp.RegisterIntOption("readLength", (int*) &readLength, "The length of reads to simulate. The length is fixed.",
CommandLineParser::PositiveInteger, "Length of every read.", 0);
clp.RegisterFloatOption("coverage", &coverage, "Total coverage (from which the number of reads is calculated",
CommandLineParser::PositiveFloat, 0);
clp.RegisterFlagOption("nonRandInit", &noRandInit, "Skip initializing the random number generator with time.");
clp.RegisterIntOption("nReads", &numReads, "Total number of reads (from which coverage is calculated)", CommandLineParser::PositiveInteger, 0);
clp.RegisterStringOption("readsFile", &readsFileName, "Reads output file", 0);
clp.RegisterFlagOption("fastq", &printFastq, "Fake fastq output with constant quality value (20)");
clp.RegisterIntOption("quality", &qualityValue, "Value to use for fastq quality", CommandLineParser::PositiveInteger);
clp.RegisterIntOption("stratify", &stratify, "Sample a read every 'stratify' bases, rather than randomly.", CommandLineParser::PositiveInteger);
clp.RegisterStringOption("titleType", &titleType, "Set the name of the title: 'pacbio'|'illumina'");
clp.RegisterStringOption("fastqType", &fastqType, "Set the type of fastq: 'illumina'|'sanger'");
vector<string> leftovers;
clp.ParseCommandLine(argc, argv, leftovers);
if (!noRandInit) {
InitializeRandomGeneratorWithTime();
}
FASTAReader inReader;
inReader.Init(inFileName);
vector<FASTASequence> reference;
inReader.ReadAllSequences(reference);
ofstream readsFile;
if (readsFileName == "") {
cout << "ERROR. You must specify a reads file." << endl;
exit(0);
}
CrucialOpen(readsFileName, readsFile, std::ios::out);
ofstream sangerFastqFile;
if (fastqType == "sanger") {
string sangerFastqFileName = readsFileName + ".fastq";
CrucialOpen(sangerFastqFileName, sangerFastqFile, std::ios::out);
}
DNALength refLength = 0;
int i;
for (i = 0; i < reference.size(); i++) {
refLength += reference[i].length;
}
if (numReads == -1 and coverage == 0 and stratify == 0) {
cout << "Error, you must specify either coverage, nReads, or stratify." << endl;
exit(1);
}
else if (numReads == -1) {
numReads = (refLength / readLength) * coverage;
}
if (stratify) {
if (!readLength) {
cout << "ERROR. If you are using stratification, a read length must be specified." << endl;
exit(1);
}
}
DNASequence sampleSeq;
sampleSeq.length = readLength;
int maxRetry = 10000000;
int retryNumber = 0;
DNALength seqIndex, seqPos;
if (stratify) {
seqIndex = 0;
seqPos = 0;
}
DNALength origReadLength = readLength;
for (i = 0; stratify or i < numReads; i++) {
if (stratify == 0) {
FindRandomPos(reference, seqIndex, seqPos, readLength );
}
else {
//
// find the next start pos, or bail if done
//
if (seqPos >= reference[seqIndex].length) {
if (seqIndex == reference.size() - 1) {
break;
}
else {
seqIndex = seqIndex + 1;
seqPos = 0;
continue;
}
}
readLength = min(reference[seqIndex].length - seqPos, origReadLength);
//.........这里部分代码省略.........
示例13: main
int main(int argc, char* argv[]) {
if (argc < 4) {
PrintUsage();
exit(0);
}
string rgFileName, vertexSeqFileName, scaffoldDirName;
rgFileName = argv[1];
vertexSeqFileName = argv[2];
scaffoldDirName = argv[3];
string repeatFileName = "";
bool printRepeatsSeparately = false;
int argi = 4;
bool printSeparate=false;
while (argi < argc) {
if (strcmp(argv[argi], "-separate") == 0) {
printSeparate=true;
}
else if (strcmp(argv[argi], "-repeats") == 0) {
printRepeatsSeparately = true;
repeatFileName = argv[++argi];
}
else {
cout << "bad option: " << argv[argi] << endl;
PrintUsage();
exit(1);
}
++argi;
}
FASTAReader vertexSequenceReader;
vertexSequenceReader.Init(vertexSeqFileName);
//
// Input necessary data
//
vector<FASTASequence> vertexSequences;
vertexSequenceReader.ReadAllSequences(vertexSequences);
RepeatGraph<string> rg;
rg.ReadGraph(rgFileName);
vector<FASTASequence> vertexRCSequences;
VectorIndex vertexIndex;
vertexRCSequences.resize(vertexSequences.size());
for (vertexIndex = 0; vertexIndex < vertexSequences.size(); vertexIndex++ ){
vertexSequences[vertexIndex].MakeRC(vertexRCSequences[vertexIndex]);
}
VectorIndex outEdgeIndex;
int scaffoldIndex = 0;
ofstream scaffoldOut;
if (printSeparate==false) {
// scaffold dir name is really a file name here.
CrucialOpen(scaffoldDirName, scaffoldOut, std::ios::out);
}
for (vertexIndex = 0; vertexIndex < rg.vertices.size(); vertexIndex++ ){
rg.vertices[vertexIndex].traversed = false;
}
//
// Set up flow for calling multiplicity.
//
/*
Test all this out later.
AssignMinimumFlowToEdges(rg, 2);
AssignVertexFlowBalance(rg);
BalanceKirchhoffFlow(rg);
UInt edgeIndex;
for (edgeIndex = 0; edgeIndex < rg.edges.size(); edgeIndex++) {
if (rg.edges[edgeIndex].flow > 1) {
cout << edgeIndex << " " << rg.edges[edgeIndex].flow << endl;
}
}
*/
int numPrintedVertices = 0;
for (vertexIndex = 0; vertexIndex < rg.vertices.size(); vertexIndex++ ){
//
// Look to see if this vertex is a branching vertex.
//
if ((rg.vertices[vertexIndex].inEdges.size() != 1 or
rg.vertices[vertexIndex].outEdges.size() != 1) and
rg.vertices[vertexIndex].traversed == false) {
//
// This is a branching vertex. Print all paths from this vertex, but not the vertex
// itself if it appears repetitive.
//
VectorIndex outEdgeIndex;
bool printedThisVertex = false;
for (outEdgeIndex = 0; outEdgeIndex < rg.vertices[vertexIndex].outEdges.size(); outEdgeIndex++ ){
//
// This is a branching vertex.
//
//.........这里部分代码省略.........
示例14: main
int main(int argc, char* argv[]) {
string rgInName, rgOutName;
int minPathLength;
string vertexSequenceFileName;
if (argc < 5) {
cout << "usage: trimShortEnds in.rg vertexSequences minPathLength out.rg" << endl;
exit(1);
}
rgInName = argv[1];
vertexSequenceFileName = argv[2];
minPathLength = atoi(argv[3]);
rgOutName = argv[4];
ofstream rgOut;
CrucialOpen(rgOutName, rgOut, std::ios::out);
FASTAReader vertexSequenceReader;
vertexSequenceReader.Init(vertexSequenceFileName);
RepeatGraph<string> rg;
vector<FASTASequence> vertexSequences;
rg.ReadGraph(rgInName);
vertexSequenceReader.ReadAllSequences(vertexSequences);
VectorIndex vertexIndex;
VectorIndex outEdgeIndex;
VectorIndex edgeIndex;
if (rg.edges.size() == 0) {
cout << "LIKELY INVALID GRAPH. There are no edges." << endl;
return 0;
}
//
// At first, any edge that exists is connected to a vertex. This
// will change as low coverage edges are deleted and replaced by
// high coverage edges from the end of the array.
//
for (edgeIndex = 0; edgeIndex < rg.edges.size(); edgeIndex++) {
rg.edges[edgeIndex].connected = true;
}
set<std::pair<VectorIndex, VectorIndex> > srcDestToRemove;
for (vertexIndex = 0; vertexIndex < rg.vertices.size(); vertexIndex++) {
if (rg.vertices[vertexIndex].inEdges.size() == 0 and
rg.vertices[vertexIndex].outEdges.size() == 1) {
//
// This is a source. Traverse this until a branching vertex or the end is found.
//
vector<VectorIndex> path;
path.push_back(vertexIndex);
int pathLength = 0;
VectorIndex pathVertex;
VectorIndex pathEdge;
pathEdge = rg.vertices[vertexIndex].outEdges[0];
pathVertex = rg.edges[pathEdge].dest;
while (rg.vertices[pathVertex].inEdges.size() == 1 and
rg.vertices[pathVertex].outEdges.size() == 1) {
path.push_back(pathVertex);
pathEdge = rg.vertices[pathVertex].outEdges[0];
pathVertex = rg.edges[pathEdge].dest;
pathLength += vertexSequences[pathVertex/2].length;
}
pathLength += vertexSequences[pathVertex/2].length;
path.push_back(pathVertex);
if (pathLength < minPathLength and path.size() < 3) {
//
// Remove this path, it is too short.
// Also remove the complement.
//
cout << "trimming path of " << path.size() << " is of sequence length " << pathLength << endl;
VectorIndex pathIndex;
for (pathIndex = 0; pathIndex < path.size() - 1; pathIndex++) {
srcDestToRemove.insert(pair<VectorIndex, VectorIndex>(path[pathIndex], path[pathIndex+1]));
srcDestToRemove.insert(pair<VectorIndex, VectorIndex>(2*(path[pathIndex+1]/2) + !(path[pathIndex+1]%2),
2*(path[pathIndex]/2) + !(path[pathIndex]%2)));
}
}
}
}
MarkEdgePairsForRemoval(srcDestToRemove, rg.vertices, rg.edges);
RemoveUnconnectedEdges(rg.vertices, rg.edges);
rg.WriteGraph(rgOut);
return 0;
}
示例15: main
int main(int argc, char* argv[]) {
string genomeFileName;
string suffixArrayFileName;
if (argc < 4) {
cout << "Usage: printWordCount genome suffixArray k [k2 k3 k4...]" << endl;
exit(1);
}
genomeFileName = argv[1];
suffixArrayFileName = argv[2];
int argi = 3;
vector<DNALength> k;
while (argi < argc) {
k.push_back(atoi(argv[argi]));
argi++;
}
// Get the ref sequence.
FASTAReader reader;
reader.Init(genomeFileName);
FASTASequence seq;
// reader.GetNext(seq);
reader.ReadAllSequencesIntoOne(seq);
seq.ToUpper();
// Get the suffix array.
DNASuffixArray sarray;
sarray.Read(suffixArrayFileName);
int ki;
char *word;
cout << "wordlen word nword" << endl;
for (ki = 0; ki < k.size(); ki++) {
word = new char[k[ki]+1];
word[k[ki]] = '\0';
DNALength i;
DNALength numUnique = 0;
for (i = 0; i < seq.length - k[ki] - 1; ) {
DNALength j = i + 1;
bool seqAtN = false;
int si;
for(si = 0; si < k[ki]; si++) {
if (seq.seq[sarray.index[i] + si] == 'N') {
seqAtN = true;
break;
}
}
if (seqAtN) {
i++;
continue;
}
while (j < seq.length - k[ki] and
seq.length - sarray.index[i] >= k[ki] and
seq.length - sarray.index[j] >= k[ki] and
strncmp((const char*) &seq.seq[sarray.index[i]], (const char*) &seq.seq[sarray.index[j]], k[ki]) == 0) {
j++;
}
if (seq.length - sarray.index[i] >= k[ki]) {
for(si = 0; si < k[ki]; si++) {
word[si] = seq.seq[sarray.index[i]+si];
}
cout << k[ki] << " " << word << " " << j - i + 1 << endl;
if (j == i + 1) {
++numUnique;
}
}
i = j;
}
}
}