当前位置: 首页>>代码示例>>C++>>正文


C++ FASTAReader::Init方法代码示例

本文整理汇总了C++中FASTAReader::Init方法的典型用法代码示例。如果您正苦于以下问题:C++ FASTAReader::Init方法的具体用法?C++ FASTAReader::Init怎么用?C++ FASTAReader::Init使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在FASTAReader的用法示例。


在下文中一共展示了FASTAReader::Init方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: main

int main(int argc, char* argv[]) {
	string genomeFileName, subseqFileName;
	if (argc != 3) {
		cout << "usage: extractRepeats genome repeat" << endl;
		exit(0);
	}

	genomeFileName = argv[1];
	subseqFileName = argv[2];

	FASTASequence genome, sub;
	FASTAReader reader;
	reader.Init(genomeFileName);
	reader.GetNext(genome);
	reader.Init(subseqFileName);
	reader.GetNext(sub);

	genome.ToUpper();
	sub.ToUpper();	
	DNALength genomePos;
	FASTASequence genomeSub;
	int kband = (int) (0.15) * sub.length;
	vector<int> scoreMat;
	vector<Arrow> pathMat;
	int readIndex = 0;
	cout << "starting extraction" << endl;
	for (genomePos = 0; genomePos < genome.length - sub.length + 1; genomePos++) {
		genomeSub.seq = &genome.seq[genomePos];
		genomeSub.length = sub.length;
		int alignScore;
		Alignment alignment;
		alignScore = SWAlign(genomeSub, sub,
												 EditDistanceMatrix, 1, //1,kband,
												 scoreMat, pathMat,
												 alignment, QueryFit);
												 
		if (alignScore < 0.25 * sub.length) {
			stringstream titlestrm;
			titlestrm << readIndex << "|" 
								 << genomePos << "|"
								<< genomePos + sub.length << " " << alignScore/ (1.0*sub.length);
			FASTASequence subcopy;
			subcopy.CopyTitle(titlestrm.str());
			subcopy.seq = &genome.seq[genomePos];
			subcopy.length = sub.length;
			subcopy.PrintSeq(std::cout);
			genomePos += sub.length;
		}
	}
}
开发者ID:jcombs1,项目名称:blasr,代码行数:50,代码来源:ExtractRepeat.cpp

示例2: main

int main(int argc, char* argv[]) {
	if (argc < 3) {
		cout << "usage: testBuildOccBins genomeFileName suffixArray" << endl;
		exit(0);
	}
	string genomeFileName      = argv[1];
	string suffixArrayFileName = argv[2];
	
	FASTAReader reader;
	reader.Init(genomeFileName);
	FASTASequence seq;
	reader.GetNext(seq);
	

	DNASuffixArray suffixArray;
	
	suffixArray.Read(suffixArrayFileName);

	Bwt<PackedDNASequence, FASTASequence> bwt;
	//bwt.InitializeFromSuffixArray(seq, suffixArray.index);

	bwt.InitializeBWTStringFromSuffixArray(seq, suffixArray.index);
	bwt.occ.Initialize(bwt.bwtSequence, 4096, 64);
	bwt.occ.PrintBins(cout);
}
开发者ID:EichlerLab,项目名称:blasr,代码行数:25,代码来源:TestBuildOccBins.cpp

示例3: main

int main(int argc, char* argv[]) {

	string seqFileName;
	TupleMetrics tm;
	string outFileName;
	if (argc < 3) {
		cout << "usage: storeTuplePosList seqFile tupleSize outFile" << endl;
		return 0;
	}
	seqFileName = argv[1];
	tm.tupleSize = atoi(argv[2]);
	outFileName = argv[3];
	
	ofstream outFile;
	//	CrucialOpen(outFileName, outFile, std::ios::out| std::ios::binary);

	FASTAReader reader;
	reader.Init(seqFileName);
	FASTASequence seq;
	reader.GetNext(seq);
	//	vector<PositionDNATuple> 
	TupleList<PositionDNATuple>tuplePosList;
	tuplePosList.SetTupleMetrics(tm);
	//	StoreTuplePosList(seq, tm, tuplePosList);
	SequenceToTupleList(seq, tm, tuplePosList);
	tuplePosList.Sort();
	tuplePosList.WriteToFile(outFileName); //WriteTuplePosList(tuplePosList, tm.tupleSize, outFile);
	outFile.close();
	return 0;
}
开发者ID:EichlerLab,项目名称:blasr,代码行数:30,代码来源:StoreTuplePosList.cpp

示例4: main

int main(int argc, char* argv[]) {
	if (argc < 3) {
		cout << "usage: bwtLocateList bwtName querySeqFile" << endl;
		exit(1);
	}
	string bwtFileName      = argv[1];
	string querySeqFileName = argv[2];
	bool doPrintResults = false;
	int maxCount = 0;
	int argi = 3;
	bool countOnly = false;
	while(argi < argc) {
		if (strcmp(argv[argi], "-print") == 0) {
			doPrintResults = true;
		}
		else if (strcmp(argv[argi], "-max") == 0) {
			maxCount = atoi(argv[++argi]);
		}
		else if (strcmp(argv[argi], "-count") == 0) {
			countOnly = true;
		}
		else {
			cout << "bad option: " << argv[argi] << endl;
		}
		++argi;
	}

 	Bwt<PackedDNASequence, FASTASequence> bwt;
	bwt.Read(bwtFileName);

	FASTAReader queryReader;
	queryReader.Init(querySeqFileName);
	FASTASequence seq;
	int seqIndex = 0;
	vector<DNALength> positions;
	while(queryReader.GetNext(seq)) {
		positions.clear();
		if (countOnly == false) {
			bwt.Locate(seq, positions, maxCount);
		}
		else {
			DNALength sp,ep;
			bwt.Count(seq, sp, ep);
		}
		//		cout << "matched " << positions.size() << " positions." << endl;
		if (doPrintResults) {
			int i;
			for (i = 0; i < positions.size(); i++ ){
				cout << positions[i] << " ";
			}
			cout << endl;
		}
		++seqIndex;
	}
		//	float wordCountsPerLookup = (bwt.bwtSequence.nCountInWord *1.0) / bwt.bwtSequence.nCountNuc;
		//	cout << "word counts per lookup: " << wordCountsPerLookup << endl;
	return 0;
}
开发者ID:BioinformaticsArchive,项目名称:blasr,代码行数:58,代码来源:BWTLocateList.cpp

示例5: main

int main(int argc, char *argv[]) {
	string sequencesInName, sequencesOutName;
	if (argc <3){ 
		cout << "usage: scramble in out" << endl;
		exit(1);
	}
	sequencesInName = argv[1];
	sequencesOutName= argv[2];
	vector<FASTASequence*> sequences;
	vector<int> sequenceIndices;

	FASTAReader reader;
	reader.Init(sequencesInName);
	ofstream out;
	CrucialOpen(sequencesOutName, out, std::ios::out);
	

	FASTASequence read;
	FASTASequence*readPtr;
	while(reader.GetNext(read)) {
		readPtr = new FASTASequence;
		*readPtr = read;
		sequences.push_back(readPtr);
	}

	int i;
	for (i = 0; i < sequences.size(); i++) {
		sequenceIndices.push_back(i);
	}

	for (i = 0; i < 10*sequences.size(); i++ ){
		//
		// shuffle indices.
		//
		int idx1;
		int idx2;
		idx1 = RandomInt(sequences.size());
		idx2 = RandomInt(sequences.size());
		int tmp;
		tmp  = sequenceIndices[idx1];
		sequenceIndices[idx1] = sequenceIndices[idx2];
		sequenceIndices[idx2] = tmp;
	}
	
	for (i = 0; i < sequenceIndices.size(); i++ ){
		sequences[sequenceIndices[i]]->PrintSeq(out);
	}
	return 0;
}
开发者ID:EichlerLab,项目名称:blasr,代码行数:49,代码来源:ScrambleSequenceOrder.cpp

示例6: main

int main(int argc, char* argv[]) {

	CommandLineParser clp;
	string fastaFileName, indexFileName;
	vector<string> fastaFileNames;
	vector<string> opts;
	clp.SetProgramName("bsdb");
	clp.SetProgramSummary("Build an index database on a file of sequences.\n"
												" The index is used to map to reads given alignment positions.\n");
	clp.RegisterStringOption("fasta", &fastaFileName, "A file with sequences to build an index.");
	clp.RegisterStringOption("index", &indexFileName, "The index file.");
	clp.RegisterPreviousFlagsAsHidden();

	clp.ParseCommandLine(argc, argv, opts);

	ifstream fastaIn;
	ofstream indexOut;

	if (FileOfFileNames::IsFOFN(fastaFileName)) {
		FileOfFileNames::FOFNToList(fastaFileName, fastaFileNames);
	}
	else {
		fastaFileNames.push_back(fastaFileName);
	}

	CrucialOpen(indexFileName, indexOut, std::ios::out | std::ios::binary);
	SequenceIndexDatabase<FASTASequence> seqDB;
		
	int fileNameIndex;
	for (fileNameIndex = 0; fileNameIndex < fastaFileNames.size(); fileNameIndex++){ 
		FASTAReader reader;
		FASTASequence seq;
		reader.Init(fastaFileNames[fileNameIndex]);
		int i = 0;
		while (reader.GetNext(seq)) {
			seqDB.AddSequence(seq);
			i++;
		}
	}
	seqDB.Finalize();
	seqDB.WriteDatabase(indexOut);
	return 0;
}
开发者ID:EichlerLab,项目名称:blasr,代码行数:43,代码来源:BuildSequenceDB.cpp

示例7: main

int main(int argc, char* argv[]) {
		if (argc < 4) {
			cout << "usage: splitContigs in.fa contiglength out" << endl;
			exit(1);
		}
		string inFileName, outFileName;
		inFileName = argv[1];
		int contigLength = atoi(argv[2]);		
		outFileName = argv[3];

		ofstream seqOut;
		CrucialOpen(outFileName, seqOut, std::ios::out);
		FASTAReader reader;
		reader.Init(inFileName);
		FASTASequence seq;
		DNALength curOffset;
		
		while(reader.GetNext(seq)) {
			FASTASequence subseq;
			int i;
			curOffset = 0;
			for (i =0 ; i < seq.length / contigLength + 1; i++ ) {
				subseq.seq = &seq.seq[curOffset];
				subseq.title = seq.title;
				if (curOffset + contigLength > seq.length) {
					subseq.length = seq.length - curOffset;
				}
				else {
					subseq.length = contigLength;
				}
				subseq.PrintSeq(seqOut);
				curOffset += contigLength;
			}
		}
		return 0;
}
开发者ID:asifemon,项目名称:blasr,代码行数:36,代码来源:SplitContigs.cpp

示例8: main

int main(int argc, char* argv[]) {
	string ad1File, ad2File, readsFile, readsOutFile;

	FASTAReader ad1Reader;
	FASTAReader ad2Reader;
	FASTAReader reader;


	CommandLineParser cl;
	float minPctSimilarity = 0.60;
	int indel = 3;
	int minLength = 10;
	cl.RegisterStringOption("ad1", &ad1File, "FASTA file with the first adapter");
	cl.RegisterStringOption("ad2", &ad2File, "FASTA file with the second adapter");
	cl.RegisterStringOption("reads", &readsFile, "FASTA file with SMRTBell reads");
	cl.RegisterStringOption("readsout", &readsOutFile, "output file for split reads");
	cl.RegisterPreviousFlagsAsHidden();
	cl.RegisterFloatOption("pctSim", &minPctSimilarity, "Minimum percent similarity to trigger a match to an adapter.", 
												 CommandLineParser::PositiveFloat);
	cl.RegisterIntOption("indel", &indel, "Penalty for indel (positive)", CommandLineParser::NonNegativeInteger);
	cl.RegisterIntOption("minLength", &minLength, "Minimum length pass to retain.", CommandLineParser::PositiveInteger);
	vector<string> opts;
	cl.ParseCommandLine(argc, argv, opts);

	/*
	 * Open all the required files, quitting if they are unavailable.
	 */

	ad1Reader.Init(ad1File);
	ad2Reader.Init(ad2File);
	reader.Init(readsFile);

	ofstream splitOut;
	CrucialOpen(readsOutFile, splitOut);

	FASTASequence ad1, ad2;
	ad1Reader.GetNext(ad1);
	ad2Reader.GetNext(ad2);

	FASTASequence read;
	vector<int> scoreMat;
	vector<Arrow> pathMat;
	int readIndex = 0;
	while(reader.GetNext(read)) {
		read.ToUpper();
		//
		// Do a fitting sequence alignment to match one of the two 
		// adapters into the read.
		//
		vector<int> passStarts, passLengths, la;
		read.PrintSeq(cout);
		SplitRead(read, 0, read.length, ad1, ad2,
							indel, 
							passStarts, passLengths,la, 0,
							scoreMat, pathMat, minPctSimilarity, minLength);
		int i;
		for (i = 0; i < passStarts.size(); i++) {
			cout << "read: " << readIndex << " pass: " << i << " " << passStarts[i] << " " << passLengths[i] << " " << la[i] << endl;
		}
		++readIndex;
	}
}
开发者ID:csuliweilong,项目名称:blasr,代码行数:62,代码来源:RemoveAdapters.cpp

示例9: main

int main(int argc, char* argv[]) {
	CommandLineParser clp;

	string refGenomeName;
	string mutGenomeName;
  string gffFileName;
	float insRate = 0;
	float delRate = 0;
	float mutRate = 0;
  bool  lower = false;
  gffFileName = "";
	clp.RegisterStringOption("refGenome", &refGenomeName, "Reference genome.", true);
	clp.RegisterStringOption("mutGenome", &mutGenomeName, "Mutated genome.", true);
	clp.RegisterPreviousFlagsAsHidden();
  clp.RegisterStringOption("gff", &gffFileName, "GFF file describing the modifications made to the genome.");
	clp.RegisterFloatOption("i", &insRate, "Insertion rate: (0-1].", 
													CommandLineParser::NonNegativeFloat, false);
	clp.RegisterFloatOption("d", &delRate, "Deletion rate: (0-1]", 
													CommandLineParser::NonNegativeFloat, false);
	clp.RegisterFloatOption("m", &mutRate, "Mutation rate, even across all nucleotides: (0-1]", 
													CommandLineParser::NonNegativeFloat, false);
  clp.RegisterFlagOption("lower", &lower, "Make mutations in lower case", false);
	vector<string> leftovers;
	clp.ParseCommandLine(argc, argv, leftovers);
  
	FASTAReader reader;
	FASTASequence refGenome;

	reader.Init(refGenomeName);
	ofstream mutGenomeOut;
	CrucialOpen(mutGenomeName, mutGenomeOut, std::ios::out);
  ofstream gffOut;
  if (gffFileName != "") {
    CrucialOpen(gffFileName, gffOut, std::ios::out);
  }

	vector<int> insIndices, delIndices, subIndices;
	int readIndex = 0;
	InitializeRandomGeneratorWithTime();
	while (reader.GetNext(refGenome)) {
		insIndices.resize(refGenome.length);
		delIndices.resize(refGenome.length);
    subIndices.resize(refGenome.length);
		std::fill(insIndices.begin(), insIndices.end(), false);
		std::fill(delIndices.begin(), delIndices.end(), false);
    std::fill(subIndices.begin(), subIndices.end(), 0);

		enum ChangeType { Ins, Del, Mut, None};
		float changeProb[4];
		changeProb[Ins] = insRate;
		changeProb[Del] = changeProb[Ins] + delRate;
		changeProb[Mut] = changeProb[Del] + mutRate;
		changeProb[None] = 1;

		if (changeProb[Mut] > 1) {
			cout << "ERROR! The sum of the error probabilities must be less than 1" << endl;
			exit(1);
		}
		DNALength pos;
		float randomNumber;
		int numIns = 0;
		int numDel = 0;
		int numMut = 0;
		for (pos =0 ; pos < refGenome.length; pos++) { 
			randomNumber = Random();
			if (randomNumber < changeProb[Ins]) {
				insIndices[pos] = true;
				numIns++;
			}
			else if (randomNumber < changeProb[Del]) {
				delIndices[pos] = true;
				numDel++;
			}
			else if (randomNumber < changeProb[Mut]){ 
				Nucleotide newNuc = TwoBitToAscii[RandomInt(4)];
				int maxIts = 100000;
				int it = 0;
				while (newNuc == refGenome.seq[pos]) {
					newNuc = TwoBitToAscii[RandomInt(4)];
					if (it == maxIts) {
						cout << "ERROR, something is wrong with the random number generation, it took too many tries to generate a new nucleotide" << endl;
						exit(1);
					}
				}
        subIndices[pos] = refGenome[pos];
				refGenome.seq[pos] = ToLower(newNuc,lower);
				++numMut;
			}
		}
		//		cout << readIndex << " m " << numMut << " i " << numIns << " d " << numDel << endl;
		if (readIndex % 100000 == 0 && readIndex > 0) {
			cout << readIndex << endl;
		}
		// 
		// Now add the insertions and deletions.
		//
		FASTASequence newSequence;
		DNALength   newPos;
		if (numIns - numDel + refGenome.length < 0) {
			cout << "ERROR, the genome has been deleted to nothing." << endl;
//.........这里部分代码省略.........
开发者ID:EichlerLab,项目名称:blasr,代码行数:101,代码来源:Evolve.cpp

示例10: main

int main(int argc, char* argv[]) {
  FASTAReader reader;
  if (argc < 5) {
	cout << "usage: wordCounter seqFile tupleSize tupleOutputFile posOutputFile" << endl;
	exit(1);
  }

  string fileName = argv[1];
  int    tupleSize = atoi(argv[2]);
  string tupleListName = argv[3];
	string posOutName    = argv[4];
  
	TupleMetrics tm;
  tm.Initialize(tupleSize);
  reader.Init(fileName);

  FASTASequence seq;
  reader.GetNext(seq);

  vector<CountedDNATuple> tupleList;
  CountedDNATuple tuple;
  DNALength i;
  for (i = 0; i < seq.length - tm.tupleSize + 1; i++ ) {
		if (tuple.FromStringRL((Nucleotide*) (seq.seq + i), tm)) {
			tuple.count = i;
			tupleList.push_back(tuple);
		}
  }

  std::sort(tupleList.begin(), tupleList.end());

  int t;
  int t2;
  int numTuples = tupleList.size();
  t = t2 = 0;
  int numUnique = 0;
  while (t < numTuples) {
	t2 = t;
	t2++;
	while (t2 < numTuples and tupleList[t] == tupleList[t2]) {
	  t2++;
	}
	++numUnique;
	t = t2;
  }

  ofstream countedTupleListOut;
  countedTupleListOut.open(tupleListName.c_str(), ios_base::binary);

	ofstream posOut;
	posOut.open(posOutName.c_str(), ios_base::binary);

  countedTupleListOut.write((const char*) &numUnique, sizeof(int));
  countedTupleListOut.write((const char*) &tm.tupleSize, sizeof(int));

  posOut.write((const char*) &numUnique, sizeof(int));

	//
	// Write out the tuple+counts to a file.
	//
  t = t2 = 0;
  CountedDNATuple countedTuple;
	int numMultOne = 0;
  while (t < numTuples) {
		t2 = t;
		t2++;
		while (t2 < numTuples and tupleList[t] == tupleList[t2]) {
			t2++;
		}
		countedTuple.tuple = tupleList[t].tuple;
		countedTuple.count = t2 - t;
		if (countedTuple.count == 1) ++numMultOne;
		countedTupleListOut.write((const char*) &countedTuple,sizeof(CountedDNATuple));
		
		posOut.write((char*)&countedTuple.count, sizeof(int));
		
		int tc;
		for (tc = t; tc < t2; tc++) {
			posOut.write((char*) &tupleList[tc].count, sizeof(int));
		}
		t = t2;
  }

	//
	// Write out the positions of the tuples to a file.
	//
	
	posOut.close();
	countedTupleListOut.close();

	//  cout << "found " << numUnique << " distinct " << DNATuple::TupleSize << "-mers." << endl;
	cout << numMultOne << endl;
  return 0;
}
开发者ID:csuliweilong,项目名称:blasr,代码行数:94,代码来源:WordCounter.cpp

示例11: main


//.........这里部分代码省略.........
                diffCoverSize == 2281) ) {
          cout << "The difference cover size must be one of 7,32,64,111, or 2281." << endl;
          cout << "Larger numbers use less space but are more slow." << endl;
          exit(1);
        }
			}
			else if (strcmp(argv[argi], "-4bit") == 0) {
				read4BitCompressed = 1;
			}
			else {
				PrintUsage();
				cout << "ERROR, bad option: " << argv[argi] << endl;
				exit(1);
			}
		}
		++argi;
	}
  
  if (inFiles.size() == 0) {
    //
    // Special use case: the input file is a fasta file.  Write to that file + .sa
    //
    inFiles.push_back(saFile);
    saFile = saFile + ".sa";
  }
  
	VectorIndex inFileIndex;
	FASTASequence seq;
	CompressedSequence<FASTASequence> compSeq;

	if (read4BitCompressed == 0) {
		for (inFileIndex = 0; inFileIndex < inFiles.size(); ++inFileIndex) {
			FASTAReader reader;
			reader.Init(inFiles[inFileIndex]);
			reader.SetSpacePadding(111);
			if (saBuildType == kark) {
				//
				// The Karkkainen sa building method requires a little extra
				// space at the end of the dna sequence so that counting may
				// be done mod 3 without adding extra logic for boundaries.
				//
			}
  
			if (inFileIndex == 0) {
				reader.ReadAllSequencesIntoOne(seq);
				reader.Close();
			}
			else {
				while(reader.ConcatenateNext(seq)) {
					cout << "added " << seq.title << endl;
				}
			}
		}
		seq.ToThreeBit();
		//seq.ToUpper();
	}
	else {
		assert(inFiles.size() == 1);
		cout << "reading compressed sequence." << endl;
		compSeq.Read(inFiles[0]);
		seq.seq = compSeq.seq;
		seq.length = compSeq.length;
		compSeq.RemoveCompressionCounts();
		cout << "done." << endl;
	}
开发者ID:bnbowman,项目名称:blasr,代码行数:66,代码来源:SAWriter.cpp

示例12: main

int main(int argc, char* argv[]) {
    string inFileName, readsFileName;
    DNALength readLength;
    float coverage = 0;
    bool noRandInit = false;
    int numReads = -1;
    CommandLineParser clp;
    int qualityValue = 20;
    bool printFastq = false;
    int stratify = 0;
    string titleType = "pacbio";
    string fastqType = "illumina"; // or "sanger"
    clp.RegisterStringOption("inFile", &inFileName, "Reference sequence", 0);
    clp.RegisterPreviousFlagsAsHidden();
    clp.RegisterIntOption("readLength", (int*) &readLength, "The length of reads to simulate.  The length is fixed.",
                          CommandLineParser::PositiveInteger, "Length of every read.", 0);
    clp.RegisterFloatOption("coverage", &coverage, "Total coverage (from which the number of reads is calculated",
                            CommandLineParser::PositiveFloat, 0);
    clp.RegisterFlagOption("nonRandInit", &noRandInit, "Skip initializing the random number generator with time.");
    clp.RegisterIntOption("nReads", &numReads, "Total number of reads (from which coverage is calculated)", CommandLineParser::PositiveInteger, 0);
    clp.RegisterStringOption("readsFile", &readsFileName, "Reads output file", 0);
    clp.RegisterFlagOption("fastq", &printFastq, "Fake fastq output with constant quality value (20)");
    clp.RegisterIntOption("quality", &qualityValue, "Value to use for fastq quality", CommandLineParser::PositiveInteger);
    clp.RegisterIntOption("stratify", &stratify, "Sample a read every 'stratify' bases, rather than randomly.", CommandLineParser::PositiveInteger);
    clp.RegisterStringOption("titleType", &titleType, "Set the name of the title: 'pacbio'|'illumina'");
    clp.RegisterStringOption("fastqType", &fastqType, "Set the type of fastq: 'illumina'|'sanger'");
    vector<string> leftovers;
    clp.ParseCommandLine(argc, argv, leftovers);

    if (!noRandInit) {
        InitializeRandomGeneratorWithTime();
    }

    FASTAReader inReader;
    inReader.Init(inFileName);
    vector<FASTASequence> reference;

    inReader.ReadAllSequences(reference);
    ofstream readsFile;
    if (readsFileName == "") {
        cout << "ERROR.  You must specify a reads file." << endl;
        exit(0);
    }
    CrucialOpen(readsFileName, readsFile, std::ios::out);

    ofstream sangerFastqFile;
    if (fastqType == "sanger") {
        string sangerFastqFileName = readsFileName + ".fastq";
        CrucialOpen(sangerFastqFileName, sangerFastqFile, std::ios::out);
    }

    DNALength refLength = 0;
    int i;
    for (i = 0; i < reference.size(); i++) {
        refLength += reference[i].length;
    }
    if (numReads == -1 and coverage == 0 and stratify == 0) {
        cout << "Error, you must specify either coverage, nReads, or stratify." << endl;
        exit(1);
    }
    else if (numReads == -1) {
        numReads = (refLength / readLength) * coverage;
    }

    if (stratify) {
        if (!readLength) {
            cout << "ERROR. If you are using stratification, a read length must be specified." << endl;
            exit(1);
        }
    }

    DNASequence sampleSeq;
    sampleSeq.length = readLength;
    int maxRetry = 10000000;
    int retryNumber = 0;
    DNALength seqIndex, seqPos;
    if (stratify) {
        seqIndex = 0;
        seqPos   = 0;
    }
    DNALength origReadLength = readLength;
    for (i = 0; stratify or i < numReads; i++) {
        if (stratify == 0) {
            FindRandomPos(reference, seqIndex, seqPos, readLength );
        }
        else {
            //
            // find the next start pos, or bail if done
            //
            if (seqPos >= reference[seqIndex].length) {
                if (seqIndex == reference.size() - 1) {
                    break;
                }
                else {
                    seqIndex = seqIndex + 1;
                    seqPos   = 0;
                    continue;
                }
            }
            readLength = min(reference[seqIndex].length - seqPos, origReadLength);
//.........这里部分代码省略.........
开发者ID:ylipacbio,项目名称:ForReferenceOnly,代码行数:101,代码来源:SimpleShredder.cpp

示例13: main

int main(int argc, char* argv[]) {
	
	if (argc < 4) {
		PrintUsage();
		exit(0);
	}

	string rgFileName, vertexSeqFileName, scaffoldDirName;
	
	rgFileName         = argv[1];
	vertexSeqFileName  = argv[2];
	scaffoldDirName    = argv[3];

	string repeatFileName = "";
	bool printRepeatsSeparately = false;
	int argi = 4;
	bool printSeparate=false;
	while (argi < argc) {
		if (strcmp(argv[argi], "-separate") == 0) {
			printSeparate=true;
		}
		else if (strcmp(argv[argi], "-repeats") == 0) {
			printRepeatsSeparately = true;
			repeatFileName = argv[++argi];
		}
		else {
			cout << "bad option: " << argv[argi] << endl;
			PrintUsage();
			exit(1);
		}
		++argi;
	}
	
	FASTAReader vertexSequenceReader;
	vertexSequenceReader.Init(vertexSeqFileName);

	//
	// Input necessary data
	//
	vector<FASTASequence> vertexSequences;
	vertexSequenceReader.ReadAllSequences(vertexSequences);
	RepeatGraph<string> rg;
	rg.ReadGraph(rgFileName);

	vector<FASTASequence> vertexRCSequences;
	VectorIndex vertexIndex;	
	vertexRCSequences.resize(vertexSequences.size());
	for (vertexIndex = 0; vertexIndex < vertexSequences.size(); vertexIndex++ ){
		vertexSequences[vertexIndex].MakeRC(vertexRCSequences[vertexIndex]);
	}
	
	VectorIndex outEdgeIndex;
	int scaffoldIndex = 0;
	ofstream scaffoldOut;

	if (printSeparate==false) {
		// scaffold dir name is really a file name here.
		CrucialOpen(scaffoldDirName, scaffoldOut, std::ios::out);
	}
	for (vertexIndex = 0; vertexIndex < rg.vertices.size(); vertexIndex++ ){
		rg.vertices[vertexIndex].traversed = false;
	}

	//
	// Set up flow for calling multiplicity.
	//
	/*
		Test all this out later.
	AssignMinimumFlowToEdges(rg, 2);
	AssignVertexFlowBalance(rg);
	BalanceKirchhoffFlow(rg);

	UInt edgeIndex;
	for (edgeIndex = 0; edgeIndex < rg.edges.size(); edgeIndex++) {
		if (rg.edges[edgeIndex].flow > 1) {
			cout << edgeIndex << " " << rg.edges[edgeIndex].flow << endl;
		}
	}
	*/

	int numPrintedVertices = 0;
	for (vertexIndex = 0; vertexIndex < rg.vertices.size(); vertexIndex++ ){
		//
		// Look to see if this vertex is a branching vertex.
		//
		if ((rg.vertices[vertexIndex].inEdges.size() != 1 or
				 rg.vertices[vertexIndex].outEdges.size() != 1) and
				rg.vertices[vertexIndex].traversed == false) {

			//
			// This is a branching vertex.  Print all paths from this vertex, but not the vertex
			// itself if it appears repetitive. 
			//
			VectorIndex outEdgeIndex;
			bool printedThisVertex = false;
			for (outEdgeIndex = 0; outEdgeIndex < rg.vertices[vertexIndex].outEdges.size(); outEdgeIndex++ ){
				//
				// This is a branching vertex.
				// 

//.........这里部分代码省略.........
开发者ID:jcombs1,项目名称:blasr,代码行数:101,代码来源:PrintScaffolds.cpp

示例14: main

int main(int argc, char* argv[]) {
	string rgInName, rgOutName;
	int minPathLength;
	string vertexSequenceFileName;
	if (argc < 5) {
		cout << "usage: trimShortEnds in.rg  vertexSequences minPathLength out.rg" << endl;
		exit(1);
	}

	rgInName      = argv[1];
	vertexSequenceFileName = argv[2];
	minPathLength = atoi(argv[3]);
	rgOutName     = argv[4];

	ofstream rgOut;
	CrucialOpen(rgOutName, rgOut, std::ios::out);
	FASTAReader vertexSequenceReader;
	vertexSequenceReader.Init(vertexSequenceFileName);

	RepeatGraph<string> rg;
	vector<FASTASequence> vertexSequences;
	rg.ReadGraph(rgInName);
	vertexSequenceReader.ReadAllSequences(vertexSequences);

	VectorIndex vertexIndex;
	VectorIndex outEdgeIndex;
	VectorIndex edgeIndex;
	
	if (rg.edges.size() == 0) {
		cout << "LIKELY INVALID GRAPH. There are no edges." << endl;
		return 0;
	}
	//
	// At first, any edge that exists is connected to a vertex. This
	// will change as low coverage edges are deleted and replaced by
	// high coverage edges from the end of the array.
	//
	for (edgeIndex = 0; edgeIndex < rg.edges.size(); edgeIndex++) {
		rg.edges[edgeIndex].connected = true;
	}
	set<std::pair<VectorIndex, VectorIndex> > srcDestToRemove;
	
	for (vertexIndex = 0; vertexIndex < rg.vertices.size(); vertexIndex++) {
		if (rg.vertices[vertexIndex].inEdges.size() == 0 and
				rg.vertices[vertexIndex].outEdges.size() == 1) {
			//
			// This is a source.  Traverse this until a branching vertex or the end is found.
			//
			vector<VectorIndex> path;
			path.push_back(vertexIndex);
			int pathLength = 0;
			VectorIndex pathVertex;
			VectorIndex pathEdge;
			pathEdge = rg.vertices[vertexIndex].outEdges[0];
			pathVertex = rg.edges[pathEdge].dest;
			while (rg.vertices[pathVertex].inEdges.size() == 1 and
						 rg.vertices[pathVertex].outEdges.size() == 1) {
				path.push_back(pathVertex);
				pathEdge   =  rg.vertices[pathVertex].outEdges[0];
				pathVertex =  rg.edges[pathEdge].dest;
				pathLength += vertexSequences[pathVertex/2].length;
			}
			pathLength += vertexSequences[pathVertex/2].length;
			path.push_back(pathVertex);
			if (pathLength < minPathLength and path.size() < 3) {
				//
				// Remove this path, it is too short.
				// Also remove the complement.
				//
				cout << "trimming path of " << path.size() << " is of sequence length " << pathLength << endl;

				VectorIndex pathIndex;
				for (pathIndex = 0; pathIndex < path.size() - 1; pathIndex++) {
					srcDestToRemove.insert(pair<VectorIndex, VectorIndex>(path[pathIndex], path[pathIndex+1]));
					srcDestToRemove.insert(pair<VectorIndex, VectorIndex>(2*(path[pathIndex+1]/2) + !(path[pathIndex+1]%2),
																																2*(path[pathIndex]/2) + !(path[pathIndex]%2)));
				}
			}
		}
	}

	MarkEdgePairsForRemoval(srcDestToRemove, rg.vertices, rg.edges);
	RemoveUnconnectedEdges(rg.vertices, rg.edges);

	rg.WriteGraph(rgOut);
	return 0;
}
开发者ID:BioinformaticsArchive,项目名称:blasr,代码行数:87,代码来源:TrimShortEnds.cpp

示例15: main

int main(int argc, char* argv[]) {
	string genomeFileName;
	string suffixArrayFileName;
	if (argc < 4) {
		cout << "Usage: printWordCount genome suffixArray k [k2 k3 k4...]" << endl;
		exit(1);
	}
	genomeFileName = argv[1];
	suffixArrayFileName = argv[2];
	int argi = 3;
	vector<DNALength> k;
	while (argi < argc) {
		k.push_back(atoi(argv[argi]));
		argi++;
	}

	// Get the ref sequence.
	FASTAReader reader;
	reader.Init(genomeFileName);
	FASTASequence seq;
  //	reader.GetNext(seq);
  reader.ReadAllSequencesIntoOne(seq);
	seq.ToUpper();
	// Get the suffix array.
	DNASuffixArray sarray;
	sarray.Read(suffixArrayFileName);
	
	int ki;
  char *word;
  cout << "wordlen word nword" << endl;
	for (ki = 0; ki < k.size(); ki++) {
    word = new char[k[ki]+1];
    word[k[ki]] = '\0';
		DNALength i;
		DNALength numUnique = 0;
		for (i = 0; i < seq.length - k[ki] - 1; ) {
			DNALength j = i + 1;
      bool seqAtN = false;
      int si;
      for(si = 0; si < k[ki]; si++) {
        if (seq.seq[sarray.index[i] + si] == 'N') {
          seqAtN = true;
          break;
        }
      }
      if (seqAtN) {
        i++;
        continue;
      }
			while (j < seq.length - k[ki] and 
						 seq.length - sarray.index[i] >= k[ki] and
						 seq.length - sarray.index[j] >= k[ki] and 
						 strncmp((const char*) &seq.seq[sarray.index[i]], (const char*) &seq.seq[sarray.index[j]], k[ki]) == 0) {
				j++;
			}
      if (seq.length - sarray.index[i] >= k[ki]) {
        for(si = 0; si < k[ki]; si++) {
          word[si] = seq.seq[sarray.index[i]+si];
        }
        cout << k[ki] << " " << word << " " << j - i + 1 << endl;
        if (j == i + 1) { 
          ++numUnique;
        }
      }
			i = j;
		}
	}
}
开发者ID:EichlerLab,项目名称:blasr,代码行数:68,代码来源:PrintWordCount.cpp


注:本文中的FASTAReader::Init方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。