本文整理汇总了C++中FASTAReader::ReadAllSequences方法的典型用法代码示例。如果您正苦于以下问题:C++ FASTAReader::ReadAllSequences方法的具体用法?C++ FASTAReader::ReadAllSequences怎么用?C++ FASTAReader::ReadAllSequences使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类FASTAReader
的用法示例。
在下文中一共展示了FASTAReader::ReadAllSequences方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: main
int main(int argc, char* argv[]) {
if (argc < 3) {
cout << "usage: testRandomSequence genome.fa ntries " << endl;
exit(0);
}
string inFile = argv[1];
int nSamples = atoi(argv[2]);
if (nSamples == 0) {
return 0;
}
FASTAReader reader;
reader.Initialize(inFile);
vector<FASTASequence> genome;
reader.ReadAllSequences(genome);
int i;
cout << "title pos" << endl;
for (i = 0; i < nSamples; i++) {
DNALength chrIndex, chrPos;
FindRandomPos(genome, chrIndex, chrPos);
cout << genome[chrIndex].title << " " << chrPos << endl;
}
return 0;
}
示例2: main
int main(int argc, char* argv[]) {
CommandLineParser clp;
string readsFileName;
string alignmentsFileName;
string outputFileName;
float minMergeIdentity = 0.70;
clp.RegisterStringOption("reads", &readsFileName, "Reads used for alignments.");
clp.RegisterStringOption("alignments", &alignmentsFileName, "SAM formatted alignments.");
clp.RegisterIntOption("k", &vertexSize, "Minimum match length", CommandLineParser::PositiveInteger);
clp.RegisterStringOption("outfile", &outputFileName, "Alignment output.");
clp.RegisterPreviousFlagsAsHidden();
clp.RegisterFlagOption("v", &verbose, "");
clp.RegisterFloatOption("minMergeIdentity",
&minMergeIdentity,
"Minimum identity to merge paths.", CommandLineParser::PositiveFloat);
clp.ParseCommandLine(argc, argv);
if (minMergeIdentity < 0 or minMergeIdentity > 1) {
cout << "ERROR. minMergeIdentity must be between 0 and 1" << endl;
exit(1);
}
vector<FASTASequence> reads;
FASTAReader fastaReader;
fastaReader.Initialize(readsFileName);
fastaReader.ReadAllSequences(reads);
//
// It is necessary to go from read title to index in the list of reads.
//
map<string, int> readNameToIndex;
BuildReadNameToIndexMap(reads, readNameToIndex);
ReadWordMatchVector readWordMatches;
InitializeFromReads(reads, readWordMatches);
//
// Get ready to read in the alignments.
//
SAMReader<SAMFullReferenceSequence, SAMReadGroup, SAMPosAlignment> samReader;
samReader.Initialize(alignmentsFileName);
AlignmentSet<SAMFullReferenceSequence, SAMReadGroup, SAMPosAlignment> alignmentSet;
samReader.ReadHeader(alignmentSet);
SAMAlignment samAlignment;
AlignmentCandidate<> alignment;
int numAlignedBases = 0;
int alignmentIndex = 0;
while ( samReader.GetNextAlignment( samAlignment ) ) {
vector<AlignmentCandidate<> > alignments;
SAMAlignmentsToCandidates(samAlignment,
reads,
readNameToIndex,
alignments, false, true);
int i;
++alignmentIndex;
int a;
for (a = 0; a < alignments.size();a++) {
if (alignments[a].qName != alignments[a].tName) {
MarkMatches(alignments[a], readNameToIndex, vertexSize, readWordMatches);
}
}
if (alignmentIndex % 1000 == 0) {
cout << alignmentIndex << endl;
}
}
int numMatches = 0;
int parentIndex = 1;
int r;
for (r = 0; r < readWordMatches.size(); r++) {
readWordMatches[r].CreateParents();
numMatches += readWordMatches[r].pos.size();
}
vector<int> parentIndices;
parentIndices.resize(2*numMatches + 1);
fill(parentIndices.begin(), parentIndices.end(), 0);
//
// Start indexing off at 1 so that 0 does not need to be treated in
// a special case.
//
int curParentIndex = 1;
cout << "There are " << numMatches << " matches." << endl;
samReader.Close();
samReader.Initialize(alignmentsFileName);
AlignmentSet<SAMFullReferenceSequence, SAMReadGroup, SAMPosAlignment> alignmentSet2;
samReader.ReadHeader(alignmentSet2);
numAlignedBases = 0;
alignmentIndex = 0;
while ( samReader.GetNextAlignment( samAlignment ) ) {
vector<AlignmentCandidate<> > alignments;
SAMAlignmentsToCandidates(samAlignment,
//.........这里部分代码省略.........
示例3: main
int main(int argc, char* argv[]) {
string refFileName, notNormalFileName, normalFileName;
if (argc < 4) {
cout << "usage: normalizeGCContent ref source dest " << endl
<< " flips the C/Gs in source randomly until they are the same gc content as ref." << endl;
exit(1);
}
refFileName = argv[1];
notNormalFileName = argv[2];
normalFileName = argv[3];
FASTAReader reader;
FASTAReader queryReader;
FASTASequence ref;
vector<FASTASequence> querySequences;
int queryTotalLength;
reader.Initialize(refFileName);
reader.ReadAllSequencesIntoOne(ref);
queryReader.Initialize(notNormalFileName);
int refCounts[5], queryCounts[5];
int s;
refCounts[0] = refCounts[1] =refCounts[2] = refCounts[3] = refCounts[4] = 0;
queryCounts[0] = queryCounts[1] =queryCounts[2] = queryCounts[3] = queryCounts[4] = 0;
queryReader.ReadAllSequences(querySequences);
ofstream normOut;
CrucialOpen(normalFileName, normOut);
CountNucs(ref, refCounts);
float refGC = (1.0*refCounts[TwoBit['c']] + refCounts[TwoBit['g']]) / (refCounts[TwoBit['a']] + refCounts[TwoBit['c']] + refCounts[TwoBit['g']] + refCounts[TwoBit['t']]);
int q;
for (q = 0; q < querySequences.size(); q++) {
CountNucs(querySequences[q], queryCounts);
}
float queryGC = (1.0*queryCounts[TwoBit['c']] + queryCounts[TwoBit['g']]) / (queryCounts[TwoBit['a']] + queryCounts[TwoBit['c']] + queryCounts[TwoBit['g']] + queryCounts[TwoBit['t']]);
float gcToat = 0.0;
float atTogc = 0.0;
if (refGC > queryGC) {
atTogc = (refGC - queryGC);
}
else {
gcToat = (queryGC - refGC);
}
DNALength queryGenomeLength = queryCounts[0] + queryCounts[1] + queryCounts[2] + queryCounts[3] + queryCounts[4];
DNALength unmaskedQueryLength = queryCounts[0] + queryCounts[1] + queryCounts[2] + queryCounts[3];
DNALength ngc2at = unmaskedQueryLength * gcToat;
DNALength nat2gc = unmaskedQueryLength * atTogc;
cout << refGC << " " << queryGC << " " << gcToat << " " << atTogc << " " << ngc2at << " " << nat2gc << endl;
vector<FASTASequence> normalized;
normalized.resize(querySequences.size());
vector<DNALength> cumLengths;
cumLengths.resize(normalized.size()+1);
cumLengths[0] = 0;
for (q = 0; q < querySequences.size(); q++) {
normalized[q] = querySequences[q];
cumLengths[q+1] = cumLengths[q] + querySequences[q].length;
}
DNALength i;
for (i = 0; i < ngc2at; i+=2) {
DNALength pos, chr;
FindRandomNuc(normalized, queryGenomeLength, cumLengths, 'G', chr, pos);
normalized[chr].seq[pos] = 'A';
FindRandomNuc(normalized, queryGenomeLength, cumLengths, 'C', chr, pos);
normalized[chr].seq[pos] = 'T';
}
for (i = 0; i < nat2gc; i+=2) {
DNALength pos, chr;
FindRandomNuc(normalized, queryGenomeLength, cumLengths, 'A', chr, pos);
normalized[chr].seq[pos] = 'g';
FindRandomNuc(normalized, queryGenomeLength, cumLengths, 'T', chr, pos);
normalized[chr].seq[pos] = 'c';
}
for (q = 0; q < normalized.size(); q++ ){
normalized[q].PrintSeq(normOut);
}
}
示例4: main
int main(int argc, char* argv[]) {
string program = "samtoh5";
string versionString = VERSION;
AppendPerforceChangelist(PERFORCE_VERSION_STRING, versionString);
string samFileName, cmpFileName, refFileName;
bool parseSmrtTitle = false;
bool useShortRefName = false;
CommandLineParser clp;
string readType = "standard";
int verbosity = 0;
clp.SetProgramName(program);
clp.SetProgramSummary("Converts in.sam file to out.cmp.h5 file.");
clp.SetVersion(versionString);
clp.RegisterStringOption("in.sam", &samFileName,
"Input SAM file.", true);
clp.RegisterStringOption("reference.fasta", &refFileName,
"Reference used to generate reads.", true);
clp.RegisterStringOption("out.cmp.h5", &cmpFileName,
"Output cmp.h5 file.", true);
clp.RegisterPreviousFlagsAsHidden();
clp.RegisterFlagOption("smrtTitle", &parseSmrtTitle,
"Use this option when converting alignments "
"generated from reads produced by the "
"pls2fasta from bas.h5 files by parsing read "
"coordinates from the SMRT read title. The title "
"is in the format /name/hole/coordinates, where "
"coordinates are in the format \\d+_\\d+, and "
"represent the interval of the read that was "
"aligned.");
clp.RegisterStringOption("readType", &readType,
"Set the read type: 'standard', 'strobe', 'CCS', "
"or 'cDNA'");
clp.RegisterIntOption("verbosity", &verbosity,
"Set desired verbosity.",
CommandLineParser::PositiveInteger);
clp.RegisterFlagOption("useShortRefName", &useShortRefName,
"Use abbreviated reference names obtained "
"from file.sam instead of using full names "
"from reference.fasta.");
string description = ("Because SAM has optional tags that have different "
"meanings in different programs, careful usage is required in order to "
"have proper output. The \"xs\" tag in bwa-sw is used to show the "
"suboptimal score, but in PacBio SAM (blasr) it is defined as the start "
"in the query sequence of the alignment.\nWhen \"-smrtTitle\" is "
"specified, the xs tag is ignored, but when it is not specified, the "
"coordinates given by the xs and xe tags are used to define the interval "
"of a read that is aligned. The CIGAR string is relative to this interval.");
clp.SetExamples(description);
clp.ParseCommandLine(argc, argv);
if (readType != "standard" and readType != "strobe" and
readType != "cDNA" and readType != "CCS") {
cout << "ERROR. Read type '" << readType
<< "' must be one of either 'standard', 'strobe', 'cDNA' or 'CCS'."
<< endl;
exit(1);
}
cerr << "[INFO] " << GetTimestamp() << " [" << program << "] started." << endl;
SAMReader<SAMFullReferenceSequence, SAMReadGroup, SAMPosAlignment> samReader;
FASTAReader fastaReader;
HDFCmpFile<AlignmentCandidate<FASTASequence, FASTASequence> > cmpFile;
//
// Initialize input/output files.
//
samReader.Initialize(samFileName);
fastaReader.Initialize(refFileName);
cmpFile.Create(cmpFileName);
//
// Configure the file log.
//
string command;
CommandLineParser::CommandLineToString(argc, argv, command);
string log = "Convert sam to cmp.h5";
cmpFile.fileLogGroup.AddEntry(command, log, program, GetTimestamp(), versionString);
//
// Set the readType
//
cmpFile.SetReadType(readType);
//
// Read necessary input.
//
vector<FASTASequence> references;
fastaReader.ReadAllSequences(references);
//
// This should probably be handled by the alignmentSetAdapter, but
// time constraints...
//
AlignmentSet<SAMFullReferenceSequence, SAMReadGroup, SAMPosAlignment> alignmentSet;
samReader.ReadHeader(alignmentSet);
//.........这里部分代码省略.........
示例5: main
int main(int argc, char* argv[]) {
string gencodeGffFileName, genomeFileName, genesOutFileName;
string geneType = "protein_coding";
bool randomSplicing = false;
int numRandomSplicing = 1;
float pSkip = 0.5;
if (argc < 4) {
cout << "Usage: extractGenes gencodeGTFFile genomeFile genesOutFileName [-geneType type (protein_coding)] [-randomSplicing] [-numRandomSplicing n] [-pSkip prob (0-1, default:0.5)]" << endl;
exit(1);
}
gencodeGffFileName = argv[1];
genomeFileName = argv[2];
genesOutFileName = argv[3];
int argi = 4;
string coordinatesFileName;
while (argi < argc) {
if (strcmp(argv[argi], "-geneType") == 0) {
geneType = argv[++argi];
}
else if (strcmp(argv[argi], "-randomSplicing") == 0) {
randomSplicing = true;
}
else if (strcmp(argv[argi], "-numRandomSplicing") == 0) {
numRandomSplicing = atoi(argv[++argi]);
}
else if (strcmp(argv[argi], "-pSkip") == 0) {
pSkip = atof(argv[++argi]);
}
else {
cout << "ERROR, bad option " << argv[argi] << endl;
exit(1);
}
++argi;
}
coordinatesFileName = genesOutFileName;
coordinatesFileName.append(".pos");
FASTAReader reader;
reader.Initialize(genomeFileName);
ofstream outFile, coordsFile;
CrucialOpen(genesOutFileName, outFile, std::ios::out);
string coordsFileName = genesOutFileName + ".coords";
CrucialOpen(coordsFileName, coordsFile, std::ios::out);
vector<FASTASequence> referenceSequences;
reader.ReadAllSequences(referenceSequences);
int i;
map<string, int> titleToIndex;
for (i = 0; i < referenceSequences.size(); i++) {
titleToIndex[referenceSequences[i].title] = i;
}
GencodeGFFFile gencodeFile;
gencodeFile.ReadAll(gencodeGffFileName);
vector<GencodeGFFGene> genes;
IndexGencodeGenes(gencodeFile, genes, geneType);
for (i = 0; i < genes.size(); i++) {
genes[i].OrderExonsByStart();
}
int e;
for (i = 0; i < genes.size(); i++) {
FASTASequence geneSequence;
geneSequence.CopyTitle(genes[i].geneName);
if (titleToIndex.find(genes[i].chromosome) == titleToIndex.end()) {
continue;
}
int chrIndex = titleToIndex[genes[i].chromosome];
string sequence = "";
//
// Do nothing with 0 length exons.
//
if (genes[i].exons.size() == 0) {
continue;
}
vector<FASTASequence> geneSequences;
vector<GeneCoordinates> geneCoordinates;
genes[i].GenerateGeneSequences(referenceSequences[chrIndex], geneSequences, geneCoordinates, randomSplicing);
int gi;
for (gi = 0; gi < geneSequences.size(); gi++) {
if (genes[i].GetStrand() == '+') {
geneSequences[gi].PrintSeq(outFile);
}
else {
FASTASequence rc;
geneSequences[gi].MakeRC(rc);
rc.PrintSeq(outFile);
rc.Free();
}
coordsFile << geneSequences[gi].title << " " << geneCoordinates[gi].chromosome << " " << geneCoordinates[gi].exonCoordinates.size() << " " << geneCoordinates[gi].strand;
int i;
for (i = 0; i < geneCoordinates[gi].exonCoordinates.size(); i++) {
coordsFile << " "
//.........这里部分代码省略.........
示例6: main
int main(int argc, char* argv[]) {
string inFileName, readsFileName;
DNALength readLength;
float coverage = 0;
bool noRandInit = false;
int numReads = -1;
CommandLineParser clp;
int qualityValue = 20;
bool printFastq = false;
int stratify = 0;
string titleType = "pacbio";
string fastqType = "illumina"; // or "sanger"
clp.RegisterStringOption("inFile", &inFileName, "Reference sequence", 0);
clp.RegisterPreviousFlagsAsHidden();
clp.RegisterIntOption("readLength", (int*) &readLength, "The length of reads to simulate. The length is fixed.",
CommandLineParser::PositiveInteger, "Length of every read.", 0);
clp.RegisterFloatOption("coverage", &coverage, "Total coverage (from which the number of reads is calculated",
CommandLineParser::PositiveFloat, 0);
clp.RegisterFlagOption("nonRandInit", &noRandInit, "Skip initializing the random number generator with time.");
clp.RegisterIntOption("nReads", &numReads, "Total number of reads (from which coverage is calculated)", CommandLineParser::PositiveInteger, 0);
clp.RegisterStringOption("readsFile", &readsFileName, "Reads output file", 0);
clp.RegisterFlagOption("fastq", &printFastq, "Fake fastq output with constant quality value (20)");
clp.RegisterIntOption("quality", &qualityValue, "Value to use for fastq quality", CommandLineParser::PositiveInteger);
clp.RegisterIntOption("stratify", &stratify, "Sample a read every 'stratify' bases, rather than randomly.", CommandLineParser::PositiveInteger);
clp.RegisterStringOption("titleType", &titleType, "Set the name of the title: 'pacbio'|'illumina'");
clp.RegisterStringOption("fastqType", &fastqType, "Set the type of fastq: 'illumina'|'sanger'");
vector<string> leftovers;
clp.ParseCommandLine(argc, argv, leftovers);
if (!noRandInit) {
InitializeRandomGeneratorWithTime();
}
FASTAReader inReader;
inReader.Init(inFileName);
vector<FASTASequence> reference;
inReader.ReadAllSequences(reference);
ofstream readsFile;
if (readsFileName == "") {
cout << "ERROR. You must specify a reads file." << endl;
exit(0);
}
CrucialOpen(readsFileName, readsFile, std::ios::out);
ofstream sangerFastqFile;
if (fastqType == "sanger") {
string sangerFastqFileName = readsFileName + ".fastq";
CrucialOpen(sangerFastqFileName, sangerFastqFile, std::ios::out);
}
DNALength refLength = 0;
int i;
for (i = 0; i < reference.size(); i++) {
refLength += reference[i].length;
}
if (numReads == -1 and coverage == 0 and stratify == 0) {
cout << "Error, you must specify either coverage, nReads, or stratify." << endl;
exit(1);
}
else if (numReads == -1) {
numReads = (refLength / readLength) * coverage;
}
if (stratify) {
if (!readLength) {
cout << "ERROR. If you are using stratification, a read length must be specified." << endl;
exit(1);
}
}
DNASequence sampleSeq;
sampleSeq.length = readLength;
int maxRetry = 10000000;
int retryNumber = 0;
DNALength seqIndex, seqPos;
if (stratify) {
seqIndex = 0;
seqPos = 0;
}
DNALength origReadLength = readLength;
for (i = 0; stratify or i < numReads; i++) {
if (stratify == 0) {
FindRandomPos(reference, seqIndex, seqPos, readLength );
}
else {
//
// find the next start pos, or bail if done
//
if (seqPos >= reference[seqIndex].length) {
if (seqIndex == reference.size() - 1) {
break;
}
else {
seqIndex = seqIndex + 1;
seqPos = 0;
continue;
}
}
readLength = min(reference[seqIndex].length - seqPos, origReadLength);
//.........这里部分代码省略.........
示例7: main
int main(int argc, char* argv[]) {
if (argc < 4) {
PrintUsage();
exit(0);
}
string rgFileName, vertexSeqFileName, scaffoldDirName;
rgFileName = argv[1];
vertexSeqFileName = argv[2];
scaffoldDirName = argv[3];
string repeatFileName = "";
bool printRepeatsSeparately = false;
int argi = 4;
bool printSeparate=false;
while (argi < argc) {
if (strcmp(argv[argi], "-separate") == 0) {
printSeparate=true;
}
else if (strcmp(argv[argi], "-repeats") == 0) {
printRepeatsSeparately = true;
repeatFileName = argv[++argi];
}
else {
cout << "bad option: " << argv[argi] << endl;
PrintUsage();
exit(1);
}
++argi;
}
FASTAReader vertexSequenceReader;
vertexSequenceReader.Init(vertexSeqFileName);
//
// Input necessary data
//
vector<FASTASequence> vertexSequences;
vertexSequenceReader.ReadAllSequences(vertexSequences);
RepeatGraph<string> rg;
rg.ReadGraph(rgFileName);
vector<FASTASequence> vertexRCSequences;
VectorIndex vertexIndex;
vertexRCSequences.resize(vertexSequences.size());
for (vertexIndex = 0; vertexIndex < vertexSequences.size(); vertexIndex++ ){
vertexSequences[vertexIndex].MakeRC(vertexRCSequences[vertexIndex]);
}
VectorIndex outEdgeIndex;
int scaffoldIndex = 0;
ofstream scaffoldOut;
if (printSeparate==false) {
// scaffold dir name is really a file name here.
CrucialOpen(scaffoldDirName, scaffoldOut, std::ios::out);
}
for (vertexIndex = 0; vertexIndex < rg.vertices.size(); vertexIndex++ ){
rg.vertices[vertexIndex].traversed = false;
}
//
// Set up flow for calling multiplicity.
//
/*
Test all this out later.
AssignMinimumFlowToEdges(rg, 2);
AssignVertexFlowBalance(rg);
BalanceKirchhoffFlow(rg);
UInt edgeIndex;
for (edgeIndex = 0; edgeIndex < rg.edges.size(); edgeIndex++) {
if (rg.edges[edgeIndex].flow > 1) {
cout << edgeIndex << " " << rg.edges[edgeIndex].flow << endl;
}
}
*/
int numPrintedVertices = 0;
for (vertexIndex = 0; vertexIndex < rg.vertices.size(); vertexIndex++ ){
//
// Look to see if this vertex is a branching vertex.
//
if ((rg.vertices[vertexIndex].inEdges.size() != 1 or
rg.vertices[vertexIndex].outEdges.size() != 1) and
rg.vertices[vertexIndex].traversed == false) {
//
// This is a branching vertex. Print all paths from this vertex, but not the vertex
// itself if it appears repetitive.
//
VectorIndex outEdgeIndex;
bool printedThisVertex = false;
for (outEdgeIndex = 0; outEdgeIndex < rg.vertices[vertexIndex].outEdges.size(); outEdgeIndex++ ){
//
// This is a branching vertex.
//
//.........这里部分代码省略.........
示例8: main
int main(int argc, char* argv[]) {
string program = "samtom4";
string versionString = VERSION;
AppendPerforceChangelist(PERFORCE_VERSION_STRING, versionString);
string samFileName, refFileName, outFileName;
bool printHeader = false;
bool parseSmrtTitle = false;
bool useShortRefName = false;
CommandLineParser clp;
clp.SetProgramName(program);
clp.SetVersion(versionString);
clp.SetProgramSummary("Converts a SAM file generated by blasr to M4 format.");
clp.RegisterStringOption("in.sam", &samFileName,
"Input SAM file, which is produced by blasr.");
clp.RegisterStringOption("reference.fasta", &refFileName,
"Reference used to generate file.sam.");
clp.RegisterStringOption("out.m4", &outFileName,
"Output in blasr M4 format.");
clp.RegisterPreviousFlagsAsHidden();
clp.RegisterFlagOption("header", &printHeader,
"Print M4 header.");
clp.RegisterFlagOption("useShortRefName", &useShortRefName,
"Use abbreviated reference names obtained "
"from file.sam instead of using full names "
"from reference.fasta.");
//clp.SetExamples(program + " file.sam reference.fasta out.m4");
clp.ParseCommandLine(argc, argv);
ostream * outFilePtr = &cout;
ofstream outFileStrm;
if (outFileName != "") {
CrucialOpen(outFileName, outFileStrm, std::ios::out);
outFilePtr = &outFileStrm;
}
SAMReader<SAMFullReferenceSequence, SAMReadGroup, SAMAlignment> samReader;
FASTAReader fastaReader;
//
// Initialize samReader and fastaReader.
//
samReader.Initialize(samFileName);
fastaReader.Initialize(refFileName);
//
// Configure the file log.
//
string command;
CommandLineParser::CommandLineToString(argc, argv, command);
//
// Read necessary input.
//
vector<FASTASequence> references;
fastaReader.ReadAllSequences(references);
AlignmentSet<SAMFullReferenceSequence, SAMReadGroup, SAMAlignment> alignmentSet;
samReader.ReadHeader(alignmentSet);
//
// The order of references in vector<FASTASequence> references and
// AlignmentSet<, , >alignmentSet.references can be different.
// Rearrange alignmentSet.references such that it is ordered in
// exactly the same way as vector<FASTASequence> references.
//
alignmentSet.RearrangeReferences(references);
//
// Map short names for references obtained from file.sam to
// full names obtained from reference.fasta
//
map<string, string> shortRefNameToFull;
map<string, string>::iterator it;
assert(references.size() == alignmentSet.references.size());
if (!useShortRefName) {
for (size_t i = 0; i < references.size(); i++) {
string shortRefName = alignmentSet.references[i].GetSequenceName();
string fullRefName(references[i].title);
if (shortRefNameToFull.find(shortRefName) != shortRefNameToFull.end()) {
cout << "ERROR, Found more than one reference " << shortRefName << "in sam header" << endl;
exit(1);
}
shortRefNameToFull[shortRefName] = fullRefName;
alignmentSet.references[i].sequenceName = fullRefName;
}
}
// Map reference name obtained from SAM file to indices
map<string, int> refNameToIndex;
for (size_t i = 0; i < references.size(); i++) {
string refName = alignmentSet.references[i].GetSequenceName();
refNameToIndex[refName] = i;
}
//
// Store the alignments.
//
//.........这里部分代码省略.........
示例9: main
int main(int argc, char* argv[]) {
string rgInName, rgOutName;
int minPathLength;
string vertexSequenceFileName;
if (argc < 5) {
cout << "usage: trimShortEnds in.rg vertexSequences minPathLength out.rg" << endl;
exit(1);
}
rgInName = argv[1];
vertexSequenceFileName = argv[2];
minPathLength = atoi(argv[3]);
rgOutName = argv[4];
ofstream rgOut;
CrucialOpen(rgOutName, rgOut, std::ios::out);
FASTAReader vertexSequenceReader;
vertexSequenceReader.Init(vertexSequenceFileName);
RepeatGraph<string> rg;
vector<FASTASequence> vertexSequences;
rg.ReadGraph(rgInName);
vertexSequenceReader.ReadAllSequences(vertexSequences);
VectorIndex vertexIndex;
VectorIndex outEdgeIndex;
VectorIndex edgeIndex;
if (rg.edges.size() == 0) {
cout << "LIKELY INVALID GRAPH. There are no edges." << endl;
return 0;
}
//
// At first, any edge that exists is connected to a vertex. This
// will change as low coverage edges are deleted and replaced by
// high coverage edges from the end of the array.
//
for (edgeIndex = 0; edgeIndex < rg.edges.size(); edgeIndex++) {
rg.edges[edgeIndex].connected = true;
}
set<std::pair<VectorIndex, VectorIndex> > srcDestToRemove;
for (vertexIndex = 0; vertexIndex < rg.vertices.size(); vertexIndex++) {
if (rg.vertices[vertexIndex].inEdges.size() == 0 and
rg.vertices[vertexIndex].outEdges.size() == 1) {
//
// This is a source. Traverse this until a branching vertex or the end is found.
//
vector<VectorIndex> path;
path.push_back(vertexIndex);
int pathLength = 0;
VectorIndex pathVertex;
VectorIndex pathEdge;
pathEdge = rg.vertices[vertexIndex].outEdges[0];
pathVertex = rg.edges[pathEdge].dest;
while (rg.vertices[pathVertex].inEdges.size() == 1 and
rg.vertices[pathVertex].outEdges.size() == 1) {
path.push_back(pathVertex);
pathEdge = rg.vertices[pathVertex].outEdges[0];
pathVertex = rg.edges[pathEdge].dest;
pathLength += vertexSequences[pathVertex/2].length;
}
pathLength += vertexSequences[pathVertex/2].length;
path.push_back(pathVertex);
if (pathLength < minPathLength and path.size() < 3) {
//
// Remove this path, it is too short.
// Also remove the complement.
//
cout << "trimming path of " << path.size() << " is of sequence length " << pathLength << endl;
VectorIndex pathIndex;
for (pathIndex = 0; pathIndex < path.size() - 1; pathIndex++) {
srcDestToRemove.insert(pair<VectorIndex, VectorIndex>(path[pathIndex], path[pathIndex+1]));
srcDestToRemove.insert(pair<VectorIndex, VectorIndex>(2*(path[pathIndex+1]/2) + !(path[pathIndex+1]%2),
2*(path[pathIndex]/2) + !(path[pathIndex]%2)));
}
}
}
}
MarkEdgePairsForRemoval(srcDestToRemove, rg.vertices, rg.edges);
RemoveUnconnectedEdges(rg.vertices, rg.edges);
rg.WriteGraph(rgOut);
return 0;
}
示例10: main
int main(int argc, char* argv[]) {
FASTAReader reader;
FASTASequence read;
int maxLength = 100;
if (argc < 3) {
cout << "usage: pairAlignAllContigs inFile maxLength equivalencies [-minIdent i]" << endl;
exit(0);
}
string readsFileName, equivalenciesFileName;
readsFileName = argv[1];
maxLength = atoi(argv[2]);
equivalenciesFileName = argv[3];
int argi = 4;
float minIdentity = 80;
while (argi < argc) {
if (strcmp(argv[argi], "-minIdent") == 0) {
minIdentity = atoi(argv[++argi]);
}
++argi;
}
vector<FASTASequence> reads, readsRC;;
reader.Init(readsFileName);
reader.ReadAllSequences(reads);
readsRC.resize(reads.size());
int r;
for (r =0; r < reads.size();r++) {
reads[r].MakeRC(readsRC[r]);
}
ofstream equivOut;
CrucialOpen(equivalenciesFileName, equivOut);
Matrix<int> alignScores;
Matrix<float> alignIdentities;
alignScores.Resize(reads.size(), reads.size());
alignIdentities.Resize(reads.size(), reads.size());
vector<int> scoreMat;
vector<Arrow> pathMat;
int i, j;
int alignScore;
FASTASequence readi, readj;
FASTASequence rcReadi, rcReadj;
for (i = 0; i < reads.size(); i++) {
float maxFrontIdent, maxEndIdent;
int maxFrontIdentIndex, maxEndIdentIndex;
maxFrontIdent = 0; maxEndIdent = 0;
maxFrontIdentIndex = 0;
maxEndIdentIndex = 0;
int maxFrontIdentLength = 0;
int maxEndIdentLength = 0;
int maxFrontLength = 0;
int maxEndLength = 0;
int nmaxFrontLengthIndex = 0;
int maxEndLengthIndex = 0;
float maxFrontLengthIdent = 0;
float maxEndLengthIdent = 0;
int maxFrontLengthIndex = 0;
equivOut << reads[i].GetName();
for (j = 0; j < reads.size(); j++ ){
//
// Store the two ends of the alignment.
//
alignScore = 0;
int rcAlignScore;
Alignment alignment;
Alignment rcAlignment;
Alignment *optAlignment;
if (i != j) {
if (reads[i].length < maxLength and reads[j].length < maxLength) {
alignScore = SWAlign(reads[i], reads[j], SMRTDistanceMatrix, 3, scoreMat, pathMat, alignment, Global);
}
if (reads[i].length < maxLength and reads[j].length < maxLength) {
rcAlignScore = SWAlign(reads[i], readsRC[j], SMRTDistanceMatrix, 3, scoreMat, pathMat, rcAlignment, Global);
}
ComputeAlignmentStats(alignment, reads[i].seq, reads[j].seq, SMRTDistanceMatrix, 3,3 );
ComputeAlignmentStats(rcAlignment, reads[i].seq, readsRC[j].seq, SMRTDistanceMatrix, 3,3 );
if (alignment.pctSimilarity > minIdentity or rcAlignment.pctSimilarity > minIdentity) {
equivOut << " " << reads[j].GetName();
}
}
}
equivOut << endl;
}
return 0;
}
示例11: main
//.........这里部分代码省略.........
ofstream outFileStrm;
if (outFileName != "") {
CrucialOpen(outFileName, outFileStrm, std::ios::out);
outFilePtr = &outFileStrm;
}
GFFFile adapterGffFile;
if (adapterGffFileName != "")
adapterGffFile.ReadAll(adapterGffFileName);
SAMReader<SAMFullReferenceSequence, SAMReadGroup, SAMAlignment> samReader;
FASTAReader fastaReader;
//
// Initialize samReader and fastaReader.
//
samReader.Initialize(samFileName);
fastaReader.Initialize(refFileName);
//
// Configure the file log.
//
string command;
CommandLineParser::CommandLineToString(argc, argv, command);
string log = "Filter sam hits.";
string program = "samFilter";
string versionString = VERSION;
AppendPerforceChangelist(PERFORCE_VERSION_STRING, versionString);
//
// Read necessary input.
//
vector<FASTASequence> references;
fastaReader.ReadAllSequences(references);
// If the SAM file is generated by blasr with -titleTable,
// then references in the SAM are represented by
// their corresponding indices in the title table.
// In that case, we need to convert reference titles in fasta file
// to their corresponding indices in the title table, such that
// references in both SAM and fasta files are represented
// by title table indices and therefore can match.
if (titleTableName != "") {
ConvertTitlesToTitleTableIndices(references, titleTableName);
}
AlignmentSet<SAMFullReferenceSequence, SAMReadGroup, SAMAlignment> alignmentSet;
vector<string> allHeaders = samReader.ReadHeader(alignmentSet);
// Process SAM Header.
string commandLineString;
clp.CommandLineToString(argc, argv, commandLineString);
allHeaders.push_back("@PG\tID:SAMFILTER\tVN:" + versionString + \
"\tCL:" + program + " " + commandLineString);
for (int i = 0; i < allHeaders.size(); i++) {
outFileStrm << allHeaders[i] << endl;
}
//
// The order of references in vector<FASTASequence> references and
// AlignmentSet<, , >alignmentSet.references can be different.
// Rearrange alignmentSet.references such that they are ordered in
// exactly the same way as vector<FASTASequence> references.
//
alignmentSet.RearrangeReferences(references);