本文整理汇总了C++中ContigPaths类的典型用法代码示例。如果您正苦于以下问题:C++ ContigPaths类的具体用法?C++ ContigPaths怎么用?C++ ContigPaths使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了ContigPaths类的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: readPaths
/** Read contig paths from the specified file.
* @param ids [out] the string ID of the paths
*/
static ContigPaths readPaths(const string& inPath,
vector<string>* ids = NULL)
{
if (ids != NULL)
assert(ids->empty());
ifstream fin(inPath.c_str());
if (opt::verbose > 0)
cerr << "Reading `" << inPath << "'..." << endl;
if (inPath != "-")
assert_good(fin, inPath);
istream& in = inPath == "-" ? cin : fin;
unsigned count = 0;
ContigPaths paths;
string id;
ContigPath path;
while (in >> id >> path) {
paths.push_back(path);
if (ids != NULL)
ids->push_back(id);
++count;
if (opt::verbose > 1 && count % 1000000 == 0)
cerr << "Read " << count << " paths. "
"Using " << toSI(getMemoryUsage())
<< "B of memory.\n";
}
if (opt::verbose > 0)
cerr << "Read " << count << " paths. "
"Using " << toSI(getMemoryUsage()) << "B of memory.\n";
assert(in.eof());
return paths;
}
示例2: seenContigs
/** Finds all contigs used in each path in paths, and
* marks them as seen in the vector seen. */
static void seenContigs(vector<bool>& seen, const ContigPaths& paths)
{
for (ContigPaths::const_iterator it = paths.begin();
it != paths.end(); ++it)
for (ContigPath::const_iterator itc = it->begin();
itc != it->end(); ++itc)
if (itc->id() < seen.size())
seen[itc->id()] = true;
}
示例3: markRemovedContigs
/** Mark contigs for removal. An empty path indicates that a contig
* should be removed.
*/
static void markRemovedContigs(vector<bool>& marked,
const vector<string>& pathIDs, const ContigPaths& paths)
{
for (ContigPaths::const_iterator it = paths.begin();
it != paths.end(); ++it) {
if (it->empty()) {
size_t i = get(g_contigNames,
pathIDs[it - paths.begin()]);
assert(i < marked.size());
marked[i] = true;
}
}
}
示例4: findRepeats
/** Return the set of contigs that appear more than once in a single
* solution.
*/
static set<ContigID> findRepeats(ContigID seed,
const ContigPaths& solutions)
{
set<ContigID> repeats;
for (ContigPaths::const_iterator solIt = solutions.begin();
solIt != solutions.end(); ++solIt) {
map<ContigID, unsigned> count;
count[seed]++;
for (ContigPath::const_iterator it = solIt->begin();
it != solIt->end(); ++it)
count[it->contigIndex()]++;
for (map<ContigID, unsigned>::const_iterator
it = count.begin(); it != count.end(); ++it)
if (it->second > 1)
repeats.insert(it->first);
}
return repeats;
}
示例5: readPaths
/** Read contig paths from the specified file.
* @param[in] inPath the filename of the contig paths
* @param[out] ids the string ID of the paths
* @param[out] isAmb whether the path contains a gap
*/
static ContigPaths readPaths(const string& inPath,
vector<string>& ids, vector<bool>& isAmb)
{
typedef graph_traits<Graph>::vertex_descriptor V;
assert(ids.empty());
assert(isAmb.empty());
assert(g_ambpath_contig.empty());
ifstream fin(inPath.c_str());
if (opt::verbose > 0)
cerr << "Reading `" << inPath << "'..." << endl;
if (inPath != "-")
assert_good(fin, inPath);
istream& in = inPath == "-" ? cin : fin;
ContigPaths paths;
string id;
Path path;
while (in >> id >> path) {
paths.push_back(path);
ids.push_back(id);
isAmb.push_back(false);
if (path.size() <= 2)
continue;
for (Path::iterator it = path.begin() + 2;
it != path.end(); ++it) {
ContigPath::value_type t = it[-2], u = it[-1], v = it[0];
if (u.ambiguous()) {
assert(!t.ambiguous());
assert(!v.ambiguous());
g_ambpath_contig.insert(AmbPath2Contig::value_type(
AmbPathConstraint(t, v, u.length()),
ContigPath()));
isAmb.back() = true;
}
}
}
assert(in.eof());
return paths;
}
示例6: assemblePathGraph
/** Assemble the path overlap graph. */
static void assemblePathGraph(const Lengths& lengths,
PathGraph& pathGraph, ContigPathMap& paths)
{
ContigPaths seedPaths;
assembleDFS(pathGraph, back_inserter(seedPaths));
ContigPaths mergedPaths = mergeSeedPaths(lengths,
paths, seedPaths);
if (opt::verbose > 1)
cout << '\n';
// Replace each path with the merged path.
for (ContigPaths::const_iterator it1 = seedPaths.begin();
it1 != seedPaths.end(); ++it1) {
const ContigPath& path(mergedPaths[it1 - seedPaths.begin()]);
ContigPath pathrc(path);
reverseComplement(pathrc.begin(), pathrc.end());
for (ContigPath::const_iterator it2 = it1->begin();
it2 != it1->end(); ++it2) {
ContigNode seed(*it2);
if (find(path.begin(), path.end(), seed) != path.end()) {
paths[seed.contigIndex()]
= seed.sense() ? pathrc : path;
} else {
// This seed was not included in the merged path.
}
}
}
removeRepeats(paths);
// Remove the subsumed paths.
if (opt::verbose > 0)
cout << "Removing redundant contigs\n";
removeSubsumedPaths(lengths, paths);
outputSortedPaths(paths);
}
示例7: mergeSeedPaths
/** Merge the specified seed paths.
* @return the merged contig paths
*/
static ContigPaths mergeSeedPaths(const Lengths& lengths,
const ContigPathMap& paths, const ContigPaths& seedPaths)
{
if (opt::verbose > 0)
cout << "\nMerging paths\n";
ContigPaths out;
out.reserve(seedPaths.size());
for (ContigPaths::const_iterator it = seedPaths.begin();
it != seedPaths.end(); ++it)
out.push_back(mergePath(lengths, paths, *it));
return out;
}
示例8: outputGraph
/** Output the updated overlap graph. */
static void outputGraph(Graph& g,
const vector<string>& pathIDs, const ContigPaths& paths,
const string& commandLine)
{
typedef graph_traits<Graph>::vertex_descriptor V;
// Add the path vertices.
g_contigNames.unlock();
for (ContigPaths::const_iterator it = paths.begin();
it != paths.end(); ++it) {
const ContigPath& path = *it;
const string& id = pathIDs[it - paths.begin()];
if (!path.empty()) {
V u = merge(g, path.begin(), path.end());
put(vertex_name, g, u, id);
}
}
g_contigNames.lock();
// Remove the vertices that are used in paths.
for (ContigPaths::const_iterator it = paths.begin();
it != paths.end(); ++it) {
const ContigPath& path = *it;
const string& id = pathIDs[it - paths.begin()];
if (path.empty()) {
remove_vertex(find_vertex(id, false, g), g);
} else {
remove_vertex_if(g, path.begin(), path.end(),
not1(std::mem_fun_ref(&ContigNode::ambiguous)));
}
}
// Output the graph.
const string& graphPath = opt::graphPath;
assert(!graphPath.empty());
if (opt::verbose > 0)
cerr << "Writing `" << graphPath << "'..." << endl;
ofstream fout(graphPath.c_str());
assert_good(fout, graphPath);
write_graph(fout, g, PROGRAM, commandLine);
assert_good(fout, graphPath);
if (opt::verbose > 0)
printGraphStats(cerr, g);
}
示例9: handleEstimate
/** Find a path for the specified distance estimates.
* @param out [out] the solution path
*/
static void handleEstimate(const Graph& g,
const EstimateRecord& er, bool dirIdx,
ContigPath& out)
{
if (er.estimates[dirIdx].empty())
return;
ContigNode origin(er.refID, dirIdx);
ostringstream vout_ss;
ostream bitBucket(NULL);
ostream& vout = opt::verbose > 0 ? vout_ss : bitBucket;
vout << "\n* " << get(vertex_name, g, origin) << '\n';
unsigned minNumPairs = UINT_MAX;
// generate the reachable set
Constraints constraints;
for (Estimates::const_iterator iter
= er.estimates[dirIdx].begin();
iter != er.estimates[dirIdx].end(); ++iter) {
ContigNode v = iter->first;
const DistanceEst& ep = iter->second;
minNumPairs = min(minNumPairs, ep.numPairs);
constraints.push_back(Constraint(v,
ep.distance + allowedError(ep.stdDev)));
}
vout << "Constraints:";
printConstraints(vout, g, constraints) << '\n';
ContigPaths solutions;
unsigned numVisited = 0;
constrainedSearch(g, origin, constraints, solutions, numVisited);
bool tooComplex = numVisited >= opt::maxCost;
bool tooManySolutions = solutions.size() > opt::maxPaths;
set<ContigID> repeats = findRepeats(er.refID, solutions);
if (!repeats.empty()) {
vout << "Repeats:";
for (set<ContigID>::const_iterator it = repeats.begin();
it != repeats.end(); ++it)
vout << ' ' << get(g_contigNames, *it);
vout << '\n';
}
unsigned numPossiblePaths = solutions.size();
if (numPossiblePaths > 0)
vout << "Paths: " << numPossiblePaths << '\n';
for (ContigPaths::iterator solIter = solutions.begin();
solIter != solutions.end();) {
vout << *solIter << '\n';
// Calculate the path distance to each node and see if
// it is within the estimated distance.
map<ContigNode, int> distanceMap
= makeDistanceMap(g, origin, *solIter);
// Remove solutions whose distance estimates are not correct.
unsigned validCount = 0, invalidCount = 0, ignoredCount = 0;
for (Estimates::const_iterator iter
= er.estimates[dirIdx].begin();
iter != er.estimates[dirIdx].end(); ++iter) {
ContigNode v = iter->first;
const DistanceEst& ep = iter->second;
vout << get(vertex_name, g, v) << ',' << ep << '\t';
map<ContigNode, int>::iterator dmIter
= distanceMap.find(v);
if (dmIter == distanceMap.end()) {
// This contig is a repeat.
ignoredCount++;
vout << "ignored\n";
continue;
}
// translate distance by -overlap to match
// coordinate space used by the estimate
int actualDistance = dmIter->second;
int diff = actualDistance - ep.distance;
unsigned buffer = allowedError(ep.stdDev);
bool invalid = (unsigned)abs(diff) > buffer;
bool repeat = repeats.count(v.contigIndex()) > 0;
bool ignored = invalid && repeat;
if (ignored)
ignoredCount++;
else if (invalid)
invalidCount++;
else
validCount++;
vout << "dist: " << actualDistance
<< " diff: " << diff
<< " buffer: " << buffer
<< " n: " << ep.numPairs
<< (ignored ? " ignored" : invalid ? " invalid" : "")
<< '\n';
}
//.........这里部分代码省略.........
示例10: constructAmbiguousPath
/** Return an ambiguous path that agrees with all the given paths. */
static ContigPath constructAmbiguousPath(const Graph &g,
const ContigNode& origin, const ContigPaths& paths)
{
assert(!paths.empty());
// Find the size of the smallest path.
const ContigPath& firstSol = paths.front();
size_t min_len = firstSol.size();
for (ContigPaths::const_iterator it = paths.begin() + 1;
it != paths.end(); ++it)
min_len = min(min_len, it->size());
// Find the longest prefix.
ContigPath vppath;
size_t longestPrefix;
bool commonPrefix = true;
for (longestPrefix = 0;
longestPrefix < min_len; longestPrefix++) {
const ContigNode& common_path_node = firstSol[longestPrefix];
for (ContigPaths::const_iterator solIter = paths.begin();
solIter != paths.end(); ++solIter) {
const ContigNode& pathnode = (*solIter)[longestPrefix];
if (pathnode != common_path_node) {
// Found the longest prefix.
commonPrefix = false;
break;
}
}
if (!commonPrefix)
break;
vppath.push_back(common_path_node);
}
// Find the longest suffix.
ContigPath vspath;
size_t longestSuffix;
bool commonSuffix = true;
for (longestSuffix = 0;
longestSuffix < min_len-longestPrefix; longestSuffix++) {
const ContigNode& common_path_node
= firstSol[firstSol.size()-longestSuffix-1];
for (ContigPaths::const_iterator solIter = paths.begin();
solIter != paths.end(); ++solIter) {
const ContigNode& pathnode
= (*solIter)[solIter->size()-longestSuffix-1];
if (pathnode != common_path_node) {
// Found the longest suffix.
commonSuffix = false;
break;
}
}
if (!commonSuffix)
break;
vspath.push_back(common_path_node);
}
ContigPath out;
out.reserve(vppath.size() + 1 + vspath.size());
out.insert(out.end(), vppath.begin(), vppath.end());
if (longestSuffix > 0) {
const ContigPath& longestPath(
*max_element(paths.begin(), paths.end(),
ComparePathLength(g, origin)));
unsigned length = calculatePathLength(g, origin, longestPath,
longestPrefix, longestSuffix);
// Account for the overlap on the right.
int dist = length + getDistance(g,
longestSuffix == longestPath.size() ? origin
: *(longestPath.rbegin() + longestSuffix),
*(longestPath.rbegin() + longestSuffix - 1));
// Add k-1 because it is the convention.
int numN = dist + opt::k - 1;
assert(numN > 0);
out.push_back(ContigNode(numN, 'N'));
out.insert(out.end(), vspath.rbegin(), vspath.rend());
}
return out;
}
示例11: fillGap
/** Return the consensus sequence of the specified gap. */
static ContigPath fillGap(const Graph& g,
const AmbPathConstraint& apConstraint,
vector<bool>& seen,
ofstream& outFasta)
{
if (opt::verbose > 1)
cerr << "\n* "
<< get(vertex_name, g, apConstraint.source) << ' '
<< apConstraint.dist << "N "
<< get(vertex_name, g, apConstraint.dest) << '\n';
Constraints constraints;
constraints.push_back(Constraint(apConstraint.dest,
apConstraint.dist + opt::distanceError));
ContigPaths solutions;
unsigned numVisited = 0;
constrainedSearch(g, apConstraint.source,
constraints, solutions, numVisited);
bool tooComplex = numVisited >= opt::maxCost;
for (ContigPaths::iterator solIt = solutions.begin();
solIt != solutions.end(); solIt++)
solIt->insert(solIt->begin(), apConstraint.source);
ContigPath consensus;
bool tooManySolutions = solutions.size() > opt::numBranches;
if (tooComplex) {
stats.tooComplex++;
if (opt::verbose > 1)
cerr << solutions.size() << " paths (too complex)\n";
} else if (tooManySolutions) {
stats.numTooManySolutions++;
if (opt::verbose > 1)
cerr << solutions.size() << " paths (too many)\n";
} else if (solutions.empty()) {
stats.numNoSolutions++;
if (opt::verbose > 1)
cerr << "no paths\n";
} else if (solutions.size() == 1) {
if (opt::verbose > 1)
cerr << "1 path\n" << solutions.front() << '\n';
stats.numMerged++;
} else {
assert(solutions.size() > 1);
if (opt::verbose > 2)
copy(solutions.begin(), solutions.end(),
ostream_iterator<ContigPath>(cerr, "\n"));
else if (opt::verbose > 1)
cerr << solutions.size() << " paths\n";
consensus = align(g, solutions, outFasta);
if (!consensus.empty()) {
stats.numMerged++;
// Mark contigs that are used in a consensus.
markSeen(seen, solutions, true);
if (opt::verbose > 1)
cerr << consensus << '\n';
} else
stats.notMerged++;
}
return consensus;
}
示例12: alignPair
/* Resolve ambiguous region using pairwise alignment
* (Needleman-Wunsch) ('solutions' contain exactly two paths, from a
* source contig to a dest contig)
*/
static ContigPath alignPair(const Graph& g,
const ContigPaths& solutions, ofstream& out)
{
assert(solutions.size() == 2);
assert(solutions[0].size() > 1);
assert(solutions[1].size() > 1);
assert(solutions[0].front() == solutions[1].front());
assert(solutions[0].back() == solutions[1].back());
ContigPath fstSol(solutions[0].begin()+1, solutions[0].end()-1);
ContigPath sndSol(solutions[1].begin()+1, solutions[1].end()-1);
if (fstSol.empty() || sndSol.empty()) {
// This entire sequence may be deleted.
const ContigPath& sol(fstSol.empty() ? sndSol : fstSol);
assert(!sol.empty());
Sequence consensus(mergePath(g, sol));
assert(consensus.size() > opt::k - 1);
string::iterator first = consensus.begin() + opt::k - 1;
transform(first, consensus.end(), first, ::tolower);
unsigned match = opt::k - 1;
float identity = (float)match / consensus.size();
if (opt::verbose > 2)
cerr << consensus << '\n';
if (opt::verbose > 1)
cerr << identity
<< (identity < opt::identity ? " (too low)\n" : "\n");
if (identity < opt::identity)
return ContigPath();
unsigned coverage = calculatePathProperties(g, sol).coverage;
ContigNode u = outputNewContig(g,
solutions, 1, 1, consensus, coverage, out);
ContigPath path;
path.push_back(solutions.front().front());
path.push_back(u);
path.push_back(solutions.front().back());
return path;
}
Sequence fstPathContig(mergePath(g, fstSol));
Sequence sndPathContig(mergePath(g, sndSol));
if (fstPathContig == sndPathContig) {
// These two paths have identical sequence.
if (fstSol.size() == sndSol.size()) {
// A perfect match must be caused by palindrome.
typedef ContigPath::const_iterator It;
pair<It, It> it = mismatch(
fstSol.begin(), fstSol.end(), sndSol.begin());
assert(it.first != fstSol.end());
assert(it.second != sndSol.end());
assert(*it.first
== get(vertex_complement, g, *it.second));
assert(equal(it.first+1, It(fstSol.end()), it.second+1));
if (opt::verbose > 1)
cerr << "Palindrome: "
<< get(vertex_contig_name, g, *it.first) << '\n';
return solutions[0];
} else {
// The paths are different lengths.
cerr << PROGRAM ": warning: "
"Two paths have identical sequence, which may be "
"caused by a transitive edge in the overlap graph.\n"
<< '\t' << fstSol << '\n'
<< '\t' << sndSol << '\n';
return solutions[fstSol.size() > sndSol.size() ? 0 : 1];
}
}
unsigned minLength = min(
fstPathContig.length(), sndPathContig.length());
unsigned maxLength = max(
fstPathContig.length(), sndPathContig.length());
float lengthRatio = (float)minLength / maxLength;
if (lengthRatio < opt::identity) {
if (opt::verbose > 1)
cerr << minLength << '\t' << maxLength
<< '\t' << lengthRatio << "\t(different length)\n";
return ContigPath();
}
NWAlignment align;
unsigned match = alignGlobal(fstPathContig, sndPathContig,
align);
float identity = (float)match / align.size();
if (opt::verbose > 2)
cerr << align;
if (opt::verbose > 1)
cerr << identity
<< (identity < opt::identity ? " (too low)\n" : "\n");
if (identity < opt::identity)
return ContigPath();
unsigned coverage = calculatePathProperties(g, fstSol).coverage
+ calculatePathProperties(g, sndSol).coverage;
ContigNode u = outputNewContig(g, solutions, 1, 1,
//.........这里部分代码省略.........