本文整理汇总了C++中KmerCounter类的典型用法代码示例。如果您正苦于以下问题:C++ KmerCounter类的具体用法?C++ KmerCounter怎么用?C++ KmerCounter使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了KmerCounter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: extract_best_seed
kmer_int_type_t IRKE::extract_best_seed(vector<kmer_int_type_t> &kmer_vec,
KmerCounter &kcounter,
float min_connectivity)
{
unsigned int kmer_length = kcounter.get_kmer_length();
unsigned int best_kmer_count = 0;
kmer_int_type_t best_seed = 0;
for (unsigned int i = 0; i < kmer_vec.size(); i++) {
kmer_int_type_t kmer = kmer_vec[i];
unsigned int count = kcounter.get_kmer_count(kmer);
if (count > best_kmer_count && is_good_seed_kmer(kmer, count, kmer_length, min_connectivity)) {
best_kmer_count = count;
best_seed = kmer;
}
}
if (IRKE_COMMON::MONITOR >= 2) {
cerr << "Parallel method found better seed: " << kcounter.get_kmer_string(best_seed) << " with count: "
<< best_kmer_count << endl;
}
return (best_seed);
}
示例2: visitor
vector<kmer_int_type_t> IRKE::build_inchworm_contig_from_seed(kmer_int_type_t kmer, KmerCounter &kcounter,
float min_connectivity, unsigned int &total_counts,
bool)
{
unsigned int kmer_count = kcounter.get_kmer_count(kmer);
/* Extend to the right */
unsigned int kmer_length = kcounter.get_kmer_length();
Kmer_visitor visitor(kmer_length, DOUBLE_STRANDED_MODE);
Path_n_count_pair selected_path_n_pair_forward = inchworm(kcounter, 'F', kmer, visitor, min_connectivity);
visitor.clear();
// add selected path to visitor
vector<kmer_int_type_t> &forward_path = selected_path_n_pair_forward.first;
if (IRKE_COMMON::MONITOR >= 2) {
cerr << "Forward path contains: " << forward_path.size() << " kmers. " << endl;
}
for (unsigned int i = 0; i < forward_path.size(); i++) {
kmer_int_type_t kmer = forward_path[i];
visitor.add(kmer);
if (IRKE_COMMON::MONITOR >= 2) {
cerr << "\tForward path kmer: " << kcounter.get_kmer_string(kmer) << endl;
}
}
/* Extend to the left */
visitor.erase(kmer); // reset the seed
Path_n_count_pair selected_path_n_pair_reverse = inchworm(kcounter, 'R', kmer, visitor, min_connectivity);
if (IRKE_COMMON::MONITOR >= 2) {
vector<kmer_int_type_t> &reverse_path = selected_path_n_pair_reverse.first;
cerr << "Reverse path contains: " << reverse_path.size() << " kmers. " << endl;
for (unsigned int i = 0; i < reverse_path.size(); i++) {
cerr << "\tReverse path kmer: " << kcounter.get_kmer_string(reverse_path[i]) << endl;
}
}
total_counts = selected_path_n_pair_forward.second + selected_path_n_pair_reverse.second + kmer_count;
vector<kmer_int_type_t> &reverse_path = selected_path_n_pair_reverse.first;
vector<kmer_int_type_t> joined_path = _join_forward_n_reverse_paths(reverse_path, kmer, forward_path);
return (joined_path);
}
示例3: traverse_path
void IRKE::traverse_path(KmerCounter &kcounter, Kmer_Occurence_Pair seed_kmer, Kmer_visitor &visitor,
Kmer_visitor &place_holder, float MIN_CONNECTIVITY_RATIO, unsigned int depth)
{
if (IRKE_COMMON::MONITOR >= 3) {
cerr << "traverse_path, depth: " << depth << ", kmer: " << kcounter.get_kmer_string(seed_kmer.first) << endl;
}
// check to see if visited already
if (visitor.exists(seed_kmer.first)) {
// already visited
if (IRKE_COMMON::MONITOR >= 3) {
cout << "\talready visited " << kcounter.get_kmer_string(seed_kmer.first) << endl;
}
return;
}
// check if at the end of the max recursion, and if so, don't visit it but set a placeholder.
if (depth > MAX_RECURSION) {
place_holder.add(seed_kmer.first);
return;
}
visitor.add(seed_kmer.first);
// try each of the forward paths from the kmer:
vector<Kmer_Occurence_Pair> forward_candidates = kcounter.get_forward_kmer_candidates(seed_kmer.first);
for (unsigned int i = 0; i < forward_candidates.size(); i++) {
Kmer_Occurence_Pair kmer = forward_candidates[i];
if (kmer.second && exceeds_min_connectivity(kcounter, seed_kmer, kmer, MIN_CONNECTIVITY_RATIO)) {
traverse_path(kcounter, kmer, visitor, place_holder, MIN_CONNECTIVITY_RATIO, depth + 1);
}
}
// try each of the reverse paths from the kmer:
vector<Kmer_Occurence_Pair> reverse_candidates = kcounter.get_reverse_kmer_candidates(seed_kmer.first);
for (unsigned int i = 0; i < reverse_candidates.size(); i++) {
Kmer_Occurence_Pair kmer = reverse_candidates[i];
if (kmer.second && exceeds_min_connectivity(kcounter, seed_kmer, kmer, MIN_CONNECTIVITY_RATIO)) {
traverse_path(kcounter, kmer, visitor, place_holder, MIN_CONNECTIVITY_RATIO, depth + 1);
}
}
return;
}
示例4: populate_kmer_counter
void populate_kmer_counter(KmerCounter &kcounter, string &kmers_fasta_file)
{
// code largely copied from IRKE.cpp
int i, myTid;
unsigned long sum,
*record_counter = new unsigned long[omp_get_max_threads()];
unsigned long start, end;
// init record counter
for (int i = 0; i < omp_get_max_threads(); i++) {
record_counter[i] = 0;
}
cerr << "-reading Kmer occurences..." << endl;
start = time(NULL);
Fasta_reader fasta_reader(kmers_fasta_file);
#pragma omp parallel private (myTid)
{
myTid = omp_get_thread_num();
record_counter[myTid] = 0;
while (true) {
Fasta_entry fe = fasta_reader.getNext();
if (fe.get_sequence() == "") break;
record_counter[myTid]++;
if (IRKE_COMMON::MONITOR) {
if (myTid == 0 && record_counter[myTid] % 100000 == 0) {
sum = record_counter[0];
for (i = 1; i < omp_get_num_threads(); i++)
sum += record_counter[i];
cerr << "\r [" << sum / 1000000 << "M] Kmers parsed. ";
}
}
string seq = fe.get_sequence();
if (seq.length() != KMER_SIZE) {
cerr << "ERROR: kmer " << seq << " is not of length: " << KMER_SIZE << endl;
continue;
}
kmer_int_type_t kmer = kcounter.get_kmer_intval(seq);
unsigned int count = atoi(fe.get_header().c_str());
kcounter.add_kmer(kmer, count);
}
}
end = time(NULL);
sum = record_counter[0];
for (i = 1; i < omp_get_max_threads(); i++)
sum += record_counter[i];
delete[] record_counter;
cerr << endl << " done parsing " << sum << " Kmers, " << kcounter.size() << " added, taking " << (end - start)
<< " seconds." << endl;
return;
}
示例5: compute_kmer_coverage
vector<unsigned int> compute_kmer_coverage(string& sequence, KmerCounter& kcounter) {
vector<unsigned int> coverage;
if(IRKE_COMMON::MONITOR) {
cerr << "processing sequence: " << sequence << endl;
}
for (int i = 0; i <= (int) sequence.length() - KMER_SIZE; i++) {
// cerr << "i: " << i << ", <= " << sequence.length() - KMER_SIZE << endl;
string kmer = sequence.substr(i, KMER_SIZE);
if(IRKE_COMMON::MONITOR >= 2) {
for (int j = 0; j <= i; j++) {
cerr << " ";
}
cerr << kmer << endl;
}
unsigned int kmer_count = 0;
if(!contains_non_gatc(kmer)) {
kmer_count = kcounter.get_kmer_count(kmer);
}
// Note, in the jellyfish run, we restrain it to min kmer coverage of 2.
// If we don't find a kmer catalogued, it must have a kmer count of 1.
if (kmer_count < 1) {
kmer_count = 1;
}
coverage.push_back(kmer_count);
}
return(coverage);
}
示例6: describe_kmers
void IRKE::describe_kmers(KmerCounter& kcounter) {
// proxy call to Kcounter method.
kcounter.describe_kmers();
return;
}
示例7: reconstruct_path_sequence
string IRKE::reconstruct_path_sequence(KmerCounter& kcounter, vector<kmer_int_type_t>& path, vector<unsigned int>& cov_counter) {
if (path.size() == 0) {
return("");
}
string seq = kcounter.get_kmer_string(path[0]);
cov_counter.push_back( kcounter.get_kmer_count(path[0]) );
for (unsigned int i = 1; i < path.size(); i++) {
string kmer = kcounter.get_kmer_string(path[i]);
seq += kmer.substr(kmer.length()-1, 1);
cov_counter.push_back( kcounter.get_kmer_count(path[i]) );
}
return(seq);
}
示例8: exceeds_min_connectivity
bool IRKE::exceeds_min_connectivity (KmerCounter& kcounter, string kmerA, string kmerB, float min_connectivity) {
kmer_int_type_t valA = kmer_to_intval(kmerA);
kmer_int_type_t valB = kmer_to_intval(kmerB);
Kmer_Occurence_Pair pairA(valA, kcounter.get_kmer_count(valA));
Kmer_Occurence_Pair pairB(valB, kcounter.get_kmer_count(valB));
return exceeds_min_connectivity(kcounter, pairA, pairB, min_connectivity);
}
示例9: compute_kmer_coverage
vector<unsigned int> compute_kmer_coverage(string& sequence, KmerCounter& kcounter) {
if(IRKE_COMMON::MONITOR) {
cerr << "processing sequence: " << sequence << endl;
}
if (sequence.length() < KMER_SIZE)
{
// Can't rely on length() - KMER_SIZE for this as length is unsigned
cerr << "Sequence: " << sequence << "is smaller than " << KMER_SIZE << " base pairs, skipping" << endl;
return vector<unsigned int>();
}
vector<unsigned int> coverage;
for (size_t i = 0; i <= sequence.length() - KMER_SIZE; i++) {
// cerr << "i: " << i << ", <= " << sequence.length() - KMER_SIZE << endl;
string kmer = sequence.substr(i, KMER_SIZE);
if(IRKE_COMMON::MONITOR >= 2) {
for (size_t j = 0; j <= i; j++) {
cerr << " ";
}
cerr << kmer << endl;
}
unsigned int kmer_count = 0;
if(!contains_non_gatc(kmer)) {
kmer_count = kcounter.get_kmer_count(kmer);
}
// Note, in the jellyfish run, we restrain it to min kmer coverage of 2.
// If we don't find a kmer catalogued, it must have a kmer count of 1.
if (kmer_count < 1) {
kmer_count = 1;
}
coverage.push_back(kmer_count);
}
return(coverage);
}
示例10: inchworm_step
Path_n_count_pair IRKE::inchworm_step (KmerCounter& kcounter, char direction, Kmer_Occurence_Pair kmer, Kmer_visitor& visitor,
Kmer_visitor& eliminator, unsigned int inchworm_round, unsigned int depth,
float MIN_CONNECTIVITY_RATIO, unsigned int max_recurse) {
// cout << "inchworm_step" << endl;
if (IRKE_COMMON::MONITOR >= 2) {
cerr << "\rinchworm: " << string(1,direction)
<< " A:" << INCHWORM_ASSEMBLY_COUNTER << " "
<< " rnd:" << inchworm_round << " D:" << depth << " ";
}
// check to see if kmer exists. If not, return empty container
Path_n_count_pair best_path_n_pair;
if ( !kmer.second
|| visitor.exists(kmer.first) // visited
|| eliminator.exists(kmer.first) // eliminated
) {
// base case, already visited or kmer doesn't exist.
//cout << kmer << "already visited or doesn't exist. ending recursion at depth: " << depth << endl;
return(best_path_n_pair);
}
visitor.add(kmer.first);
if (PACMAN && depth > 0) {
// cerr << "pacman eliminated kmer: " << kmer << endl;
eliminator.add(kmer.first);
}
if (depth < max_recurse) {
vector<Kmer_Occurence_Pair> kmer_candidates;
if (direction == 'F') {
// forward search
kmer_candidates = kcounter.get_forward_kmer_candidates(kmer.first);
}
else {
// reverse search
kmer_candidates = kcounter.get_reverse_kmer_candidates(kmer.first);
}
bool tie = true;
unsigned int recurse_cap = max_recurse;
unsigned int best_path_length = 0;
while (tie) {
vector<Path_n_count_pair> paths;
for (unsigned int i = 0; i < kmer_candidates.size(); i++) {
Kmer_Occurence_Pair kmer_candidate = kmer_candidates[i];
if (kmer_candidate.second // ) {
&& !visitor.exists(kmer_candidate.first) // avoid creating already visited kmers since they're unvisited below...
&& exceeds_min_connectivity(kcounter, kmer, kmer_candidate, MIN_CONNECTIVITY_RATIO) ) {
//cout << endl << "\ttrying " << kmer_candidate << endl;
// recursive call here for extension
Path_n_count_pair p = inchworm_step(kcounter, direction, kmer_candidate, visitor, eliminator, inchworm_round, depth+1, MIN_CONNECTIVITY_RATIO, recurse_cap);
paths.push_back(p);
visitor.erase(kmer_candidate.first); // un-visiting
}
} // end for kmer
if (paths.size() > 1) {
sort(paths.begin(), paths.end(), compare);
if (paths[0].second == paths[1].second // same cumulative coverage values for both paths.
&&
// check last kmer to be sure they're different.
// Not interested in breaking ties between identically scoring paths that end up at the same kmer.
paths[0].first[0] != paths[1].first[0]
) {
// got tie, two different paths and two different endpoints:
if (IRKE_COMMON::MONITOR >= 3) {
cerr << "Got tie! " << ", score: " << paths[0].second << ", recurse at: " << recurse_cap << endl;
vector<unsigned int> v;
cerr << reconstruct_path_sequence(kcounter, paths[0].first, v) << endl;
cerr << reconstruct_path_sequence(kcounter, paths[1].first, v) << endl;
}
//.........这里部分代码省略.........
示例11: inchworm
Path_n_count_pair IRKE::inchworm (KmerCounter& kcounter, char direction, kmer_int_type_t kmer, Kmer_visitor& visitor, float min_connectivity) {
// cout << "inchworm" << endl;
Path_n_count_pair entire_path;
unsigned int inchworm_round = 0;
unsigned long num_total_kmers = kcounter.size();
Kmer_visitor eliminator(kcounter.get_kmer_length(), DOUBLE_STRANDED_MODE);
while (true) {
inchworm_round++;
eliminator.clear();
if (inchworm_round > num_total_kmers) {
throw(string ("Error, inchworm rounds have exceeded the number of possible seed kmers"));
}
if (IRKE_COMMON::MONITOR >= 3) {
cerr << endl << "Inchworm round(" << string(1,direction) << "): " << inchworm_round << " searching kmer: " << kmer << endl;
string kmer_str = kcounter.get_kmer_string(kmer);
cerr << kcounter.describe_kmer(kmer_str) << endl;
}
visitor.erase(kmer); // seed kmer must be not visited already.
Kmer_Occurence_Pair kmer_pair(kmer, kcounter.get_kmer_count(kmer));
Path_n_count_pair best_path = inchworm_step(kcounter, direction, kmer_pair, visitor, eliminator, inchworm_round, 0, min_connectivity, MAX_RECURSION);
if (best_path.second > 0) {
// append info to entire path in reverse order, so starts just after seed kmer
vector<kmer_int_type_t>& kmer_list = best_path.first;
unsigned int num_kmers = kmer_list.size();
int first_index = num_kmers - 1;
int last_index = 0;
if (CRAWL) {
last_index = first_index - CRAWL_LENGTH + 1;
if (last_index < 0) {
last_index = 0;
}
}
for (int i = first_index; i >= last_index; i--) {
kmer_int_type_t kmer_extend = kmer_list[i];
entire_path.first.push_back(kmer_extend);
visitor.add(kmer_extend);
entire_path.second += kcounter.get_kmer_count(kmer_extend);
}
kmer = entire_path.first[ entire_path.first.size() -1 ];
}
else {
// no extension possible
break;
}
}
if (IRKE_COMMON::MONITOR >= 3)
cerr << endl;
return(entire_path);
}
示例12: compute_sequence_assemblies
void IRKE::compute_sequence_assemblies(KmerCounter& kcounter, float min_connectivity,
unsigned int MIN_ASSEMBLY_LENGTH, unsigned int MIN_ASSEMBLY_COVERAGE,
bool WRITE_COVERAGE, string COVERAGE_OUTPUT_FILENAME) {
if (! got_sorted_kmers_flag) {
stringstream error;
error << stacktrace() << " Error, must populate_sorted_kmers_list() before computing sequence assemblies" << endl;
throw(error.str());
}
unsigned int kmer_length = kcounter.get_kmer_length();
ofstream coverage_writer;
if (WRITE_COVERAGE) {
coverage_writer.open(COVERAGE_OUTPUT_FILENAME.c_str());
}
vector<Kmer_counter_map_iterator>& kmers = sorted_kmers; //kcounter.get_kmers_sort_descending_counts();
unsigned long init_size = kcounter.size();
// string s = "before.kmers";
// kcounter.dump_kmers_to_file(s);
for (unsigned int i = 0; i < kmers.size(); i++) {
// cerr << "round: " << i << endl;
unsigned long kmer_counter_size = kcounter.size();
if (kmer_counter_size > init_size) {
// string s = "after.kmers";
// kcounter.dump_kmers_to_file(s);
stringstream error;
error << stacktrace() << "Error, Kcounter size has grown from " << init_size
<< " to " << kmer_counter_size << endl;
throw (error.str());
}
kmer_int_type_t kmer = kmers[i]->first;
unsigned int kmer_count = kmers[i]->second;
if (kmer_count == 0) {
continue;
}
if (IRKE_COMMON::MONITOR >= 2) {
cerr << "SEED kmer: " << kcounter.get_kmer_string(kmer) << ", count: " << kmer_count << endl;
}
if (kmer == revcomp_val(kmer, kmer_length)) {
// palindromic kmer, avoid palindromes as seeds
if (IRKE_COMMON::MONITOR >= 2) {
cerr << "SEED kmer: " << kcounter.get_kmer_string(kmer) << " is palidnromic. Skipping. " << endl;
}
continue;
}
if (kmer_count < MIN_SEED_COVERAGE) {
if (IRKE_COMMON::MONITOR >= 2) {
cerr << "-seed has insufficient coverage, skipping" << endl;
}
continue;
}
float entropy = compute_entropy(kmer, kmer_length);
if (entropy < MIN_SEED_ENTROPY) {
if (IRKE_COMMON::MONITOR >= 2) {
cerr << "-skipping seed due to low entropy: " << entropy << endl;
}
continue;
}
/* Extend to the right */
Kmer_visitor visitor(kmer_length, DOUBLE_STRANDED_MODE);
Path_n_count_pair selected_path_n_pair_forward = inchworm(kcounter, 'F', kmer, visitor, min_connectivity);
visitor.clear();
// add selected path to visitor
vector<kmer_int_type_t>& forward_path = selected_path_n_pair_forward.first;
if (IRKE_COMMON::MONITOR >= 2) {
cerr << "Forward path contains: " << forward_path.size() << " kmers. " << endl;
//.........这里部分代码省略.........
示例13: inchworm_step
Path_n_count_pair IRKE::inchworm_step(KmerCounter &kcounter,
char direction,
Kmer_Occurence_Pair kmer,
Kmer_visitor &visitor,
Kmer_visitor &eliminator,
unsigned int inchworm_round,
unsigned int depth,
float MIN_CONNECTIVITY_RATIO,
unsigned int max_recurse)
{
// cout << "inchworm_step" << endl;
if (IRKE_COMMON::MONITOR >= 2) {
cerr << "\rinchworm: " << string(1, direction)
<< " A:" << INCHWORM_ASSEMBLY_COUNTER << " "
<< " rnd:" << inchworm_round << " D:" << depth << " ";
}
// check to see if kmer exists. If not, return empty container
Path_n_count_pair best_path_n_pair;
best_path_n_pair.second = 0; // init
if ( // !kmer.second ||
visitor.exists(kmer.first) // visited
|| eliminator.exists(kmer.first) // eliminated
) {
if (IRKE_COMMON::MONITOR >= 3) {
cerr << "base case, already visited or kmer doesn't exist." << endl;
cerr << kmer.first << " already visited or doesn't exist. ending recursion at depth: " << depth << endl;
}
return (best_path_n_pair);
}
visitor.add(kmer.first);
if (PACMAN && depth > 0) {
// cerr << "pacman eliminated kmer: " << kmer << endl;
eliminator.add(kmer.first);
}
if (depth < max_recurse) {
vector<Kmer_Occurence_Pair> kmer_candidates;
if (direction == 'F') {
// forward search
kmer_candidates = kcounter.get_forward_kmer_candidates(kmer.first);
}
else {
// reverse search
kmer_candidates = kcounter.get_reverse_kmer_candidates(kmer.first);
}
if (IRKE_COMMON::MONITOR >= 3) {
cerr << "Got " << kmer_candidates.size() << " kmer extension candidates." << endl;
}
bool tie = true;
unsigned int recurse_cap = max_recurse;
unsigned int best_path_length = 0;
while (tie) {
// keep trying to break ties if ties encountered.
// this is done by increasing the allowed recursion depth until the tie is broken.
// Recursion depth set via: recurse_cap and incremented if tie is found
vector<Path_n_count_pair> paths; // to collect all the paths rooting from this point
for (unsigned int i = 0; i < kmer_candidates.size(); i++) {
Kmer_Occurence_Pair kmer_candidate = kmer_candidates[i];
if (kmer_candidate.second &&
!visitor.exists(kmer_candidate
.first) // avoid creating already visited kmers since they're unvisited below...
&& exceeds_min_connectivity(kcounter, kmer, kmer_candidate, MIN_CONNECTIVITY_RATIO)) {
//cout << endl << "\ttrying " << kmer_candidate << endl;
// recursive call here for extension
Path_n_count_pair p = inchworm_step(kcounter,
direction,
kmer_candidate,
visitor,
eliminator,
inchworm_round,
depth + 1,
MIN_CONNECTIVITY_RATIO,
recurse_cap);
if (p.first.size() >= 1) {
// only retain paths that include visited nodes.
paths.push_back(p);
//.........这里部分代码省略.........
示例14: inchworm
Path_n_count_pair IRKE::inchworm(KmerCounter &kcounter,
char direction,
kmer_int_type_t kmer,
Kmer_visitor &visitor,
float min_connectivity)
{
// cout << "inchworm" << endl;
Path_n_count_pair entire_path;
entire_path.second = 0; // init cumulative path coverage
unsigned int inchworm_round = 0;
unsigned long num_total_kmers = kcounter.size();
Kmer_visitor eliminator(kcounter.get_kmer_length(), DOUBLE_STRANDED_MODE);
while (true) {
if (IRKE_COMMON::__DEVEL_rand_fracture) {
// terminate extension with probability of __DEVEL_rand_fracture_prob
float prob_to_fracture = rand() / (float) RAND_MAX;
//cerr << "prob: " << prob_to_fracture << endl;
if (prob_to_fracture <= IRKE_COMMON::__DEVEL_rand_fracture_prob) {
// cerr << "Fracturing at iworm round: " << inchworm_round << " given P: " << prob_to_fracture << endl;
return (entire_path);
}
}
inchworm_round++;
eliminator.clear();
if (inchworm_round > num_total_kmers) {
throw (string("Error, inchworm rounds have exceeded the number of possible seed kmers"));
}
if (IRKE_COMMON::MONITOR >= 3) {
cerr << endl << "Inchworm round(" << string(1, direction) << "): " << inchworm_round << " searching kmer: "
<< kmer << endl;
string kmer_str = kcounter.get_kmer_string(kmer);
cerr << kcounter.describe_kmer(kmer_str) << endl;
}
visitor.erase(kmer); // seed kmer must be not visited already.
Kmer_Occurence_Pair kmer_pair(kmer, kcounter.get_kmer_count(kmer));
Path_n_count_pair best_path = inchworm_step(kcounter,
direction,
kmer_pair,
visitor,
eliminator,
inchworm_round,
0,
min_connectivity,
MAX_RECURSION);
vector<kmer_int_type_t> &kmer_list = best_path.first;
unsigned int num_kmers = kmer_list.size();
if ((IRKE_COMMON::__DEVEL_zero_kmer_on_use && num_kmers >= 1) || best_path.second > 0) {
// append info to entire path in reverse order, so starts just after seed kmer
int first_index = num_kmers - 1;
int last_index = 0;
if (CRAWL) {
last_index = first_index - CRAWL_LENGTH + 1;
if (last_index < 0) {
last_index = 0;
}
}
for (int i = first_index; i >= last_index; i--) {
kmer_int_type_t kmer_extend = kmer_list[i];
entire_path.first.push_back(kmer_extend);
visitor.add(kmer_extend);
//entire_path.second += kcounter.get_kmer_count(kmer_extend);
// selected here, zero out:
if (IRKE_COMMON::__DEVEL_zero_kmer_on_use) {
kcounter.clear_kmer(kmer_extend);
}
}
kmer = entire_path.first[entire_path.first.size() - 1];
entire_path.second += best_path.second;
}
else {
// no extension possible
//.........这里部分代码省略.........
示例15: compute_sequence_assemblies
void IRKE::compute_sequence_assemblies(KmerCounter &kcounter, float min_connectivity,
unsigned int MIN_ASSEMBLY_LENGTH, unsigned int MIN_ASSEMBLY_COVERAGE,
bool PARALLEL_IWORM, bool WRITE_COVERAGE, string COVERAGE_OUTPUT_FILENAME)
{
if (!got_sorted_kmers_flag) {
stringstream error;
error << stacktrace() << " Error, must populate_sorted_kmers_list() before computing sequence assemblies"
<< endl;
throw (error.str());
}
//vector<Kmer_counter_map_iterator>& kmers = sorted_kmers;
vector<Kmer_Occurence_Pair> &kmers = sorted_kmers; // note, these are not actually sorted if PARALLEL_IWORM mode.
unsigned long init_size = kcounter.size();
cerr << "Total kcounter hash size: " << init_size << " vs. sorted list size: " << kmers.size() << endl;
unsigned int kmer_length = kcounter.get_kmer_length();
ofstream coverage_writer;
if (WRITE_COVERAGE) {
coverage_writer.open(COVERAGE_OUTPUT_FILENAME.c_str());
}
// string s = "before.kmers";
// kcounter.dump_kmers_to_file(s);
/* -----------------------------------------------------------
Two reconstruction modes. (bhaas, Jan-4-2014)
1. Original (not setting PARALLEL_IWORM): kmer list is sorted descendingly by abundance.
Seed kmer is chosen as most abundant, and contig construction extends from this seed.
2. PARALLEL_IWORM: the kmer list is not sorted. A random kmer (just ordered by hash iterator, nothing special, probably not so random)
is used as a seed for draft contig reconstruction.
The most abundant kmer in the draft contig is chosen as a proper seed.
An inchworm contig is then constructed using this new seed, and reported.
So, in this case, the draft contig in the first phase is just used to find a better seed, from which the inchworm contig is then
properly reconstructed.
--------------------------------------------------------------- */
// try building an inchworm contig from each seed kmer
int myTid;
if (PARALLEL_IWORM) {
omp_set_num_threads(IRKE_COMMON::NUM_THREADS);
}
else {
omp_set_num_threads(1); // turn off multithreading for the contig building.
}
//-----------------------------------------------------------------
// Prep writing to separate inchworm output files for each thread
//-----------------------------------------------------------------
vector<iworm_tmp_file> tmpfiles;
int num_threads = omp_get_max_threads();
cerr << "num threads set to: " << num_threads << endl;
for (int i = 0; i < num_threads; i++) {
iworm_tmp_file tmpfile_struct;
tmpfiles.push_back(tmpfile_struct);
iworm_tmp_file &itmp = tmpfiles[i];
stringstream filename_constructor;
filename_constructor << "tmp.iworm.fa.pid_" << getpid() << ".thread_" << i;
itmp.tmp_filename = new char[100];
strcpy(itmp.tmp_filename, filename_constructor.str().c_str());
itmp.tmp_filename[filename_constructor.str().length()] = '\0';
itmp.fh = new ofstream();
itmp.fh->open(itmp.tmp_filename);
cerr << "Done opening file. " << itmp.tmp_filename << endl;
}
//-------------------
// Build contigs.
//-------------------
#pragma omp parallel for private (myTid) schedule (dynamic, 1000)
for (unsigned int i = 0; i < kmers.size(); i++) {
// cerr << "round: " << i << endl;
myTid = omp_get_thread_num();
unsigned long kmer_counter_size = kcounter.size();
if (kmer_counter_size > init_size) {
// string s = "after.kmers";
//.........这里部分代码省略.........