本文整理汇总了C++中KmerCounter::get_kmer_count方法的典型用法代码示例。如果您正苦于以下问题:C++ KmerCounter::get_kmer_count方法的具体用法?C++ KmerCounter::get_kmer_count怎么用?C++ KmerCounter::get_kmer_count使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类KmerCounter
的用法示例。
在下文中一共展示了KmerCounter::get_kmer_count方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: extract_best_seed
kmer_int_type_t IRKE::extract_best_seed(vector<kmer_int_type_t> &kmer_vec,
KmerCounter &kcounter,
float min_connectivity)
{
unsigned int kmer_length = kcounter.get_kmer_length();
unsigned int best_kmer_count = 0;
kmer_int_type_t best_seed = 0;
for (unsigned int i = 0; i < kmer_vec.size(); i++) {
kmer_int_type_t kmer = kmer_vec[i];
unsigned int count = kcounter.get_kmer_count(kmer);
if (count > best_kmer_count && is_good_seed_kmer(kmer, count, kmer_length, min_connectivity)) {
best_kmer_count = count;
best_seed = kmer;
}
}
if (IRKE_COMMON::MONITOR >= 2) {
cerr << "Parallel method found better seed: " << kcounter.get_kmer_string(best_seed) << " with count: "
<< best_kmer_count << endl;
}
return (best_seed);
}
示例2: return
vector<unsigned int> compute_kmer_coverage(string& sequence, KmerCounter& kcounter) {
vector<unsigned int> coverage;
if(IRKE_COMMON::MONITOR) {
cerr << "processing sequence: " << sequence << endl;
}
for (int i = 0; i <= (int) sequence.length() - KMER_SIZE; i++) {
// cerr << "i: " << i << ", <= " << sequence.length() - KMER_SIZE << endl;
string kmer = sequence.substr(i, KMER_SIZE);
if(IRKE_COMMON::MONITOR >= 2) {
for (int j = 0; j <= i; j++) {
cerr << " ";
}
cerr << kmer << endl;
}
unsigned int kmer_count = 0;
if(!contains_non_gatc(kmer)) {
kmer_count = kcounter.get_kmer_count(kmer);
}
// Note, in the jellyfish run, we restrain it to min kmer coverage of 2.
// If we don't find a kmer catalogued, it must have a kmer count of 1.
if (kmer_count < 1) {
kmer_count = 1;
}
coverage.push_back(kmer_count);
}
return(coverage);
}
示例3: reconstruct_path_sequence
string IRKE::reconstruct_path_sequence(KmerCounter& kcounter, vector<kmer_int_type_t>& path, vector<unsigned int>& cov_counter) {
if (path.size() == 0) {
return("");
}
string seq = kcounter.get_kmer_string(path[0]);
cov_counter.push_back( kcounter.get_kmer_count(path[0]) );
for (unsigned int i = 1; i < path.size(); i++) {
string kmer = kcounter.get_kmer_string(path[i]);
seq += kmer.substr(kmer.length()-1, 1);
cov_counter.push_back( kcounter.get_kmer_count(path[i]) );
}
return(seq);
}
示例4: visitor
vector<kmer_int_type_t> IRKE::build_inchworm_contig_from_seed(kmer_int_type_t kmer, KmerCounter &kcounter,
float min_connectivity, unsigned int &total_counts,
bool)
{
unsigned int kmer_count = kcounter.get_kmer_count(kmer);
/* Extend to the right */
unsigned int kmer_length = kcounter.get_kmer_length();
Kmer_visitor visitor(kmer_length, DOUBLE_STRANDED_MODE);
Path_n_count_pair selected_path_n_pair_forward = inchworm(kcounter, 'F', kmer, visitor, min_connectivity);
visitor.clear();
// add selected path to visitor
vector<kmer_int_type_t> &forward_path = selected_path_n_pair_forward.first;
if (IRKE_COMMON::MONITOR >= 2) {
cerr << "Forward path contains: " << forward_path.size() << " kmers. " << endl;
}
for (unsigned int i = 0; i < forward_path.size(); i++) {
kmer_int_type_t kmer = forward_path[i];
visitor.add(kmer);
if (IRKE_COMMON::MONITOR >= 2) {
cerr << "\tForward path kmer: " << kcounter.get_kmer_string(kmer) << endl;
}
}
/* Extend to the left */
visitor.erase(kmer); // reset the seed
Path_n_count_pair selected_path_n_pair_reverse = inchworm(kcounter, 'R', kmer, visitor, min_connectivity);
if (IRKE_COMMON::MONITOR >= 2) {
vector<kmer_int_type_t> &reverse_path = selected_path_n_pair_reverse.first;
cerr << "Reverse path contains: " << reverse_path.size() << " kmers. " << endl;
for (unsigned int i = 0; i < reverse_path.size(); i++) {
cerr << "\tReverse path kmer: " << kcounter.get_kmer_string(reverse_path[i]) << endl;
}
}
total_counts = selected_path_n_pair_forward.second + selected_path_n_pair_reverse.second + kmer_count;
vector<kmer_int_type_t> &reverse_path = selected_path_n_pair_reverse.first;
vector<kmer_int_type_t> joined_path = _join_forward_n_reverse_paths(reverse_path, kmer, forward_path);
return (joined_path);
}
示例5: exceeds_min_connectivity
bool IRKE::exceeds_min_connectivity (KmerCounter& kcounter, string kmerA, string kmerB, float min_connectivity) {
kmer_int_type_t valA = kmer_to_intval(kmerA);
kmer_int_type_t valB = kmer_to_intval(kmerB);
Kmer_Occurence_Pair pairA(valA, kcounter.get_kmer_count(valA));
Kmer_Occurence_Pair pairB(valB, kcounter.get_kmer_count(valB));
return exceeds_min_connectivity(kcounter, pairA, pairB, min_connectivity);
}
示例6: return
vector<unsigned int> compute_kmer_coverage(string& sequence, KmerCounter& kcounter) {
if(IRKE_COMMON::MONITOR) {
cerr << "processing sequence: " << sequence << endl;
}
if (sequence.length() < KMER_SIZE)
{
// Can't rely on length() - KMER_SIZE for this as length is unsigned
cerr << "Sequence: " << sequence << "is smaller than " << KMER_SIZE << " base pairs, skipping" << endl;
return vector<unsigned int>();
}
vector<unsigned int> coverage;
for (size_t i = 0; i <= sequence.length() - KMER_SIZE; i++) {
// cerr << "i: " << i << ", <= " << sequence.length() - KMER_SIZE << endl;
string kmer = sequence.substr(i, KMER_SIZE);
if(IRKE_COMMON::MONITOR >= 2) {
for (size_t j = 0; j <= i; j++) {
cerr << " ";
}
cerr << kmer << endl;
}
unsigned int kmer_count = 0;
if(!contains_non_gatc(kmer)) {
kmer_count = kcounter.get_kmer_count(kmer);
}
// Note, in the jellyfish run, we restrain it to min kmer coverage of 2.
// If we don't find a kmer catalogued, it must have a kmer count of 1.
if (kmer_count < 1) {
kmer_count = 1;
}
coverage.push_back(kmer_count);
}
return(coverage);
}
示例7: inchworm
Path_n_count_pair IRKE::inchworm (KmerCounter& kcounter, char direction, kmer_int_type_t kmer, Kmer_visitor& visitor, float min_connectivity) {
// cout << "inchworm" << endl;
Path_n_count_pair entire_path;
unsigned int inchworm_round = 0;
unsigned long num_total_kmers = kcounter.size();
Kmer_visitor eliminator(kcounter.get_kmer_length(), DOUBLE_STRANDED_MODE);
while (true) {
inchworm_round++;
eliminator.clear();
if (inchworm_round > num_total_kmers) {
throw(string ("Error, inchworm rounds have exceeded the number of possible seed kmers"));
}
if (IRKE_COMMON::MONITOR >= 3) {
cerr << endl << "Inchworm round(" << string(1,direction) << "): " << inchworm_round << " searching kmer: " << kmer << endl;
string kmer_str = kcounter.get_kmer_string(kmer);
cerr << kcounter.describe_kmer(kmer_str) << endl;
}
visitor.erase(kmer); // seed kmer must be not visited already.
Kmer_Occurence_Pair kmer_pair(kmer, kcounter.get_kmer_count(kmer));
Path_n_count_pair best_path = inchworm_step(kcounter, direction, kmer_pair, visitor, eliminator, inchworm_round, 0, min_connectivity, MAX_RECURSION);
if (best_path.second > 0) {
// append info to entire path in reverse order, so starts just after seed kmer
vector<kmer_int_type_t>& kmer_list = best_path.first;
unsigned int num_kmers = kmer_list.size();
int first_index = num_kmers - 1;
int last_index = 0;
if (CRAWL) {
last_index = first_index - CRAWL_LENGTH + 1;
if (last_index < 0) {
last_index = 0;
}
}
for (int i = first_index; i >= last_index; i--) {
kmer_int_type_t kmer_extend = kmer_list[i];
entire_path.first.push_back(kmer_extend);
visitor.add(kmer_extend);
entire_path.second += kcounter.get_kmer_count(kmer_extend);
}
kmer = entire_path.first[ entire_path.first.size() -1 ];
}
else {
// no extension possible
break;
}
}
if (IRKE_COMMON::MONITOR >= 3)
cerr << endl;
return(entire_path);
}
示例8: compute_sequence_assemblies
//.........这里部分代码省略.........
visitor.clear();
// add selected path to visitor
vector<kmer_int_type_t>& forward_path = selected_path_n_pair_forward.first;
if (IRKE_COMMON::MONITOR >= 2) {
cerr << "Forward path contains: " << forward_path.size() << " kmers. " << endl;
}
for (unsigned int i = 0; i < forward_path.size(); i++) {
kmer_int_type_t kmer = forward_path[i];
visitor.add(kmer);
if (IRKE_COMMON::MONITOR >= 2) {
cerr << "\tForward path kmer: " << kcounter.get_kmer_string(kmer) << endl;
}
}
/* Extend to the left */
visitor.erase(kmer); // reset the seed
Path_n_count_pair selected_path_n_pair_reverse = inchworm(kcounter, 'R', kmer, visitor, min_connectivity);
if (IRKE_COMMON::MONITOR >= 2) {
vector<kmer_int_type_t>& reverse_path = selected_path_n_pair_reverse.first;
cerr << "Reverse path contains: " << reverse_path.size() << " kmers. " << endl;
for (unsigned int i = 0; i < reverse_path.size(); i++) {
cerr << "\tReverse path kmer: " << kcounter.get_kmer_string(reverse_path[i]) << endl;
}
}
unsigned int total_counts = selected_path_n_pair_forward.second + selected_path_n_pair_reverse.second + kcounter.get_kmer_count(kmer);
vector<kmer_int_type_t>& reverse_path = selected_path_n_pair_reverse.first;
vector<kmer_int_type_t> joined_path = _join_forward_n_reverse_paths(reverse_path, kmer, forward_path);
// report sequence reconstructed from path.
vector<unsigned int> assembly_base_coverage;
string sequence = reconstruct_path_sequence(kcounter, joined_path, assembly_base_coverage);
unsigned int avg_cov = static_cast<unsigned int> ( (float)total_counts/(sequence.length()-kcounter.get_kmer_length() +1) + 0.5);
/*
cout << "Inchworm-reconstructed sequence, length: " << sequence.length()
<< ", avgCov: " << avg_cov
<< " " << sequence << endl;
*/
if (sequence.length() >= MIN_ASSEMBLY_LENGTH && avg_cov >= MIN_ASSEMBLY_COVERAGE) {
INCHWORM_ASSEMBLY_COUNTER++;
stringstream headerstream;
headerstream << ">a" << INCHWORM_ASSEMBLY_COUNTER << ";" << avg_cov
<< " K: " << kmer_length
<< " length: " << sequence.length();
string header = headerstream.str();
示例9: inchworm
Path_n_count_pair IRKE::inchworm(KmerCounter &kcounter,
char direction,
kmer_int_type_t kmer,
Kmer_visitor &visitor,
float min_connectivity)
{
// cout << "inchworm" << endl;
Path_n_count_pair entire_path;
entire_path.second = 0; // init cumulative path coverage
unsigned int inchworm_round = 0;
unsigned long num_total_kmers = kcounter.size();
Kmer_visitor eliminator(kcounter.get_kmer_length(), DOUBLE_STRANDED_MODE);
while (true) {
if (IRKE_COMMON::__DEVEL_rand_fracture) {
// terminate extension with probability of __DEVEL_rand_fracture_prob
float prob_to_fracture = rand() / (float) RAND_MAX;
//cerr << "prob: " << prob_to_fracture << endl;
if (prob_to_fracture <= IRKE_COMMON::__DEVEL_rand_fracture_prob) {
// cerr << "Fracturing at iworm round: " << inchworm_round << " given P: " << prob_to_fracture << endl;
return (entire_path);
}
}
inchworm_round++;
eliminator.clear();
if (inchworm_round > num_total_kmers) {
throw (string("Error, inchworm rounds have exceeded the number of possible seed kmers"));
}
if (IRKE_COMMON::MONITOR >= 3) {
cerr << endl << "Inchworm round(" << string(1, direction) << "): " << inchworm_round << " searching kmer: "
<< kmer << endl;
string kmer_str = kcounter.get_kmer_string(kmer);
cerr << kcounter.describe_kmer(kmer_str) << endl;
}
visitor.erase(kmer); // seed kmer must be not visited already.
Kmer_Occurence_Pair kmer_pair(kmer, kcounter.get_kmer_count(kmer));
Path_n_count_pair best_path = inchworm_step(kcounter,
direction,
kmer_pair,
visitor,
eliminator,
inchworm_round,
0,
min_connectivity,
MAX_RECURSION);
vector<kmer_int_type_t> &kmer_list = best_path.first;
unsigned int num_kmers = kmer_list.size();
if ((IRKE_COMMON::__DEVEL_zero_kmer_on_use && num_kmers >= 1) || best_path.second > 0) {
// append info to entire path in reverse order, so starts just after seed kmer
int first_index = num_kmers - 1;
int last_index = 0;
if (CRAWL) {
last_index = first_index - CRAWL_LENGTH + 1;
if (last_index < 0) {
last_index = 0;
}
}
for (int i = first_index; i >= last_index; i--) {
kmer_int_type_t kmer_extend = kmer_list[i];
entire_path.first.push_back(kmer_extend);
visitor.add(kmer_extend);
//entire_path.second += kcounter.get_kmer_count(kmer_extend);
// selected here, zero out:
if (IRKE_COMMON::__DEVEL_zero_kmer_on_use) {
kcounter.clear_kmer(kmer_extend);
}
}
kmer = entire_path.first[entire_path.first.size() - 1];
entire_path.second += best_path.second;
}
else {
// no extension possible
//.........这里部分代码省略.........
示例10: compute_sequence_assemblies
//.........这里部分代码省略.........
cerr << "Done opening file. " << itmp.tmp_filename << endl;
}
//-------------------
// Build contigs.
//-------------------
#pragma omp parallel for private (myTid) schedule (dynamic, 1000)
for (unsigned int i = 0; i < kmers.size(); i++) {
// cerr << "round: " << i << endl;
myTid = omp_get_thread_num();
unsigned long kmer_counter_size = kcounter.size();
if (kmer_counter_size > init_size) {
// string s = "after.kmers";
// kcounter.dump_kmers_to_file(s);
stringstream error;
error << stacktrace() << "Error, Kcounter size has grown from " << init_size
<< " to " << kmer_counter_size << endl;
throw (error.str());
}
//kmer_int_type_t kmer = kmers[i]->first;
//unsigned int kmer_count = kmers[i]->second;
kmer_int_type_t kmer = kmers[i].first;
// unsigned int kmer_count = kmers[i].second; // NO!!! Use for sorting, but likely zeroed out in the hashtable after contig construction
unsigned int kmer_count = kcounter.get_kmer_count(kmer);
if (!is_good_seed_kmer(kmer, kmer_count, kmer_length, min_connectivity)) {
continue;
}
// cout << "SEED kmer: " << kcounter.get_kmer_string(kmer) << ", count: " << kmer_count << endl;
if (IRKE_COMMON::MONITOR >= 2) {
cerr << "SEED kmer: " << kcounter.get_kmer_string(kmer) << ", count: " << kmer_count << endl;
}
if (IRKE_COMMON::MONITOR >= 2) {
#pragma omp critical
cerr << "Seed for thread: " << myTid << " is " << kcounter.get_kmer_string(kmer) << " with count: "
<< kmer_count << endl;
}
unsigned int total_counts;
vector<kmer_int_type_t> joined_path =
build_inchworm_contig_from_seed(kmer, kcounter, min_connectivity, total_counts, PARALLEL_IWORM);
if (PARALLEL_IWORM && TWO_PHASE) {
// get a new seed based on the draft contig
// choose the 'good' kmer with highest abundance
kmer_int_type_t new_seed = extract_best_seed(joined_path, kcounter, min_connectivity);
if (kcounter.get_kmer_count(new_seed) == 0) {
continue; // must have been zapped by another thread
}
joined_path =