本文整理汇总了C++中KmerCounter::size方法的典型用法代码示例。如果您正苦于以下问题:C++ KmerCounter::size方法的具体用法?C++ KmerCounter::size怎么用?C++ KmerCounter::size使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类KmerCounter
的用法示例。
在下文中一共展示了KmerCounter::size方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: populate_kmer_counter
void populate_kmer_counter(KmerCounter &kcounter, string &kmers_fasta_file)
{
// code largely copied from IRKE.cpp
int i, myTid;
unsigned long sum,
*record_counter = new unsigned long[omp_get_max_threads()];
unsigned long start, end;
// init record counter
for (int i = 0; i < omp_get_max_threads(); i++) {
record_counter[i] = 0;
}
cerr << "-reading Kmer occurences..." << endl;
start = time(NULL);
Fasta_reader fasta_reader(kmers_fasta_file);
#pragma omp parallel private (myTid)
{
myTid = omp_get_thread_num();
record_counter[myTid] = 0;
while (true) {
Fasta_entry fe = fasta_reader.getNext();
if (fe.get_sequence() == "") break;
record_counter[myTid]++;
if (IRKE_COMMON::MONITOR) {
if (myTid == 0 && record_counter[myTid] % 100000 == 0) {
sum = record_counter[0];
for (i = 1; i < omp_get_num_threads(); i++)
sum += record_counter[i];
cerr << "\r [" << sum / 1000000 << "M] Kmers parsed. ";
}
}
string seq = fe.get_sequence();
if (seq.length() != KMER_SIZE) {
cerr << "ERROR: kmer " << seq << " is not of length: " << KMER_SIZE << endl;
continue;
}
kmer_int_type_t kmer = kcounter.get_kmer_intval(seq);
unsigned int count = atoi(fe.get_header().c_str());
kcounter.add_kmer(kmer, count);
}
}
end = time(NULL);
sum = record_counter[0];
for (i = 1; i < omp_get_max_threads(); i++)
sum += record_counter[i];
delete[] record_counter;
cerr << endl << " done parsing " << sum << " Kmers, " << kcounter.size() << " added, taking " << (end - start)
<< " seconds." << endl;
return;
}
示例2: inchworm
Path_n_count_pair IRKE::inchworm (KmerCounter& kcounter, char direction, kmer_int_type_t kmer, Kmer_visitor& visitor, float min_connectivity) {
// cout << "inchworm" << endl;
Path_n_count_pair entire_path;
unsigned int inchworm_round = 0;
unsigned long num_total_kmers = kcounter.size();
Kmer_visitor eliminator(kcounter.get_kmer_length(), DOUBLE_STRANDED_MODE);
while (true) {
inchworm_round++;
eliminator.clear();
if (inchworm_round > num_total_kmers) {
throw(string ("Error, inchworm rounds have exceeded the number of possible seed kmers"));
}
if (IRKE_COMMON::MONITOR >= 3) {
cerr << endl << "Inchworm round(" << string(1,direction) << "): " << inchworm_round << " searching kmer: " << kmer << endl;
string kmer_str = kcounter.get_kmer_string(kmer);
cerr << kcounter.describe_kmer(kmer_str) << endl;
}
visitor.erase(kmer); // seed kmer must be not visited already.
Kmer_Occurence_Pair kmer_pair(kmer, kcounter.get_kmer_count(kmer));
Path_n_count_pair best_path = inchworm_step(kcounter, direction, kmer_pair, visitor, eliminator, inchworm_round, 0, min_connectivity, MAX_RECURSION);
if (best_path.second > 0) {
// append info to entire path in reverse order, so starts just after seed kmer
vector<kmer_int_type_t>& kmer_list = best_path.first;
unsigned int num_kmers = kmer_list.size();
int first_index = num_kmers - 1;
int last_index = 0;
if (CRAWL) {
last_index = first_index - CRAWL_LENGTH + 1;
if (last_index < 0) {
last_index = 0;
}
}
for (int i = first_index; i >= last_index; i--) {
kmer_int_type_t kmer_extend = kmer_list[i];
entire_path.first.push_back(kmer_extend);
visitor.add(kmer_extend);
entire_path.second += kcounter.get_kmer_count(kmer_extend);
}
kmer = entire_path.first[ entire_path.first.size() -1 ];
}
else {
// no extension possible
break;
}
}
if (IRKE_COMMON::MONITOR >= 3)
cerr << endl;
return(entire_path);
}
示例3: compute_sequence_assemblies
void IRKE::compute_sequence_assemblies(KmerCounter& kcounter, float min_connectivity,
unsigned int MIN_ASSEMBLY_LENGTH, unsigned int MIN_ASSEMBLY_COVERAGE,
bool WRITE_COVERAGE, string COVERAGE_OUTPUT_FILENAME) {
if (! got_sorted_kmers_flag) {
stringstream error;
error << stacktrace() << " Error, must populate_sorted_kmers_list() before computing sequence assemblies" << endl;
throw(error.str());
}
unsigned int kmer_length = kcounter.get_kmer_length();
ofstream coverage_writer;
if (WRITE_COVERAGE) {
coverage_writer.open(COVERAGE_OUTPUT_FILENAME.c_str());
}
vector<Kmer_counter_map_iterator>& kmers = sorted_kmers; //kcounter.get_kmers_sort_descending_counts();
unsigned long init_size = kcounter.size();
// string s = "before.kmers";
// kcounter.dump_kmers_to_file(s);
for (unsigned int i = 0; i < kmers.size(); i++) {
// cerr << "round: " << i << endl;
unsigned long kmer_counter_size = kcounter.size();
if (kmer_counter_size > init_size) {
// string s = "after.kmers";
// kcounter.dump_kmers_to_file(s);
stringstream error;
error << stacktrace() << "Error, Kcounter size has grown from " << init_size
<< " to " << kmer_counter_size << endl;
throw (error.str());
}
kmer_int_type_t kmer = kmers[i]->first;
unsigned int kmer_count = kmers[i]->second;
if (kmer_count == 0) {
continue;
}
if (IRKE_COMMON::MONITOR >= 2) {
cerr << "SEED kmer: " << kcounter.get_kmer_string(kmer) << ", count: " << kmer_count << endl;
}
if (kmer == revcomp_val(kmer, kmer_length)) {
// palindromic kmer, avoid palindromes as seeds
if (IRKE_COMMON::MONITOR >= 2) {
cerr << "SEED kmer: " << kcounter.get_kmer_string(kmer) << " is palidnromic. Skipping. " << endl;
}
continue;
}
if (kmer_count < MIN_SEED_COVERAGE) {
if (IRKE_COMMON::MONITOR >= 2) {
cerr << "-seed has insufficient coverage, skipping" << endl;
}
continue;
}
float entropy = compute_entropy(kmer, kmer_length);
if (entropy < MIN_SEED_ENTROPY) {
if (IRKE_COMMON::MONITOR >= 2) {
cerr << "-skipping seed due to low entropy: " << entropy << endl;
}
continue;
}
/* Extend to the right */
Kmer_visitor visitor(kmer_length, DOUBLE_STRANDED_MODE);
Path_n_count_pair selected_path_n_pair_forward = inchworm(kcounter, 'F', kmer, visitor, min_connectivity);
visitor.clear();
// add selected path to visitor
vector<kmer_int_type_t>& forward_path = selected_path_n_pair_forward.first;
if (IRKE_COMMON::MONITOR >= 2) {
cerr << "Forward path contains: " << forward_path.size() << " kmers. " << endl;
//.........这里部分代码省略.........
示例4: inchworm
Path_n_count_pair IRKE::inchworm(KmerCounter &kcounter,
char direction,
kmer_int_type_t kmer,
Kmer_visitor &visitor,
float min_connectivity)
{
// cout << "inchworm" << endl;
Path_n_count_pair entire_path;
entire_path.second = 0; // init cumulative path coverage
unsigned int inchworm_round = 0;
unsigned long num_total_kmers = kcounter.size();
Kmer_visitor eliminator(kcounter.get_kmer_length(), DOUBLE_STRANDED_MODE);
while (true) {
if (IRKE_COMMON::__DEVEL_rand_fracture) {
// terminate extension with probability of __DEVEL_rand_fracture_prob
float prob_to_fracture = rand() / (float) RAND_MAX;
//cerr << "prob: " << prob_to_fracture << endl;
if (prob_to_fracture <= IRKE_COMMON::__DEVEL_rand_fracture_prob) {
// cerr << "Fracturing at iworm round: " << inchworm_round << " given P: " << prob_to_fracture << endl;
return (entire_path);
}
}
inchworm_round++;
eliminator.clear();
if (inchworm_round > num_total_kmers) {
throw (string("Error, inchworm rounds have exceeded the number of possible seed kmers"));
}
if (IRKE_COMMON::MONITOR >= 3) {
cerr << endl << "Inchworm round(" << string(1, direction) << "): " << inchworm_round << " searching kmer: "
<< kmer << endl;
string kmer_str = kcounter.get_kmer_string(kmer);
cerr << kcounter.describe_kmer(kmer_str) << endl;
}
visitor.erase(kmer); // seed kmer must be not visited already.
Kmer_Occurence_Pair kmer_pair(kmer, kcounter.get_kmer_count(kmer));
Path_n_count_pair best_path = inchworm_step(kcounter,
direction,
kmer_pair,
visitor,
eliminator,
inchworm_round,
0,
min_connectivity,
MAX_RECURSION);
vector<kmer_int_type_t> &kmer_list = best_path.first;
unsigned int num_kmers = kmer_list.size();
if ((IRKE_COMMON::__DEVEL_zero_kmer_on_use && num_kmers >= 1) || best_path.second > 0) {
// append info to entire path in reverse order, so starts just after seed kmer
int first_index = num_kmers - 1;
int last_index = 0;
if (CRAWL) {
last_index = first_index - CRAWL_LENGTH + 1;
if (last_index < 0) {
last_index = 0;
}
}
for (int i = first_index; i >= last_index; i--) {
kmer_int_type_t kmer_extend = kmer_list[i];
entire_path.first.push_back(kmer_extend);
visitor.add(kmer_extend);
//entire_path.second += kcounter.get_kmer_count(kmer_extend);
// selected here, zero out:
if (IRKE_COMMON::__DEVEL_zero_kmer_on_use) {
kcounter.clear_kmer(kmer_extend);
}
}
kmer = entire_path.first[entire_path.first.size() - 1];
entire_path.second += best_path.second;
}
else {
// no extension possible
//.........这里部分代码省略.........
示例5: compute_sequence_assemblies
void IRKE::compute_sequence_assemblies(KmerCounter &kcounter, float min_connectivity,
unsigned int MIN_ASSEMBLY_LENGTH, unsigned int MIN_ASSEMBLY_COVERAGE,
bool PARALLEL_IWORM, bool WRITE_COVERAGE, string COVERAGE_OUTPUT_FILENAME)
{
if (!got_sorted_kmers_flag) {
stringstream error;
error << stacktrace() << " Error, must populate_sorted_kmers_list() before computing sequence assemblies"
<< endl;
throw (error.str());
}
//vector<Kmer_counter_map_iterator>& kmers = sorted_kmers;
vector<Kmer_Occurence_Pair> &kmers = sorted_kmers; // note, these are not actually sorted if PARALLEL_IWORM mode.
unsigned long init_size = kcounter.size();
cerr << "Total kcounter hash size: " << init_size << " vs. sorted list size: " << kmers.size() << endl;
unsigned int kmer_length = kcounter.get_kmer_length();
ofstream coverage_writer;
if (WRITE_COVERAGE) {
coverage_writer.open(COVERAGE_OUTPUT_FILENAME.c_str());
}
// string s = "before.kmers";
// kcounter.dump_kmers_to_file(s);
/* -----------------------------------------------------------
Two reconstruction modes. (bhaas, Jan-4-2014)
1. Original (not setting PARALLEL_IWORM): kmer list is sorted descendingly by abundance.
Seed kmer is chosen as most abundant, and contig construction extends from this seed.
2. PARALLEL_IWORM: the kmer list is not sorted. A random kmer (just ordered by hash iterator, nothing special, probably not so random)
is used as a seed for draft contig reconstruction.
The most abundant kmer in the draft contig is chosen as a proper seed.
An inchworm contig is then constructed using this new seed, and reported.
So, in this case, the draft contig in the first phase is just used to find a better seed, from which the inchworm contig is then
properly reconstructed.
--------------------------------------------------------------- */
// try building an inchworm contig from each seed kmer
int myTid;
if (PARALLEL_IWORM) {
omp_set_num_threads(IRKE_COMMON::NUM_THREADS);
}
else {
omp_set_num_threads(1); // turn off multithreading for the contig building.
}
//-----------------------------------------------------------------
// Prep writing to separate inchworm output files for each thread
//-----------------------------------------------------------------
vector<iworm_tmp_file> tmpfiles;
int num_threads = omp_get_max_threads();
cerr << "num threads set to: " << num_threads << endl;
for (int i = 0; i < num_threads; i++) {
iworm_tmp_file tmpfile_struct;
tmpfiles.push_back(tmpfile_struct);
iworm_tmp_file &itmp = tmpfiles[i];
stringstream filename_constructor;
filename_constructor << "tmp.iworm.fa.pid_" << getpid() << ".thread_" << i;
itmp.tmp_filename = new char[100];
strcpy(itmp.tmp_filename, filename_constructor.str().c_str());
itmp.tmp_filename[filename_constructor.str().length()] = '\0';
itmp.fh = new ofstream();
itmp.fh->open(itmp.tmp_filename);
cerr << "Done opening file. " << itmp.tmp_filename << endl;
}
//-------------------
// Build contigs.
//-------------------
#pragma omp parallel for private (myTid) schedule (dynamic, 1000)
for (unsigned int i = 0; i < kmers.size(); i++) {
// cerr << "round: " << i << endl;
myTid = omp_get_thread_num();
unsigned long kmer_counter_size = kcounter.size();
if (kmer_counter_size > init_size) {
// string s = "after.kmers";
//.........这里部分代码省略.........
示例6: populate_kmer_counter_from_reads
void populate_kmer_counter_from_reads (KmerCounter& kcounter, string& fasta_filename) {
unsigned int kmer_length = kcounter.get_kmer_length();
int i, myTid;
unsigned long sum,
*record_counter = new unsigned long[omp_get_max_threads()];
unsigned long start, end;
// init record counter
for (int i = 0; i < omp_get_max_threads(); i++) {
record_counter[i] = 0;
}
cerr << "-storing Kmers..." << "\n";
start = time(NULL);
Fasta_reader fasta_reader(fasta_filename);
unsigned int entry_num = 0;
#pragma omp parallel private (myTid)
{
myTid = omp_get_thread_num();
record_counter[myTid] = 0;
while (fasta_reader.hasNext()) {
Fasta_entry fe = fasta_reader.getNext();
string accession = fe.get_accession();
#pragma omp atomic
entry_num++;
record_counter[myTid]++;
if (IRKE_COMMON::MONITOR >= 4) {
cerr << "[" << entry_num << "] acc: " << accession << ", by thread no: " << myTid << "\n";;
}
else if (IRKE_COMMON::MONITOR) {
if (myTid == 0 && record_counter[myTid] % 1000 == 0)
{
sum = record_counter[0];
for (i=1; i<omp_get_num_threads(); i++)
sum+= record_counter[i];
cerr << "\r [" << sum << "] sequences parsed. ";
}
}
string seq = fe.get_sequence();
if (seq.length() < KMER_SIZE + 1) {
continue;
}
kcounter.add_sequence(seq);
}
cerr << "\n" << " done parsing " << sum << " sequences, extracted " << kcounter.size() << " kmers, taking " << (end-start) << " seconds." << "\n";
}
return;
}