C++ KmerCounter::size方法代码示例

本文整理汇总了C++中KmerCounter::size方法的典型用法代码示例。如果您正苦于以下问题：C++ KmerCounter::size方法的具体用法？C++ KmerCounter::size怎么用？C++ KmerCounter::size使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类KmerCounter的用法示例。

在下文中一共展示了KmerCounter::size方法的6个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: populate_kmer_counter

void populate_kmer_counter(KmerCounter &kcounter, string &kmers_fasta_file)
{
    // code largely copied from IRKE.cpp
    int i, myTid;
    unsigned long sum,
        *record_counter = new unsigned long[omp_get_max_threads()];
    unsigned long start, end;
    // init record counter
    for (int i = 0; i < omp_get_max_threads(); i++) {
        record_counter[i] = 0;
    }
    cerr << "-reading Kmer occurences..." << endl;
    start = time(NULL);
    Fasta_reader fasta_reader(kmers_fasta_file);
#pragma omp parallel private (myTid)
    {
        myTid = omp_get_thread_num();
        record_counter[myTid] = 0;
        while (true) {
            Fasta_entry fe = fasta_reader.getNext();
            if (fe.get_sequence() == "") break;
            record_counter[myTid]++;
            if (IRKE_COMMON::MONITOR) {
                if (myTid == 0 && record_counter[myTid] % 100000 == 0) {
                    sum = record_counter[0];
                    for (i = 1; i < omp_get_num_threads(); i++)
                        sum += record_counter[i];
                    cerr << "\r [" << sum / 1000000 << "M] Kmers parsed.     ";
                }
            }
            string seq = fe.get_sequence();
            if (seq.length() != KMER_SIZE) {
                cerr << "ERROR: kmer " << seq << " is not of length: " << KMER_SIZE << endl;
                continue;
            }
            kmer_int_type_t kmer = kcounter.get_kmer_intval(seq);
            unsigned int count = atoi(fe.get_header().c_str());
            kcounter.add_kmer(kmer, count);
        }
    }
    end = time(NULL);
    sum = record_counter[0];
    for (i = 1; i < omp_get_max_threads(); i++)
        sum += record_counter[i];
    delete[] record_counter;
    cerr << endl << " done parsing " << sum << " Kmers, " << kcounter.size() << " added, taking " << (end - start)
        << " seconds." << endl;
    return;
}

开发者ID:bowhan，项目名称:trinityrnaseq，代码行数:49，代码来源:fastaToKmerCoverageStats.cpp

示例2: inchworm

Path_n_count_pair IRKE::inchworm (KmerCounter& kcounter, char direction, kmer_int_type_t kmer, Kmer_visitor& visitor, float min_connectivity) {
	
	// cout << "inchworm" << endl;
	
	Path_n_count_pair entire_path;
	
	unsigned int inchworm_round = 0;
	
	unsigned long num_total_kmers = kcounter.size();
	
	Kmer_visitor eliminator(kcounter.get_kmer_length(), DOUBLE_STRANDED_MODE);
	
	while (true) {
		
		inchworm_round++;
		eliminator.clear();
		
		if (inchworm_round > num_total_kmers) {
			throw(string ("Error, inchworm rounds have exceeded the number of possible seed kmers"));
		}
		
		if (IRKE_COMMON::MONITOR >= 3) {
			cerr << endl << "Inchworm round(" << string(1,direction) << "): " << inchworm_round << " searching kmer: " << kmer << endl;
			string kmer_str = kcounter.get_kmer_string(kmer);
			cerr << kcounter.describe_kmer(kmer_str) << endl;
		}
		
		visitor.erase(kmer); // seed kmer must be not visited already.
		
		Kmer_Occurence_Pair kmer_pair(kmer, kcounter.get_kmer_count(kmer));
		Path_n_count_pair best_path = inchworm_step(kcounter, direction, kmer_pair, visitor, eliminator, inchworm_round, 0, min_connectivity, MAX_RECURSION);
		
		if (best_path.second > 0) {
			// append info to entire path in reverse order, so starts just after seed kmer
			vector<kmer_int_type_t>& kmer_list = best_path.first;
			
			unsigned int num_kmers = kmer_list.size();
			int first_index = num_kmers - 1;
			int last_index = 0;
			if (CRAWL) {
				last_index = first_index - CRAWL_LENGTH + 1;
				if (last_index < 0) {
					last_index = 0;
				}
			}
			
			for (int i = first_index; i >= last_index; i--) {
				kmer_int_type_t kmer_extend = kmer_list[i];
				entire_path.first.push_back(kmer_extend);
				visitor.add(kmer_extend);
				entire_path.second += kcounter.get_kmer_count(kmer_extend);

			}
			
			kmer = entire_path.first[ entire_path.first.size() -1 ];
			
		}
		else {
			// no extension possible
			break;
		}
	}
	
	if (IRKE_COMMON::MONITOR >= 3) 
		cerr << endl;
	
	
	return(entire_path);
}

开发者ID:Biocacahuete，项目名称:trinityrnaseq，代码行数:69，代码来源:IRKE.cpp

示例3: compute_sequence_assemblies

void IRKE::compute_sequence_assemblies(KmerCounter& kcounter, float min_connectivity,
									   unsigned int MIN_ASSEMBLY_LENGTH, unsigned int MIN_ASSEMBLY_COVERAGE,
									   bool WRITE_COVERAGE, string COVERAGE_OUTPUT_FILENAME) {
	
    if (! got_sorted_kmers_flag) {
        stringstream error;
        error << stacktrace() << " Error, must populate_sorted_kmers_list() before computing sequence assemblies" << endl;
        throw(error.str());
    }
    

	unsigned int kmer_length = kcounter.get_kmer_length();
	ofstream coverage_writer;
	if (WRITE_COVERAGE) {
		coverage_writer.open(COVERAGE_OUTPUT_FILENAME.c_str());
	}
	
	vector<Kmer_counter_map_iterator>& kmers = sorted_kmers; //kcounter.get_kmers_sort_descending_counts();
	
	unsigned long init_size = kcounter.size();
	
	// string s = "before.kmers";
	// kcounter.dump_kmers_to_file(s);
	
	for (unsigned int i = 0; i < kmers.size(); i++) {
		
		// cerr << "round: " << i << endl;
		
		unsigned long kmer_counter_size = kcounter.size();
		if (kmer_counter_size > init_size) {
			
			// string s = "after.kmers";
			// kcounter.dump_kmers_to_file(s);
			
			stringstream error;
			error << stacktrace() << "Error, Kcounter size has grown from " << init_size
				  << " to " << kmer_counter_size << endl;
			throw (error.str());
		}
		
		
		kmer_int_type_t kmer = kmers[i]->first;
		unsigned int kmer_count = kmers[i]->second;
		
        
		if (kmer_count == 0) {
			continue;
		}
        
        
		if (IRKE_COMMON::MONITOR >= 2) {
			cerr << "SEED kmer: " << kcounter.get_kmer_string(kmer) << ", count: " << kmer_count << endl;
		}

        

		if (kmer == revcomp_val(kmer, kmer_length)) {
			// palindromic kmer, avoid palindromes as seeds
			
            if (IRKE_COMMON::MONITOR >= 2) {
                cerr << "SEED kmer: " << kcounter.get_kmer_string(kmer) << " is palidnromic.  Skipping. " << endl;
            }
            
            continue;
		}
		
        
		if (kmer_count < MIN_SEED_COVERAGE) {
			if (IRKE_COMMON::MONITOR >= 2) {
                cerr << "-seed has insufficient coverage, skipping" << endl;
            }
            
            continue;
		}
		
		
		float entropy = compute_entropy(kmer, kmer_length);
		
		
		if (entropy < MIN_SEED_ENTROPY) {

            if (IRKE_COMMON::MONITOR >= 2) {
                cerr << "-skipping seed due to low entropy: " << entropy << endl;
            }
            
            continue;
		}
		
				
		/* Extend to the right */
		
		Kmer_visitor visitor(kmer_length, DOUBLE_STRANDED_MODE);
		Path_n_count_pair selected_path_n_pair_forward = inchworm(kcounter, 'F', kmer, visitor, min_connectivity); 
		
		visitor.clear();
		// add selected path to visitor
		
		vector<kmer_int_type_t>& forward_path = selected_path_n_pair_forward.first;
		if (IRKE_COMMON::MONITOR >= 2) {
            cerr << "Forward path contains: " << forward_path.size() << " kmers. " << endl;
//.........这里部分代码省略.........

开发者ID:Biocacahuete，项目名称:trinityrnaseq，代码行数:101，代码来源:IRKE.cpp

示例4: inchworm

Path_n_count_pair IRKE::inchworm(KmerCounter &kcounter,
                                 char direction,
                                 kmer_int_type_t kmer,
                                 Kmer_visitor &visitor,
                                 float min_connectivity)
{

    // cout << "inchworm" << endl;

    Path_n_count_pair entire_path;
    entire_path.second = 0; // init cumulative path coverage

    unsigned int inchworm_round = 0;

    unsigned long num_total_kmers = kcounter.size();

    Kmer_visitor eliminator(kcounter.get_kmer_length(), DOUBLE_STRANDED_MODE);

    while (true) {


        if (IRKE_COMMON::__DEVEL_rand_fracture) {

            // terminate extension with probability of __DEVEL_rand_fracture_prob

            float prob_to_fracture = rand() / (float) RAND_MAX;
            //cerr << "prob: " << prob_to_fracture << endl;

            if (prob_to_fracture <= IRKE_COMMON::__DEVEL_rand_fracture_prob) {

                // cerr << "Fracturing at iworm round: " << inchworm_round << " given P: " << prob_to_fracture << endl;

                return (entire_path);
            }
        }

        inchworm_round++;
        eliminator.clear();

        if (inchworm_round > num_total_kmers) {
            throw (string("Error, inchworm rounds have exceeded the number of possible seed kmers"));
        }

        if (IRKE_COMMON::MONITOR >= 3) {
            cerr << endl << "Inchworm round(" << string(1, direction) << "): " << inchworm_round << " searching kmer: "
                << kmer << endl;
            string kmer_str = kcounter.get_kmer_string(kmer);
            cerr << kcounter.describe_kmer(kmer_str) << endl;
        }

        visitor.erase(kmer); // seed kmer must be not visited already.

        Kmer_Occurence_Pair kmer_pair(kmer, kcounter.get_kmer_count(kmer));
        Path_n_count_pair best_path = inchworm_step(kcounter,
                                                    direction,
                                                    kmer_pair,
                                                    visitor,
                                                    eliminator,
                                                    inchworm_round,
                                                    0,
                                                    min_connectivity,
                                                    MAX_RECURSION);

        vector<kmer_int_type_t> &kmer_list = best_path.first;
        unsigned int num_kmers = kmer_list.size();

        if ((IRKE_COMMON::__DEVEL_zero_kmer_on_use && num_kmers >= 1) || best_path.second > 0) {
            // append info to entire path in reverse order, so starts just after seed kmer

            int first_index = num_kmers - 1;
            int last_index = 0;
            if (CRAWL) {
                last_index = first_index - CRAWL_LENGTH + 1;
                if (last_index < 0) {
                    last_index = 0;
                }
            }

            for (int i = first_index; i >= last_index; i--) {
                kmer_int_type_t kmer_extend = kmer_list[i];
                entire_path.first.push_back(kmer_extend);
                visitor.add(kmer_extend);
                //entire_path.second += kcounter.get_kmer_count(kmer_extend);

                // selected here, zero out:


                if (IRKE_COMMON::__DEVEL_zero_kmer_on_use) {
                    kcounter.clear_kmer(kmer_extend);
                }

            }

            kmer = entire_path.first[entire_path.first.size() - 1];

            entire_path.second += best_path.second;

        }
        else {
            // no extension possible
//.........这里部分代码省略.........

开发者ID:bowhan，项目名称:trinityrnaseq，代码行数:101，代码来源:IRKE.cpp

示例5: compute_sequence_assemblies

void IRKE::compute_sequence_assemblies(KmerCounter &kcounter, float min_connectivity,
                                       unsigned int MIN_ASSEMBLY_LENGTH, unsigned int MIN_ASSEMBLY_COVERAGE,
                                       bool PARALLEL_IWORM, bool WRITE_COVERAGE, string COVERAGE_OUTPUT_FILENAME)
{

    if (!got_sorted_kmers_flag) {
        stringstream error;
        error << stacktrace() << " Error, must populate_sorted_kmers_list() before computing sequence assemblies"
            << endl;
        throw (error.str());
    }


    //vector<Kmer_counter_map_iterator>& kmers = sorted_kmers;
    vector<Kmer_Occurence_Pair> &kmers = sorted_kmers;  // note, these are not actually sorted if PARALLEL_IWORM mode.

    unsigned long init_size = kcounter.size();

    cerr << "Total kcounter hash size: " << init_size << " vs. sorted list size: " << kmers.size() << endl;

    unsigned int kmer_length = kcounter.get_kmer_length();
    ofstream coverage_writer;
    if (WRITE_COVERAGE) {
        coverage_writer.open(COVERAGE_OUTPUT_FILENAME.c_str());
    }


    // string s = "before.kmers";
    // kcounter.dump_kmers_to_file(s);

    /* -----------------------------------------------------------
       Two reconstruction modes. (bhaas, Jan-4-2014)

       1.  Original (not setting PARALLEL_IWORM): kmer list is sorted descendingly by abundance.
       Seed kmer is chosen as most abundant, and contig construction extends from this seed.

       2.  PARALLEL_IWORM:  the kmer list is not sorted. A random kmer (just ordered by hash iterator, nothing special, probably not so random)
       is used as a seed for draft contig reconstruction.
       The most abundant kmer in the draft contig is chosen as a proper seed.
       An inchworm contig is then constructed using this new seed, and reported.
       So, in this case, the draft contig in the first phase is just used to find a better seed, from which the inchworm contig is then 
       properly reconstructed.

    --------------------------------------------------------------- */



    // try building an inchworm contig from each seed kmer
    int myTid;


    if (PARALLEL_IWORM) {
        omp_set_num_threads(IRKE_COMMON::NUM_THREADS);
    }
    else {
        omp_set_num_threads(1); // turn off multithreading for the contig building.
    }


    //-----------------------------------------------------------------
    // Prep writing to separate inchworm output files for each thread
    //-----------------------------------------------------------------

    vector<iworm_tmp_file> tmpfiles;
    int num_threads = omp_get_max_threads();
    cerr << "num threads set to: " << num_threads << endl;

    for (int i = 0; i < num_threads; i++) {
        iworm_tmp_file tmpfile_struct;
        tmpfiles.push_back(tmpfile_struct);
        iworm_tmp_file &itmp = tmpfiles[i];

        stringstream filename_constructor;
        filename_constructor << "tmp.iworm.fa.pid_" << getpid() << ".thread_" << i;

        itmp.tmp_filename = new char[100];
        strcpy(itmp.tmp_filename, filename_constructor.str().c_str());
        itmp.tmp_filename[filename_constructor.str().length()] = '\0';

        itmp.fh = new ofstream();
        itmp.fh->open(itmp.tmp_filename);
        cerr << "Done opening file. " << itmp.tmp_filename << endl;
    }


    //-------------------
    // Build contigs.
    //-------------------

#pragma omp parallel for private (myTid) schedule (dynamic, 1000)
    for (unsigned int i = 0; i < kmers.size(); i++) {

        // cerr << "round: " << i << endl;

        myTid = omp_get_thread_num();

        unsigned long kmer_counter_size = kcounter.size();
        if (kmer_counter_size > init_size) {

            // string s = "after.kmers";
//.........这里部分代码省略.........

开发者ID:bowhan，项目名称:trinityrnaseq，代码行数:101，代码来源:IRKE.cpp

示例6: populate_kmer_counter_from_reads

void populate_kmer_counter_from_reads (KmerCounter& kcounter, string& fasta_filename) {
    unsigned int kmer_length = kcounter.get_kmer_length();
    int i, myTid;
    unsigned long sum,
        *record_counter = new unsigned long[omp_get_max_threads()];
    unsigned long start, end;

    // init record counter
    for (int i = 0; i < omp_get_max_threads(); i++) {
        record_counter[i] = 0;
    }


    cerr << "-storing Kmers..." << "\n";
    start = time(NULL);

    Fasta_reader fasta_reader(fasta_filename);

    unsigned int entry_num = 0;

#pragma omp parallel private (myTid)
    {
        myTid = omp_get_thread_num();
        record_counter[myTid] = 0;

        while (fasta_reader.hasNext()) {
            Fasta_entry fe = fasta_reader.getNext();
            string accession = fe.get_accession();

#pragma omp atomic
            entry_num++;
            record_counter[myTid]++;
            
            if (IRKE_COMMON::MONITOR >= 4) {
                cerr << "[" << entry_num << "] acc: " << accession << ", by thread no: " << myTid << "\n";;
            }
            else if (IRKE_COMMON::MONITOR) {
                if (myTid == 0 && record_counter[myTid] % 1000 == 0)
                    {
                        sum = record_counter[0];
                        for (i=1; i<omp_get_num_threads(); i++)
                            sum+= record_counter[i];
                        cerr << "\r [" << sum << "] sequences parsed.     ";
                    }
            }
            
            string seq = fe.get_sequence();

            if (seq.length() < KMER_SIZE + 1) {
                continue;
            }
            kcounter.add_sequence(seq);

        }
        
        cerr << "\n" << " done parsing " << sum << " sequences, extracted " << kcounter.size() << " kmers, taking " << (end-start) << " seconds." << "\n";
        
        
    }


    return;
    
}

开发者ID:，项目名称:，代码行数:64，代码来源:

注：本文中的KmerCounter::size方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。