当前位置: 首页>>代码示例>>C++>>正文


C++ KmerCounter类代码示例

本文整理汇总了C++中KmerCounter的典型用法代码示例。如果您正苦于以下问题:C++ KmerCounter类的具体用法?C++ KmerCounter怎么用?C++ KmerCounter使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了KmerCounter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: extract_best_seed

kmer_int_type_t IRKE::extract_best_seed(vector<kmer_int_type_t> &kmer_vec,
                                        KmerCounter &kcounter,
                                        float min_connectivity)
{

    unsigned int kmer_length = kcounter.get_kmer_length();

    unsigned int best_kmer_count = 0;
    kmer_int_type_t best_seed = 0;

    for (unsigned int i = 0; i < kmer_vec.size(); i++) {

        kmer_int_type_t kmer = kmer_vec[i];
        unsigned int count = kcounter.get_kmer_count(kmer);

        if (count > best_kmer_count && is_good_seed_kmer(kmer, count, kmer_length, min_connectivity)) {
            best_kmer_count = count;
            best_seed = kmer;
        }
    }

    if (IRKE_COMMON::MONITOR >= 2) {
        cerr << "Parallel method found better seed: " << kcounter.get_kmer_string(best_seed) << " with count: "
            << best_kmer_count << endl;
    }

    return (best_seed);
}
开发者ID:bowhan,项目名称:trinityrnaseq,代码行数:28,代码来源:IRKE.cpp

示例2: visitor

vector<kmer_int_type_t>  IRKE::build_inchworm_contig_from_seed(kmer_int_type_t kmer, KmerCounter &kcounter,
                                                               float min_connectivity, unsigned int &total_counts,
                                                               bool)
{

    unsigned int kmer_count = kcounter.get_kmer_count(kmer);

    /* Extend to the right */

    unsigned int kmer_length = kcounter.get_kmer_length();

    Kmer_visitor visitor(kmer_length, DOUBLE_STRANDED_MODE);
    Path_n_count_pair selected_path_n_pair_forward = inchworm(kcounter, 'F', kmer, visitor, min_connectivity);

    visitor.clear();
    // add selected path to visitor

    vector<kmer_int_type_t> &forward_path = selected_path_n_pair_forward.first;

    if (IRKE_COMMON::MONITOR >= 2) {
        cerr << "Forward path contains: " << forward_path.size() << " kmers. " << endl;
    }


    for (unsigned int i = 0; i < forward_path.size(); i++) {
        kmer_int_type_t kmer = forward_path[i];
        visitor.add(kmer);

        if (IRKE_COMMON::MONITOR >= 2) {
            cerr << "\tForward path kmer: " << kcounter.get_kmer_string(kmer) << endl;
        }

    }


    /* Extend to the left */
    visitor.erase(kmer); // reset the seed

    Path_n_count_pair selected_path_n_pair_reverse = inchworm(kcounter, 'R', kmer, visitor, min_connectivity);
    if (IRKE_COMMON::MONITOR >= 2) {
        vector<kmer_int_type_t> &reverse_path = selected_path_n_pair_reverse.first;
        cerr << "Reverse path contains: " << reverse_path.size() << " kmers. " << endl;
        for (unsigned int i = 0; i < reverse_path.size(); i++) {
            cerr << "\tReverse path kmer: " << kcounter.get_kmer_string(reverse_path[i]) << endl;
        }
    }


    total_counts = selected_path_n_pair_forward.second + selected_path_n_pair_reverse.second + kmer_count;

    vector<kmer_int_type_t> &reverse_path = selected_path_n_pair_reverse.first;

    vector<kmer_int_type_t> joined_path = _join_forward_n_reverse_paths(reverse_path, kmer, forward_path);

    return (joined_path);
}
开发者ID:bowhan,项目名称:trinityrnaseq,代码行数:56,代码来源:IRKE.cpp

示例3: traverse_path

void IRKE::traverse_path(KmerCounter &kcounter, Kmer_Occurence_Pair seed_kmer, Kmer_visitor &visitor,
                         Kmer_visitor &place_holder, float MIN_CONNECTIVITY_RATIO, unsigned int depth)
{

    if (IRKE_COMMON::MONITOR >= 3) {
        cerr << "traverse_path, depth: " << depth << ", kmer: " << kcounter.get_kmer_string(seed_kmer.first) << endl;
    }


    // check to see if visited already
    if (visitor.exists(seed_kmer.first)) {
        // already visited
        if (IRKE_COMMON::MONITOR >= 3) {
            cout << "\talready visited " << kcounter.get_kmer_string(seed_kmer.first) << endl;
        }

        return;
    }

    // check if at the end of the max recursion, and if so, don't visit it but set a placeholder.
    if (depth > MAX_RECURSION) {
        place_holder.add(seed_kmer.first);
        return;
    }

    visitor.add(seed_kmer.first);

    // try each of the forward paths from the kmer:
    vector<Kmer_Occurence_Pair> forward_candidates = kcounter.get_forward_kmer_candidates(seed_kmer.first);

    for (unsigned int i = 0; i < forward_candidates.size(); i++) {
        Kmer_Occurence_Pair kmer = forward_candidates[i];

        if (kmer.second && exceeds_min_connectivity(kcounter, seed_kmer, kmer, MIN_CONNECTIVITY_RATIO)) {

            traverse_path(kcounter, kmer, visitor, place_holder, MIN_CONNECTIVITY_RATIO, depth + 1);
        }
    }

    // try each of the reverse paths from the kmer:
    vector<Kmer_Occurence_Pair> reverse_candidates = kcounter.get_reverse_kmer_candidates(seed_kmer.first);

    for (unsigned int i = 0; i < reverse_candidates.size(); i++) {
        Kmer_Occurence_Pair kmer = reverse_candidates[i];

        if (kmer.second && exceeds_min_connectivity(kcounter, seed_kmer, kmer, MIN_CONNECTIVITY_RATIO)) {

            traverse_path(kcounter, kmer, visitor, place_holder, MIN_CONNECTIVITY_RATIO, depth + 1);
        }
    }


    return;

}
开发者ID:bowhan,项目名称:trinityrnaseq,代码行数:55,代码来源:IRKE.cpp

示例4: populate_kmer_counter

void populate_kmer_counter(KmerCounter &kcounter, string &kmers_fasta_file)
{
    // code largely copied from IRKE.cpp
    int i, myTid;
    unsigned long sum,
        *record_counter = new unsigned long[omp_get_max_threads()];
    unsigned long start, end;
    // init record counter
    for (int i = 0; i < omp_get_max_threads(); i++) {
        record_counter[i] = 0;
    }
    cerr << "-reading Kmer occurences..." << endl;
    start = time(NULL);
    Fasta_reader fasta_reader(kmers_fasta_file);
#pragma omp parallel private (myTid)
    {
        myTid = omp_get_thread_num();
        record_counter[myTid] = 0;
        while (true) {
            Fasta_entry fe = fasta_reader.getNext();
            if (fe.get_sequence() == "") break;
            record_counter[myTid]++;
            if (IRKE_COMMON::MONITOR) {
                if (myTid == 0 && record_counter[myTid] % 100000 == 0) {
                    sum = record_counter[0];
                    for (i = 1; i < omp_get_num_threads(); i++)
                        sum += record_counter[i];
                    cerr << "\r [" << sum / 1000000 << "M] Kmers parsed.     ";
                }
            }
            string seq = fe.get_sequence();
            if (seq.length() != KMER_SIZE) {
                cerr << "ERROR: kmer " << seq << " is not of length: " << KMER_SIZE << endl;
                continue;
            }
            kmer_int_type_t kmer = kcounter.get_kmer_intval(seq);
            unsigned int count = atoi(fe.get_header().c_str());
            kcounter.add_kmer(kmer, count);
        }
    }
    end = time(NULL);
    sum = record_counter[0];
    for (i = 1; i < omp_get_max_threads(); i++)
        sum += record_counter[i];
    delete[] record_counter;
    cerr << endl << " done parsing " << sum << " Kmers, " << kcounter.size() << " added, taking " << (end - start)
        << " seconds." << endl;
    return;
}
开发者ID:bowhan,项目名称:trinityrnaseq,代码行数:49,代码来源:fastaToKmerCoverageStats.cpp

示例5: compute_kmer_coverage

vector<unsigned int> compute_kmer_coverage(string& sequence, KmerCounter& kcounter) {
    vector<unsigned int> coverage;
    if(IRKE_COMMON::MONITOR) {
        cerr << "processing sequence: " << sequence << endl;
    }
    for (int i = 0; i <= (int) sequence.length() - KMER_SIZE; i++) {
        // cerr << "i: " << i << ", <= " << sequence.length() - KMER_SIZE << endl;
        string kmer = sequence.substr(i, KMER_SIZE);
        if(IRKE_COMMON::MONITOR >= 2) {
            for (int j = 0; j <= i; j++) {
                cerr << " ";
            }
            cerr << kmer << endl;
        }
        unsigned int kmer_count = 0;
        if(!contains_non_gatc(kmer)) {
            kmer_count = kcounter.get_kmer_count(kmer);
        }

        // Note, in the jellyfish run, we restrain it to min kmer coverage of 2. 
        // If we don't find a kmer catalogued, it must have a kmer count of 1.
        if (kmer_count < 1) {
            kmer_count = 1;
        }
        
        coverage.push_back(kmer_count);
    }
    return(coverage);
}
开发者ID:djinnome,项目名称:JAMg-1,代码行数:29,代码来源:fastaToKmerCoverageStats.cpp

示例6: describe_kmers

void IRKE::describe_kmers(KmerCounter& kcounter) {
	
	// proxy call to Kcounter method.
	
	kcounter.describe_kmers();
	
	return;
}
开发者ID:Biocacahuete,项目名称:trinityrnaseq,代码行数:8,代码来源:IRKE.cpp

示例7: reconstruct_path_sequence

string IRKE::reconstruct_path_sequence(KmerCounter& kcounter, vector<kmer_int_type_t>& path, vector<unsigned int>& cov_counter) {
	
	if (path.size() == 0) {
		return("");
	}
	
	string seq = kcounter.get_kmer_string(path[0]);
	cov_counter.push_back( kcounter.get_kmer_count(path[0]) );
	
	for (unsigned int i = 1; i < path.size(); i++) {
		string kmer = kcounter.get_kmer_string(path[i]);
		seq += kmer.substr(kmer.length()-1, 1);
		
		cov_counter.push_back( kcounter.get_kmer_count(path[i]) );
	}
	
	return(seq);
}
开发者ID:Biocacahuete,项目名称:trinityrnaseq,代码行数:18,代码来源:IRKE.cpp

示例8: exceeds_min_connectivity

bool IRKE::exceeds_min_connectivity (KmerCounter& kcounter, string kmerA, string kmerB, float min_connectivity) {

	kmer_int_type_t valA = kmer_to_intval(kmerA);
	kmer_int_type_t valB = kmer_to_intval(kmerB);

	Kmer_Occurence_Pair pairA(valA, kcounter.get_kmer_count(valA));
	Kmer_Occurence_Pair pairB(valB, kcounter.get_kmer_count(valB));

	return exceeds_min_connectivity(kcounter, pairA, pairB, min_connectivity);

}
开发者ID:Biocacahuete,项目名称:trinityrnaseq,代码行数:11,代码来源:IRKE.cpp

示例9: compute_kmer_coverage

vector<unsigned int> compute_kmer_coverage(string& sequence, KmerCounter& kcounter) {
    if(IRKE_COMMON::MONITOR) {
        cerr << "processing sequence: " << sequence << endl;
    }
    if (sequence.length() < KMER_SIZE)
    {
        // Can't rely on length() - KMER_SIZE for this as length is unsigned
        cerr << "Sequence: " << sequence << "is smaller than " << KMER_SIZE << " base pairs, skipping" << endl;
	return vector<unsigned int>();
    }

    vector<unsigned int> coverage;
    for (size_t i = 0; i <= sequence.length() - KMER_SIZE; i++) {
        // cerr << "i: " << i << ", <= " << sequence.length() - KMER_SIZE << endl;
        string kmer = sequence.substr(i, KMER_SIZE);
        if(IRKE_COMMON::MONITOR >= 2) {
            for (size_t j = 0; j <= i; j++) {
                cerr << " ";
            }
            cerr << kmer << endl;
        }
        unsigned int kmer_count = 0;
        if(!contains_non_gatc(kmer)) {
            kmer_count = kcounter.get_kmer_count(kmer);
        }

        // Note, in the jellyfish run, we restrain it to min kmer coverage of 2. 
        // If we don't find a kmer catalogued, it must have a kmer count of 1.
        if (kmer_count < 1) {
            kmer_count = 1;
        }
        
        coverage.push_back(kmer_count);
    }
    return(coverage);
}
开发者ID:,项目名称:,代码行数:36,代码来源:

示例10: inchworm_step

Path_n_count_pair IRKE::inchworm_step (KmerCounter& kcounter, char direction, Kmer_Occurence_Pair kmer, Kmer_visitor& visitor,
									   Kmer_visitor& eliminator, unsigned int inchworm_round, unsigned int depth, 
									   float MIN_CONNECTIVITY_RATIO, unsigned int max_recurse) {
	
	// cout << "inchworm_step" << endl;
	
	if (IRKE_COMMON::MONITOR >= 2) {
		cerr << "\rinchworm: " << string(1,direction) 
			 << " A:" << INCHWORM_ASSEMBLY_COUNTER << " "
			 << " rnd:" << inchworm_round << " D:" << depth << "         "; 
	}
	
	// check to see if kmer exists.  If not, return empty container
	Path_n_count_pair best_path_n_pair;
	
		
	if ( !kmer.second
		 || visitor.exists(kmer.first) // visited
		 || eliminator.exists(kmer.first) // eliminated
		 
		 ) {
		
		// base case, already visited or kmer doesn't exist.
		//cout << kmer << "already visited or doesn't exist.  ending recursion at depth: " << depth << endl;
		
		return(best_path_n_pair);
		
	}
	
	visitor.add(kmer.first);
	
	if (PACMAN && depth > 0) {
		// cerr << "pacman eliminated kmer: " << kmer << endl;
		eliminator.add(kmer.first);
	}
	
	
	if (depth < max_recurse) {
		
		vector<Kmer_Occurence_Pair> kmer_candidates;
		if (direction == 'F') {
			// forward search
			kmer_candidates = kcounter.get_forward_kmer_candidates(kmer.first);
		}
		else {
			// reverse search
			kmer_candidates = kcounter.get_reverse_kmer_candidates(kmer.first);
		}
		
		bool tie = true;
		unsigned int recurse_cap = max_recurse;
		unsigned int best_path_length = 0;
		while (tie) {
			vector<Path_n_count_pair> paths;
			
			for (unsigned int i = 0; i < kmer_candidates.size(); i++) {
				Kmer_Occurence_Pair kmer_candidate = kmer_candidates[i];
				
				if (kmer_candidate.second // ) {
					&& !visitor.exists(kmer_candidate.first)  // avoid creating already visited kmers since they're unvisited below...
					&& exceeds_min_connectivity(kcounter, kmer, kmer_candidate, MIN_CONNECTIVITY_RATIO) ) {
					//cout << endl << "\ttrying " << kmer_candidate << endl;
					

                    // recursive call here for extension
					Path_n_count_pair p = inchworm_step(kcounter, direction, kmer_candidate, visitor, eliminator, inchworm_round, depth+1, MIN_CONNECTIVITY_RATIO, recurse_cap);
					
					paths.push_back(p);
					
					visitor.erase(kmer_candidate.first); // un-visiting
					
                }
				
			} // end for kmer
			
			
			if (paths.size() > 1) {
				
				sort(paths.begin(), paths.end(), compare);
				
				if (paths[0].second == paths[1].second   // same cumulative coverage values for both paths.
					&&
					// check last kmer to be sure they're different. 
					// Not interested in breaking ties between identically scoring paths that end up at the same kmer.
					paths[0].first[0] != paths[1].first[0]
					
					) {
					
					// got tie, two different paths and two different endpoints:
					if (IRKE_COMMON::MONITOR >= 3) {
						
						
						cerr << "Got tie! " << ", score: " << paths[0].second << ", recurse at: " << recurse_cap << endl;
						vector<unsigned int> v;
						cerr << reconstruct_path_sequence(kcounter, paths[0].first, v) << endl;
						cerr << reconstruct_path_sequence(kcounter, paths[1].first, v) << endl;
						
						
						
					}
//.........这里部分代码省略.........
开发者ID:Biocacahuete,项目名称:trinityrnaseq,代码行数:101,代码来源:IRKE.cpp

示例11: inchworm

Path_n_count_pair IRKE::inchworm (KmerCounter& kcounter, char direction, kmer_int_type_t kmer, Kmer_visitor& visitor, float min_connectivity) {
	
	// cout << "inchworm" << endl;
	
	Path_n_count_pair entire_path;
	
	unsigned int inchworm_round = 0;
	
	unsigned long num_total_kmers = kcounter.size();
	
	Kmer_visitor eliminator(kcounter.get_kmer_length(), DOUBLE_STRANDED_MODE);
	
	while (true) {
		
		inchworm_round++;
		eliminator.clear();
		
		if (inchworm_round > num_total_kmers) {
			throw(string ("Error, inchworm rounds have exceeded the number of possible seed kmers"));
		}
		
		if (IRKE_COMMON::MONITOR >= 3) {
			cerr << endl << "Inchworm round(" << string(1,direction) << "): " << inchworm_round << " searching kmer: " << kmer << endl;
			string kmer_str = kcounter.get_kmer_string(kmer);
			cerr << kcounter.describe_kmer(kmer_str) << endl;
		}
		
		visitor.erase(kmer); // seed kmer must be not visited already.
		
		Kmer_Occurence_Pair kmer_pair(kmer, kcounter.get_kmer_count(kmer));
		Path_n_count_pair best_path = inchworm_step(kcounter, direction, kmer_pair, visitor, eliminator, inchworm_round, 0, min_connectivity, MAX_RECURSION);
		
		if (best_path.second > 0) {
			// append info to entire path in reverse order, so starts just after seed kmer
			vector<kmer_int_type_t>& kmer_list = best_path.first;
			
			unsigned int num_kmers = kmer_list.size();
			int first_index = num_kmers - 1;
			int last_index = 0;
			if (CRAWL) {
				last_index = first_index - CRAWL_LENGTH + 1;
				if (last_index < 0) {
					last_index = 0;
				}
			}
			
			for (int i = first_index; i >= last_index; i--) {
				kmer_int_type_t kmer_extend = kmer_list[i];
				entire_path.first.push_back(kmer_extend);
				visitor.add(kmer_extend);
				entire_path.second += kcounter.get_kmer_count(kmer_extend);

			}
			
			kmer = entire_path.first[ entire_path.first.size() -1 ];
			
		}
		else {
			// no extension possible
			break;
		}
	}
	
	if (IRKE_COMMON::MONITOR >= 3) 
		cerr << endl;
	
	
	return(entire_path);
}
开发者ID:Biocacahuete,项目名称:trinityrnaseq,代码行数:69,代码来源:IRKE.cpp

示例12: compute_sequence_assemblies

void IRKE::compute_sequence_assemblies(KmerCounter& kcounter, float min_connectivity,
									   unsigned int MIN_ASSEMBLY_LENGTH, unsigned int MIN_ASSEMBLY_COVERAGE,
									   bool WRITE_COVERAGE, string COVERAGE_OUTPUT_FILENAME) {
	
    if (! got_sorted_kmers_flag) {
        stringstream error;
        error << stacktrace() << " Error, must populate_sorted_kmers_list() before computing sequence assemblies" << endl;
        throw(error.str());
    }
    

	unsigned int kmer_length = kcounter.get_kmer_length();
	ofstream coverage_writer;
	if (WRITE_COVERAGE) {
		coverage_writer.open(COVERAGE_OUTPUT_FILENAME.c_str());
	}
	
	vector<Kmer_counter_map_iterator>& kmers = sorted_kmers; //kcounter.get_kmers_sort_descending_counts();
	
	unsigned long init_size = kcounter.size();
	
	// string s = "before.kmers";
	// kcounter.dump_kmers_to_file(s);
	
	for (unsigned int i = 0; i < kmers.size(); i++) {
		
		// cerr << "round: " << i << endl;
		
		unsigned long kmer_counter_size = kcounter.size();
		if (kmer_counter_size > init_size) {
			
			// string s = "after.kmers";
			// kcounter.dump_kmers_to_file(s);
			
			stringstream error;
			error << stacktrace() << "Error, Kcounter size has grown from " << init_size
				  << " to " << kmer_counter_size << endl;
			throw (error.str());
		}
		
		
		kmer_int_type_t kmer = kmers[i]->first;
		unsigned int kmer_count = kmers[i]->second;
		
        
		if (kmer_count == 0) {
			continue;
		}
        
        
		if (IRKE_COMMON::MONITOR >= 2) {
			cerr << "SEED kmer: " << kcounter.get_kmer_string(kmer) << ", count: " << kmer_count << endl;
		}

        

		if (kmer == revcomp_val(kmer, kmer_length)) {
			// palindromic kmer, avoid palindromes as seeds
			
            if (IRKE_COMMON::MONITOR >= 2) {
                cerr << "SEED kmer: " << kcounter.get_kmer_string(kmer) << " is palidnromic.  Skipping. " << endl;
            }
            
            continue;
		}
		
        
		if (kmer_count < MIN_SEED_COVERAGE) {
			if (IRKE_COMMON::MONITOR >= 2) {
                cerr << "-seed has insufficient coverage, skipping" << endl;
            }
            
            continue;
		}
		
		
		float entropy = compute_entropy(kmer, kmer_length);
		
		
		if (entropy < MIN_SEED_ENTROPY) {

            if (IRKE_COMMON::MONITOR >= 2) {
                cerr << "-skipping seed due to low entropy: " << entropy << endl;
            }
            
            continue;
		}
		
				
		/* Extend to the right */
		
		Kmer_visitor visitor(kmer_length, DOUBLE_STRANDED_MODE);
		Path_n_count_pair selected_path_n_pair_forward = inchworm(kcounter, 'F', kmer, visitor, min_connectivity); 
		
		visitor.clear();
		// add selected path to visitor
		
		vector<kmer_int_type_t>& forward_path = selected_path_n_pair_forward.first;
		if (IRKE_COMMON::MONITOR >= 2) {
            cerr << "Forward path contains: " << forward_path.size() << " kmers. " << endl;
//.........这里部分代码省略.........
开发者ID:Biocacahuete,项目名称:trinityrnaseq,代码行数:101,代码来源:IRKE.cpp

示例13: inchworm_step

Path_n_count_pair IRKE::inchworm_step(KmerCounter &kcounter,
                                      char direction,
                                      Kmer_Occurence_Pair kmer,
                                      Kmer_visitor &visitor,
                                      Kmer_visitor &eliminator,
                                      unsigned int inchworm_round,
                                      unsigned int depth,
                                      float MIN_CONNECTIVITY_RATIO,
                                      unsigned int max_recurse)
{

    // cout << "inchworm_step" << endl;

    if (IRKE_COMMON::MONITOR >= 2) {
        cerr << "\rinchworm: " << string(1, direction)
            << " A:" << INCHWORM_ASSEMBLY_COUNTER << " "
            << " rnd:" << inchworm_round << " D:" << depth << "         ";
    }

    // check to see if kmer exists.  If not, return empty container
    Path_n_count_pair best_path_n_pair;
    best_path_n_pair.second = 0; // init

    if ( // !kmer.second ||

        visitor.exists(kmer.first) // visited
            || eliminator.exists(kmer.first) // eliminated

        ) {

        if (IRKE_COMMON::MONITOR >= 3) {
            cerr << "base case, already visited or kmer doesn't exist." << endl;
            cerr << kmer.first << " already visited or doesn't exist.  ending recursion at depth: " << depth << endl;
        }

        return (best_path_n_pair);

    }

    visitor.add(kmer.first);

    if (PACMAN && depth > 0) {
        // cerr << "pacman eliminated kmer: " << kmer << endl;
        eliminator.add(kmer.first);
    }


    if (depth < max_recurse) {

        vector<Kmer_Occurence_Pair> kmer_candidates;
        if (direction == 'F') {
            // forward search
            kmer_candidates = kcounter.get_forward_kmer_candidates(kmer.first);
        }
        else {
            // reverse search
            kmer_candidates = kcounter.get_reverse_kmer_candidates(kmer.first);
        }

        if (IRKE_COMMON::MONITOR >= 3) {
            cerr << "Got " << kmer_candidates.size() << " kmer extension candidates." << endl;
        }

        bool tie = true;
        unsigned int recurse_cap = max_recurse;
        unsigned int best_path_length = 0;
        while (tie) {

            // keep trying to break ties if ties encountered.
            // this is done by increasing the allowed recursion depth until the tie is broken.
            //  Recursion depth set via: recurse_cap and incremented if tie is found


            vector<Path_n_count_pair> paths; // to collect all the paths rooting from this point

            for (unsigned int i = 0; i < kmer_candidates.size(); i++) {
                Kmer_Occurence_Pair kmer_candidate = kmer_candidates[i];

                if (kmer_candidate.second &&

                    !visitor.exists(kmer_candidate
                                        .first)  // avoid creating already visited kmers since they're unvisited below...
                    && exceeds_min_connectivity(kcounter, kmer, kmer_candidate, MIN_CONNECTIVITY_RATIO)) {
                    //cout << endl << "\ttrying " << kmer_candidate << endl;


                    // recursive call here for extension
                    Path_n_count_pair p = inchworm_step(kcounter,
                                                        direction,
                                                        kmer_candidate,
                                                        visitor,
                                                        eliminator,
                                                        inchworm_round,
                                                        depth + 1,
                                                        MIN_CONNECTIVITY_RATIO,
                                                        recurse_cap);

                    if (p.first.size() >= 1) {
                        // only retain paths that include visited nodes.
                        paths.push_back(p);
//.........这里部分代码省略.........
开发者ID:bowhan,项目名称:trinityrnaseq,代码行数:101,代码来源:IRKE.cpp

示例14: inchworm

Path_n_count_pair IRKE::inchworm(KmerCounter &kcounter,
                                 char direction,
                                 kmer_int_type_t kmer,
                                 Kmer_visitor &visitor,
                                 float min_connectivity)
{

    // cout << "inchworm" << endl;

    Path_n_count_pair entire_path;
    entire_path.second = 0; // init cumulative path coverage

    unsigned int inchworm_round = 0;

    unsigned long num_total_kmers = kcounter.size();

    Kmer_visitor eliminator(kcounter.get_kmer_length(), DOUBLE_STRANDED_MODE);

    while (true) {


        if (IRKE_COMMON::__DEVEL_rand_fracture) {

            // terminate extension with probability of __DEVEL_rand_fracture_prob

            float prob_to_fracture = rand() / (float) RAND_MAX;
            //cerr << "prob: " << prob_to_fracture << endl;

            if (prob_to_fracture <= IRKE_COMMON::__DEVEL_rand_fracture_prob) {

                // cerr << "Fracturing at iworm round: " << inchworm_round << " given P: " << prob_to_fracture << endl;

                return (entire_path);
            }
        }

        inchworm_round++;
        eliminator.clear();

        if (inchworm_round > num_total_kmers) {
            throw (string("Error, inchworm rounds have exceeded the number of possible seed kmers"));
        }

        if (IRKE_COMMON::MONITOR >= 3) {
            cerr << endl << "Inchworm round(" << string(1, direction) << "): " << inchworm_round << " searching kmer: "
                << kmer << endl;
            string kmer_str = kcounter.get_kmer_string(kmer);
            cerr << kcounter.describe_kmer(kmer_str) << endl;
        }

        visitor.erase(kmer); // seed kmer must be not visited already.

        Kmer_Occurence_Pair kmer_pair(kmer, kcounter.get_kmer_count(kmer));
        Path_n_count_pair best_path = inchworm_step(kcounter,
                                                    direction,
                                                    kmer_pair,
                                                    visitor,
                                                    eliminator,
                                                    inchworm_round,
                                                    0,
                                                    min_connectivity,
                                                    MAX_RECURSION);

        vector<kmer_int_type_t> &kmer_list = best_path.first;
        unsigned int num_kmers = kmer_list.size();

        if ((IRKE_COMMON::__DEVEL_zero_kmer_on_use && num_kmers >= 1) || best_path.second > 0) {
            // append info to entire path in reverse order, so starts just after seed kmer

            int first_index = num_kmers - 1;
            int last_index = 0;
            if (CRAWL) {
                last_index = first_index - CRAWL_LENGTH + 1;
                if (last_index < 0) {
                    last_index = 0;
                }
            }

            for (int i = first_index; i >= last_index; i--) {
                kmer_int_type_t kmer_extend = kmer_list[i];
                entire_path.first.push_back(kmer_extend);
                visitor.add(kmer_extend);
                //entire_path.second += kcounter.get_kmer_count(kmer_extend);

                // selected here, zero out:


                if (IRKE_COMMON::__DEVEL_zero_kmer_on_use) {
                    kcounter.clear_kmer(kmer_extend);
                }

            }

            kmer = entire_path.first[entire_path.first.size() - 1];

            entire_path.second += best_path.second;

        }
        else {
            // no extension possible
//.........这里部分代码省略.........
开发者ID:bowhan,项目名称:trinityrnaseq,代码行数:101,代码来源:IRKE.cpp

示例15: compute_sequence_assemblies

void IRKE::compute_sequence_assemblies(KmerCounter &kcounter, float min_connectivity,
                                       unsigned int MIN_ASSEMBLY_LENGTH, unsigned int MIN_ASSEMBLY_COVERAGE,
                                       bool PARALLEL_IWORM, bool WRITE_COVERAGE, string COVERAGE_OUTPUT_FILENAME)
{

    if (!got_sorted_kmers_flag) {
        stringstream error;
        error << stacktrace() << " Error, must populate_sorted_kmers_list() before computing sequence assemblies"
            << endl;
        throw (error.str());
    }


    //vector<Kmer_counter_map_iterator>& kmers = sorted_kmers;
    vector<Kmer_Occurence_Pair> &kmers = sorted_kmers;  // note, these are not actually sorted if PARALLEL_IWORM mode.

    unsigned long init_size = kcounter.size();

    cerr << "Total kcounter hash size: " << init_size << " vs. sorted list size: " << kmers.size() << endl;

    unsigned int kmer_length = kcounter.get_kmer_length();
    ofstream coverage_writer;
    if (WRITE_COVERAGE) {
        coverage_writer.open(COVERAGE_OUTPUT_FILENAME.c_str());
    }


    // string s = "before.kmers";
    // kcounter.dump_kmers_to_file(s);

    /* -----------------------------------------------------------
       Two reconstruction modes. (bhaas, Jan-4-2014)

       1.  Original (not setting PARALLEL_IWORM): kmer list is sorted descendingly by abundance.
       Seed kmer is chosen as most abundant, and contig construction extends from this seed.

       2.  PARALLEL_IWORM:  the kmer list is not sorted. A random kmer (just ordered by hash iterator, nothing special, probably not so random)
       is used as a seed for draft contig reconstruction.
       The most abundant kmer in the draft contig is chosen as a proper seed.
       An inchworm contig is then constructed using this new seed, and reported.
       So, in this case, the draft contig in the first phase is just used to find a better seed, from which the inchworm contig is then 
       properly reconstructed.

    --------------------------------------------------------------- */



    // try building an inchworm contig from each seed kmer
    int myTid;


    if (PARALLEL_IWORM) {
        omp_set_num_threads(IRKE_COMMON::NUM_THREADS);
    }
    else {
        omp_set_num_threads(1); // turn off multithreading for the contig building.
    }


    //-----------------------------------------------------------------
    // Prep writing to separate inchworm output files for each thread
    //-----------------------------------------------------------------

    vector<iworm_tmp_file> tmpfiles;
    int num_threads = omp_get_max_threads();
    cerr << "num threads set to: " << num_threads << endl;

    for (int i = 0; i < num_threads; i++) {
        iworm_tmp_file tmpfile_struct;
        tmpfiles.push_back(tmpfile_struct);
        iworm_tmp_file &itmp = tmpfiles[i];

        stringstream filename_constructor;
        filename_constructor << "tmp.iworm.fa.pid_" << getpid() << ".thread_" << i;

        itmp.tmp_filename = new char[100];
        strcpy(itmp.tmp_filename, filename_constructor.str().c_str());
        itmp.tmp_filename[filename_constructor.str().length()] = '\0';

        itmp.fh = new ofstream();
        itmp.fh->open(itmp.tmp_filename);
        cerr << "Done opening file. " << itmp.tmp_filename << endl;
    }


    //-------------------
    // Build contigs.
    //-------------------

#pragma omp parallel for private (myTid) schedule (dynamic, 1000)
    for (unsigned int i = 0; i < kmers.size(); i++) {

        // cerr << "round: " << i << endl;

        myTid = omp_get_thread_num();

        unsigned long kmer_counter_size = kcounter.size();
        if (kmer_counter_size > init_size) {

            // string s = "after.kmers";
//.........这里部分代码省略.........
开发者ID:bowhan,项目名称:trinityrnaseq,代码行数:101,代码来源:IRKE.cpp


注:本文中的KmerCounter类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。