当前位置: 首页>>代码示例>>C++>>正文


C++ SequenceSet::size方法代码示例

本文整理汇总了C++中SequenceSet::size方法的典型用法代码示例。如果您正苦于以下问题:C++ SequenceSet::size方法的具体用法?C++ SequenceSet::size怎么用?C++ SequenceSet::size使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在SequenceSet的用法示例。


在下文中一共展示了SequenceSet::size方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: gap_sites

utils::Bitvector gap_sites( SequenceSet const& set, std::string const& gap_chars )
{
    // Edge case.
    if( set.size() == 0 ) {
        return utils::Bitvector();
    }

    // Init result bitvector to all true. Then, for every site that is not a gap, reset to false.
    auto result = utils::Bitvector( set[0].length(), true );

    // Init lookup array.
    auto lookup = utils::CharLookup<bool>( false );
    lookup.set_selection_upper_lower( gap_chars, true );

    // Process all sequences in the set.
    for( auto const& seq : set ) {
        if( seq.length() != result.size() ) {
            throw std::runtime_error(
                "Cannot calculate gap_sites() if SequenceSet is not an alignment."
            );
        }

        // Process the sites of the sequence. If it is not a gap, set it to false in the bitvector.
        // This way, only sites that are all-gap will remain with value `true` in the end.
        for( size_t i = 0; i < seq.length(); ++i ) {
            if( ! lookup[ seq[ i ] ] ) {
                result.set( i, false );
            }
        }
    }

    return result;
}
开发者ID:lczech,项目名称:genesis,代码行数:33,代码来源:functions.cpp

示例2: filter_max_sequence_length

void filter_max_sequence_length( SequenceSet& set, size_t max_length )
{
    size_t index = 0;
    while( index < set.size() ) {
        if( set.at(index).length() > max_length ) {
            set.remove(index);
        } else {
            ++index;
        }
    }
}
开发者ID:lczech,项目名称:genesis,代码行数:11,代码来源:functions.cpp

示例3: filter_min_max_sequence_length

void filter_min_max_sequence_length( SequenceSet& set, size_t min_length, size_t max_length )
{
    size_t index = 0;
    while( index < set.size() ) {
        auto len = set.at(index).length();
        if( len < min_length || len > max_length ) {
            set.remove(index);
        } else {
            ++index;
        }
    }
}
开发者ID:lczech,项目名称:genesis,代码行数:12,代码来源:functions.cpp

示例4: applyThreshold

void Application::applyThreshold()
{
  const int numSeqs=trainingSet.size();
  for(int i=0 ; i<numSeqs ; ++i) {
    EmissionSequence &S=*trainingSet[i];
    const int L=S.length();
    for(int pos=0 ; pos<L ; ++pos) {
      Emission &e=S[pos];
      GSL::Vector &V=e.getContinuous();
      for(int d=0 ; d<D ; ++d) V[d]=V[d]>=threshold ? threshold : 0;
    }
  }
}
开发者ID:bmajoros,项目名称:MUMMIE,代码行数:13,代码来源:threshold.C

示例5: is_alignment

bool is_alignment( SequenceSet const& set )
{
    if( set.size() == 0 ) {
        return true;
    }

    size_t length = set[0].length();
    for( auto& s : set ) {
        if( s.length() != length ) {
            return false;
        }
    }
    return true;
}
开发者ID:lczech,项目名称:genesis,代码行数:14,代码来源:functions.cpp

示例6: writeClusterView

void writeClusterView( string fpath , SequenceSet& allModelSeqs, SequenceSet& allDataSeqs ){

    int numSeq = allDataSeqs.size();
    SequenceSet allCOSeqs (numSeq, Sequence(0));
    vector<int> pos(numSeq, 0);
    while (true) {
        set<int> insertion_ids;
        for (int i = 0; i < numSeq; i ++) {
            if (pos[i] >= allModelSeqs[i].size()) continue;
            if (allModelSeqs[i][pos[i]] == '-') 
                insertion_ids.insert(i);
        }
        if (insertion_ids.size() != 0) {
            // insertion exists
            for (int i = 0; i < numSeq; i ++) {
                if (insertion_ids.find(i)==insertion_ids.end()) // not in set
                    allCOSeqs[i].push_back('-');
                else { // in set
                    allCOSeqs[i].push_back(allDataSeqs[i][pos[i]++]);
                }
            }
        } else { // no insertion
            for (int i = 0; i < numSeq; i ++) 
                allCOSeqs[i].push_back(allDataSeqs[i][pos[i]++]);
        }
        // terminating
        bool terminated = true;
        for (int i = 0; i < numSeq; i ++) 
            if (pos[i] != allModelSeqs[i].size()) {
                terminated = false; 
                break;
            }
        if (terminated) break;
    }
    ofstream co_out (fpath.c_str());
    for (int i = 0; i < numSeq; i ++) {
        for (int j = 0; j < allCOSeqs[i].size(); j++)  {
            co_out << allCOSeqs[i][j];
        }
        co_out << endl;
    }
    co_out.close();
}
开发者ID:JimmyLin192,项目名称:ConvexMSA,代码行数:43,代码来源:MSA_Convex.cpp

示例7: PhylipReader

TEST( Sequence, PhylipReaderAaSequential )
{
    // Skip test if no data availabe.
    NEEDS_TEST_DATA;

    // Load sequence file.
    std::string infile = environment->data_dir + "sequence/aa_3_384_s.phylip";
    SequenceSet sset;
    PhylipReader()
        .label_length( 10 )
        .valid_chars( amino_acid_codes_all() )
        .read( from_file(infile), sset);

    // Check data.
    EXPECT_EQ( 3, sset.size() );
    EXPECT_EQ( 384,          sset[0].length() );
    EXPECT_EQ( "CATH_HUMAN", sset[2].label() );
    EXPECT_EQ( "G-AVTPVKNQ", sset[0].sites().substr( 160, 10 ) );
}
开发者ID:lczech,项目名称:genesis,代码行数:19,代码来源:phylip.cpp

示例8: CVX_ADMM_MSA

Tensor5D CVX_ADMM_MSA (SequenceSet& allSeqs, vector<int>& lenSeqs, int T2, string& dir_path) {
    // 1. initialization
    int numSeq = allSeqs.size();
    vector<Tensor4D> C (numSeq, Tensor4D(0, Tensor(T2, Matrix(NUM_DNA_TYPE,
                        vector<double>(NUM_MOVEMENT, 0.0)))));  
    vector<Tensor4D> W_1 (numSeq, Tensor4D(0, Tensor(T2, Matrix(NUM_DNA_TYPE,
                        vector<double>(NUM_MOVEMENT, 0.0)))));  
    vector<Tensor4D> W_2 (numSeq, Tensor4D(0, Tensor(T2, Matrix(NUM_DNA_TYPE,
                        vector<double>(NUM_MOVEMENT, 0.0)))));  
    vector<Tensor4D> Y (numSeq, Tensor4D(0, Tensor(T2, Matrix(NUM_DNA_TYPE,
                        vector<double>(NUM_MOVEMENT, 0.0)))));  
    tensor5D_init (C, allSeqs, lenSeqs, T2);
    tensor5D_init (W_1, allSeqs, lenSeqs, T2);
    tensor5D_init (W_2, allSeqs, lenSeqs, T2);
    tensor5D_init (Y, allSeqs, lenSeqs, T2);
    set_C (C, allSeqs);

    // 2. ADMM iteration
    int iter = 0;
    double mu = MU;
    double prev_CoZ = MAX_DOUBLE;
    while (iter < MAX_ADMM_ITER) {
        // 2a. Subprogram: FrankWolf Algorithm
        // NOTE: parallelize this for to enable parallelism
#ifdef PARRALLEL_COMPUTING
#pragma omp parallel for
#endif
        for (int n = 0; n < numSeq; n++) 
            first_subproblem (W_1[n], W_2[n], Y[n], C[n], mu, allSeqs[n]);

        // 2b. Subprogram: 
        second_subproblem (W_1, W_2, Y, mu, allSeqs, lenSeqs);
	
        // 2d. update Y: Y += mu * (W_1 - W_2)
        for (int n = 0; n < numSeq; n ++)
            tensor4D_lin_update (Y[n], W_1[n], W_2[n], mu);

        // 2e. print out tracking info
        double CoZ = 0.0;
        for (int n = 0; n < numSeq; n++) 
            CoZ += tensor4D_frob_prod(C[n], W_2[n]);
        double W1mW2 = 0.0;
        for (int n = 0; n < numSeq; n ++) {
            int T1 = W_1[n].size();
            for (int i = 0; i < T1; i ++) 
                for (int j = 0; j < T2; j ++) 
                    for (int d = 0; d < NUM_DNA_TYPE; d ++) 
                        for (int m = 0; m < NUM_MOVEMENT; m ++) {
                            double value = (W_1[n][i][j][d][m] - W_2[n][i][j][d][m]);
                            W1mW2 = max( fabs(value), W1mW2 ) ;
                        }
        }
        ///////////////////////////////////Copy from Main/////////////////////////////////////////
	int T2m = T2;
	Tensor tensor (T2m, Matrix (NUM_DNA_TYPE, vector<double>(NUM_DNA_TYPE, 0.0)));
	Matrix mat_insertion (T2m, vector<double> (NUM_DNA_TYPE, 0.0));
	for (int n = 0; n < numSeq; n ++) {
		int T1 = W_2[n].size();
		for (int i = 0; i < T1; i ++) { 
			for (int j = 0; j < T2m; j ++) {
				for (int d = 0; d < NUM_DNA_TYPE; d ++) {
					for (int m = 0; m < NUM_MOVEMENT; m ++) {
						if (m == DELETION_A or m == MATCH_A)
							tensor[j][d][dna2T3idx('A')] += max(0.0, W_2[n][i][j][d][m]);
						else if (m == DELETION_T or m == MATCH_T)
							tensor[j][d][dna2T3idx('T')] += max(0.0, W_2[n][i][j][d][m]);
						else if (m == DELETION_C or m == MATCH_C)
							tensor[j][d][dna2T3idx('C')] += max(0.0, W_2[n][i][j][d][m]);
						else if (m == DELETION_G or m == MATCH_G)
							tensor[j][d][dna2T3idx('G')] += max(0.0, W_2[n][i][j][d][m]);
						else if (m == DELETION_START or m == MATCH_START)
							tensor[j][d][dna2T3idx('*')] += max(0.0, W_2[n][i][j][d][m]);
						else if (m == DELETION_END or m == MATCH_END)
							tensor[j][d][dna2T3idx('#')] += max(0.0, W_2[n][i][j][d][m]);
						else if (m == INSERTION) 
							mat_insertion[j][d] += max(0.0, W_2[n][i][j][d][m]);
					}
				}
			}
		}
	}
	Trace trace (0, Cell(2)); // 1d: j, 2d: ATCG
	refined_viterbi_algo (trace, tensor, mat_insertion);
	
	Sequence recSeq;
	for (int i = 0; i < trace.size(); i ++) 
		if (trace[i].action != INSERTION) {
			recSeq.push_back(trace[i].acidB);
			if (trace[i].acidB == '#') break;
		}
	////////////////////////////////END copy from MAIN/////////////////////////////////////////////////////
	
	SequenceSet allModelSeqs, allDataSeqs;
        double obj_rounded = 0.0;
        for (int n = 0; n < numSeq; n ++) {
            Sequence model_seq = recSeq, data_seq = allSeqs[n];
            data_seq.erase(data_seq.begin());
            model_seq.erase(model_seq.begin());
            data_seq.erase(data_seq.end()-1);
            model_seq.erase(model_seq.end()-1);
//.........这里部分代码省略.........
开发者ID:JimmyLin192,项目名称:ConvexMSA,代码行数:101,代码来源:MSA_Convex.cpp

示例9: second_subproblem

/* We resolve the second subproblem through sky-plane projection */
Sequence second_subproblem (Tensor5D& W_1, Tensor5D& W_2, Tensor5D& Y, double& mu, SequenceSet& allSeqs, vector<int> lenSeqs) {
    /*{{{*/
    int numSeq = allSeqs.size();
    int T2 = W_2[0][0].size();
    // reinitialize W_2 to all-zero matrix
    for (int n = 0; REINIT_W_ZERO_TOGGLE and n < numSeq; n ++) {
        int T1 = W_2[n].size();
        for (int i = 0; i < T1; i ++)  
            for (int j = 0; j < T2; j ++) 
                for (int d = 0; d < NUM_DNA_TYPE; d ++) 
                    for (int m = 0; m < NUM_MOVEMENT; m ++) 
                        W_2[n][i][j][d][m] = 0.0;
    }

    vector<Tensor4D> delta (numSeq, Tensor4D(0, Tensor(T2, Matrix(NUM_DNA_TYPE,
                        vector<double>(NUM_MOVEMENT, 0.0)))));  
    tensor5D_init (delta, allSeqs, lenSeqs, T2);
    Tensor tensor (T2, Matrix (NUM_DNA_TYPE, vector<double>(NUM_DNA_TYPE, 0.0)));
    Matrix mat_insertion (T2, vector<double>(NUM_DNA_TYPE, 0.0));

    Trace trace (0, Cell(2)); // 1d: j, 2d: ATCG
    int fw_iter = -1;
    while (fw_iter < MAX_2nd_FW_ITER) {
        fw_iter ++;
        // 1. compute delta
#ifdef PARRALLEL_COMPUTING
//#pragma omp parallel for
#endif
        for (int n = 0; n < numSeq; n ++) {
            int T1 = W_2[n].size();
            for (int i = 0; i < T1; i ++) { 
                for (int j = 0; j < T2; j ++) 
                    for (int d = 0; d < NUM_DNA_TYPE; d ++) 
                        for (int m = 0; m < NUM_MOVEMENT; m ++) {
                            delta[n][i][j][d][m] = -1.0* mu * (W_2[n][i][j][d][m] - W_1[n][i][j][d][m]) + Y[n][i][j][d][m];
#ifdef SECOND_SUBPROBLEM_DEBUG
                            if (delta[n][i][j][d][m] > 0)
                                cout <<"delta: " << n << "," << i << "," << j << "," << d  << "," << m << ": "
                                    << delta[n][i][j][d][m] << endl;
#endif
                            if (m == DELETION_A or m == MATCH_A)
                                tensor[j][d][dna2T3idx('A')] += max(0.0, delta[n][i][j][d][m]);
                            else if (m == DELETION_T or m == MATCH_T)
                                tensor[j][d][dna2T3idx('T')] += max(0.0, delta[n][i][j][d][m]);
                            else if (m == DELETION_C or m == MATCH_C)
                                tensor[j][d][dna2T3idx('C')] += max(0.0, delta[n][i][j][d][m]);
                            else if (m == DELETION_G or m == MATCH_G)
                                tensor[j][d][dna2T3idx('G')] += max(0.0, delta[n][i][j][d][m]);
                            else if (m == DELETION_START or m == MATCH_START)
                                tensor[j][d][dna2T3idx('*')] += max(0.0, delta[n][i][j][d][m]);
                            else if (m == DELETION_END or m == MATCH_END)
                                tensor[j][d][dna2T3idx('#')] += max(0.0, delta[n][i][j][d][m]);
                            else if (m == INSERTION) {
                                mat_insertion[j][d] += max(0.0, delta[n][i][j][d][m]);
                            }
                        }
            }
        }
#ifdef SECOND_SUBPROBLEM_DEBUG
        cout << "tensor transition input list:" << endl;
        for (int j = 0; j < T2; j ++) 
            for (int d = 0; d < NUM_DNA_TYPE; d ++) 
                for (int k = 0; k < NUM_DNA_TYPE; k ++) {
                    if (tensor[j][d][k] > 0)
                        cout << "(" << j << ", " << d << ", " << k << ")=" << tensor[j][d][k] << endl;
                }
#endif

        double delta_square = 0.0;
        for (int n = 0; n < numSeq; n ++) 
            delta_square += tensor4D_frob_prod (delta[n], delta[n]);
        //cout << "delta_square: " << delta_square << endl;
        if ( delta_square < 1e-12 ) {
            //cout << "small delta. early stop." << endl;
            break;
        }

        // 2. determine the trace: run viterbi algorithm
        trace.clear();
	refined_viterbi_algo (trace, tensor, mat_insertion);
        Tensor5D S (numSeq, Tensor4D(0, Tensor(T2, Matrix(NUM_DNA_TYPE, vector<double>(NUM_MOVEMENT, 0.0))))); 
        tensor5D_init (S, allSeqs, lenSeqs, T2);

        // 3. recover values for S 
        // 3b. set a number of selected elements to 1
        for (int t = 0; t < trace.size(); t++) {
            int sj = trace[t].location[0];
            int sd = trace[t].location[1];
            int sm = dna2T3idx(trace[t].acidB);
            // cout << trace[t].acidB;
            for (int n = 0; n < numSeq; n ++) {
                int T1 = S[n].size();
                for (int i = 0; i < T1; i ++) {
                    for (int m = 0; m < NUM_MOVEMENT; m ++)
                        if (delta[n][i][sj][sd][m] > 0.0) { 
                            if (m == DEL_BASE_IDX + sm or m == MTH_BASE_IDX + sm)
                                S[n][i][sj][sd][m] = 1.0;
                            else if (m == INSERTION and trace[t].action == INSERTION) {
                                S[n][i][sj][sd][m] = 1.0;
//.........这里部分代码省略.........
开发者ID:JimmyLin192,项目名称:ConvexMSA,代码行数:101,代码来源:MSA_Convex.cpp

示例10: go

int Application::go(int argc,char *argv[])
{
  // Process command line
  CommandLine cmd(argc,argv,"");
  if(cmd.numArgs()!=5) 
    throw "\n\
hack-known-sites [options] <training-dir> <schema.txt> <signal-name> <margin-around-peak> <motifs>\n\
";
  String dir=cmd.arg(0);
  String schemaFile=cmd.arg(1);
  String signalName=cmd.arg(2);
  int margin=cmd.arg(3).asInt();
  String motifs=cmd.arg(4);

  // Misc. initialization
  cout.precision(10);
  cerr.precision(10);
  BOOM::catchFloatOverflow();
  BOOM::Vector<String> &motifV=*motifs.getFields(",");
  Regex r("([^\/]+)\\.fastb");

  // Load schema
  Schema schema(schemaFile);
  sigID=schema.lookupContinuousID(signalName);
  openID=schema.lookupContinuousID("unpaired");
  alphabet=schema.getAlphabet(0);

  // Load the training set
  cerr<<"loading training data"<<endl;
  trainingSet.load(dir,schema);

  // Process sequences
  cerr<<"processing sequences"<<endl;
  BOOM::Vector<float> all, cluster;
  BOOM::Array1D< BOOM::Vector<double> > offsets(2*WINDOW_SIZE+1);
  BOOM::Vector<double> motifClusterSizes, otherClusterSizes;
  const int N=trainingSet.size();
  int nextSeqID=1;
  for(int i=0 ; i<N ; ++i) {
    EmissionSequence &S=*trainingSet[i];
    if(!r.search(S.getFilename())) throw S.getFilename();
    String substrate=r[1];
    int begin;
    double prevX=0;
    double maxX=-1;
    int maxPos;
    const int L=S.length();
    for(int pos=0 ; pos<L ; ++pos) {
      const Emission &e=S[pos];
      const double x=e[sigID];
      all.push_back(e[openID]);
      if(prevX==0 && x>0) { begin=pos; maxX=x; maxPos=pos; }
      else if(prevX>0 && x>0 && x>maxX) { maxX=x; maxPos=pos; }
      else if(prevX>0 && x==0) {
	int from, to;
	if(1) {
	  if(findMotif(maxPos,from,to,motifV,margin,S)) {
	  /*
	  cout<<substrate<<"\tpositive\tgroup\t"<<begin<<"\t"<<pos<<"\t1\t+\t.\n";
	  cout<<substrate<<"\tmotif\tsite\t"<<from<<"\t"<<to<<"\t1\t+\t.\n";
	  */
	  //cout<<maxX<<endl;
	  /*
	  EmissionSequence *subseq=S.getSubsequence(begin,pos-begin);
	  Sequence *seq=subseq->getDiscreteSeq(0);
	  delete subseq;
	  cout<<">"<<nextSeqID++<<endl;
	  seq->printOn(cout,schema.getAlphabet(0));
	  cout<<endl;
	  delete seq;
	  */

	  for(int delta=-WINDOW_SIZE ; delta<=WINDOW_SIZE ; ++delta) {
	    //int x=to+delta;
	    int x=maxPos+delta;
	    if(x<0 || x>=L) continue;
	    double y=S[x][openID];
	    offsets[delta+WINDOW_SIZE].push_back(y);
	  }

	  //motifClusterSizes.push_back(pos-begin);
	  }
	}
	else {//for(int i=begin ; i<pos ; ++i) {
	  /*
	  cout<<substrate<<"\tnegative\tgroup\t"<<begin+1<<"\t"<<pos<<"\t1\t+\t.\n";
	  cout<<substrate<<"\tdecoy\tsite\t"<<maxPos-margin<<"\t"<<maxPos+margin<<"\t1\t+\t.\n";
	  */
	  //cout<<maxX<<endl;
	  /*
	  //EmissionSequence *subseq=S.getSubsequence(begin,pos-begin);
	  if(maxPos>margin && maxPos+margin<L) {
	    EmissionSequence *subseq=S.getSubsequence(maxPos-margin,2*margin);
	    Sequence *seq=subseq->getDiscreteSeq(0);
	    delete subseq;
	    cout<<">"<<nextSeqID++<<endl;
	    seq->printOn(cout,schema.getAlphabet(0));
	    cout<<endl;
	    delete seq;
	  }
//.........这里部分代码省略.........
开发者ID:bmajoros,项目名称:MUMMIE,代码行数:101,代码来源:hack-known-sites.C

示例11: merge_duplicate_sequences

void merge_duplicate_sequences(
    SequenceSet& set,
    MergeDuplicateSequencesCountPolicy count_policy,
    std::string const& counter_prefix
) {
    // Helper to keep track of which sequences (at position i in the set) occured how often.
    struct Duplicate
    {
        size_t index;
        size_t count;
    };

    // Find duplicates and count their occurences.
    // TODO this is a very memory intense step. find an algo that does not need to copy all sites...
    std::unordered_map< std::string, Duplicate > dup_map;
    size_t i = 0;
    while( i < set.size() ) {
        auto& seq = set[i];

        if( dup_map.count( seq.sites() ) == 0 ) {

            // If it is a new sequence, init the map entry and move to the next sequence.
            dup_map[ seq.sites() ].index = i;
            dup_map[ seq.sites() ].count = 1;
            ++i;

        } else {

            // If we already saw that sequence, increment its counter, and remove the sequence
            // from the set. Do not increment i - we deleted a sequence, so staying at the
            // position automatically "moves" to the next one.
            ++dup_map[ seq.sites() ].count;
            set.remove(i);
        }
    }

    // We do not need to relabel sequences.
    if( count_policy == MergeDuplicateSequencesCountPolicy::kDiscard ) {
        return;
    }

    // Relabel using the counts.
    for( size_t j = 0; j < set.size(); ++j ) {
        auto& seq = set[j];

        // The sequence needs to be in the map, as we added it in the previous step.
        // It also needs to have the same index, as we never changed that.
        assert( dup_map.count(seq.sites()) > 0 );
        assert( dup_map[ seq.sites() ].index == j );

        // Append the count to the label.
        auto count = dup_map[ seq.sites() ].count;
        if( count_policy == MergeDuplicateSequencesCountPolicy::kAppendToLabel ) {
            auto new_label = seq.label() + counter_prefix + std::to_string(count);
            seq.label( new_label );

        } else {

            // We already skipped the discard case, so this here should not happen.
            assert( false );
        }
    }
}
开发者ID:lczech,项目名称:genesis,代码行数:63,代码来源:functions.cpp


注:本文中的SequenceSet::size方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。