本文整理汇总了C++中SequenceSet::size方法的典型用法代码示例。如果您正苦于以下问题:C++ SequenceSet::size方法的具体用法?C++ SequenceSet::size怎么用?C++ SequenceSet::size使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类SequenceSet
的用法示例。
在下文中一共展示了SequenceSet::size方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: gap_sites
utils::Bitvector gap_sites( SequenceSet const& set, std::string const& gap_chars )
{
// Edge case.
if( set.size() == 0 ) {
return utils::Bitvector();
}
// Init result bitvector to all true. Then, for every site that is not a gap, reset to false.
auto result = utils::Bitvector( set[0].length(), true );
// Init lookup array.
auto lookup = utils::CharLookup<bool>( false );
lookup.set_selection_upper_lower( gap_chars, true );
// Process all sequences in the set.
for( auto const& seq : set ) {
if( seq.length() != result.size() ) {
throw std::runtime_error(
"Cannot calculate gap_sites() if SequenceSet is not an alignment."
);
}
// Process the sites of the sequence. If it is not a gap, set it to false in the bitvector.
// This way, only sites that are all-gap will remain with value `true` in the end.
for( size_t i = 0; i < seq.length(); ++i ) {
if( ! lookup[ seq[ i ] ] ) {
result.set( i, false );
}
}
}
return result;
}
示例2: filter_max_sequence_length
void filter_max_sequence_length( SequenceSet& set, size_t max_length )
{
size_t index = 0;
while( index < set.size() ) {
if( set.at(index).length() > max_length ) {
set.remove(index);
} else {
++index;
}
}
}
示例3: filter_min_max_sequence_length
void filter_min_max_sequence_length( SequenceSet& set, size_t min_length, size_t max_length )
{
size_t index = 0;
while( index < set.size() ) {
auto len = set.at(index).length();
if( len < min_length || len > max_length ) {
set.remove(index);
} else {
++index;
}
}
}
示例4: applyThreshold
void Application::applyThreshold()
{
const int numSeqs=trainingSet.size();
for(int i=0 ; i<numSeqs ; ++i) {
EmissionSequence &S=*trainingSet[i];
const int L=S.length();
for(int pos=0 ; pos<L ; ++pos) {
Emission &e=S[pos];
GSL::Vector &V=e.getContinuous();
for(int d=0 ; d<D ; ++d) V[d]=V[d]>=threshold ? threshold : 0;
}
}
}
示例5: is_alignment
bool is_alignment( SequenceSet const& set )
{
if( set.size() == 0 ) {
return true;
}
size_t length = set[0].length();
for( auto& s : set ) {
if( s.length() != length ) {
return false;
}
}
return true;
}
示例6: writeClusterView
void writeClusterView( string fpath , SequenceSet& allModelSeqs, SequenceSet& allDataSeqs ){
int numSeq = allDataSeqs.size();
SequenceSet allCOSeqs (numSeq, Sequence(0));
vector<int> pos(numSeq, 0);
while (true) {
set<int> insertion_ids;
for (int i = 0; i < numSeq; i ++) {
if (pos[i] >= allModelSeqs[i].size()) continue;
if (allModelSeqs[i][pos[i]] == '-')
insertion_ids.insert(i);
}
if (insertion_ids.size() != 0) {
// insertion exists
for (int i = 0; i < numSeq; i ++) {
if (insertion_ids.find(i)==insertion_ids.end()) // not in set
allCOSeqs[i].push_back('-');
else { // in set
allCOSeqs[i].push_back(allDataSeqs[i][pos[i]++]);
}
}
} else { // no insertion
for (int i = 0; i < numSeq; i ++)
allCOSeqs[i].push_back(allDataSeqs[i][pos[i]++]);
}
// terminating
bool terminated = true;
for (int i = 0; i < numSeq; i ++)
if (pos[i] != allModelSeqs[i].size()) {
terminated = false;
break;
}
if (terminated) break;
}
ofstream co_out (fpath.c_str());
for (int i = 0; i < numSeq; i ++) {
for (int j = 0; j < allCOSeqs[i].size(); j++) {
co_out << allCOSeqs[i][j];
}
co_out << endl;
}
co_out.close();
}
示例7: PhylipReader
TEST( Sequence, PhylipReaderAaSequential )
{
// Skip test if no data availabe.
NEEDS_TEST_DATA;
// Load sequence file.
std::string infile = environment->data_dir + "sequence/aa_3_384_s.phylip";
SequenceSet sset;
PhylipReader()
.label_length( 10 )
.valid_chars( amino_acid_codes_all() )
.read( from_file(infile), sset);
// Check data.
EXPECT_EQ( 3, sset.size() );
EXPECT_EQ( 384, sset[0].length() );
EXPECT_EQ( "CATH_HUMAN", sset[2].label() );
EXPECT_EQ( "G-AVTPVKNQ", sset[0].sites().substr( 160, 10 ) );
}
示例8: CVX_ADMM_MSA
Tensor5D CVX_ADMM_MSA (SequenceSet& allSeqs, vector<int>& lenSeqs, int T2, string& dir_path) {
// 1. initialization
int numSeq = allSeqs.size();
vector<Tensor4D> C (numSeq, Tensor4D(0, Tensor(T2, Matrix(NUM_DNA_TYPE,
vector<double>(NUM_MOVEMENT, 0.0)))));
vector<Tensor4D> W_1 (numSeq, Tensor4D(0, Tensor(T2, Matrix(NUM_DNA_TYPE,
vector<double>(NUM_MOVEMENT, 0.0)))));
vector<Tensor4D> W_2 (numSeq, Tensor4D(0, Tensor(T2, Matrix(NUM_DNA_TYPE,
vector<double>(NUM_MOVEMENT, 0.0)))));
vector<Tensor4D> Y (numSeq, Tensor4D(0, Tensor(T2, Matrix(NUM_DNA_TYPE,
vector<double>(NUM_MOVEMENT, 0.0)))));
tensor5D_init (C, allSeqs, lenSeqs, T2);
tensor5D_init (W_1, allSeqs, lenSeqs, T2);
tensor5D_init (W_2, allSeqs, lenSeqs, T2);
tensor5D_init (Y, allSeqs, lenSeqs, T2);
set_C (C, allSeqs);
// 2. ADMM iteration
int iter = 0;
double mu = MU;
double prev_CoZ = MAX_DOUBLE;
while (iter < MAX_ADMM_ITER) {
// 2a. Subprogram: FrankWolf Algorithm
// NOTE: parallelize this for to enable parallelism
#ifdef PARRALLEL_COMPUTING
#pragma omp parallel for
#endif
for (int n = 0; n < numSeq; n++)
first_subproblem (W_1[n], W_2[n], Y[n], C[n], mu, allSeqs[n]);
// 2b. Subprogram:
second_subproblem (W_1, W_2, Y, mu, allSeqs, lenSeqs);
// 2d. update Y: Y += mu * (W_1 - W_2)
for (int n = 0; n < numSeq; n ++)
tensor4D_lin_update (Y[n], W_1[n], W_2[n], mu);
// 2e. print out tracking info
double CoZ = 0.0;
for (int n = 0; n < numSeq; n++)
CoZ += tensor4D_frob_prod(C[n], W_2[n]);
double W1mW2 = 0.0;
for (int n = 0; n < numSeq; n ++) {
int T1 = W_1[n].size();
for (int i = 0; i < T1; i ++)
for (int j = 0; j < T2; j ++)
for (int d = 0; d < NUM_DNA_TYPE; d ++)
for (int m = 0; m < NUM_MOVEMENT; m ++) {
double value = (W_1[n][i][j][d][m] - W_2[n][i][j][d][m]);
W1mW2 = max( fabs(value), W1mW2 ) ;
}
}
///////////////////////////////////Copy from Main/////////////////////////////////////////
int T2m = T2;
Tensor tensor (T2m, Matrix (NUM_DNA_TYPE, vector<double>(NUM_DNA_TYPE, 0.0)));
Matrix mat_insertion (T2m, vector<double> (NUM_DNA_TYPE, 0.0));
for (int n = 0; n < numSeq; n ++) {
int T1 = W_2[n].size();
for (int i = 0; i < T1; i ++) {
for (int j = 0; j < T2m; j ++) {
for (int d = 0; d < NUM_DNA_TYPE; d ++) {
for (int m = 0; m < NUM_MOVEMENT; m ++) {
if (m == DELETION_A or m == MATCH_A)
tensor[j][d][dna2T3idx('A')] += max(0.0, W_2[n][i][j][d][m]);
else if (m == DELETION_T or m == MATCH_T)
tensor[j][d][dna2T3idx('T')] += max(0.0, W_2[n][i][j][d][m]);
else if (m == DELETION_C or m == MATCH_C)
tensor[j][d][dna2T3idx('C')] += max(0.0, W_2[n][i][j][d][m]);
else if (m == DELETION_G or m == MATCH_G)
tensor[j][d][dna2T3idx('G')] += max(0.0, W_2[n][i][j][d][m]);
else if (m == DELETION_START or m == MATCH_START)
tensor[j][d][dna2T3idx('*')] += max(0.0, W_2[n][i][j][d][m]);
else if (m == DELETION_END or m == MATCH_END)
tensor[j][d][dna2T3idx('#')] += max(0.0, W_2[n][i][j][d][m]);
else if (m == INSERTION)
mat_insertion[j][d] += max(0.0, W_2[n][i][j][d][m]);
}
}
}
}
}
Trace trace (0, Cell(2)); // 1d: j, 2d: ATCG
refined_viterbi_algo (trace, tensor, mat_insertion);
Sequence recSeq;
for (int i = 0; i < trace.size(); i ++)
if (trace[i].action != INSERTION) {
recSeq.push_back(trace[i].acidB);
if (trace[i].acidB == '#') break;
}
////////////////////////////////END copy from MAIN/////////////////////////////////////////////////////
SequenceSet allModelSeqs, allDataSeqs;
double obj_rounded = 0.0;
for (int n = 0; n < numSeq; n ++) {
Sequence model_seq = recSeq, data_seq = allSeqs[n];
data_seq.erase(data_seq.begin());
model_seq.erase(model_seq.begin());
data_seq.erase(data_seq.end()-1);
model_seq.erase(model_seq.end()-1);
//.........这里部分代码省略.........
示例9: second_subproblem
/* We resolve the second subproblem through sky-plane projection */
Sequence second_subproblem (Tensor5D& W_1, Tensor5D& W_2, Tensor5D& Y, double& mu, SequenceSet& allSeqs, vector<int> lenSeqs) {
/*{{{*/
int numSeq = allSeqs.size();
int T2 = W_2[0][0].size();
// reinitialize W_2 to all-zero matrix
for (int n = 0; REINIT_W_ZERO_TOGGLE and n < numSeq; n ++) {
int T1 = W_2[n].size();
for (int i = 0; i < T1; i ++)
for (int j = 0; j < T2; j ++)
for (int d = 0; d < NUM_DNA_TYPE; d ++)
for (int m = 0; m < NUM_MOVEMENT; m ++)
W_2[n][i][j][d][m] = 0.0;
}
vector<Tensor4D> delta (numSeq, Tensor4D(0, Tensor(T2, Matrix(NUM_DNA_TYPE,
vector<double>(NUM_MOVEMENT, 0.0)))));
tensor5D_init (delta, allSeqs, lenSeqs, T2);
Tensor tensor (T2, Matrix (NUM_DNA_TYPE, vector<double>(NUM_DNA_TYPE, 0.0)));
Matrix mat_insertion (T2, vector<double>(NUM_DNA_TYPE, 0.0));
Trace trace (0, Cell(2)); // 1d: j, 2d: ATCG
int fw_iter = -1;
while (fw_iter < MAX_2nd_FW_ITER) {
fw_iter ++;
// 1. compute delta
#ifdef PARRALLEL_COMPUTING
//#pragma omp parallel for
#endif
for (int n = 0; n < numSeq; n ++) {
int T1 = W_2[n].size();
for (int i = 0; i < T1; i ++) {
for (int j = 0; j < T2; j ++)
for (int d = 0; d < NUM_DNA_TYPE; d ++)
for (int m = 0; m < NUM_MOVEMENT; m ++) {
delta[n][i][j][d][m] = -1.0* mu * (W_2[n][i][j][d][m] - W_1[n][i][j][d][m]) + Y[n][i][j][d][m];
#ifdef SECOND_SUBPROBLEM_DEBUG
if (delta[n][i][j][d][m] > 0)
cout <<"delta: " << n << "," << i << "," << j << "," << d << "," << m << ": "
<< delta[n][i][j][d][m] << endl;
#endif
if (m == DELETION_A or m == MATCH_A)
tensor[j][d][dna2T3idx('A')] += max(0.0, delta[n][i][j][d][m]);
else if (m == DELETION_T or m == MATCH_T)
tensor[j][d][dna2T3idx('T')] += max(0.0, delta[n][i][j][d][m]);
else if (m == DELETION_C or m == MATCH_C)
tensor[j][d][dna2T3idx('C')] += max(0.0, delta[n][i][j][d][m]);
else if (m == DELETION_G or m == MATCH_G)
tensor[j][d][dna2T3idx('G')] += max(0.0, delta[n][i][j][d][m]);
else if (m == DELETION_START or m == MATCH_START)
tensor[j][d][dna2T3idx('*')] += max(0.0, delta[n][i][j][d][m]);
else if (m == DELETION_END or m == MATCH_END)
tensor[j][d][dna2T3idx('#')] += max(0.0, delta[n][i][j][d][m]);
else if (m == INSERTION) {
mat_insertion[j][d] += max(0.0, delta[n][i][j][d][m]);
}
}
}
}
#ifdef SECOND_SUBPROBLEM_DEBUG
cout << "tensor transition input list:" << endl;
for (int j = 0; j < T2; j ++)
for (int d = 0; d < NUM_DNA_TYPE; d ++)
for (int k = 0; k < NUM_DNA_TYPE; k ++) {
if (tensor[j][d][k] > 0)
cout << "(" << j << ", " << d << ", " << k << ")=" << tensor[j][d][k] << endl;
}
#endif
double delta_square = 0.0;
for (int n = 0; n < numSeq; n ++)
delta_square += tensor4D_frob_prod (delta[n], delta[n]);
//cout << "delta_square: " << delta_square << endl;
if ( delta_square < 1e-12 ) {
//cout << "small delta. early stop." << endl;
break;
}
// 2. determine the trace: run viterbi algorithm
trace.clear();
refined_viterbi_algo (trace, tensor, mat_insertion);
Tensor5D S (numSeq, Tensor4D(0, Tensor(T2, Matrix(NUM_DNA_TYPE, vector<double>(NUM_MOVEMENT, 0.0)))));
tensor5D_init (S, allSeqs, lenSeqs, T2);
// 3. recover values for S
// 3b. set a number of selected elements to 1
for (int t = 0; t < trace.size(); t++) {
int sj = trace[t].location[0];
int sd = trace[t].location[1];
int sm = dna2T3idx(trace[t].acidB);
// cout << trace[t].acidB;
for (int n = 0; n < numSeq; n ++) {
int T1 = S[n].size();
for (int i = 0; i < T1; i ++) {
for (int m = 0; m < NUM_MOVEMENT; m ++)
if (delta[n][i][sj][sd][m] > 0.0) {
if (m == DEL_BASE_IDX + sm or m == MTH_BASE_IDX + sm)
S[n][i][sj][sd][m] = 1.0;
else if (m == INSERTION and trace[t].action == INSERTION) {
S[n][i][sj][sd][m] = 1.0;
//.........这里部分代码省略.........
示例10: go
int Application::go(int argc,char *argv[])
{
// Process command line
CommandLine cmd(argc,argv,"");
if(cmd.numArgs()!=5)
throw "\n\
hack-known-sites [options] <training-dir> <schema.txt> <signal-name> <margin-around-peak> <motifs>\n\
";
String dir=cmd.arg(0);
String schemaFile=cmd.arg(1);
String signalName=cmd.arg(2);
int margin=cmd.arg(3).asInt();
String motifs=cmd.arg(4);
// Misc. initialization
cout.precision(10);
cerr.precision(10);
BOOM::catchFloatOverflow();
BOOM::Vector<String> &motifV=*motifs.getFields(",");
Regex r("([^\/]+)\\.fastb");
// Load schema
Schema schema(schemaFile);
sigID=schema.lookupContinuousID(signalName);
openID=schema.lookupContinuousID("unpaired");
alphabet=schema.getAlphabet(0);
// Load the training set
cerr<<"loading training data"<<endl;
trainingSet.load(dir,schema);
// Process sequences
cerr<<"processing sequences"<<endl;
BOOM::Vector<float> all, cluster;
BOOM::Array1D< BOOM::Vector<double> > offsets(2*WINDOW_SIZE+1);
BOOM::Vector<double> motifClusterSizes, otherClusterSizes;
const int N=trainingSet.size();
int nextSeqID=1;
for(int i=0 ; i<N ; ++i) {
EmissionSequence &S=*trainingSet[i];
if(!r.search(S.getFilename())) throw S.getFilename();
String substrate=r[1];
int begin;
double prevX=0;
double maxX=-1;
int maxPos;
const int L=S.length();
for(int pos=0 ; pos<L ; ++pos) {
const Emission &e=S[pos];
const double x=e[sigID];
all.push_back(e[openID]);
if(prevX==0 && x>0) { begin=pos; maxX=x; maxPos=pos; }
else if(prevX>0 && x>0 && x>maxX) { maxX=x; maxPos=pos; }
else if(prevX>0 && x==0) {
int from, to;
if(1) {
if(findMotif(maxPos,from,to,motifV,margin,S)) {
/*
cout<<substrate<<"\tpositive\tgroup\t"<<begin<<"\t"<<pos<<"\t1\t+\t.\n";
cout<<substrate<<"\tmotif\tsite\t"<<from<<"\t"<<to<<"\t1\t+\t.\n";
*/
//cout<<maxX<<endl;
/*
EmissionSequence *subseq=S.getSubsequence(begin,pos-begin);
Sequence *seq=subseq->getDiscreteSeq(0);
delete subseq;
cout<<">"<<nextSeqID++<<endl;
seq->printOn(cout,schema.getAlphabet(0));
cout<<endl;
delete seq;
*/
for(int delta=-WINDOW_SIZE ; delta<=WINDOW_SIZE ; ++delta) {
//int x=to+delta;
int x=maxPos+delta;
if(x<0 || x>=L) continue;
double y=S[x][openID];
offsets[delta+WINDOW_SIZE].push_back(y);
}
//motifClusterSizes.push_back(pos-begin);
}
}
else {//for(int i=begin ; i<pos ; ++i) {
/*
cout<<substrate<<"\tnegative\tgroup\t"<<begin+1<<"\t"<<pos<<"\t1\t+\t.\n";
cout<<substrate<<"\tdecoy\tsite\t"<<maxPos-margin<<"\t"<<maxPos+margin<<"\t1\t+\t.\n";
*/
//cout<<maxX<<endl;
/*
//EmissionSequence *subseq=S.getSubsequence(begin,pos-begin);
if(maxPos>margin && maxPos+margin<L) {
EmissionSequence *subseq=S.getSubsequence(maxPos-margin,2*margin);
Sequence *seq=subseq->getDiscreteSeq(0);
delete subseq;
cout<<">"<<nextSeqID++<<endl;
seq->printOn(cout,schema.getAlphabet(0));
cout<<endl;
delete seq;
}
//.........这里部分代码省略.........
示例11: merge_duplicate_sequences
void merge_duplicate_sequences(
SequenceSet& set,
MergeDuplicateSequencesCountPolicy count_policy,
std::string const& counter_prefix
) {
// Helper to keep track of which sequences (at position i in the set) occured how often.
struct Duplicate
{
size_t index;
size_t count;
};
// Find duplicates and count their occurences.
// TODO this is a very memory intense step. find an algo that does not need to copy all sites...
std::unordered_map< std::string, Duplicate > dup_map;
size_t i = 0;
while( i < set.size() ) {
auto& seq = set[i];
if( dup_map.count( seq.sites() ) == 0 ) {
// If it is a new sequence, init the map entry and move to the next sequence.
dup_map[ seq.sites() ].index = i;
dup_map[ seq.sites() ].count = 1;
++i;
} else {
// If we already saw that sequence, increment its counter, and remove the sequence
// from the set. Do not increment i - we deleted a sequence, so staying at the
// position automatically "moves" to the next one.
++dup_map[ seq.sites() ].count;
set.remove(i);
}
}
// We do not need to relabel sequences.
if( count_policy == MergeDuplicateSequencesCountPolicy::kDiscard ) {
return;
}
// Relabel using the counts.
for( size_t j = 0; j < set.size(); ++j ) {
auto& seq = set[j];
// The sequence needs to be in the map, as we added it in the previous step.
// It also needs to have the same index, as we never changed that.
assert( dup_map.count(seq.sites()) > 0 );
assert( dup_map[ seq.sites() ].index == j );
// Append the count to the label.
auto count = dup_map[ seq.sites() ].count;
if( count_policy == MergeDuplicateSequencesCountPolicy::kAppendToLabel ) {
auto new_label = seq.label() + counter_prefix + std::to_string(count);
seq.label( new_label );
} else {
// We already skipped the discard case, so this here should not happen.
assert( false );
}
}
}