当前位置: 首页>>代码示例>>C++>>正文


C++ SuffixArray类代码示例

本文整理汇总了C++中SuffixArray的典型用法代码示例。如果您正苦于以下问题:C++ SuffixArray类的具体用法?C++ SuffixArray怎么用?C++ SuffixArray使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了SuffixArray类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: main

int main() {
    SuffixArray in;
    while(gets(in.str) && in.str[0] != '\0') {
    	int n = 0;
    	for(int i = 0; in.str[i]; i++)
    		if(in.str[i] != ' ')
    			in.str[n++] = in.str[i];
    	in.str[n] = '\0';
    	
        in.build();
        in.build_h();
        
        if(n == 0)
        	puts("0");
        for(int i = 1; i <= in.n; i++) {
        	int cnt = 0, ret = 0;
        	for(int j = 0; j < in.n; j++) {
        		if(in.h[j] >= i)
        			cnt++;
        		else
        			ret = max(ret, cnt), cnt = 0;
        	}
        	ret = max(ret, cnt);
        	if(ret <= 0)
        		break;
        	printf("%d\n", ret + 1);
        }
        puts("");
    }
    return 0;
}
开发者ID:JohnXinhua,项目名称:UVa,代码行数:31,代码来源:11855+-+Buzzwords.cpp

示例2: ReadTable

// Validate the sampled suffix array values are correct
void SampledSuffixArray::validate(const std::string filename, const BWT* pBWT)
{
    ReadTable* pRT = new ReadTable(filename);
    SuffixArray* pSA = new SuffixArray(pRT, 1);
    
    std::cout << "Validating sampled suffix array entries\n";

    for(size_t i = 0; i < pSA->getSize(); ++i)
    {
        SAElem calc = calcSA(i, pBWT);
        SAElem real = pSA->get(i);
        if(calc.getID() != real.getID() || calc.getPos() != real.getPos())
        {
            std::cout << "Error SA elements do not match for " << i << "\n";
            std::cout << "Calc: " << calc << "\n";
            std::cout << "Real: " << real << "\n";
            exit(1);
        }
    }
    
    std::cout << "All calculate SA values are correct\n";

    delete pRT;
    delete pSA;
}
开发者ID:kimrutherford,项目名称:sga,代码行数:26,代码来源:SampledSuffixArray.cpp

示例3: main

int main()
{
    scanf("%s", buf);
    SuffixArray sa;
    sa.create(buf);
    sa.output();
    
    return 0;
} 
开发者ID:ZhouWeikuan,项目名称:zoj,代码行数:9,代码来源:SuffixArray.cpp

示例4: test1

void test1(wordstring& ws, intstring& ids)
{
   struct timeval start;
   struct timeval end;
   SuffixArray sa;
   gettimeofday(&start,NULL);
   sa.DA(ids); 
   gettimeofday(&end,NULL);
   double dur = 0;
   dur += (end.tv_sec-start.tv_sec)*1000000+(end.tv_usec-start.tv_usec);
   cout<<dur/1000000<<endl;
   cerr << endl;
   vector<RepeatSubString> repeat;
   CaculateRepeatSubString(sa, repeat, 5); 
   for (int i = 0; i < repeat.size(); ++i)
   {
       cout << repeat[i] << repeat[i].ToString(ws) << endl;
   }
}
开发者ID:wxwidget,项目名称:suffix,代码行数:19,代码来源:lfs.cpp

示例5: buildIndexForTable

void buildIndexForTable(std::string prefix, const ReadTable* pRT, bool isReverse)
{
    // Create suffix array from read table
    SuffixArray* pSA = new SuffixArray(pRT, opt::numThreads);

    if(opt::validate)
    {
        std::cout << "Validating suffix array\n";
        pSA->validate(pRT);
    }

    std::string bwt_filename = prefix + (!isReverse ? BWT_EXT : RBWT_EXT);
    pSA->writeBWT(bwt_filename, pRT);

    std::string sufidx_filename = prefix + (!isReverse ? SAI_EXT : RSAI_EXT);
    pSA->writeIndex(sufidx_filename);

    delete pSA;
    pSA = NULL;
}
开发者ID:avilella,项目名称:sga,代码行数:20,代码来源:index.cpp

示例6: print_ranks

void
print_ranks(std::string const &s, SuffixArray &sa) {
    int n = sa.dp.empty() ? 0 : sa.dp[0].size();
    for (int r = 0; r < n; ++r) {
        printf("%3c", s[r]);
        for (size_t c = 0; c < sa.dp.size(); ++c) {
            printf("%4d", sa.get_dp(c)[r]);
        }
        printf("\n");
    }
}
开发者ID:dhruvbird,项目名称:genome-diff-compression,代码行数:11,代码来源:suffix_array.hpp

示例7: find_max_length

 static Int find_max_length( const SuffixArray& sa, const String& s ) {
   Int len = 0;
   int n = s.size();
   for ( int i = 0; i + len < n; ) {
     if ( sa.find(s.substr(i, len + 1)) ) {
       len ++;
     } else {
       i ++;
     }
   }
   return len;
 }
开发者ID:sh19910711,项目名称:aoj-solutions,代码行数:12,代码来源:main.cpp

示例8: print_suffix_array

void
print_suffix_array(std::string const &s, SuffixArray &sa, vi_t *plcp = NULL) {
    vi_t pos;
    sa.sorted_indexes(pos);
    for (size_t i = 0; i < pos.size(); ++i) {
        // Limit each line to 60 characters
        if (plcp) {
            printf("%3d: [%2d]: %s\n", pos[i], (*plcp)[pos[i]], s.substr(pos[i], 60).c_str());
        }
        else {
            printf("%3d: %s\n", pos[i], s.substr(pos[i], 60).c_str());
        }
    }
}
开发者ID:dhruvbird,项目名称:genome-diff-compression,代码行数:14,代码来源:suffix_array.hpp

示例9: longest_common_prefix

std::vector<int> longest_common_prefix(const T &s, const SuffixArray &sa){
	const int n = sa.size();
	std::vector<int> vs(n), isa(n), lcp(n - 1);
	for(int i = 0; i + 1 < n; ++i){ vs[i] = s[i]; }
	for(int i = 0; i < n; ++i){ isa[sa[i]] = i; }
	int h = 0;
	for(int i = 0; i < n; ++i){
		const int j = isa[i];
		if(j > 0){
			const int k = j - 1;
			while(vs[sa[j] + h] == vs[sa[k] + h]){ ++h; }
			lcp[k] = h;
			if(h > 0){ --h; }
		}
	}
	return lcp;
}
开发者ID:logicmachine,项目名称:LibCompetitive-v3,代码行数:17,代码来源:longest_common_prefix.hpp

示例10: addOverlapsSA

/** Add the overlaps of vseq to the graph. */
static void addOverlapsSA(Graph& g, const SuffixArray& sa,
		ContigNode v, const string& vseq)
{
	assert(!vseq.empty());
	set<ContigNode> seen;
	typedef SuffixArray::const_iterator It;
	for (string q(vseq, 0, vseq.size() - 1);
			q.size() >= opt::minOverlap; chop(q)) {
		pair<It, It> range = sa.equal_range(q);
		for (It it = range.first; it != range.second; ++it) {
			ContigNode u(it->second);
			if (seen.insert(u).second) {
				// Add the longest overlap between two vertices.
				unsigned overlap = it->first.size();
				add_edge(u, v, -overlap, static_cast<DG&>(g));
			}
		}
	}
}
开发者ID:genome-vendor,项目名称:abyss,代码行数:20,代码来源:AdjList.cpp

示例11: RunTest

void RunTest(SuffixArray &index, const context_t *context,
             const unordered_map<vector<wid_t>, size_t, phrase_hash> &ngrams, vector<speed_perf_t> &speedData) {
    size_t queryCount = 0;

    for (auto entry = ngrams.begin(); entry != ngrams.end(); ++entry) {
        Collector *collector = index.NewCollector(context, true);

        for (size_t i = 0; i < entry->first.size(); ++i) {
            double begin = GetTime();
            vector<sample_t> samples;
            collector->Extend(entry->first[i], 1000, samples);
            speedData[i].seconds += GetElapsedTime(begin);
            speedData[i].requests++;

            queryCount++;

            if (queryCount % 10000 == 0)
                cout << "." << flush;
        }

        delete collector;
    }
}
开发者ID:ModernMT,项目名称:MMT,代码行数:23,代码来源:test_prefixspeed.cpp

示例12: ILCPConstruct

void ILCPConstruct(const SuffixArray& sa,
                   std::vector<SuffixArray::Index>* ilcp) {
  typedef SuffixArray::Index Index;
  std::vector<Index>& text_lcp = *ilcp;
  text_lcp.resize(sa.size());
  Index start = 0;
  int num_docs = 0;
  const char* text = sa.text();
  for (Index i = 0; i <= (Index)sa.size(); ++i) {
    if (i == (Index)sa.size() || (unsigned char)text[i] <= 1) {
      const char* doc = text + start;
      Index doc_len = i - start;
      SuffixArray doc_sa(doc, doc_len);
      for (Index j = 0; j < doc_len; ++j) {
        Index p = doc_sa.sa(j);
        Index lcp = doc_sa.lcp(j);
        text_lcp[start + p] = lcp;
      }
      num_docs++;
      start = i;
    }
  }
  std::vector<bool> visited(sa.size());
  // permutate text_lcp[i] = text_lcp[sa[i]] implace
  for (Index i = 0; i < (Index)sa.size(); ++i) {
    if (!visited[i]) {
      int j = i;
      while (true) {
        visited[j] = 1;
        Index to = sa.sa(j);
        if (visited[to]) break;
        std::swap(text_lcp[j], text_lcp[to]);
        j = to;
      }
    }
    // ilcp[i] = text_lcp[sa.sa(i)];
  }
}
开发者ID:ahartik,项目名称:succinct,代码行数:38,代码来源:ilcp-common.hpp

示例13: parseDupHits

std::string parseDupHits(const StringVector& hitsFilenames, const std::string& out_prefix)
{
    // Load the suffix array index and the reverse suffix array index
    // Note these are not the full suffix arrays
    SuffixArray* pFwdSAI = new SuffixArray(opt::prefix + SAI_EXT);
    SuffixArray* pRevSAI = new SuffixArray(opt::prefix + RSAI_EXT);

    // Load the read table to look up the lengths of the reads and their ids.
    // When rmduping a set of reads, the ReadInfoTable can actually be larger than the
    // BWT if the names of the reads are very long. Previously, when two reads
    // are duplicated, the read with the lexographically lower read name was chosen
    // to be kept. To save memory here, we break ties using the index in the ReadInfoTable
    // instead. This allows us to avoid loading the read names.
    ReadInfoTable* pRIT = new ReadInfoTable(opt::readsFile, pFwdSAI->getNumStrings(), RIO_NUMERICID);

    std::string outFile = out_prefix + ".fa";
    std::string dupFile = out_prefix + ".dups.fa";
    std::ostream* pWriter = createWriter(outFile);
    std::ostream* pDupWriter = createWriter(dupFile);

    size_t substringRemoved = 0;
    size_t identicalRemoved = 0;
    size_t kept = 0;
    size_t buffer_size = SequenceProcessFramework::BUFFER_SIZE;

    // The reads must be output in their original ordering.
    // The hits are in the blocks of buffer_size items. We read
    // buffer_size items from the first hits file, then buffer_size
    // from the second and so on until all the hits have been processed.
    size_t num_files = hitsFilenames.size();
    std::vector<std::istream*> reader_vec(num_files, 0);

    for(size_t i = 0; i < num_files; ++i)
    {
        std::cout << "Opening " << hitsFilenames[i] << "\n";
        reader_vec[i] = createReader(hitsFilenames[i]);
    }

    bool done = false;
    size_t currReaderIdx = 0;
    size_t numRead = 0;
    size_t numReadersDone = 0;
    std::string line;

    while(!done)
    {
        // Parse a line from the current file
        bool valid = getline(*reader_vec[currReaderIdx], line);
        ++numRead;
        // Deal with switching the active reader and the end of files
        if(!valid || numRead == buffer_size)
        {
            // Switch the reader
            currReaderIdx = (currReaderIdx + 1) % num_files;
            numRead = 0;

            // Break once all the readers are invalid
            if(!valid)
            {
                ++numReadersDone;
                if(numReadersDone == num_files)
                {
                    done = true;
                    break;
                }
            }
        }

        // Parse the data
        if(valid)
        {
            std::string id;
            std::string sequence;
            std::string hitsStr;
            size_t readIdx;
            size_t numCopies;
            bool isSubstring;

            std::stringstream parser(line);
            parser >> id;
            parser >> sequence;
            getline(parser, hitsStr);

            OverlapVector ov;
            OverlapCommon::parseHitsString(hitsStr, pRIT, pRIT, pFwdSAI, pRevSAI, true, readIdx, numCopies, ov, isSubstring);
            
            bool isContained = false;
            if(isSubstring)
            {
                ++substringRemoved;
                isContained = true;
            }
            else
            {
                for(OverlapVector::iterator iter = ov.begin(); iter != ov.end(); ++iter)
                {
                    if(iter->isContainment() && iter->getContainedIdx() == 0)
                    {
                        // This read is contained by some other read
                        ++identicalRemoved;
//.........这里部分代码省略.........
开发者ID:cboursnell,项目名称:sga,代码行数:101,代码来源:rmdup.cpp

示例14: buildBWTDisk

// The algorithm is as follows. We create M BWTs for subsets of 
// the input reads. These are created independently and written
// to disk. They are then merged either sequentially or pairwise
// to create the final BWT
void buildBWTDisk(const std::string& in_filename, const std::string& out_prefix, 
                  const std::string& bwt_extension, const std::string& sai_extension,
                  bool doReverse, int numThreads, int numReadsPerBatch, int storageLevel)
{
    size_t MAX_READS_PER_GROUP = numReadsPerBatch;

    SeqReader* pReader = new SeqReader(in_filename);
    SeqRecord record;

    int groupID = 0;
    size_t numReadTotal = 0;

    MergeVector mergeVector;
    MergeItem mergeItem;
    mergeItem.start_index = 0;

    // Phase 1: Compute the initial BWTs
    ReadTable* pCurrRT = new ReadTable;
    bool done = false;
    while(!done)
    {
        done = !pReader->get(record);

        if(!done)
        {
            // the read is valid
            SeqItem item = record.toSeqItem();
            if(doReverse)
                item.seq.reverse();
            pCurrRT->addRead(item);
            ++numReadTotal;
        }

        if(pCurrRT->getCount() >= MAX_READS_PER_GROUP || (done && pCurrRT->getCount() > 0))
        {
            // Compute the SA and BWT for this group
            SuffixArray* pSA = new SuffixArray(pCurrRT, numThreads);

            // Write the BWT to disk                
            std::string bwt_temp_filename = makeTempName(out_prefix, groupID, bwt_extension);
            pSA->writeBWT(bwt_temp_filename, pCurrRT);

            std::string sai_temp_filename = makeTempName(out_prefix, groupID, sai_extension);
            pSA->writeIndex(sai_temp_filename);

            // Push the merge info
            mergeItem.end_index = numReadTotal - 1; // inclusive
            mergeItem.reads_filename = in_filename;
            mergeItem.bwt_filename = bwt_temp_filename;
            mergeItem.sai_filename = sai_temp_filename;
            mergeVector.push_back(mergeItem);

            // Cleanup
            delete pSA;

            // Start the new group
            mergeItem.start_index = numReadTotal;
            ++groupID;
            pCurrRT->clear();
        }
    }
    delete pCurrRT;
    delete pReader;

    // Phase 2: Pairwise merge the BWTs
    int round = 1;
    MergeVector nextMergeRound;
    while(mergeVector.size() > 1)
    {
        std::cout << "Starting round " << round << "\n";
        pReader = new SeqReader(in_filename);
        for(size_t i = 0; i < mergeVector.size(); i+=2)
        {
            if(i + 1 != mergeVector.size())
            {
                std::string bwt_merged_name = makeTempName(out_prefix, groupID, bwt_extension);
                std::string sai_merged_name = makeTempName(out_prefix, groupID, sai_extension);

                MergeItem item1 = mergeVector[i];
                MergeItem item2 = mergeVector[i+1];

                // Perform the actual merge
                int64_t curr_idx = merge(pReader, item1, item2, 
                                         bwt_merged_name, sai_merged_name, 
                                         doReverse, numThreads, storageLevel);

                // pReader now points to the end of item1's block of 
                // reads. Skip item2's reads
                assert(curr_idx == item2.start_index);
                while(curr_idx <= item2.end_index)
                {
                    bool eof = !pReader->get(record);
                    assert(!eof);
                    (void)eof;
                    ++curr_idx;
                }
//.........这里部分代码省略.........
开发者ID:avilella,项目名称:sga,代码行数:101,代码来源:BWTDiskConstruction.cpp

示例15: init_suffix_array

 static void init_suffix_array( const String& s, SuffixArray& sa ) {
   sa.init(s);
   sa.build();
   sa.buildHeight();
 }
开发者ID:sh19910711,项目名称:aoj-solutions,代码行数:5,代码来源:main.cpp


注:本文中的SuffixArray类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。