本文整理汇总了C++中SuffixArray类的典型用法代码示例。如果您正苦于以下问题:C++ SuffixArray类的具体用法?C++ SuffixArray怎么用?C++ SuffixArray使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SuffixArray类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: main
int main() {
SuffixArray in;
while(gets(in.str) && in.str[0] != '\0') {
int n = 0;
for(int i = 0; in.str[i]; i++)
if(in.str[i] != ' ')
in.str[n++] = in.str[i];
in.str[n] = '\0';
in.build();
in.build_h();
if(n == 0)
puts("0");
for(int i = 1; i <= in.n; i++) {
int cnt = 0, ret = 0;
for(int j = 0; j < in.n; j++) {
if(in.h[j] >= i)
cnt++;
else
ret = max(ret, cnt), cnt = 0;
}
ret = max(ret, cnt);
if(ret <= 0)
break;
printf("%d\n", ret + 1);
}
puts("");
}
return 0;
}
示例2: ReadTable
// Validate the sampled suffix array values are correct
void SampledSuffixArray::validate(const std::string filename, const BWT* pBWT)
{
ReadTable* pRT = new ReadTable(filename);
SuffixArray* pSA = new SuffixArray(pRT, 1);
std::cout << "Validating sampled suffix array entries\n";
for(size_t i = 0; i < pSA->getSize(); ++i)
{
SAElem calc = calcSA(i, pBWT);
SAElem real = pSA->get(i);
if(calc.getID() != real.getID() || calc.getPos() != real.getPos())
{
std::cout << "Error SA elements do not match for " << i << "\n";
std::cout << "Calc: " << calc << "\n";
std::cout << "Real: " << real << "\n";
exit(1);
}
}
std::cout << "All calculate SA values are correct\n";
delete pRT;
delete pSA;
}
示例3: main
int main()
{
scanf("%s", buf);
SuffixArray sa;
sa.create(buf);
sa.output();
return 0;
}
示例4: test1
void test1(wordstring& ws, intstring& ids)
{
struct timeval start;
struct timeval end;
SuffixArray sa;
gettimeofday(&start,NULL);
sa.DA(ids);
gettimeofday(&end,NULL);
double dur = 0;
dur += (end.tv_sec-start.tv_sec)*1000000+(end.tv_usec-start.tv_usec);
cout<<dur/1000000<<endl;
cerr << endl;
vector<RepeatSubString> repeat;
CaculateRepeatSubString(sa, repeat, 5);
for (int i = 0; i < repeat.size(); ++i)
{
cout << repeat[i] << repeat[i].ToString(ws) << endl;
}
}
示例5: buildIndexForTable
void buildIndexForTable(std::string prefix, const ReadTable* pRT, bool isReverse)
{
// Create suffix array from read table
SuffixArray* pSA = new SuffixArray(pRT, opt::numThreads);
if(opt::validate)
{
std::cout << "Validating suffix array\n";
pSA->validate(pRT);
}
std::string bwt_filename = prefix + (!isReverse ? BWT_EXT : RBWT_EXT);
pSA->writeBWT(bwt_filename, pRT);
std::string sufidx_filename = prefix + (!isReverse ? SAI_EXT : RSAI_EXT);
pSA->writeIndex(sufidx_filename);
delete pSA;
pSA = NULL;
}
示例6: print_ranks
void
print_ranks(std::string const &s, SuffixArray &sa) {
int n = sa.dp.empty() ? 0 : sa.dp[0].size();
for (int r = 0; r < n; ++r) {
printf("%3c", s[r]);
for (size_t c = 0; c < sa.dp.size(); ++c) {
printf("%4d", sa.get_dp(c)[r]);
}
printf("\n");
}
}
示例7: find_max_length
static Int find_max_length( const SuffixArray& sa, const String& s ) {
Int len = 0;
int n = s.size();
for ( int i = 0; i + len < n; ) {
if ( sa.find(s.substr(i, len + 1)) ) {
len ++;
} else {
i ++;
}
}
return len;
}
示例8: print_suffix_array
void
print_suffix_array(std::string const &s, SuffixArray &sa, vi_t *plcp = NULL) {
vi_t pos;
sa.sorted_indexes(pos);
for (size_t i = 0; i < pos.size(); ++i) {
// Limit each line to 60 characters
if (plcp) {
printf("%3d: [%2d]: %s\n", pos[i], (*plcp)[pos[i]], s.substr(pos[i], 60).c_str());
}
else {
printf("%3d: %s\n", pos[i], s.substr(pos[i], 60).c_str());
}
}
}
示例9: longest_common_prefix
std::vector<int> longest_common_prefix(const T &s, const SuffixArray &sa){
const int n = sa.size();
std::vector<int> vs(n), isa(n), lcp(n - 1);
for(int i = 0; i + 1 < n; ++i){ vs[i] = s[i]; }
for(int i = 0; i < n; ++i){ isa[sa[i]] = i; }
int h = 0;
for(int i = 0; i < n; ++i){
const int j = isa[i];
if(j > 0){
const int k = j - 1;
while(vs[sa[j] + h] == vs[sa[k] + h]){ ++h; }
lcp[k] = h;
if(h > 0){ --h; }
}
}
return lcp;
}
示例10: addOverlapsSA
/** Add the overlaps of vseq to the graph. */
static void addOverlapsSA(Graph& g, const SuffixArray& sa,
ContigNode v, const string& vseq)
{
assert(!vseq.empty());
set<ContigNode> seen;
typedef SuffixArray::const_iterator It;
for (string q(vseq, 0, vseq.size() - 1);
q.size() >= opt::minOverlap; chop(q)) {
pair<It, It> range = sa.equal_range(q);
for (It it = range.first; it != range.second; ++it) {
ContigNode u(it->second);
if (seen.insert(u).second) {
// Add the longest overlap between two vertices.
unsigned overlap = it->first.size();
add_edge(u, v, -overlap, static_cast<DG&>(g));
}
}
}
}
示例11: RunTest
void RunTest(SuffixArray &index, const context_t *context,
const unordered_map<vector<wid_t>, size_t, phrase_hash> &ngrams, vector<speed_perf_t> &speedData) {
size_t queryCount = 0;
for (auto entry = ngrams.begin(); entry != ngrams.end(); ++entry) {
Collector *collector = index.NewCollector(context, true);
for (size_t i = 0; i < entry->first.size(); ++i) {
double begin = GetTime();
vector<sample_t> samples;
collector->Extend(entry->first[i], 1000, samples);
speedData[i].seconds += GetElapsedTime(begin);
speedData[i].requests++;
queryCount++;
if (queryCount % 10000 == 0)
cout << "." << flush;
}
delete collector;
}
}
示例12: ILCPConstruct
void ILCPConstruct(const SuffixArray& sa,
std::vector<SuffixArray::Index>* ilcp) {
typedef SuffixArray::Index Index;
std::vector<Index>& text_lcp = *ilcp;
text_lcp.resize(sa.size());
Index start = 0;
int num_docs = 0;
const char* text = sa.text();
for (Index i = 0; i <= (Index)sa.size(); ++i) {
if (i == (Index)sa.size() || (unsigned char)text[i] <= 1) {
const char* doc = text + start;
Index doc_len = i - start;
SuffixArray doc_sa(doc, doc_len);
for (Index j = 0; j < doc_len; ++j) {
Index p = doc_sa.sa(j);
Index lcp = doc_sa.lcp(j);
text_lcp[start + p] = lcp;
}
num_docs++;
start = i;
}
}
std::vector<bool> visited(sa.size());
// permutate text_lcp[i] = text_lcp[sa[i]] implace
for (Index i = 0; i < (Index)sa.size(); ++i) {
if (!visited[i]) {
int j = i;
while (true) {
visited[j] = 1;
Index to = sa.sa(j);
if (visited[to]) break;
std::swap(text_lcp[j], text_lcp[to]);
j = to;
}
}
// ilcp[i] = text_lcp[sa.sa(i)];
}
}
示例13: parseDupHits
std::string parseDupHits(const StringVector& hitsFilenames, const std::string& out_prefix)
{
// Load the suffix array index and the reverse suffix array index
// Note these are not the full suffix arrays
SuffixArray* pFwdSAI = new SuffixArray(opt::prefix + SAI_EXT);
SuffixArray* pRevSAI = new SuffixArray(opt::prefix + RSAI_EXT);
// Load the read table to look up the lengths of the reads and their ids.
// When rmduping a set of reads, the ReadInfoTable can actually be larger than the
// BWT if the names of the reads are very long. Previously, when two reads
// are duplicated, the read with the lexographically lower read name was chosen
// to be kept. To save memory here, we break ties using the index in the ReadInfoTable
// instead. This allows us to avoid loading the read names.
ReadInfoTable* pRIT = new ReadInfoTable(opt::readsFile, pFwdSAI->getNumStrings(), RIO_NUMERICID);
std::string outFile = out_prefix + ".fa";
std::string dupFile = out_prefix + ".dups.fa";
std::ostream* pWriter = createWriter(outFile);
std::ostream* pDupWriter = createWriter(dupFile);
size_t substringRemoved = 0;
size_t identicalRemoved = 0;
size_t kept = 0;
size_t buffer_size = SequenceProcessFramework::BUFFER_SIZE;
// The reads must be output in their original ordering.
// The hits are in the blocks of buffer_size items. We read
// buffer_size items from the first hits file, then buffer_size
// from the second and so on until all the hits have been processed.
size_t num_files = hitsFilenames.size();
std::vector<std::istream*> reader_vec(num_files, 0);
for(size_t i = 0; i < num_files; ++i)
{
std::cout << "Opening " << hitsFilenames[i] << "\n";
reader_vec[i] = createReader(hitsFilenames[i]);
}
bool done = false;
size_t currReaderIdx = 0;
size_t numRead = 0;
size_t numReadersDone = 0;
std::string line;
while(!done)
{
// Parse a line from the current file
bool valid = getline(*reader_vec[currReaderIdx], line);
++numRead;
// Deal with switching the active reader and the end of files
if(!valid || numRead == buffer_size)
{
// Switch the reader
currReaderIdx = (currReaderIdx + 1) % num_files;
numRead = 0;
// Break once all the readers are invalid
if(!valid)
{
++numReadersDone;
if(numReadersDone == num_files)
{
done = true;
break;
}
}
}
// Parse the data
if(valid)
{
std::string id;
std::string sequence;
std::string hitsStr;
size_t readIdx;
size_t numCopies;
bool isSubstring;
std::stringstream parser(line);
parser >> id;
parser >> sequence;
getline(parser, hitsStr);
OverlapVector ov;
OverlapCommon::parseHitsString(hitsStr, pRIT, pRIT, pFwdSAI, pRevSAI, true, readIdx, numCopies, ov, isSubstring);
bool isContained = false;
if(isSubstring)
{
++substringRemoved;
isContained = true;
}
else
{
for(OverlapVector::iterator iter = ov.begin(); iter != ov.end(); ++iter)
{
if(iter->isContainment() && iter->getContainedIdx() == 0)
{
// This read is contained by some other read
++identicalRemoved;
//.........这里部分代码省略.........
示例14: buildBWTDisk
// The algorithm is as follows. We create M BWTs for subsets of
// the input reads. These are created independently and written
// to disk. They are then merged either sequentially or pairwise
// to create the final BWT
void buildBWTDisk(const std::string& in_filename, const std::string& out_prefix,
const std::string& bwt_extension, const std::string& sai_extension,
bool doReverse, int numThreads, int numReadsPerBatch, int storageLevel)
{
size_t MAX_READS_PER_GROUP = numReadsPerBatch;
SeqReader* pReader = new SeqReader(in_filename);
SeqRecord record;
int groupID = 0;
size_t numReadTotal = 0;
MergeVector mergeVector;
MergeItem mergeItem;
mergeItem.start_index = 0;
// Phase 1: Compute the initial BWTs
ReadTable* pCurrRT = new ReadTable;
bool done = false;
while(!done)
{
done = !pReader->get(record);
if(!done)
{
// the read is valid
SeqItem item = record.toSeqItem();
if(doReverse)
item.seq.reverse();
pCurrRT->addRead(item);
++numReadTotal;
}
if(pCurrRT->getCount() >= MAX_READS_PER_GROUP || (done && pCurrRT->getCount() > 0))
{
// Compute the SA and BWT for this group
SuffixArray* pSA = new SuffixArray(pCurrRT, numThreads);
// Write the BWT to disk
std::string bwt_temp_filename = makeTempName(out_prefix, groupID, bwt_extension);
pSA->writeBWT(bwt_temp_filename, pCurrRT);
std::string sai_temp_filename = makeTempName(out_prefix, groupID, sai_extension);
pSA->writeIndex(sai_temp_filename);
// Push the merge info
mergeItem.end_index = numReadTotal - 1; // inclusive
mergeItem.reads_filename = in_filename;
mergeItem.bwt_filename = bwt_temp_filename;
mergeItem.sai_filename = sai_temp_filename;
mergeVector.push_back(mergeItem);
// Cleanup
delete pSA;
// Start the new group
mergeItem.start_index = numReadTotal;
++groupID;
pCurrRT->clear();
}
}
delete pCurrRT;
delete pReader;
// Phase 2: Pairwise merge the BWTs
int round = 1;
MergeVector nextMergeRound;
while(mergeVector.size() > 1)
{
std::cout << "Starting round " << round << "\n";
pReader = new SeqReader(in_filename);
for(size_t i = 0; i < mergeVector.size(); i+=2)
{
if(i + 1 != mergeVector.size())
{
std::string bwt_merged_name = makeTempName(out_prefix, groupID, bwt_extension);
std::string sai_merged_name = makeTempName(out_prefix, groupID, sai_extension);
MergeItem item1 = mergeVector[i];
MergeItem item2 = mergeVector[i+1];
// Perform the actual merge
int64_t curr_idx = merge(pReader, item1, item2,
bwt_merged_name, sai_merged_name,
doReverse, numThreads, storageLevel);
// pReader now points to the end of item1's block of
// reads. Skip item2's reads
assert(curr_idx == item2.start_index);
while(curr_idx <= item2.end_index)
{
bool eof = !pReader->get(record);
assert(!eof);
(void)eof;
++curr_idx;
}
//.........这里部分代码省略.........
示例15: init_suffix_array
static void init_suffix_array( const String& s, SuffixArray& sa ) {
sa.init(s);
sa.build();
sa.buildHeight();
}