本文整理汇总了C++中Words::set3方法的典型用法代码示例。如果您正苦于以下问题:C++ Words::set3方法的具体用法?C++ Words::set3怎么用?C++ Words::set3使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Words
的用法示例。
在下文中一共展示了Words::set3方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1:
// langId is language of the query
long long getSynBaseHash64 ( char *qstr , uint8_t langId ) {
Words ww;
ww.set3 ( qstr );
long nw = ww.getNumWords();
long long *wids = ww.getWordIds();
//char **wptrs = ww.getWords();
//long *wlens = ww.getWordLens();
long long baseHash64 = 0LL;
Synonyms syn;
// assume english if unknown to fix 'pandora's tower'
// vs 'pandoras tower' where both words are in both
// english and german so langid is unknown
if ( langId == langUnknown ) langId = langEnglish;
// . store re-written query into here then hash that string
// . this way we can get rid of spaces
//char rebuf[1024];
//char *p = rebuf;
//if ( strstr(qstr,"cheatcodes") )
// log("hey");
// for deduping
HashTableX dups;
if ( ! dups.set ( 8,0,1024,NULL,0,false,0,"qhddup") ) return false;
// scan the words
for ( long i = 0 ; i < nw ; i++ ) {
// skip if not alnum
if ( ! wids[i] ) continue;
// get its synonyms into tmpBuf
char tmpBuf[TMPSYNBUFSIZE];
// . assume niceness of 0 for now
// . make sure to get all synsets!! ('love' has two synsets)
long naids = syn.getSynonyms (&ww,i,langId,tmpBuf,0);
// term freq algo
//long pop = g_speller.getPhrasePopularity(NULL,
// wids[i],
// true,
// langId);
// is it a queryStopWord like "the" or "and"?
bool isQueryStop = ::isQueryStopWord(NULL,0,wids[i]);
// a more restrictive list
bool isStop = ::isStopWord(NULL,0,wids[i]);
if ( ::isCommonQueryWordInEnglish(wids[i]) ) isStop = true;
// find the smallest one
unsigned long long min = wids[i];
//char *minWordPtr = wptrs[i];
//long minWordLen = wlens[i];
// declare up here since we have a goto below
long j;
// add to table too
if ( dups.isInTable ( &min ) ) goto gotdup;
// add to it
if ( ! dups.addKey ( &min ) ) return false;
// now scan the synonyms, they do not include "min" in them
for ( j = 0 ; j < naids ; j++ ) {
// get it
unsigned long long aid64;
aid64 = (unsigned long long)syn.m_aids[j];
// if any syn already hashed then skip it and count
// as a repeated term. we have to do it this way
// rather than just getting the minimum synonym
// word id, because 'love' has two synsets and
// 'like', a synonym of 'love' only has one synset
// and they end up having different minimum synonym
// word ids!!!
if ( dups.isInTable ( &aid64 ) ) break;
// add it. this could fail!
if ( ! dups.addKey ( &aid64 ) ) return false;
// set it?
if ( aid64 >= min ) continue;
// got a new min
min = aid64;
//minWordPtr = syn.m_termPtrs[j];
//minWordLen = syn.m_termLens[j];
// get largest term freq of all synonyms
//long pop2 = g_speller.getPhrasePopularity(NULL,aid64,
// true,langId);
//if ( pop2 > pop ) pop = pop2;
}
// early break out means a hit in dups table
if ( j < naids ) {
gotdup:
// do not count as repeat if query stop word
// because they often repeat
if ( isQueryStop ) continue;
// count # of repeated word forms
//nrwf++;
continue;
}
// hash that now
// do not include stop words in synbasehash so
// 'search the web' != 'search web'
if ( ! isStop ) {
// no! make it order independent so 'search the web'
// equals 'web the search' and 'engine search'
// equals 'search engine'
//baseHash64 <<= 1LL;
baseHash64 ^= min;
}
// count it, but only if not a query stop word like "and"
// or "the" or "a". # of unique word forms.
//.........这里部分代码省略.........