本文整理汇总了C++中Words::getWordLen方法的典型用法代码示例。如果您正苦于以下问题:C++ Words::getWordLen方法的具体用法?C++ Words::getWordLen怎么用?C++ Words::getWordLen使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Words
的用法示例。
在下文中一共展示了Words::getWordLen方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: setTitle
//.........这里部分代码省略.........
for ( int32_t i = 0 ; i < oldn && n + 3 < MAX_TIT_CANDIDATES ; i++ ) {
// stop if no root title segments
if ( nr <= 0 ) break;
// get the word info
Words *w = cptrs[i];
int32_t a = as[i];
int32_t b = bs[i];
// init
int32_t lasta = a;
char prev = false;
// char length in bytes
//int32_t charlen = 1;
// see how many we add
int32_t added = 0;
char *skipTo = NULL;
bool qualified = true;
// . scan the words looking for a token
// . sometimes the candidates end in ": " so put in "k < b-1"
// . made this from k<b-1 to k<b to fix
// "Hot Tub Time Machine (2010) - IMDb" to strip IMDb
for ( int32_t k = a ; k < b && n + 3 < MAX_TIT_CANDIDATES; k++){
// get word
char *wp = w->getWord(k);
// skip if not alnum
if ( ! w->isAlnum(k) ) {
// in order for next alnum word to
// qualify for "clipping" if it matches
// the root title, there has to be more
// than just spaces here, some punct.
// otherwise title
// "T. D. Jakes: Biography from Answers.com"
// becomes
// "T. D. Jakes: Biography from"
qualified=isWordQualified(wp,w->getWordLen(k));
continue;
}
// gotta be qualified!
if ( ! qualified ) continue;
// skip if in root title
if ( skipTo && wp < skipTo ) continue;
// does this match any root page title segments?
int32_t j;
for ( j = 0 ; j < nr ; j++ ) {
// . compare to root title
// . break out if we matched!
if ( ! strncmp( wp, rootTitles[j], rootTitleLens[j] ) ) {
break;
}
}
// if we did not match a root title segment,
// keep on chugging
if ( j >= nr ) continue;
// . we got a root title match!
// . skip over
skipTo = wp + rootTitleLens[j];
// must land on qualified punct then!!
int32_t e = k+1;
for ( ; e<b && w->getWord(e)<skipTo ; e++ );
// ok, word #e must be a qualified punct
if ( e<b &&
! isWordQualified(w->getWord(e),w->getWordLen(e)))
// assume no match then!!
continue;
// if we had a previous guy, reset the end of the
// previous candidate
示例2: getBestWindow
//.........这里部分代码省略.........
// . the match at the center of the window is match #"mm", so that
// matches->m_matches[mm] is the Match class
// . set "mi" to it and back up "mi" as int32_t as >= a
for ( mi = mm ; mi > 0 && ms[mi-1].m_wordNum >=a ; mi-- )
;
// now get the score of this excerpt. Also mark all the represented
// query words. Mark the represented query words in the array that
// comes to us. also mark how many times the same word is repeated in
// this summary.
int64_t score = 0LL;
// is a url contained in the summary, that looks bad! punish!
bool hasUrl = false;
// the word count we did above was just an approximate. count it right
wordCount = 0;
// for debug
//char buf[5000];
//char *xp = buf;
SafeBuf xp;
// wtf?
if ( b > nw ) {
b = nw;
}
// first score from the starting match down to a, including match
for ( int32_t i = a ; i < b ; i++ ) {
// debug print out
if ( g_conf.m_logDebugSummary ) {
int32_t len = words->getWordLen(i);
char cs;
for (int32_t k=0;k<len; k+=cs ) {
const char *c = words->getWord(i)+k;
cs = getUtf8CharSize(c);
if ( is_binary_utf8 ( c ) ) {
continue;
}
xp.safeMemcpy ( c , cs );
xp.nullTerm();
}
}
// skip if in bad section, marquee, select, script, style
if ( sp && (sp[i]->m_flags & badFlags) ) {
continue;
}
// don't count just numeric words
if ( words->isNum(i) ) {
continue;
}
// check if there is a url. best way to check for '://'
if ( wids && !wids[i] ) {
const char *wrd = words->getWord(i);
int32_t wrdLen = words->getWordLen(i);
if ( wrdLen == 3 && wrd[0] == ':' && wrd[1] == '/' && wrd[2] == '/' ) {
hasUrl = true;
}
}
// skip if not wid