本文整理汇总了C++中Words::getWord方法的典型用法代码示例。如果您正苦于以下问题:C++ Words::getWord方法的具体用法?C++ Words::getWord怎么用?C++ Words::getWord使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Words
的用法示例。
在下文中一共展示了Words::getWord方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: getBestWindow
// . return the score of the highest-scoring window containing match #m
// . window is defined by the half-open interval [a,b) where a and b are
// word #'s in the Words array indicated by match #m
// . return -1 and set g_errno on error
int64_t Summary::getBestWindow ( Matches *matches, int32_t mm, int32_t *lasta,
int32_t *besta, int32_t *bestb, char *gotIt,
char *retired, int32_t maxExcerptLen ) {
// get the window around match #mm
Match *m = &matches->m_matches[mm];
// what is the word # of match #mm?
int32_t matchWordNum = m->m_wordNum;
// what Words/Pos/Bits classes is this match in?
Words *words = m->m_words;
Section **sp = NULL;
int32_t *pos = m->m_pos->m_pos;
// use "m_swbits" not "m_bits", that is what Bits::setForSummary() uses
const swbit_t *bb = m->m_bits->m_swbits;
// shortcut
if ( m->m_sections ) {
sp = m->m_sections->m_sectionPtrs;
}
int32_t nw = words->getNumWords();
int64_t *wids = words->getWordIds();
nodeid_t *tids = words->getTagIds();
// . sanity check
// . this prevents a core i've seen
if ( matchWordNum >= nw ) {
log("summary: got overflow condition for q=%s",m_q->m_orig);
// assume no best window
*besta = -1;
*bestb = -1;
*lasta = matchWordNum;
return 0;
}
// . we NULLify the section ptrs if we already used the word in another summary.
int32_t badFlags = SEC_SCRIPT|SEC_STYLE|SEC_SELECT|SEC_IN_TITLE;
if ( (bb[matchWordNum] & D_USED) || ( sp && (sp[matchWordNum]->m_flags & badFlags) ) ) {
// assume no best window
*besta = -1;
*bestb = -1;
*lasta = matchWordNum;
return 0;
}
// . "a" is the left fence post of the window (it is a word # in Words)
// . go to the left as far as we can
// . thus we decrement "a"
int32_t a = matchWordNum;
// "posa" is the character position of the END of word #a
int32_t posa = pos[a+1];
int32_t firstFrag = -1;
bool startOnQuote = false;
bool goodStart = false;
int32_t wordCount = 0;
// . decrease "a" as int32_t as we stay within maxNumCharsPerLine
// . avoid duplicating windows by using "lasta", the last "a" of the
// previous call to getBestWindow(). This can happen if our last
// central query term was close to this one.
for ( ; a > 0 && posa - pos[a-1] < maxExcerptLen && a > *lasta; a-- ) {
// . don't include any "dead zone",
// . dead zones have already been used for the summary, and
// we are getting a second/third/... excerpt here now then
// stop if its the start of a sentence, too
// stop before title word
if ( (bb[a-1] & D_USED) || (bb[a] & D_STARTS_SENTENCE) || ( bb[a-1] & D_IN_TITLE )) {
goodStart = true;
break;
}
// don't go beyond an LI, TR, P tag
if ( tids && ( tids[a-1] == TAG_LI ||
tids[a-1] == TAG_TR ||
tids[a-1] == TAG_P ||
tids[a-1] == TAG_DIV ) ) {
goodStart = true;
break;
}
// stop if its the start of a quoted sentence
if ( a+1<nw && (bb[a+1] & D_IN_QUOTES) &&
words->getWord(a)[0] == '\"' ){
startOnQuote = true;
goodStart = true;
break;
}
// find out the first instance of a fragment (comma, etc)
// watch out! because frag also means 's' in there's
if ( ( bb[a] & D_STARTS_FRAG ) && !(bb[a-1] & D_IS_STRONG_CONNECTOR) && firstFrag == -1 ) {
firstFrag = a;
//.........这里部分代码省略.........
示例2: setTitle
//.........这里部分代码省略.........
// advance
n++;
// break out if too many already. save some for below.
if ( n + 20 >= MAX_TIT_CANDIDATES ) {
break;
}
}
//logf(LOG_DEBUG,"title: took2=%" PRId64,gettimeofdayInMilliseconds()-x);
//x = gettimeofdayInMilliseconds();
//int64_t *wids = WW->getWordIds();
// . find the last positive scoring guy
// . do not consider title candidates after "r" if "r" is non-zero
// . FIXES http://larvatusprodeo.net/2009/01/07/partisanship-politics-and-participation/
// the candidate # of the title tag
int32_t tti = -1;
// allow up to 4 tags from each type
char table[512];
// sanity check
if ( getNumXmlNodes() > 512 ) { char *xx=NULL;*xx=0; }
// clear table counts
memset ( table , 0 , 512 );
// the first word
char *wstart = NULL;
if ( NW > 0 ) {
wstart = words->getWord(0);
}
// loop over all "words" in the html body
for ( int32_t i = 0 ; i < NW ; i++ ) {
// come back up here if we encounter another "title-ish" tag
// within our first alleged "title-ish" tag
subloop:
// stop after 30k of text
if ( words->getWord(i) - wstart > 200000 ) {
break; // 1106
}
// get the tag id minus the back tag bit
nodeid_t tid = tids[i] & BACKBITCOMP;
// pen up and pen down for these comment like tags
if ( tid == TAG_SCRIPT || tid == TAG_STYLE ) {
// ignore "titles" in script or style tags
if ( ! (tids[i] & BACKBIT) ) {
continue;
}
}
/// @todo ALC we should allow more tags than just title/link
// skip if not a good tag.
if (tid != TAG_TITLE && tid != TAG_A) {
continue;
}
// must NOT be a back tag
if ( tids[i] & BACKBIT ) {
示例3: setSummary
//.........这里部分代码省略.........
skip = false;
}
if ( skip ) {
continue;
}
// ask him for the query words he matched
//char gotIt [ MAX_QUERY_WORDS ];
// clear it for him
memset ( gotIt, 0, m_q->m_numWords * sizeof(char) );
// . get score of best window around this match
// . do not allow left post of window to be <= lasta to
// avoid repeating the same window.
int64_t score = getBestWindow (matches, i, &lasta, &a, &b, gotIt, retired, maxExcerptLen);
// USE THIS BUF BELOW TO DEBUG THE ABOVE CODE.
// PRINTS OUT THE SUMMARY
/*
//if ( score >=12000 ) {
char buf[10*1024];
char *xp = buf;
if ( i == 0 )
log (LOG_WARN,"=-=-=-=-=-=-=-=-=-=-=-=-=-=-=");
sprintf(xp, "score=%08" PRId32" a=%05" PRId32" b=%05" PRId32" ",
(int32_t)score,(int32_t)a,(int32_t)b);
xp += strlen(xp);
for ( int32_t j = a; j < b; j++ ){
//int32_t s = scores->m_scores[j];
int32_t s = 0;
if ( s < 0 ) continue;
char e = 1;
int32_t len = words->getWordLen(j);
for(int32_t k=0;k<len;k +=e){
char c = words->m_words[j][k];
//if ( is_binary( c ) ) continue;
*xp = c;
xp++;
}
//p += strlen(p);
if ( s == 0 ) continue;
sprintf ( xp ,"(%" PRId32")",s);
xp += strlen(xp);
}
log (LOG_WARN,"query: summary: %s", buf);
//}
*/
// prints out the best window with the score
/*
char buf[MAX_SUMMARY_LEN];
char *bufPtr = buf;
char *bufPtrEnd = p + MAX_SUMMARY_LEN;
if ( i == 0 )
log (LOG_WARN,"=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=");
int32_t len = 0;
Words *ww = matches->m_matches[i].m_words;
//Sections *ss = matches->m_matches[i].m_sections;
//if ( ss->m_numSections <= 0 ) ss = NULL;
//len=pos->filter(bufPtr, bufPtrEnd, ww, a, b, NULL);
//log(LOG_WARN,"summary: %" PRId32") %s - %" PRId64,i,bufPtr,
//score);
log(LOG_WARN,"summary: %" PRId32") %s - %" PRId64,i,bufPtr,
score);
*/