当前位置: 首页>>代码示例>>C++>>正文


C++ Words::getWord方法代码示例

本文整理汇总了C++中Words::getWord方法的典型用法代码示例。如果您正苦于以下问题:C++ Words::getWord方法的具体用法?C++ Words::getWord怎么用?C++ Words::getWord使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Words的用法示例。


在下文中一共展示了Words::getWord方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: getBestWindow

// . return the score of the highest-scoring window containing match #m
// . window is defined by the half-open interval [a,b) where a and b are 
//   word #'s in the Words array indicated by match #m
// . return -1 and set g_errno on error
int64_t Summary::getBestWindow ( Matches *matches, int32_t mm, int32_t *lasta,
                                 int32_t *besta, int32_t *bestb, char *gotIt,
                                 char *retired, int32_t maxExcerptLen ) {
	// get the window around match #mm
	Match *m = &matches->m_matches[mm];

	// what is the word # of match #mm?
	int32_t matchWordNum = m->m_wordNum;

	// what Words/Pos/Bits classes is this match in?
	Words *words = m->m_words;
	Section **sp = NULL;
	int32_t *pos = m->m_pos->m_pos;

	// use "m_swbits" not "m_bits", that is what Bits::setForSummary() uses
	const swbit_t *bb = m->m_bits->m_swbits;

	// shortcut
	if ( m->m_sections ) {
		sp = m->m_sections->m_sectionPtrs;
	}

	int32_t nw = words->getNumWords();
	int64_t *wids = words->getWordIds();
	nodeid_t *tids = words->getTagIds();

	// . sanity check
	// . this prevents a core i've seen
	if ( matchWordNum >= nw ) {
		log("summary: got overflow condition for q=%s",m_q->m_orig);

		// assume no best window
		*besta = -1;
		*bestb = -1;
		*lasta = matchWordNum;
		return 0;
	}

	// . we NULLify the section ptrs if we already used the word in another summary.
	int32_t badFlags = SEC_SCRIPT|SEC_STYLE|SEC_SELECT|SEC_IN_TITLE;
	if ( (bb[matchWordNum] & D_USED) || ( sp && (sp[matchWordNum]->m_flags & badFlags) ) ) {
		// assume no best window
		*besta = -1;
		*bestb = -1;
		*lasta = matchWordNum;
		return 0;
	}

	// . "a" is the left fence post of the window (it is a word # in Words)
	// . go to the left as far as we can 
	// . thus we decrement "a"
	int32_t a = matchWordNum;

	// "posa" is the character position of the END of word #a
	int32_t posa = pos[a+1];
	int32_t firstFrag = -1;
	bool startOnQuote = false;
	bool goodStart = false;
	int32_t wordCount = 0;

	// . decrease "a" as int32_t as we stay within maxNumCharsPerLine
	// . avoid duplicating windows by using "lasta", the last "a" of the
	//   previous call to getBestWindow(). This can happen if our last
	//   central query term was close to this one.
	for ( ; a > 0 && posa - pos[a-1] < maxExcerptLen && a > *lasta; a-- ) {
		// . don't include any "dead zone", 
		// . dead zones have already been used for the summary, and
		//   we are getting a second/third/... excerpt here now then
		// stop if its the start of a sentence, too
		// stop before title word
		if ( (bb[a-1] & D_USED) || (bb[a] & D_STARTS_SENTENCE) || ( bb[a-1] & D_IN_TITLE )) {
			goodStart = true;
			break;
		}

		// don't go beyond an LI, TR, P tag
		if ( tids && ( tids[a-1] == TAG_LI ||
		               tids[a-1] == TAG_TR ||
		               tids[a-1] == TAG_P  ||
		               tids[a-1] == TAG_DIV ) ) {
			goodStart = true;
			break;
		}

		// stop if its the start of a quoted sentence
		if ( a+1<nw && (bb[a+1] & D_IN_QUOTES) && 
		     words->getWord(a)[0] == '\"' ){
			startOnQuote = true;
			goodStart    = true;
			break;
		}

		// find out the first instance of a fragment (comma, etc)
		// watch out! because frag also means 's' in there's
		if ( ( bb[a] & D_STARTS_FRAG ) && !(bb[a-1] & D_IS_STRONG_CONNECTOR) && firstFrag == -1 ) {
			firstFrag = a;
//.........这里部分代码省略.........
开发者ID:exename,项目名称:open-source-search-engine,代码行数:101,代码来源:Summary.cpp

示例2: setTitle


//.........这里部分代码省略.........

		// advance
		n++;

		// break out if too many already. save some for below.
		if ( n + 20 >= MAX_TIT_CANDIDATES ) {
			break;
		}
	}

	//logf(LOG_DEBUG,"title: took2=%" PRId64,gettimeofdayInMilliseconds()-x);
	//x = gettimeofdayInMilliseconds();

	//int64_t *wids = WW->getWordIds();
	// . find the last positive scoring guy
	// . do not consider title candidates after "r" if "r" is non-zero
	// . FIXES http://larvatusprodeo.net/2009/01/07/partisanship-politics-and-participation/

	// the candidate # of the title tag
	int32_t tti = -1;

	// allow up to 4 tags from each type
	char table[512];

	// sanity check
	if ( getNumXmlNodes() > 512 ) { char *xx=NULL;*xx=0; }

	// clear table counts
	memset ( table , 0 , 512 );

	// the first word
	char *wstart = NULL;
	if ( NW > 0 ) {
		wstart = words->getWord(0);
	}

	// loop over all "words" in the html body
	for ( int32_t i = 0 ; i < NW ; i++ ) {
		// come back up here if we encounter another "title-ish" tag
		// within our first alleged "title-ish" tag
	subloop:
		// stop after 30k of text
		if ( words->getWord(i) - wstart > 200000 ) {
			break; // 1106
		}

		// get the tag id minus the back tag bit
		nodeid_t tid = tids[i] & BACKBITCOMP;


		// pen up and pen down for these comment like tags
		if ( tid == TAG_SCRIPT || tid == TAG_STYLE ) {
			// ignore "titles" in script or style tags
			if ( ! (tids[i] & BACKBIT) ) {
				continue;
			}
		}

		/// @todo ALC we should allow more tags than just title/link
		// skip if not a good tag.
		if (tid != TAG_TITLE && tid != TAG_A) {
			continue;
		}

		// must NOT be a back tag
		if ( tids[i] & BACKBIT ) {
开发者ID:lemire,项目名称:open-source-search-engine,代码行数:67,代码来源:Title.cpp

示例3: setSummary


//.........这里部分代码省略.........
				skip = false;
			}

			if ( skip ) {
				continue;
			}

			// ask him for the query words he matched
			//char gotIt [ MAX_QUERY_WORDS ];
			// clear it for him
			memset ( gotIt, 0, m_q->m_numWords * sizeof(char) );

			// . get score of best window around this match
			// . do not allow left post of window to be <= lasta to
			//   avoid repeating the same window.
			int64_t score = getBestWindow (matches, i, &lasta, &a, &b, gotIt, retired, maxExcerptLen);
			
			// USE THIS BUF BELOW TO DEBUG THE ABOVE CODE. 
			// PRINTS OUT THE SUMMARY
			/*
			//if ( score >=12000 ) {
			char buf[10*1024];
			char *xp = buf;
			if ( i == 0 )
				log (LOG_WARN,"=-=-=-=-=-=-=-=-=-=-=-=-=-=-=");
			sprintf(xp, "score=%08" PRId32" a=%05" PRId32" b=%05" PRId32" ",
				(int32_t)score,(int32_t)a,(int32_t)b);
			xp += strlen(xp);
			for ( int32_t j = a; j < b; j++ ){
				//int32_t s = scores->m_scores[j];
				int32_t s = 0;
				if ( s < 0 ) continue;
				char e = 1;
				int32_t len = words->getWordLen(j);
				for(int32_t k=0;k<len;k +=e){
					char c = words->m_words[j][k];
					//if ( is_binary( c ) ) continue;
					*xp = c;
					xp++;
				}
				//p += strlen(p);
				if ( s == 0 ) continue;
				sprintf ( xp ,"(%" PRId32")",s);
				xp += strlen(xp);
			}
			log (LOG_WARN,"query: summary: %s", buf);
			//}
			*/

			// prints out the best window with the score
			/*
			char buf[MAX_SUMMARY_LEN];
			  char *bufPtr = buf;
			  char *bufPtrEnd = p + MAX_SUMMARY_LEN;
			  if ( i == 0 )
			  log (LOG_WARN,"=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=");
			  int32_t len = 0;
			  Words *ww  = matches->m_matches[i].m_words;
			  //Sections *ss = matches->m_matches[i].m_sections;
			  //if ( ss->m_numSections <= 0 ) ss = NULL;
			  //len=pos->filter(bufPtr, bufPtrEnd, ww, a, b, NULL);
			  //log(LOG_WARN,"summary: %" PRId32") %s - %" PRId64,i,bufPtr,
			  //score);
			  log(LOG_WARN,"summary: %" PRId32") %s - %" PRId64,i,bufPtr,
			  score);
			*/
开发者ID:exename,项目名称:open-source-search-engine,代码行数:67,代码来源:Summary.cpp


注:本文中的Words::getWord方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。