当前位置: 首页>>代码示例>>C++>>正文


C++ Query::getTermId方法代码示例

本文整理汇总了C++中Query::getTermId方法的典型用法代码示例。如果您正苦于以下问题:C++ Query::getTermId方法的具体用法?C++ Query::getTermId怎么用?C++ Query::getTermId使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Query的用法示例。


在下文中一共展示了Query::getTermId方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: setCandidates


//.........这里部分代码省略.........
	memset ( tc , 0 , 512 );
	long a = firstPosScore;
	for ( ; a >= 0 ; a-- ) {
		// get the tid
		nodeid_t tid = tids[a];
		// remove back bit, if any
		tid &= BACKBITCOMP;
		// skip if not a tag, or a generic xml tag
		if ( tid <= 1 ) continue;
		// mark it
		if ( words->isBackTag(a) ) tc[tid] |= 0x02;
		else                       tc[tid] |= 0x01;
		// continue if not a full front/back pair
		if ( tc[tid] != 0x03 ) continue;
		// continue if not a "section" type tag (see Scores.cpp)
		if ( tid != TAG_DIV      &&
		     tid != TAG_TEXTAREA &&
                     tid != TAG_TR       &&
                     tid != TAG_TD       &&
                     tid != TAG_TABLE      ) 
			continue;
		// ok we should stop now
		break;
	}		
	// min is 0
	if ( a < 0 ) a = 0;

	// now look for the image urls within this window
	for ( long i = a ; i < lastPosScore ; i++ ) {
		// skip if not <img> tag
		if (tids[i] != TAG_IMG ) continue;
		// get the node num into Xml.cpp::m_nodes[] array
		long nn = words->m_nodes[i];
		// check width to rule out small decorating imgs
		long width = xml->getLong(nn,nn+1,"width", -1 );
		if ( width != -1 && width < 50 ) continue;
		// same with height
		long height = xml->getLong(nn,nn+1, "height", -1 );
		if ( height != -1 && height < 50 ) continue;
		// get the url of the image
		long  srcLen;
		char *src = xml->getString(nn,"src",&srcLen);
		// skip if none
		if ( srcLen <= 2 ) continue;
		// set it to the full url
		Url iu;
		// use "pageUrl" as the baseUrl
		iu.set ( pageUrl , src , srcLen ); 
		// skip if invalid domain or TLD
		if ( iu.getDomainLen() <= 0 ) continue;
		// skip if not from same domain as page url
		//long dlen = pageUrl->getDomainLen();
		//if ( iu.getDomainLen() != dlen ) continue;
		//if(strncmp(iu.getDomain(),pageUrl->getDomain(),dlen))continue
		// get the full url
		char *u    = iu.getUrl();
		long  ulen = iu.getUrlLen();
		// skip common crap
		if ( strncasestr(u,ulen,"logo"           ) ) continue;
		if ( strncasestr(u,ulen,"comment"        ) ) continue;
		if ( strncasestr(u,ulen,"print"          ) ) continue;
		if ( strncasestr(u,ulen,"subscribe"      ) ) continue;
		if ( strncasestr(u,ulen,"header"         ) ) continue;
		if ( strncasestr(u,ulen,"footer"         ) ) continue;
		if ( strncasestr(u,ulen,"menu"           ) ) continue;
		if ( strncasestr(u,ulen,"button"         ) ) continue;
		if ( strncasestr(u,ulen,"banner"         ) ) continue;
		if ( strncasestr(u,ulen,"ad.doubleclick.") ) continue;
		if ( strncasestr(u,ulen,"ads.webfeat."   ) ) continue;
		if ( strncasestr(u,ulen,"xads.zedo."     ) ) continue;

		// save it
		m_imageNodes[m_numImages] = nn;

		// before we lookup the image url to see if it is unique we
		// must first make sure that we have an adequate number of
		// permalinks from this same site with this same hop count.
		// we need at least 10 before we extract image thumbnails.
		char buf[2000];
		// set the query
		Query q;

		// if we do have 10 or more, then we lookup the image url to
		// make sure it is indeed unique
		sprintf ( buf , "gbimage:%s",u);
		// TODO: make sure this is a no-split termid storage thingy
		// in Msg14.cpp
		if ( ! q.set2 ( buf , langUnknown , false ) )
			// return true with g_errno set on error
			return;
		// store the termid
		m_termIds[m_numImages] = q.getTermId(0);

		// advance the counter
		m_numImages++;

		// break if full
		if ( m_numImages >= MAX_IMAGES ) break;
	}
}
开发者ID:UIKit0,项目名称:open-source-search-engine,代码行数:101,代码来源:Images.cpp

示例2: setCandidates

void Images::setCandidates ( Url *pageUrl , Words *words , Xml *xml ,
			     Sections *sections , XmlDoc *xd ) {
	// not valid for now
	m_thumbnailValid = false;
	// reset our array of image node candidates
	m_numImages = 0;
	// flag it
	m_setCalled = true;
	// strange...
	if ( m_imgReply ) { char *xx=NULL;*xx=0; }
	// save this
	m_xml       = xml;
	m_pageUrl   = pageUrl;

	//
	// first add any open graph candidate.
	// basically they page telling us the best image straight up.
	//

	int32_t node2 = -1;
	int32_t startNode = 0;

	// . field can be stuff like "summary","description","keywords",...
	// . if "convertHtmlEntites" is true we change < to &lt; and > to &gt;
	// . <meta property="og:image" content="http://example.com/rock2.jpg"/>
	// . <meta property="og:image" content="http://example.com/rock3.jpg"/>
 ogimgloop:
	char ubuf[2000];
	int32_t ulen = xml->getMetaContent( ubuf, 1999, "og:image", 8, "property", startNode, &node2 );

	// update this in case goto ogimgloop is called
	startNode = node2 + 1;
	// see section below for explanation of what we are storing here...
	if ( node2 >= 0 ) {
		// save it
		m_imageNodes[m_numImages] = node2;
		Query q;
		if ( ulen > MAX_URL_LEN ) goto ogimgloop;
		// set it to the full url
		Url iu;
		// use "pageUrl" as the baseUrl
		iu.set( pageUrl, ubuf, ulen );
		// skip if invalid domain or TLD
		if ( iu.getDomainLen() <= 0 ) goto ogimgloop;
		// for looking it up on disk to see if unique or not
		char buf[2000];
		// if we don't put in quotes it expands '|' into
		// the "PiiPe" operator in Query.cpp
		snprintf ( buf , 1999, "gbimage:\"%s\"",iu.getUrl());
		// TODO: make sure this is a no-split termid storage thingy
		// in Msg14.cpp
		if ( ! q.set2 ( buf , langUnknown , false ) ) return;
		// sanity test
		if ( q.getNumTerms() != 1 ) { char *xx=0;*xx=0; }
		// store the termid
		m_termIds[m_numImages] = q.getTermId(0);
		// advance the counter
		m_numImages++;
		// try to get more graph images if we have some room
		if ( m_numImages + 2 < MAX_IMAGES ) goto ogimgloop;
	}
	


	//m_pageSite  = pageSite;
	// scan the words
	int32_t       nw     = words->getNumWords();
	nodeid_t  *tids   = words->getTagIds();
	int64_t *wids   = words->getWordIds();
	//int32_t      *scores = scoresArg->m_scores;
	Section **sp = NULL; 
	if ( sections ) sp = sections->m_sectionPtrs;
	// not if we don't have any identified sections
	if ( sections && sections->m_numSections <= 0 ) sp = NULL;
	// the positive scored window
	int32_t firstPosScore = -1;
	int32_t lastPosScore  = -1;
	int32_t badFlags = SEC_SCRIPT|SEC_STYLE|SEC_SELECT;
	// find positive scoring window
	for ( int32_t i = 0 ; i < nw ; i++ ) {
		// skip if in bad section
		if ( sp && (sp[i]->m_flags & badFlags) ) continue;
		if ( wids[i]   != 0 ) continue;
		// set first positive scoring guy
		if ( firstPosScore == -1 ) firstPosScore = i;
		// keep track of last guy
		lastPosScore = i;
	}
	// sanity check
	if ( getNumXmlNodes() > 512 ) { char *xx=NULL;*xx=0; }
	// . pedal firstPosScore back until we hit a section boundary
	// . i.e. stop once we hit a front/back tag pair, like <div> and </div>
	char tc[512];
	memset ( tc , 0 , 512 );
	int32_t a = firstPosScore;
	for ( ; a >= 0 ; a-- ) {
		// get the tid
		nodeid_t tid = tids[a];
		// remove back bit, if any
		tid &= BACKBITCOMP;
//.........这里部分代码省略.........
开发者ID:lemire,项目名称:open-source-search-engine,代码行数:101,代码来源:Images.cpp

示例3: getThumbnail

// . returns false if blocked, returns true otherwise
// . sets g_errno on error
bool Images::getThumbnail ( char *pageSite ,
			    long  siteLen  ,
			    long long docId ,
			    XmlDoc *xd ,
			    collnum_t collnum,//char *coll ,
			    //char **statusPtr ,
			    long hopCount,
			    void *state ,
			    void   (*callback)(void *state) ) {
	// sanity check
	if ( ! m_setCalled ) { char *xx=NULL;*xx=0; }
	// we haven't had any error
	m_hadError  = 0;
	// no reason to stop yet
	m_stopDownloading = false;
	// reset here now
	m_i = 0;
	m_j = 0;
	m_phase = 0;

	// sanity check
	if ( ! m_pageUrl ) { char *xx=NULL;*xx=0; }
	// sanity check
	if ( ! pageSite ) { char *xx=NULL;*xx=0; }
	// we need to be a permalink
	//if ( ! isPermalink ) return true;

	// save these
	//m_statusPtr = statusPtr;
	// save this
	m_collnum = collnum;
	m_docId = docId;
	m_callback = callback;
	m_state = state;

	// if this doc is a json diffbot reply it already has the primary
	// image selected so just use that
	m_xd = xd;
	if ( m_xd->m_isDiffbotJSONObject ) 
		return downloadImages();

	// if no candidates, we are done, no error
	if ( m_numImages == 0 ) return true;

	//Vector *v = xd->getTagVector();
	// this will at least have one component, the 0/NULL component
	uint32_t *tph = xd->getTagPairHash32();
	// must not block or error on us
	if ( tph == (void *)-1 ) { char *xx=NULL;*xx=0; }
	// must not error on use?
	if ( ! tph ) { char *xx=NULL;*xx=0; }

	// . see DupDetector.cpp, very similar to this
	// . see how many pages we have from our same site with our same 
	//   html template (and that are permalinks)
	char buf[2000];
	char c = pageSite[siteLen];
	pageSite[siteLen]=0;
	// site MUST NOT start with "http://"
	if ( strncmp ( pageSite , "http://", 7)==0){char*xx=NULL;*xx=0;}
	// this must match what we hash in XmlDoc::hashNoSplit()
	sprintf ( buf , "gbsitetemplate:%lu%s", (unsigned long)*tph,pageSite );
	pageSite[siteLen]=c;
	// TODO: make sure this is a no-split termid storage thingy
	// in Msg14.cpp
	Query q;
	if ( ! q.set2 ( buf , langUnknown , false ) )
		// return true with g_errno set on error
		return true;
	// store the termid
	long long termId = q.getTermId(0);

	key144_t startKey ;
	key144_t endKey   ;
	g_posdb.makeStartKey(&startKey,termId);
	g_posdb.makeEndKey  (&endKey  ,termId);

	// get shard of that (this termlist is sharded by termid -
	// see XmlDoc.cpp::hashNoSplit() where it hashes gbsitetemplate: term)
	long shardNum = g_hostdb.getShardNumByTermId ( &startKey );

	// if ( ! m_msg36.getTermFreq ( m_collnum               ,
	// 			     0                  , // maxAge
	// 			     termId             ,
	// 			     this               ,
	// 			     gotTermFreqWrapper ,
	// 			     MAX_NICENESS       ,
	// 			     true               ,  // exact count?
	// 			     false              ,  // inc count?
	// 			     false              ,  // dec count?
	// 			     false              )) // is split?
	// 	return false;


	// just use msg0 and limit to like 1k or something
	if ( ! m_msg0.getList ( -1    , // hostid
				-1    , // ip
				-1    , // port
//.........这里部分代码省略.........
开发者ID:UIKit0,项目名称:open-source-search-engine,代码行数:101,代码来源:Images.cpp

示例4: sendPageIndexdb


//.........这里部分代码省略.........
		return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno)); 
	}
	// make a state
	State10 *st ;
	try { st = new (State10); }
	catch ( ... ) {
		g_errno = ENOMEM;
		log("PageIndexdb: new(%i): %s", 
		    sizeof(State10),mstrerror(g_errno));
		return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno));}
	mnew ( st , sizeof(State10) , "PageIndexdb" );
	// password, too
	long pwdLen = 0 ;
	char *pwd = r->getString ( "pwd" , &pwdLen );
	if ( pwdLen > 31 ) pwdLen = 31;
	if ( pwdLen > 0 ) strncpy ( st->m_pwd , pwd , pwdLen );
	st->m_pwd[pwdLen]='\0';
	// get # of records to retreive from IndexList
	st->m_numRecs  = r->getLong ( "numRecs" , 100 );
	// use disk, tree, or cache?
	st->m_useDisk  = r->getLong ("ud" , 0 );
	st->m_useTree  = r->getLong ("ut" , 0 );
	st->m_useCache = r->getLong ("uc" , 0 );
	st->m_useDatedb= r->getLong ("ub" , 0 );
	st->m_add      = r->getLong ("add", 0 );
	st->m_del      = r->getLong ("del", 0 );
	// get the termId, if any, from the cgi vars
	st->m_termId = r->getLongLong ("t", 0LL ) ;
	// get docid and score
	st->m_docId  = r->getLongLong ("d", 0LL );
	st->m_score  = r->getLong ("score", 0 );
	// copy query/collection
	memcpy ( st->m_query , query , queryLen );
	st->m_queryLen = queryLen;
	st->m_query [ queryLen ] ='\0';
	//memcpy ( st->m_coll , coll , collLen );
	//st->m_collLen  = collLen;
	//st->m_coll [ collLen ] ='\0';
	st->m_coll = coll;
	// save the TcpSocket
	st->m_socket = s;
	// and if the request is local/internal or not
	st->m_isAdmin = g_collectiondb.isAdmin ( r , s );
	st->m_isLocal = r->isLocal();
	st->m_r.copy ( r );
	// . check for add/delete request
	if ( st->m_add || st->m_del ) {
		key_t startKey = g_indexdb.makeStartKey ( st->m_termId );
		key_t endKey   = g_indexdb.makeEndKey   ( st->m_termId );
		// construct the key to add/delete
		st->m_key = g_indexdb.makeKey ( st->m_termId,
						st->m_score ,
						st->m_docId ,
						st->m_del   );
		// make an RdbList out of the key
		st->m_keyList.set ( (char*)&st->m_key,
				    sizeof(key_t),
				    (char*)&st->m_key,
				    sizeof(key_t),
				    startKey,
				    endKey,
				    0,
				    false,
				    true  );
		log ( LOG_INFO, "build: adding indexdb key to indexdb: "
				"%lx %llx", st->m_key.n1, st->m_key.n0 );
		// call msg1 to add/delete key
		if ( ! st->m_msg1.addList ( &st->m_keyList,
					     RDB_INDEXDB,
					     st->m_coll,
					     st,
					     addedKeyWrapper,
					     false,
					     MAX_NICENESS ) )
			return false;
		// continue to page if no block
		return gotIndexList ( st );
	}

	if ( ! st->m_query[0] ) return gotIndexList(st);

	// . set query class
	// . a boolFlag of 0 means query is not boolean
	Query q;
	q.set2 ( query , langUnknown , true ); // 0 = boolFlag, not boolean!
	// reset 
	st->m_msg36.m_termFreq = 0LL;
	// if query was provided, use that, otherwise use termId
	if ( q.getNumTerms() > 0 ) st->m_termId = q.getTermId(0);
	// skip if nothing
	else return gotTermFreq ( st );
	// get the termfreq of this term!
	if ( ! st->m_msg36.getTermFreq ( coll ,
					 0 , 
					 st->m_termId,
					 st ,
					 gotTermFreqWrapper ) ) return false;
	// otherwise, we didn't block
	return gotTermFreq ( st );
}
开发者ID:BKJackson,项目名称:open-source-search-engine,代码行数:101,代码来源:PageIndexdb.cpp


注:本文中的Query::getTermId方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。