本文整理汇总了C++中Query::getTermId方法的典型用法代码示例。如果您正苦于以下问题:C++ Query::getTermId方法的具体用法?C++ Query::getTermId怎么用?C++ Query::getTermId使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Query
的用法示例。
在下文中一共展示了Query::getTermId方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: setCandidates
//.........这里部分代码省略.........
memset ( tc , 0 , 512 );
long a = firstPosScore;
for ( ; a >= 0 ; a-- ) {
// get the tid
nodeid_t tid = tids[a];
// remove back bit, if any
tid &= BACKBITCOMP;
// skip if not a tag, or a generic xml tag
if ( tid <= 1 ) continue;
// mark it
if ( words->isBackTag(a) ) tc[tid] |= 0x02;
else tc[tid] |= 0x01;
// continue if not a full front/back pair
if ( tc[tid] != 0x03 ) continue;
// continue if not a "section" type tag (see Scores.cpp)
if ( tid != TAG_DIV &&
tid != TAG_TEXTAREA &&
tid != TAG_TR &&
tid != TAG_TD &&
tid != TAG_TABLE )
continue;
// ok we should stop now
break;
}
// min is 0
if ( a < 0 ) a = 0;
// now look for the image urls within this window
for ( long i = a ; i < lastPosScore ; i++ ) {
// skip if not <img> tag
if (tids[i] != TAG_IMG ) continue;
// get the node num into Xml.cpp::m_nodes[] array
long nn = words->m_nodes[i];
// check width to rule out small decorating imgs
long width = xml->getLong(nn,nn+1,"width", -1 );
if ( width != -1 && width < 50 ) continue;
// same with height
long height = xml->getLong(nn,nn+1, "height", -1 );
if ( height != -1 && height < 50 ) continue;
// get the url of the image
long srcLen;
char *src = xml->getString(nn,"src",&srcLen);
// skip if none
if ( srcLen <= 2 ) continue;
// set it to the full url
Url iu;
// use "pageUrl" as the baseUrl
iu.set ( pageUrl , src , srcLen );
// skip if invalid domain or TLD
if ( iu.getDomainLen() <= 0 ) continue;
// skip if not from same domain as page url
//long dlen = pageUrl->getDomainLen();
//if ( iu.getDomainLen() != dlen ) continue;
//if(strncmp(iu.getDomain(),pageUrl->getDomain(),dlen))continue
// get the full url
char *u = iu.getUrl();
long ulen = iu.getUrlLen();
// skip common crap
if ( strncasestr(u,ulen,"logo" ) ) continue;
if ( strncasestr(u,ulen,"comment" ) ) continue;
if ( strncasestr(u,ulen,"print" ) ) continue;
if ( strncasestr(u,ulen,"subscribe" ) ) continue;
if ( strncasestr(u,ulen,"header" ) ) continue;
if ( strncasestr(u,ulen,"footer" ) ) continue;
if ( strncasestr(u,ulen,"menu" ) ) continue;
if ( strncasestr(u,ulen,"button" ) ) continue;
if ( strncasestr(u,ulen,"banner" ) ) continue;
if ( strncasestr(u,ulen,"ad.doubleclick.") ) continue;
if ( strncasestr(u,ulen,"ads.webfeat." ) ) continue;
if ( strncasestr(u,ulen,"xads.zedo." ) ) continue;
// save it
m_imageNodes[m_numImages] = nn;
// before we lookup the image url to see if it is unique we
// must first make sure that we have an adequate number of
// permalinks from this same site with this same hop count.
// we need at least 10 before we extract image thumbnails.
char buf[2000];
// set the query
Query q;
// if we do have 10 or more, then we lookup the image url to
// make sure it is indeed unique
sprintf ( buf , "gbimage:%s",u);
// TODO: make sure this is a no-split termid storage thingy
// in Msg14.cpp
if ( ! q.set2 ( buf , langUnknown , false ) )
// return true with g_errno set on error
return;
// store the termid
m_termIds[m_numImages] = q.getTermId(0);
// advance the counter
m_numImages++;
// break if full
if ( m_numImages >= MAX_IMAGES ) break;
}
}
示例2: setCandidates
void Images::setCandidates ( Url *pageUrl , Words *words , Xml *xml ,
Sections *sections , XmlDoc *xd ) {
// not valid for now
m_thumbnailValid = false;
// reset our array of image node candidates
m_numImages = 0;
// flag it
m_setCalled = true;
// strange...
if ( m_imgReply ) { char *xx=NULL;*xx=0; }
// save this
m_xml = xml;
m_pageUrl = pageUrl;
//
// first add any open graph candidate.
// basically they page telling us the best image straight up.
//
int32_t node2 = -1;
int32_t startNode = 0;
// . field can be stuff like "summary","description","keywords",...
// . if "convertHtmlEntites" is true we change < to < and > to >
// . <meta property="og:image" content="http://example.com/rock2.jpg"/>
// . <meta property="og:image" content="http://example.com/rock3.jpg"/>
ogimgloop:
char ubuf[2000];
int32_t ulen = xml->getMetaContent( ubuf, 1999, "og:image", 8, "property", startNode, &node2 );
// update this in case goto ogimgloop is called
startNode = node2 + 1;
// see section below for explanation of what we are storing here...
if ( node2 >= 0 ) {
// save it
m_imageNodes[m_numImages] = node2;
Query q;
if ( ulen > MAX_URL_LEN ) goto ogimgloop;
// set it to the full url
Url iu;
// use "pageUrl" as the baseUrl
iu.set( pageUrl, ubuf, ulen );
// skip if invalid domain or TLD
if ( iu.getDomainLen() <= 0 ) goto ogimgloop;
// for looking it up on disk to see if unique or not
char buf[2000];
// if we don't put in quotes it expands '|' into
// the "PiiPe" operator in Query.cpp
snprintf ( buf , 1999, "gbimage:\"%s\"",iu.getUrl());
// TODO: make sure this is a no-split termid storage thingy
// in Msg14.cpp
if ( ! q.set2 ( buf , langUnknown , false ) ) return;
// sanity test
if ( q.getNumTerms() != 1 ) { char *xx=0;*xx=0; }
// store the termid
m_termIds[m_numImages] = q.getTermId(0);
// advance the counter
m_numImages++;
// try to get more graph images if we have some room
if ( m_numImages + 2 < MAX_IMAGES ) goto ogimgloop;
}
//m_pageSite = pageSite;
// scan the words
int32_t nw = words->getNumWords();
nodeid_t *tids = words->getTagIds();
int64_t *wids = words->getWordIds();
//int32_t *scores = scoresArg->m_scores;
Section **sp = NULL;
if ( sections ) sp = sections->m_sectionPtrs;
// not if we don't have any identified sections
if ( sections && sections->m_numSections <= 0 ) sp = NULL;
// the positive scored window
int32_t firstPosScore = -1;
int32_t lastPosScore = -1;
int32_t badFlags = SEC_SCRIPT|SEC_STYLE|SEC_SELECT;
// find positive scoring window
for ( int32_t i = 0 ; i < nw ; i++ ) {
// skip if in bad section
if ( sp && (sp[i]->m_flags & badFlags) ) continue;
if ( wids[i] != 0 ) continue;
// set first positive scoring guy
if ( firstPosScore == -1 ) firstPosScore = i;
// keep track of last guy
lastPosScore = i;
}
// sanity check
if ( getNumXmlNodes() > 512 ) { char *xx=NULL;*xx=0; }
// . pedal firstPosScore back until we hit a section boundary
// . i.e. stop once we hit a front/back tag pair, like <div> and </div>
char tc[512];
memset ( tc , 0 , 512 );
int32_t a = firstPosScore;
for ( ; a >= 0 ; a-- ) {
// get the tid
nodeid_t tid = tids[a];
// remove back bit, if any
tid &= BACKBITCOMP;
//.........这里部分代码省略.........
示例3: getThumbnail
// . returns false if blocked, returns true otherwise
// . sets g_errno on error
bool Images::getThumbnail ( char *pageSite ,
long siteLen ,
long long docId ,
XmlDoc *xd ,
collnum_t collnum,//char *coll ,
//char **statusPtr ,
long hopCount,
void *state ,
void (*callback)(void *state) ) {
// sanity check
if ( ! m_setCalled ) { char *xx=NULL;*xx=0; }
// we haven't had any error
m_hadError = 0;
// no reason to stop yet
m_stopDownloading = false;
// reset here now
m_i = 0;
m_j = 0;
m_phase = 0;
// sanity check
if ( ! m_pageUrl ) { char *xx=NULL;*xx=0; }
// sanity check
if ( ! pageSite ) { char *xx=NULL;*xx=0; }
// we need to be a permalink
//if ( ! isPermalink ) return true;
// save these
//m_statusPtr = statusPtr;
// save this
m_collnum = collnum;
m_docId = docId;
m_callback = callback;
m_state = state;
// if this doc is a json diffbot reply it already has the primary
// image selected so just use that
m_xd = xd;
if ( m_xd->m_isDiffbotJSONObject )
return downloadImages();
// if no candidates, we are done, no error
if ( m_numImages == 0 ) return true;
//Vector *v = xd->getTagVector();
// this will at least have one component, the 0/NULL component
uint32_t *tph = xd->getTagPairHash32();
// must not block or error on us
if ( tph == (void *)-1 ) { char *xx=NULL;*xx=0; }
// must not error on use?
if ( ! tph ) { char *xx=NULL;*xx=0; }
// . see DupDetector.cpp, very similar to this
// . see how many pages we have from our same site with our same
// html template (and that are permalinks)
char buf[2000];
char c = pageSite[siteLen];
pageSite[siteLen]=0;
// site MUST NOT start with "http://"
if ( strncmp ( pageSite , "http://", 7)==0){char*xx=NULL;*xx=0;}
// this must match what we hash in XmlDoc::hashNoSplit()
sprintf ( buf , "gbsitetemplate:%lu%s", (unsigned long)*tph,pageSite );
pageSite[siteLen]=c;
// TODO: make sure this is a no-split termid storage thingy
// in Msg14.cpp
Query q;
if ( ! q.set2 ( buf , langUnknown , false ) )
// return true with g_errno set on error
return true;
// store the termid
long long termId = q.getTermId(0);
key144_t startKey ;
key144_t endKey ;
g_posdb.makeStartKey(&startKey,termId);
g_posdb.makeEndKey (&endKey ,termId);
// get shard of that (this termlist is sharded by termid -
// see XmlDoc.cpp::hashNoSplit() where it hashes gbsitetemplate: term)
long shardNum = g_hostdb.getShardNumByTermId ( &startKey );
// if ( ! m_msg36.getTermFreq ( m_collnum ,
// 0 , // maxAge
// termId ,
// this ,
// gotTermFreqWrapper ,
// MAX_NICENESS ,
// true , // exact count?
// false , // inc count?
// false , // dec count?
// false )) // is split?
// return false;
// just use msg0 and limit to like 1k or something
if ( ! m_msg0.getList ( -1 , // hostid
-1 , // ip
-1 , // port
//.........这里部分代码省略.........
示例4: sendPageIndexdb
//.........这里部分代码省略.........
return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno));
}
// make a state
State10 *st ;
try { st = new (State10); }
catch ( ... ) {
g_errno = ENOMEM;
log("PageIndexdb: new(%i): %s",
sizeof(State10),mstrerror(g_errno));
return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno));}
mnew ( st , sizeof(State10) , "PageIndexdb" );
// password, too
long pwdLen = 0 ;
char *pwd = r->getString ( "pwd" , &pwdLen );
if ( pwdLen > 31 ) pwdLen = 31;
if ( pwdLen > 0 ) strncpy ( st->m_pwd , pwd , pwdLen );
st->m_pwd[pwdLen]='\0';
// get # of records to retreive from IndexList
st->m_numRecs = r->getLong ( "numRecs" , 100 );
// use disk, tree, or cache?
st->m_useDisk = r->getLong ("ud" , 0 );
st->m_useTree = r->getLong ("ut" , 0 );
st->m_useCache = r->getLong ("uc" , 0 );
st->m_useDatedb= r->getLong ("ub" , 0 );
st->m_add = r->getLong ("add", 0 );
st->m_del = r->getLong ("del", 0 );
// get the termId, if any, from the cgi vars
st->m_termId = r->getLongLong ("t", 0LL ) ;
// get docid and score
st->m_docId = r->getLongLong ("d", 0LL );
st->m_score = r->getLong ("score", 0 );
// copy query/collection
memcpy ( st->m_query , query , queryLen );
st->m_queryLen = queryLen;
st->m_query [ queryLen ] ='\0';
//memcpy ( st->m_coll , coll , collLen );
//st->m_collLen = collLen;
//st->m_coll [ collLen ] ='\0';
st->m_coll = coll;
// save the TcpSocket
st->m_socket = s;
// and if the request is local/internal or not
st->m_isAdmin = g_collectiondb.isAdmin ( r , s );
st->m_isLocal = r->isLocal();
st->m_r.copy ( r );
// . check for add/delete request
if ( st->m_add || st->m_del ) {
key_t startKey = g_indexdb.makeStartKey ( st->m_termId );
key_t endKey = g_indexdb.makeEndKey ( st->m_termId );
// construct the key to add/delete
st->m_key = g_indexdb.makeKey ( st->m_termId,
st->m_score ,
st->m_docId ,
st->m_del );
// make an RdbList out of the key
st->m_keyList.set ( (char*)&st->m_key,
sizeof(key_t),
(char*)&st->m_key,
sizeof(key_t),
startKey,
endKey,
0,
false,
true );
log ( LOG_INFO, "build: adding indexdb key to indexdb: "
"%lx %llx", st->m_key.n1, st->m_key.n0 );
// call msg1 to add/delete key
if ( ! st->m_msg1.addList ( &st->m_keyList,
RDB_INDEXDB,
st->m_coll,
st,
addedKeyWrapper,
false,
MAX_NICENESS ) )
return false;
// continue to page if no block
return gotIndexList ( st );
}
if ( ! st->m_query[0] ) return gotIndexList(st);
// . set query class
// . a boolFlag of 0 means query is not boolean
Query q;
q.set2 ( query , langUnknown , true ); // 0 = boolFlag, not boolean!
// reset
st->m_msg36.m_termFreq = 0LL;
// if query was provided, use that, otherwise use termId
if ( q.getNumTerms() > 0 ) st->m_termId = q.getTermId(0);
// skip if nothing
else return gotTermFreq ( st );
// get the termfreq of this term!
if ( ! st->m_msg36.getTermFreq ( coll ,
0 ,
st->m_termId,
st ,
gotTermFreqWrapper ) ) return false;
// otherwise, we didn't block
return gotTermFreq ( st );
}