本文整理汇总了C++中SpiderRequest::getRecSize方法的典型用法代码示例。如果您正苦于以下问题:C++ SpiderRequest::getRecSize方法的具体用法?C++ SpiderRequest::getRecSize怎么用?C++ SpiderRequest::getRecSize使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类SpiderRequest
的用法示例。
在下文中一共展示了SpiderRequest::getRecSize方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: sendPageAddUrl
//.........这里部分代码省略.........
// . should we force it into spiderdb even if already in there
// . use to manually update spider times for a url
// . however, will not remove old scheduled spider times
// . mdw: made force on the default
st1->m_forceRespider = r->getLong("force",1); // 0);
long now = getTimeGlobal();
// . allow 1 submit every 1 hour
// . restrict by submitter domain ip
if ( ! st1->m_isAdmin &&
! canSubmit ( h , now , cr->m_maxAddUrlsPerIpDomPerDay ) ) {
// return error page
g_errno = ETOOEARLY;
return sendReply ( st1 , true );
}
//st1->m_query = r->getString( "qts", &st1->m_queryLen );
// check it, if turing test is enabled for this collection
if ( ! st1->m_isAdmin && cr->m_doTuringTest &&
! g_turingTest.isHuman(r) ) {
// log note so we know it didn't make it
g_msg = " (error: bad answer)";
//log("PageAddUrl:: addurl failed for %s : bad answer",
// iptoa(s->m_ip));
st1->m_goodAnswer = false;
return sendReply ( st1 , true /*addUrl enabled?*/ );
}
//if ( st1->m_queryLen > 0 )
// return getPages( st1 );
// if no url given, just print a blank page
if ( ! url ) return sendReply ( st1 , true );
//
// make a SpiderRequest
//
SpiderRequest *sreq = &st1->m_sreq;
// reset it
sreq->reset();
// make the probable docid
long long probDocId = g_titledb.getProbableDocId ( st1->m_url );
// make one up, like we do in PageReindex.cpp
long firstIp = (probDocId & 0xffffffff);
// . now fill it up
// . TODO: calculate the other values... lazy!!! (m_isRSSExt,
// m_siteNumInlinks,...)
sreq->m_isNewOutlink = 1;
sreq->m_isAddUrl = 1;
sreq->m_addedTime = now;
sreq->m_fakeFirstIp = 1;
sreq->m_probDocId = probDocId;
sreq->m_firstIp = firstIp;
sreq->m_hopCount = 0;
// its valid if root
Url uu; uu.set ( st1->m_url );
if ( uu.isRoot() ) sreq->m_hopCountValid = true;
// too big?
//long len = st1->m_urlLen;
// the url! includes \0
strcpy ( sreq->m_url , st1->m_url );
// call this to set sreq->m_dataSize now
sreq->setDataSize();
// make the key dude -- after setting url
sreq->setKey ( firstIp , 0LL, false );
// need a fake first ip lest we core!
//sreq->m_firstIp = (pdocId & 0xffffffff);
// how to set m_firstIp? i guess addurl can be throttled independently
// of the other urls??? use the hash of the domain for it!
long dlen;
char *dom = getDomFast ( st1->m_url , &dlen );
// fake it for this...
//sreq->m_firstIp = hash32 ( dom , dlen );
// sanity
if ( ! dom ) {
g_errno = EBADURL;
return sendReply ( st1 , true );
}
// shortcut
Msg4 *m = &st1->m_msg4;
// now add that to spiderdb using msg4
if ( ! m->addMetaList ( (char *)sreq ,
sreq->getRecSize() ,
coll ,
st1 , // state
addedStuff ,
MAX_NICENESS ,
RDB_SPIDERDB ) )
// we blocked
return false;
// send back the reply
return sendReply ( st1 , true );
}