本文整理汇总了C++中SpiderRequest::setDataSize方法的典型用法代码示例。如果您正苦于以下问题:C++ SpiderRequest::setDataSize方法的具体用法?C++ SpiderRequest::setDataSize怎么用?C++ SpiderRequest::setDataSize使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类SpiderRequest
的用法示例。
在下文中一共展示了SpiderRequest::setDataSize方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: sendPageGet
//.........这里部分代码省略.........
// . we need to match summary here so we need to know this
//bool seq = r->getLong ( "seq" , false );
// restrict to root file?
bool rtq = r->getLong ( "rtq" , false );
// . get the titleRec
// . TODO: redirect client to a better http server to save bandwidth
State2 *st ;
try { st = new (State2); }
catch (... ) {
g_errno = ENOMEM;
log("PageGet: new(%i): %s",
(int)sizeof(State2),mstrerror(g_errno));
return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno));}
mnew ( st , sizeof(State2) , "PageGet1" );
// save the socket and if Host: is local in the Http request Mime
st->m_socket = s;
st->m_isAdmin = g_conf.isCollAdmin ( s , r );
st->m_isLocal = r->isLocal();
st->m_docId = docId;
st->m_printed = false;
// include header ... "this page cached by Gigablast on..."
st->m_includeHeader = r->getLong ("ih" , true );
st->m_includeBaseHref = r->getLong ("ibh" , false );
st->m_queryHighlighting = r->getLong ("qh" , true );
st->m_strip = r->getLong ("strip" , 0 );
st->m_clickAndScroll = r->getLong ("cas" , true );
st->m_cnsPage = r->getLong ("cnsp" , true );
char *langAbbr = r->getString("qlang",NULL);
st->m_langId = langUnknown;
if ( langAbbr ) {
uint8_t langId = getLangIdFromAbbr ( langAbbr );
st->m_langId = langId;
}
strncpy ( st->m_coll , coll , MAX_COLL_LEN+1 );
// store query for query highlighting
st->m_netTestResults = r->getLong ("rnettest", false );
//if( st->m_netTestResults ) {
// mdelete ( st , sizeof(State2) , "PageGet1" );
// delete ( st );
// return sendPageNetResult( s );
//}
if ( q && qlen > 0 ) strcpy ( st->m_q , q );
else st->m_q[0] = '\0';
st->m_qlen = qlen;
//st->m_seq = seq;
st->m_rtq = rtq;
st->m_boolFlag = r->getLong ("bq", 2 /*default is 2*/ );
st->m_isBanned = false;
st->m_noArchive = false;
st->m_socket = s;
st->m_format = r->getReplyFormat();
// default to 0 niceness
st->m_niceness = 0;
st->m_r.copy ( r );
//st->m_cr = cr;
st->m_printDisclaimer = true;
if ( st->m_cnsPage )
st->m_printDisclaimer = false;
if ( st->m_strip ) // ! st->m_evbits.isEmpty() )
st->m_printDisclaimer = false;
// should we cache it?
char useCache = r->getLong ( "usecache" , 1 );
char rcache = r->getLong ( "rcache" , 1 );
char wcache = r->getLong ( "wcache" , 1 );
long cacheAge = r->getLong ( "cacheAge" , 60*60 ); // default one hour
if ( useCache == 0 ) { cacheAge = 0; wcache = 0; }
if ( rcache == 0 ) cacheAge = 0;
// . fetch the TitleRec
// . a max cache age of 0 means not to read from the cache
XmlDoc *xd = &st->m_xd;
// url based?
if ( url ) {
SpiderRequest sreq;
sreq.reset();
strcpy(sreq.m_url, url );
sreq.setDataSize();
// this returns false if "coll" is invalid
if ( ! xd->set4 ( &sreq , NULL , coll , NULL , st->m_niceness ) )
goto hadSetError;
}
// . when getTitleRec() is called it will load the old one
// since XmlDoc::m_setFromTitleRec will be true
// . niceness is 0
// . use st->m_coll since XmlDoc just points to it!
// . this returns false if "coll" is invalid
else if ( ! xd->set3 ( docId , st->m_coll , 0 ) ) {
hadSetError:
mdelete ( st , sizeof(State2) , "PageGet1" );
delete ( st );
g_errno = ENOMEM;
log("PageGet: set3: %s", mstrerror(g_errno));
return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno));
}
// if it blocks while it loads title rec, it will re-call this routine
xd->setCallback ( st , processLoopWrapper );
// good to go!
return processLoop ( st );
}
示例2: sendPageAddUrl
//.........这里部分代码省略.........
// . should we force it into spiderdb even if already in there
// . use to manually update spider times for a url
// . however, will not remove old scheduled spider times
// . mdw: made force on the default
st1->m_forceRespider = r->getLong("force",1); // 0);
long now = getTimeGlobal();
// . allow 1 submit every 1 hour
// . restrict by submitter domain ip
if ( ! st1->m_isAdmin &&
! canSubmit ( h , now , cr->m_maxAddUrlsPerIpDomPerDay ) ) {
// return error page
g_errno = ETOOEARLY;
return sendReply ( st1 , true );
}
//st1->m_query = r->getString( "qts", &st1->m_queryLen );
// check it, if turing test is enabled for this collection
if ( ! st1->m_isAdmin && cr->m_doTuringTest &&
! g_turingTest.isHuman(r) ) {
// log note so we know it didn't make it
g_msg = " (error: bad answer)";
//log("PageAddUrl:: addurl failed for %s : bad answer",
// iptoa(s->m_ip));
st1->m_goodAnswer = false;
return sendReply ( st1 , true /*addUrl enabled?*/ );
}
//if ( st1->m_queryLen > 0 )
// return getPages( st1 );
// if no url given, just print a blank page
if ( ! url ) return sendReply ( st1 , true );
//
// make a SpiderRequest
//
SpiderRequest *sreq = &st1->m_sreq;
// reset it
sreq->reset();
// make the probable docid
long long probDocId = g_titledb.getProbableDocId ( st1->m_url );
// make one up, like we do in PageReindex.cpp
long firstIp = (probDocId & 0xffffffff);
// . now fill it up
// . TODO: calculate the other values... lazy!!! (m_isRSSExt,
// m_siteNumInlinks,...)
sreq->m_isNewOutlink = 1;
sreq->m_isAddUrl = 1;
sreq->m_addedTime = now;
sreq->m_fakeFirstIp = 1;
sreq->m_probDocId = probDocId;
sreq->m_firstIp = firstIp;
sreq->m_hopCount = 0;
// its valid if root
Url uu; uu.set ( st1->m_url );
if ( uu.isRoot() ) sreq->m_hopCountValid = true;
// too big?
//long len = st1->m_urlLen;
// the url! includes \0
strcpy ( sreq->m_url , st1->m_url );
// call this to set sreq->m_dataSize now
sreq->setDataSize();
// make the key dude -- after setting url
sreq->setKey ( firstIp , 0LL, false );
// need a fake first ip lest we core!
//sreq->m_firstIp = (pdocId & 0xffffffff);
// how to set m_firstIp? i guess addurl can be throttled independently
// of the other urls??? use the hash of the domain for it!
long dlen;
char *dom = getDomFast ( st1->m_url , &dlen );
// fake it for this...
//sreq->m_firstIp = hash32 ( dom , dlen );
// sanity
if ( ! dom ) {
g_errno = EBADURL;
return sendReply ( st1 , true );
}
// shortcut
Msg4 *m = &st1->m_msg4;
// now add that to spiderdb using msg4
if ( ! m->addMetaList ( (char *)sreq ,
sreq->getRecSize() ,
coll ,
st1 , // state
addedStuff ,
MAX_NICENESS ,
RDB_SPIDERDB ) )
// we blocked
return false;
// send back the reply
return sendReply ( st1 , true );
}