本文整理汇总了C++中Url::getDomainHash32方法的典型用法代码示例。如果您正苦于以下问题:C++ Url::getDomainHash32方法的具体用法?C++ Url::getDomainHash32怎么用?C++ Url::getDomainHash32使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Url
的用法示例。
在下文中一共展示了Url::getDomainHash32方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: sendPageAnalyze
//.........这里部分代码省略.........
long isXml = r->getLong("xml",0);
// if got docid, use that
if ( st->m_docId != -1 ) {
if ( ! xd->set3 ( st->m_docId,
st->m_coll,
0 ) ) // niceness
// return error reply if g_errno is set
return sendErrorReply ( st , g_errno );
// make this our callback in case something blocks
xd->setCallback ( st , gotXmlDoc );
xd->m_pbuf = &st->m_wbuf;
// reset this flag
st->m_donePrinting = false;
// . set xd from the old title rec if recycle is true
// . can also use XmlDoc::m_loadFromOldTitleRec flag
//if ( st->m_recycle ) xd->m_recycleContent = true;
xd->m_recycleContent = true;
// force this on
//xd->m_useSiteLinkBuf = true;
//xd->m_usePageLinkBuf = true;
if ( isXml ) xd->m_printInXml = true;
// now tell it to fetch the old title rec
if ( ! xd->loadFromOldTitleRec () )
// return false if this blocks
return false;
return gotXmlDoc ( st );
}
// set this up
SpiderRequest sreq;
sreq.reset();
if ( st->m_u ) strcpy(sreq.m_url,st->m_u);
long firstIp = hash32n(st->m_u);
if ( firstIp == -1 || firstIp == 0 ) firstIp = 1;
// parentdocid of 0
sreq.setKey( firstIp, 0LL, false );
sreq.m_isPageParser = 1;
sreq.m_hopCount = st->m_hopCount;
sreq.m_hopCountValid = 1;
sreq.m_fakeFirstIp = 1;
sreq.m_firstIp = firstIp;
Url nu;
nu.set(sreq.m_url);
sreq.m_domHash32 = nu.getDomainHash32();
sreq.m_siteHash32 = nu.getHostHash32();
// . get provided content if any
// . will be NULL if none provided
// . "content" may contain a MIME
long contentLen = 0;
char *content = r->getString ( "content" , &contentLen , NULL );
// is the "content" url-encoded? default is true.
bool contentIsEncoded = true;
// mark doesn't like to url-encode his content
if ( ! content ) {
content = r->getUnencodedContent ();
contentLen = r->getUnencodedContentLen ();
contentIsEncoded = false;
}
// ensure null
if ( contentLen == 0 ) content = NULL;
//uint8_t contentType = CT_HTML;
//if ( isXml ) contentType = CT_XML;
long ctype = r->getLong("ctype",CT_HTML);
// . use the enormous power of our new XmlDoc class
// . this returns false if blocked
if ( ! xd->set4 ( &sreq ,
NULL ,
st->m_coll ,
// we need this so the term table is set!
&st->m_wbuf , // XmlDoc::m_pbuf
0, // try 0 now! 1 ,//PP_NICENESS ))
content ,
false, // deletefromindex
0, // forced ip
ctype ))
// return error reply if g_errno is set
return sendErrorReply ( st , g_errno );
// make this our callback in case something blocks
xd->setCallback ( st , gotXmlDoc );
// reset this flag
st->m_donePrinting = false;
// prevent a core here in the event we download the page content
xd->m_crawlDelayValid = true;
xd->m_crawlDelay = 0;
// . set xd from the old title rec if recycle is true
// . can also use XmlDoc::m_loadFromOldTitleRec flag
//if ( st->m_recycle ) xd->m_recycleContent = true;
// only recycle if docid is given!!
if ( st->m_recycle ) xd->m_recycleContent = true;
// force this on
//xd->m_useSiteLinkBuf = true;
//xd->m_usePageLinkBuf = true;
if ( isXml ) xd->m_printInXml = true;
return gotXmlDoc ( st );
}
示例2: sendPageParser2
//.........这里部分代码省略.........
"<br>",
//oips ,
contentParm );
xbuf->safePrintf(
"<center>"
"<input type=submit value=Submit>"
"</center>"
);
// just print the page if no url given
if ( ! st->m_u || ! st->m_u[0] ) return processLoop ( st );
XmlDoc *xd = &st->m_xd;
// set this up
SpiderRequest sreq;
sreq.reset();
strcpy(sreq.m_url,st->m_u);
long firstIp = hash32n(st->m_u);
if ( firstIp == -1 || firstIp == 0 ) firstIp = 1;
// parentdocid of 0
sreq.setKey( firstIp, 0LL, false );
sreq.m_isPageParser = 1;
sreq.m_hopCount = st->m_hopCount;
sreq.m_hopCountValid = 1;
sreq.m_fakeFirstIp = 1;
sreq.m_firstIp = firstIp;
Url nu;
nu.set(sreq.m_url);
sreq.m_domHash32 = nu.getDomainHash32();
sreq.m_siteHash32 = nu.getHostHash32();
// . get provided content if any
// . will be NULL if none provided
// . "content" may contain a MIME
long contentLen = 0;
char *content = r->getString ( "content" , &contentLen , NULL );
// is the "content" url-encoded? default is true.
bool contentIsEncoded = true;
// mark doesn't like to url-encode his content
if ( ! content ) {
content = r->getUnencodedContent ();
contentLen = r->getUnencodedContentLen ();
contentIsEncoded = false;
}
// ensure null
if ( contentLen == 0 ) content = NULL;
uint8_t contentType = CT_HTML;
if ( r->getBool("xml",0) ) contentType = CT_XML;
contentType = r->getLong("ctype",contentType);//CT_HTML);
// if facebook, load xml content from title rec...
bool isFacebook = (bool)strstr(st->m_u,"http://www.facebook.com/");
if ( isFacebook && ! content ) {
long long docId = g_titledb.getProbableDocId(st->m_u);
sprintf(sreq.m_url ,"%llu", docId );
sreq.m_isPageReindex = true;
}
// hack
if ( content ) {
st->m_dbuf.purge();
st->m_dbuf.safeStrcpy(content);
//char *data = strstr(content,"\r\n\r\n");
//long dataPos = 0;
//if ( data ) dataPos = (data + 4) - content;
//st->m_dbuf.convertJSONtoXML(0,dataPos);
//st->m_dbuf.decodeJSON(0);
content = st->m_dbuf.getBufStart();
}
// . use the enormous power of our new XmlDoc class
// . this returns false if blocked
if ( ! xd->set4 ( &sreq ,
NULL ,
st->m_coll ,
&st->m_wbuf ,
0 ,//PP_NICENESS ))
content ,
false, // deletefromindex
0, // forced ip
contentType ))
// return error reply if g_errno is set
return sendErrorReply ( st , g_errno );
// make this our callback in case something blocks
xd->setCallback ( st , processLoop );
// . set xd from the old title rec if recycle is true
// . can also use XmlDoc::m_loadFromOldTitleRec flag
if ( st->m_recycle ) xd->m_recycleContent = true;
return processLoop ( st );
}