本文整理汇总了C++中XmlDoc::set4方法的典型用法代码示例。如果您正苦于以下问题:C++ XmlDoc::set4方法的具体用法?C++ XmlDoc::set4怎么用?C++ XmlDoc::set4使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类XmlDoc
的用法示例。
在下文中一共展示了XmlDoc::set4方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: sendPageAnalyze
//.........这里部分代码省略.........
long isXml = r->getLong("xml",0);
// if got docid, use that
if ( st->m_docId != -1 ) {
if ( ! xd->set3 ( st->m_docId,
st->m_coll,
0 ) ) // niceness
// return error reply if g_errno is set
return sendErrorReply ( st , g_errno );
// make this our callback in case something blocks
xd->setCallback ( st , gotXmlDoc );
xd->m_pbuf = &st->m_wbuf;
// reset this flag
st->m_donePrinting = false;
// . set xd from the old title rec if recycle is true
// . can also use XmlDoc::m_loadFromOldTitleRec flag
//if ( st->m_recycle ) xd->m_recycleContent = true;
xd->m_recycleContent = true;
// force this on
//xd->m_useSiteLinkBuf = true;
//xd->m_usePageLinkBuf = true;
if ( isXml ) xd->m_printInXml = true;
// now tell it to fetch the old title rec
if ( ! xd->loadFromOldTitleRec () )
// return false if this blocks
return false;
return gotXmlDoc ( st );
}
// set this up
SpiderRequest sreq;
sreq.reset();
if ( st->m_u ) strcpy(sreq.m_url,st->m_u);
long firstIp = hash32n(st->m_u);
if ( firstIp == -1 || firstIp == 0 ) firstIp = 1;
// parentdocid of 0
sreq.setKey( firstIp, 0LL, false );
sreq.m_isPageParser = 1;
sreq.m_hopCount = st->m_hopCount;
sreq.m_hopCountValid = 1;
sreq.m_fakeFirstIp = 1;
sreq.m_firstIp = firstIp;
Url nu;
nu.set(sreq.m_url);
sreq.m_domHash32 = nu.getDomainHash32();
sreq.m_siteHash32 = nu.getHostHash32();
// . get provided content if any
// . will be NULL if none provided
// . "content" may contain a MIME
long contentLen = 0;
char *content = r->getString ( "content" , &contentLen , NULL );
// is the "content" url-encoded? default is true.
bool contentIsEncoded = true;
// mark doesn't like to url-encode his content
if ( ! content ) {
content = r->getUnencodedContent ();
contentLen = r->getUnencodedContentLen ();
contentIsEncoded = false;
}
// ensure null
if ( contentLen == 0 ) content = NULL;
//uint8_t contentType = CT_HTML;
//if ( isXml ) contentType = CT_XML;
long ctype = r->getLong("ctype",CT_HTML);
// . use the enormous power of our new XmlDoc class
// . this returns false if blocked
if ( ! xd->set4 ( &sreq ,
NULL ,
st->m_coll ,
// we need this so the term table is set!
&st->m_wbuf , // XmlDoc::m_pbuf
0, // try 0 now! 1 ,//PP_NICENESS ))
content ,
false, // deletefromindex
0, // forced ip
ctype ))
// return error reply if g_errno is set
return sendErrorReply ( st , g_errno );
// make this our callback in case something blocks
xd->setCallback ( st , gotXmlDoc );
// reset this flag
st->m_donePrinting = false;
// prevent a core here in the event we download the page content
xd->m_crawlDelayValid = true;
xd->m_crawlDelay = 0;
// . set xd from the old title rec if recycle is true
// . can also use XmlDoc::m_loadFromOldTitleRec flag
//if ( st->m_recycle ) xd->m_recycleContent = true;
// only recycle if docid is given!!
if ( st->m_recycle ) xd->m_recycleContent = true;
// force this on
//xd->m_useSiteLinkBuf = true;
//xd->m_usePageLinkBuf = true;
if ( isXml ) xd->m_printInXml = true;
return gotXmlDoc ( st );
}
示例2: sendPageGet
//.........这里部分代码省略.........
// . we need to match summary here so we need to know this
//bool seq = r->getLong ( "seq" , false );
// restrict to root file?
bool rtq = r->getLong ( "rtq" , false );
// . get the titleRec
// . TODO: redirect client to a better http server to save bandwidth
State2 *st ;
try { st = new (State2); }
catch (... ) {
g_errno = ENOMEM;
log("PageGet: new(%i): %s",
(int)sizeof(State2),mstrerror(g_errno));
return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno));}
mnew ( st , sizeof(State2) , "PageGet1" );
// save the socket and if Host: is local in the Http request Mime
st->m_socket = s;
st->m_isAdmin = g_conf.isCollAdmin ( s , r );
st->m_isLocal = r->isLocal();
st->m_docId = docId;
st->m_printed = false;
// include header ... "this page cached by Gigablast on..."
st->m_includeHeader = r->getLong ("ih" , true );
st->m_includeBaseHref = r->getLong ("ibh" , false );
st->m_queryHighlighting = r->getLong ("qh" , true );
st->m_strip = r->getLong ("strip" , 0 );
st->m_clickAndScroll = r->getLong ("cas" , true );
st->m_cnsPage = r->getLong ("cnsp" , true );
char *langAbbr = r->getString("qlang",NULL);
st->m_langId = langUnknown;
if ( langAbbr ) {
uint8_t langId = getLangIdFromAbbr ( langAbbr );
st->m_langId = langId;
}
strncpy ( st->m_coll , coll , MAX_COLL_LEN+1 );
// store query for query highlighting
st->m_netTestResults = r->getLong ("rnettest", false );
//if( st->m_netTestResults ) {
// mdelete ( st , sizeof(State2) , "PageGet1" );
// delete ( st );
// return sendPageNetResult( s );
//}
if ( q && qlen > 0 ) strcpy ( st->m_q , q );
else st->m_q[0] = '\0';
st->m_qlen = qlen;
//st->m_seq = seq;
st->m_rtq = rtq;
st->m_boolFlag = r->getLong ("bq", 2 /*default is 2*/ );
st->m_isBanned = false;
st->m_noArchive = false;
st->m_socket = s;
st->m_format = r->getReplyFormat();
// default to 0 niceness
st->m_niceness = 0;
st->m_r.copy ( r );
//st->m_cr = cr;
st->m_printDisclaimer = true;
if ( st->m_cnsPage )
st->m_printDisclaimer = false;
if ( st->m_strip ) // ! st->m_evbits.isEmpty() )
st->m_printDisclaimer = false;
// should we cache it?
char useCache = r->getLong ( "usecache" , 1 );
char rcache = r->getLong ( "rcache" , 1 );
char wcache = r->getLong ( "wcache" , 1 );
long cacheAge = r->getLong ( "cacheAge" , 60*60 ); // default one hour
if ( useCache == 0 ) { cacheAge = 0; wcache = 0; }
if ( rcache == 0 ) cacheAge = 0;
// . fetch the TitleRec
// . a max cache age of 0 means not to read from the cache
XmlDoc *xd = &st->m_xd;
// url based?
if ( url ) {
SpiderRequest sreq;
sreq.reset();
strcpy(sreq.m_url, url );
sreq.setDataSize();
// this returns false if "coll" is invalid
if ( ! xd->set4 ( &sreq , NULL , coll , NULL , st->m_niceness ) )
goto hadSetError;
}
// . when getTitleRec() is called it will load the old one
// since XmlDoc::m_setFromTitleRec will be true
// . niceness is 0
// . use st->m_coll since XmlDoc just points to it!
// . this returns false if "coll" is invalid
else if ( ! xd->set3 ( docId , st->m_coll , 0 ) ) {
hadSetError:
mdelete ( st , sizeof(State2) , "PageGet1" );
delete ( st );
g_errno = ENOMEM;
log("PageGet: set3: %s", mstrerror(g_errno));
return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno));
}
// if it blocks while it loads title rec, it will re-call this routine
xd->setCallback ( st , processLoopWrapper );
// good to go!
return processLoop ( st );
}
示例3: sendPageParser2
//.........这里部分代码省略.........
"<br>",
//oips ,
contentParm );
xbuf->safePrintf(
"<center>"
"<input type=submit value=Submit>"
"</center>"
);
// just print the page if no url given
if ( ! st->m_u || ! st->m_u[0] ) return processLoop ( st );
XmlDoc *xd = &st->m_xd;
// set this up
SpiderRequest sreq;
sreq.reset();
strcpy(sreq.m_url,st->m_u);
long firstIp = hash32n(st->m_u);
if ( firstIp == -1 || firstIp == 0 ) firstIp = 1;
// parentdocid of 0
sreq.setKey( firstIp, 0LL, false );
sreq.m_isPageParser = 1;
sreq.m_hopCount = st->m_hopCount;
sreq.m_hopCountValid = 1;
sreq.m_fakeFirstIp = 1;
sreq.m_firstIp = firstIp;
Url nu;
nu.set(sreq.m_url);
sreq.m_domHash32 = nu.getDomainHash32();
sreq.m_siteHash32 = nu.getHostHash32();
// . get provided content if any
// . will be NULL if none provided
// . "content" may contain a MIME
long contentLen = 0;
char *content = r->getString ( "content" , &contentLen , NULL );
// is the "content" url-encoded? default is true.
bool contentIsEncoded = true;
// mark doesn't like to url-encode his content
if ( ! content ) {
content = r->getUnencodedContent ();
contentLen = r->getUnencodedContentLen ();
contentIsEncoded = false;
}
// ensure null
if ( contentLen == 0 ) content = NULL;
uint8_t contentType = CT_HTML;
if ( r->getBool("xml",0) ) contentType = CT_XML;
contentType = r->getLong("ctype",contentType);//CT_HTML);
// if facebook, load xml content from title rec...
bool isFacebook = (bool)strstr(st->m_u,"http://www.facebook.com/");
if ( isFacebook && ! content ) {
long long docId = g_titledb.getProbableDocId(st->m_u);
sprintf(sreq.m_url ,"%llu", docId );
sreq.m_isPageReindex = true;
}
// hack
if ( content ) {
st->m_dbuf.purge();
st->m_dbuf.safeStrcpy(content);
//char *data = strstr(content,"\r\n\r\n");
//long dataPos = 0;
//if ( data ) dataPos = (data + 4) - content;
//st->m_dbuf.convertJSONtoXML(0,dataPos);
//st->m_dbuf.decodeJSON(0);
content = st->m_dbuf.getBufStart();
}
// . use the enormous power of our new XmlDoc class
// . this returns false if blocked
if ( ! xd->set4 ( &sreq ,
NULL ,
st->m_coll ,
&st->m_wbuf ,
0 ,//PP_NICENESS ))
content ,
false, // deletefromindex
0, // forced ip
contentType ))
// return error reply if g_errno is set
return sendErrorReply ( st , g_errno );
// make this our callback in case something blocks
xd->setCallback ( st , processLoop );
// . set xd from the old title rec if recycle is true
// . can also use XmlDoc::m_loadFromOldTitleRec flag
if ( st->m_recycle ) xd->m_recycleContent = true;
return processLoop ( st );
}
示例4: void
//.........这里部分代码省略.........
sreq.reset();
strcpy(sreq.m_url, cleanUrl );
// parentdocid of 0
long firstIp = hash32n(cleanUrl);
if ( firstIp == -1 || firstIp == 0 ) firstIp = 1;
sreq.setKey( firstIp,0LL, false );
sreq.m_isInjecting = 1;
sreq.m_isPageInject = 1;
sreq.m_hopCount = hopCount;
sreq.m_hopCountValid = 1;
sreq.m_fakeFirstIp = 1;
sreq.m_firstIp = firstIp;
// shortcut
XmlDoc *xd = &m_xd;
// log it now
//log("inject: injecting doc %s",cleanUrl);
static char s_dummy[3];
// sometims the content is indeed NULL...
if ( newOnly && ! content ) {
// don't let it be NULL because then xmldoc will
// try to download the page!
s_dummy[0] = '\0';
content = s_dummy;
//char *xx=NULL;*xx=0; }
}
// . use the enormous power of our new XmlDoc class
// . this returns false with g_errno set on error
if ( //m_needsSet &&
! xd->set4 ( &sreq ,
NULL ,
m_coll ,
NULL , // pbuf
// give it a niceness of 1, we have to be
// careful since we are a niceness of 0!!!!
niceness, // 1 ,
// inject this content
content ,
deleteIt, // false, // deleteFromIndex ,
forcedIp ,
contentType ,
lastSpidered ,
hasMime )) {
// g_errno should be set if that returned false
if ( ! g_errno ) { char *xx=NULL;*xx=0; }
return true;
}
// do not re-call the set
//m_needsSet = false;
// make this our callback in case something blocks
xd->setCallback ( state , callback );
xd->m_doConsistencyTesting = doConsistencyTesting;
// . set xd from the old title rec if recycle is true
// . can also use XmlDoc::m_loadFromOldTitleRec flag
if ( recycleContent ) xd->m_recycleContent = true;
// othercrap
if ( firstIndexed ) {
xd->m_firstIndexedDate = firstIndexed;
xd->m_firstIndexedDateValid = true;