本文整理汇总了C++中SafeBuf::nullTerm方法的典型用法代码示例。如果您正苦于以下问题:C++ SafeBuf::nullTerm方法的具体用法?C++ SafeBuf::nullTerm怎么用?C++ SafeBuf::nullTerm使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类SafeBuf
的用法示例。
在下文中一共展示了SafeBuf::nullTerm方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: getCompoundName
bool JsonItem::getCompoundName ( SafeBuf &nameBuf ) {
// reset, but don't free mem etc. just set m_length to 0
nameBuf.reset();
// get its full compound name like "meta.twitter.title"
JsonItem *p = this;//ji;
char *lastName = NULL;
char *nameArray[20];
int32_t numNames = 0;
for ( ; p ; p = p->m_parent ) {
// empty name?
if ( ! p->m_name ) continue;
if ( ! p->m_name[0] ) continue;
// dup? can happen with arrays. parent of string
// in object, has same name as his parent, the
// name of the array. "dupname":[{"a":"b"},{"c":"d"}]
if ( p->m_name == lastName ) continue;
// update
lastName = p->m_name;
// add it up
nameArray[numNames++] = p->m_name;
// breach?
if ( numNames < 15 ) continue;
log("build: too many names in json tag");
break;
}
// assemble the names in reverse order which is correct order
for ( int32_t i = 1 ; i <= numNames ; i++ ) {
// copy into our safebuf
if ( ! nameBuf.safeStrcpy ( nameArray[numNames-i]) )
return false;
// separate names with periods
if ( ! nameBuf.pushChar('.') ) return false;
}
// remove last period
nameBuf.removeLastChar('.');
// and null terminate
if ( ! nameBuf.nullTerm() ) return false;
// change all :'s in names to .'s since : is reserved!
char *px = nameBuf.getBufStart();
for ( ; *px ; px++ ) if ( *px == ':' ) *px = '.';
return true;
}
示例2: processLoop
//.........这里部分代码省略.........
//p += 5;
}
if ( st->m_strip == 1 )
contentLen = stripHtml( content, contentLen,
(int32_t)xd->m_version, st->m_strip );
// it returns -1 and sets g_errno on error, line OOM
if ( contentLen == -1 ) {
//if ( buf ) mfree ( buf , bufMaxSize , "PageGet2" );
return sendErrorReply ( st , g_errno );
}
Xml xml;
Words ww;
// if no highlighting, skip it
bool queryHighlighting = st->m_queryHighlighting;
if ( st->m_strip == 2 ) queryHighlighting = false;
// do not do term highlighting if json
if ( xd->m_contentType == CT_JSON )
queryHighlighting = false;
if ( xd->m_contentType == CT_STATUS )
queryHighlighting = false;
SafeBuf tmp;
SafeBuf *xb = sb;
if ( format == FORMAT_XML ) xb = &tmp;
if ( format == FORMAT_JSON ) xb = &tmp;
if ( ! queryHighlighting ) {
xb->safeMemcpy ( content , contentLen );
xb->nullTerm();
//p += contentLen ;
}
else {
// get the content as xhtml (should be NULL terminated)
//Words *ww = xd->getWords();
if ( ! xml.set ( content , contentLen , false ,
0 , false , TITLEREC_CURRENT_VERSION ,
false , 0 , CT_HTML ) ) { // niceness is 0
//if ( buf ) mfree ( buf , bufMaxSize , "PageGet2" );
return sendErrorReply ( st , g_errno );
}
if ( ! ww.set ( &xml , true , 0 ) ) { // niceness is 0
//if ( buf ) mfree ( buf , bufMaxSize , "PageGet2" );
return sendErrorReply ( st , g_errno );
}
// sanity check
//if ( ! xd->m_wordsValid ) { char *xx=NULL;*xx=0; }
// how much space left in p?
//avail = bufEnd - p;
Matches m;
m.setQuery ( &qq );
m.addMatches ( &ww );
hilen = hi.set ( xb , // p , avail ,
&ww , &m ,
false /*doStemming?*/ ,
st->m_clickAndScroll ,
thisUrl /*base url for click & scroll*/);
//p += hilen;
log(LOG_DEBUG, "query: Done highlighting cached page content");
}
示例3: qaspider1
bool qaspider1 ( ) {
//
// delete the 'qatest123' collection
//
//static bool s_x1 = false;
if ( ! s_flags[0] ) {
s_flags[0] = true;
if ( ! getUrl ( "/admin/delcoll?xml=1&delcoll=qatest123" ) )
return false;
}
//
// add the 'qatest123' collection
//
//static bool s_x2 = false;
if ( ! s_flags[1] ) {
s_flags[1] = true;
if ( ! getUrl ( "/admin/addcoll?addcoll=qatest123&xml=1" ,
// checksum of reply expected
238170006 ) )
return false;
}
// restrict hopcount to 0 or 1 in url filters so we do not spider
// too deep
//static bool s_z1 = false;
if ( ! s_flags[2] ) {
s_flags[2] = true;
SafeBuf sb;
sb.safePrintf("&c=qatest123&"
// make it the custom filter
"ufp=0&"
"fe=%%21ismanualadd+%%26%%26+%%21insitelist&hspl=0&hspl=1&fsf=0.000000&mspr=0&mspi=1&xg=1000&fsp=-3&"
// take out hopcount for now, just test quotas
// "fe1=tag%%3Ashallow+%%26%%26+hopcount%%3C%%3D1&hspl1=0&hspl1=1&fsf1=1.000000&mspr1=1&mspi1=1&xg1=1000&fsp1=3&"
// just one spider out allowed for consistency
"fe1=tag%%3Ashallow+%%26%%26+sitepages%%3C%%3D20&hspl1=0&hspl1=1&fsf1=1.000000&mspr1=1&mspi1=1&xg1=1000&fsp1=45&"
"fe2=default&hspl2=0&hspl2=1&fsf2=1.000000&mspr2=0&mspi2=1&xg2=1000&fsp2=45&"
);
if ( ! getUrl ( "/admin/filters",0,sb.getBufStart()) )
return false;
}
// set the site list to
// a few sites
//static bool s_z2 = false;
if ( ! s_flags[3] ) {
s_flags[3] = true;
SafeBuf sb;
sb.safePrintf("&c=qatest123&format=xml&sitelist=");
sb.urlEncode("tag:shallow site:www.walmart.com\r\n"
"tag:shallow site:http://www.ibm.com/\r\n");
sb.nullTerm();
if ( ! getUrl ("/admin/settings",0,sb.getBufStart() ) )
return false;
}
//
// use the add url interface now
// walmart.com above was not seeded because of the site: directive
// so this will seed it.
//
//static bool s_y2 = false;
if ( ! s_flags[4] ) {
s_flags[4] = true;
SafeBuf sb;
// delim=+++URL:
sb.safePrintf("&c=qatest123"
"&format=json"
"&strip=1"
"&spiderlinks=1"
"&urls=www.walmart.com+ibm.com"
);
// . now a list of websites we want to spider
// . the space is already encoded as +
//sb.urlEncode(s_urls1);
if ( ! getUrl ( "/admin/addurl",0,sb.getBufStart()) )
return false;
}
//
// wait for spidering to stop
//
checkagain:
// wait until spider finishes. check the spider status page
// in json to see when completed
//static bool s_k1 = false;
if ( ! s_flags[5] ) {
// wait 5 seconds, call sleep timer... then call qatest()
//usleep(5000000); // 5 seconds
wait(3.0);
s_flags[5] = true;
return false;
}
//.........这里部分代码省略.........
示例4: qainject1
//
// the injection qa test suite
//
bool qainject1 ( ) {
//if ( ! s_callback ) s_callback = qainject1;
//
// delete the 'qatest123' collection
//
//static bool s_x1 = false;
if ( ! s_flags[0] ) {
s_flags[0] = true;
if ( ! getUrl ( "/admin/delcoll?xml=1&delcoll=qatest123" ) )
return false;
}
//
// add the 'qatest123' collection
//
//static bool s_x2 = false;
if ( ! s_flags[1] ) {
s_flags[1] = true;
if ( ! getUrl ( "/admin/addcoll?addcoll=qatest123&xml=1" ,
// checksum of reply expected
238170006 ) )
return false;
}
// this only loads once
loadUrls();
long max = s_ubuf2.length()/(long)sizeof(char *);
//max = 1;
//
// inject urls, return false if not done yet
//
//static bool s_x4 = false;
if ( ! s_flags[2] ) {
// TODO: try delimeter based injection too
//static long s_ii = 0;
for ( ; s_flags[20] < max ; ) {
// inject using html api
SafeBuf sb;
sb.safePrintf("&c=qatest123&deleteurl=0&"
"format=xml&u=");
sb.urlEncode ( s_urlPtrs[s_flags[20]] );
// the content
sb.safePrintf("&hasmime=1");
// sanity
//if ( strstr(s_urlPtrs[s_flags[20]],"wdc.htm") )
// log("hey");
sb.safePrintf("&content=");
sb.urlEncode(s_contentPtrs[s_flags[20]] );
sb.nullTerm();
// pre-inc it in case getUrl() blocks
s_flags[20]++;//ii++;
if ( ! getUrl("/admin/inject",
0, // no idea what crc to expect
sb.getBufStart()) )
return false;
}
s_flags[2] = true;
}
// +the
//static bool s_x5 = false;
if ( ! s_flags[3] ) {
wait(1.5);
s_flags[3] = true;
return false;
}
if ( ! s_flags[16] ) {
s_flags[16] = true;
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&q=%2Bthe",
702467314 ) )
return false;
}
// sports news
//static bool s_x7 = false;
if ( ! s_flags[4] ) {
s_flags[4] = true;
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&"
"q=sports+news",2009472889 ) )
return false;
}
// 'washer & dryer' does some algorithmic synonyms 'washer and dryer'
if ( ! s_flags[15] ) {
s_flags[15] = true;
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&"
"debug=1&q=washer+%26+dryer",9999 ) )
return false;
}
//
// mdw: query reindex test
//
//.........这里部分代码省略.........
示例5: qaspider2
bool qaspider2 ( ) {
//
// delete the 'qatest123' collection
//
//static bool s_x1 = false;
if ( ! s_flags[0] ) {
s_flags[0] = true;
if ( ! getUrl ( "/admin/delcoll?xml=1&delcoll=qatest123" ) )
return false;
}
//
// add the 'qatest123' collection
//
//static bool s_x2 = false;
if ( ! s_flags[1] ) {
s_flags[1] = true;
if ( ! getUrl ( "/admin/addcoll?addcoll=qatest123&xml=1" ,
// checksum of reply expected
238170006 ) )
return false;
}
// restrict hopcount to 0 or 1 in url filters so we do not spider
// too deep
//static bool s_z1 = false;
if ( ! s_flags[2] ) {
s_flags[2] = true;
SafeBuf sb;
sb.safePrintf("&c=qatest123&"
// make it the custom filter
"ufp=0&"
"fe=%%21ismanualadd+%%26%%26+%%21insitelist&hspl=0&hspl=1&fsf=0.000000&mspr=0&mspi=1&xg=1000&fsp=-3&"
// take out hopcount for now, just test quotas
// "fe1=tag%%3Ashallow+%%26%%26+hopcount%%3C%%3D1&hspl1=0&hspl1=1&fsf1=1.000000&mspr1=1&mspi1=1&xg1=1000&fsp1=3&"
// sitepages is a little fuzzy so take it
// out for this test and use hopcount!!!
//"fe1=tag%%3Ashallow+%%26%%26+sitepages%%3C%%3D20&hspl1=0&hspl1=1&fsf1=1.000000&mspr1=1&mspi1=1&xg1=1000&fsp1=45&"
"fe1=tag%%3Ashallow+%%26%%26+hopcount<%%3D1&hspl1=0&hspl1=1&fsf1=1.000000&mspr1=1&mspi1=1&xg1=1000&fsp1=45&"
"fe2=default&hspl2=0&hspl2=1&fsf2=1.000000&mspr2=0&mspi2=1&xg2=1000&fsp2=45&"
);
if ( ! getUrl ( "/admin/filters",0,sb.getBufStart()) )
return false;
}
// set the site list to
// a few sites
// these should auto seed so no need to use addurl
//static bool s_z2 = false;
if ( ! s_flags[3] ) {
s_flags[3] = true;
SafeBuf sb;
sb.safePrintf("&c=qatest123&format=xml&sitelist=");
sb.urlEncode(//walmart has too many pages at depth 1, so remove it
//"tag:shallow www.walmart.com\r\n"
"tag:shallow http://www.ibm.com/\r\n");
sb.nullTerm();
if ( ! getUrl ("/admin/settings",0,sb.getBufStart() ) )
return false;
}
//
// wait for spidering to stop
//
checkagain:
// wait until spider finishes. check the spider status page
// in json to see when completed
//static bool s_k1 = false;
if ( ! s_flags[4] ) {
//usleep(5000000); // 5 seconds
s_flags[4] = true;
wait(3.0);
return false;
}
if ( ! s_flags[14] ) {
s_flags[14] = true;
if ( ! getUrl ( "/admin/status?format=json&c=qatest123",0) )
return false;
}
//static bool s_k2 = false;
if ( ! s_flags[5] ) {
// ensure spiders are done.
// "Nothing currently available to spider"
if ( s_content&&!strstr(s_content,"Nothing currently avail")){
s_flags[4] = false;
s_flags[14] = false;
goto checkagain;
}
s_flags[5] = true;
}
//.........这里部分代码省略.........
示例6: getBestWindow
//.........这里部分代码省略.........
// comes to us. also mark how many times the same word is repeated in
// this summary.
int64_t score = 0LL;
// is a url contained in the summary, that looks bad! punish!
bool hasUrl = false;
// the word count we did above was just an approximate. count it right
wordCount = 0;
// for debug
//char buf[5000];
//char *xp = buf;
SafeBuf xp;
// wtf?
if ( b > nw ) {
b = nw;
}
// first score from the starting match down to a, including match
for ( int32_t i = a ; i < b ; i++ ) {
// debug print out
if ( g_conf.m_logDebugSummary ) {
int32_t len = words->getWordLen(i);
char cs;
for (int32_t k=0;k<len; k+=cs ) {
const char *c = words->getWord(i)+k;
cs = getUtf8CharSize(c);
if ( is_binary_utf8 ( c ) ) {
continue;
}
xp.safeMemcpy ( c , cs );
xp.nullTerm();
}
}
// skip if in bad section, marquee, select, script, style
if ( sp && (sp[i]->m_flags & badFlags) ) {
continue;
}
// don't count just numeric words
if ( words->isNum(i) ) {
continue;
}
// check if there is a url. best way to check for '://'
if ( wids && !wids[i] ) {
const char *wrd = words->getWord(i);
int32_t wrdLen = words->getWordLen(i);
if ( wrdLen == 3 && wrd[0] == ':' && wrd[1] == '/' && wrd[2] == '/' ) {
hasUrl = true;
}
}
// skip if not wid
if ( ! wids[i] ) {
continue;
}
// just make every word 100 pts
int32_t t = 100;
// penalize it if in one of these sections
if ( bb[i] & ( D_IN_PARENS | D_IN_SUP | D_IN_LIST ) ) {
示例7:
// . "uf" is printf url format to scrape with a %s for the query
// . example: uf="http://www.google.com/search?num=50&q=%s&scoring=d&filter=0";
bool Msg7::scrapeQuery ( ) {
// advance round now in case we return early
m_round++;
GigablastRequest *gr = &m_gr;
// error?
char *qts = gr->m_queryToScrape;
if ( ! qts ) { char *xx=NULL;*xx=0; }
if ( gbstrlen(qts) > 500 ) {
g_errno = EQUERYTOOBIG;
return true;
}
// first encode the query
SafeBuf ebuf;
ebuf.urlEncode ( qts ); // queryUNEncoded );
ebuf.nullTerm();
char *uf;
if ( m_round == 1 )
// set to 1 for debugging
uf="http://www.google.com/search?num=20&"
"q=%s&scoring=d&filter=0";
//uf = "https://startpage.com/do/search?q=%s";
//uf = "http://www.google.com/"
// "/cse?cx=013269018370076798483%3A8eec3papwpi&"
// "ie=UTF-8&q=%s&"
// "num=20";
else
uf="http://www.bing.com/search?q=%s";
// skip bing for now
//if ( m_round == 2 )
// return true;
//if ( m_round == 1 )
// return true;
// make the url we will download
char ubuf[2048];
sprintf ( ubuf , uf , ebuf.getBufStart() );
// log it
log("inject: SCRAPING %s",ubuf);
SpiderRequest sreq;
sreq.reset();
// set the SpiderRequest
strcpy(sreq.m_url, ubuf);
// . tell it to only add the hosts of each outlink for now!
// . that will be passed on to when XmlDoc calls Links::set() i guess
// . xd will not reschedule the scraped url into spiderdb either
sreq.m_isScraping = 1;
sreq.m_fakeFirstIp = 1;
long firstIp = hash32n(ubuf);
if ( firstIp == 0 || firstIp == -1 ) firstIp = 1;
sreq.m_firstIp = firstIp;
// parent docid is 0
sreq.setKey(firstIp,0LL,false);
char *coll2 = gr->m_coll;
CollectionRec *cr = g_collectiondb.getRec ( coll2 );
// forceDEl = false, niceness = 0
m_xd.set4 ( &sreq , NULL , cr->m_coll , NULL , 0 );
//m_xd.m_isScraping = true;
// download without throttling
//m_xd.m_throttleDownload = false;
// disregard this
m_xd.m_useRobotsTxt = false;
// this will tell it to index ahrefs first before indexing
// the doc. but do NOT do this if we are from ahrefs.com
// ourselves to avoid recursive explosion!!
if ( m_useAhrefs )
m_xd.m_useAhrefs = true;
m_xd.m_reallyInjectLinks = true;//gr->m_injectLinks;
//
// rather than just add the links of the page to spiderdb,
// let's inject them!
//
m_xd.setCallback ( this , doneInjectingLinksWrapper );
// niceness is 0
m_linkDedupTable.set(4,0,512,NULL,0,false,0,"ldtab2");
// do we actually inject the links, or just scrape?
if ( ! m_xd.injectLinks ( &m_linkDedupTable ,
NULL,
this ,
doneInjectingLinksWrapper ) )
//.........这里部分代码省略.........
示例8: addToCookieJar
bool HttpMime::addToCookieJar(Url *currentUrl, SafeBuf *sb) {
/// @note Slightly modified from Netscape HTTP Cookie File format
/// Difference is we only have one column for name/value
// http://www.cookiecentral.com/faq/#3.5
// The layout of Netscape's cookies.txt file is such that each line contains one name-value pair.
// An example cookies.txt file may have an entry that looks like this:
// .netscape.com TRUE / FALSE 946684799 NETSCAPE_ID 100103
//
// Each line represents a single piece of stored information. A tab is inserted between each of the fields.
// From left-to-right, here is what each field represents:
//
// domain - The domain that created AND that can read the variable.
// flag - A TRUE/FALSE value indicating if all machines within a given domain can access the variable. This value is set automatically by the browser, depending on the value you set for domain.
// path - The path within the domain that the variable is valid for.
// secure - A TRUE/FALSE value indicating if a secure connection with the domain is needed to access the variable.
// expiration - The UNIX time that the variable will expire on. UNIX time is defined as the number of seconds since Jan 1, 1970 00:00:00 GMT.
// name/value - The name/value of the variable.
/// @todo ALC we should sort cookie-list
// The user agent SHOULD sort the cookie-list in the following order:
// * Cookies with longer paths are listed before cookies with shorter paths.
// * Among cookies that have equal-length path fields, cookies with earlier creation-times are listed
// before cookies with later creation-times.
// fill in cookies from cookieJar
std::map<std::string, httpcookie_t> oldCookies;
const char *cookieJar = sb->getBufStart();
int32_t cookieJarLen = sb->length();
const char *lineStartPos = cookieJar;
const char *lineEndPos = NULL;
while ((lineEndPos = (const char*)memchr(lineStartPos, '\n', cookieJarLen - (lineStartPos - cookieJar))) != NULL) {
const char *currentPos = lineStartPos;
const char *tabPos = NULL;
unsigned fieldCount = 0;
httpcookie_t cookie = {};
while (fieldCount < 5 && (tabPos = (const char*)memchr(currentPos, '\t', lineEndPos - currentPos)) != NULL) {
switch (fieldCount) {
case 0:
// domain
cookie.m_domain = currentPos;
cookie.m_domainLen = tabPos - currentPos;
break;
case 1:
// flag
if (memcmp(currentPos, "TRUE", 4) != 0) {
cookie.m_defaultDomain = true;
}
break;
case 2: {
// path
cookie.m_path = currentPos;
cookie.m_pathLen = tabPos - currentPos;
} break;
case 3:
// secure
cookie.m_secure = (memcmp(currentPos, "TRUE", 4) == 0);
break;
case 4:
// expiration
break;
}
currentPos = tabPos + 1;
++fieldCount;
}
cookie.m_cookie = currentPos;
cookie.m_cookieLen = lineEndPos - currentPos;
const char *equalPos = (const char *)memchr(cookie.m_cookie, '=', cookie.m_cookieLen);
if (equalPos) {
cookie.m_nameLen = equalPos - cookie.m_cookie;
oldCookies[std::string(cookie.m_cookie, cookie.m_nameLen)] = cookie;
}
lineStartPos = lineEndPos + 1;
}
// we don't need to care about the last line (we always end on \n)
SafeBuf newCookieJar;
// add old cookies
for (auto &pair : oldCookies) {
if (m_cookies.find(pair.first) == m_cookies.end()) {
addCookie(pair.second, *currentUrl, &newCookieJar);
}
}
// add new cookies
for (auto &pair : m_cookies) {
addCookie(pair.second, *currentUrl, &newCookieJar);
}
newCookieJar.nullTerm();
//.........这里部分代码省略.........