本文整理汇总了C++中SafeBuf::urlEncode方法的典型用法代码示例。如果您正苦于以下问题:C++ SafeBuf::urlEncode方法的具体用法?C++ SafeBuf::urlEncode怎么用?C++ SafeBuf::urlEncode使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类SafeBuf
的用法示例。
在下文中一共展示了SafeBuf::urlEncode方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: deleteUrls
bool deleteUrls ( ) {
static long s_ii2 = 0;
for ( ; s_ii2 < s_numUrls ; ) {
// pre-inc it
s_ii2++;
// reject using html api
SafeBuf sb;
sb.safePrintf( "/admin/inject?c=qatest123&delete=1&u=");
sb.urlEncode ( s_urlPtrs[s_ii2] );
return getUrl ( sb.getBufStart() , qatestWrapper );
}
return true;
}
示例2: searchTest2
// ensure search results are consistent
bool searchTest2 () {
long nq = sizeof(s_queries)/sizeof(char *);
for ( ; s_qi2 < nq ; ) {
// pre-inc it
s_qi2++;
// inject using html api
SafeBuf sb;
// qa=1 tell gb to exclude "variable" or "random" things
// from the serps so we can checksum it consistently
sb.safePrintf ( "/search?c=qatest123&qa=1&q=" );
sb.urlEncode ( s_queries[s_qi2] );
return getUrl ( sb.getBufStart() , doneSearching2 );
}
return true;
}
示例3:
// . "uf" is printf url format to scrape with a %s for the query
// . example: uf="http://www.google.com/search?num=50&q=%s&scoring=d&filter=0";
bool Msg7::scrapeQuery ( ) {
// advance round now in case we return early
m_round++;
// error?
if ( m_qbuf.length() > 500 ) {
g_errno = EQUERYTOOBIG;
return true;
}
// first encode the query
SafeBuf ebuf;
ebuf.urlEncode ( m_qbuf.getBufStart() ); // queryUNEncoded );
char *uf;
if ( m_round == 1 )
// set to 1 for debugging
uf="http://www.google.com/search?num=20&"
"q=%s&scoring=d&filter=0";
//uf = "https://startpage.com/do/search?q=%s";
//uf = "http://www.google.com/"
// "/cse?cx=013269018370076798483%3A8eec3papwpi&"
// "ie=UTF-8&q=%s&"
// "num=20";
else
uf="http://www.bing.com/search?q=%s";
// skip bing for now
//if ( m_round == 2 )
// return true;
//if ( m_round == 1 )
// return true;
// make the url we will download
char ubuf[2048];
sprintf ( ubuf , uf , ebuf.getBufStart() );
// log it
log("inject: SCRAPING %s",ubuf);
SpiderRequest sreq;
sreq.reset();
// set the SpiderRequest
strcpy(sreq.m_url, ubuf);
// . tell it to only add the hosts of each outlink for now!
// . that will be passed on to when XmlDoc calls Links::set() i guess
// . xd will not reschedule the scraped url into spiderdb either
sreq.m_isScraping = 1;
sreq.m_fakeFirstIp = 1;
long firstIp = hash32n(ubuf);
if ( firstIp == 0 || firstIp == -1 ) firstIp = 1;
sreq.m_firstIp = firstIp;
// parent docid is 0
sreq.setKey(firstIp,0LL,false);
// forceDEl = false, niceness = 0
m_xd.set4 ( &sreq , NULL , m_coll , NULL , 0 );
//m_xd.m_isScraping = true;
// download without throttling
//m_xd.m_throttleDownload = false;
// disregard this
m_xd.m_useRobotsTxt = false;
// this will tell it to index ahrefs first before indexing
// the doc. but do NOT do this if we are from ahrefs.com
// ourselves to avoid recursive explosion!!
if ( m_useAhrefs )
m_xd.m_useAhrefs = true;
m_xd.m_reallyInjectLinks = m_injectLinks;
//
// rather than just add the links of the page to spiderdb,
// let's inject them!
//
m_xd.setCallback ( this , doneInjectingLinksWrapper );
// niceness is 0
m_linkDedupTable.set(4,0,512,NULL,0,false,0,"ldtab2");
// do we actually inject the links, or just scrape?
if ( ! m_xd.injectLinks ( &m_linkDedupTable ,
NULL,
this ,
doneInjectingLinksWrapper ) )
return false;
// otherwise, just download the google/bing search results so we
// can display them in xml
//else if ( m_xd.getUtf8Content() == (char **)-1 )
// return false;
// print reply..
//printReply();
return true;
//.........这里部分代码省略.........
示例4: qaspider1
bool qaspider1 ( ) {
//
// delete the 'qatest123' collection
//
//static bool s_x1 = false;
if ( ! s_flags[0] ) {
s_flags[0] = true;
if ( ! getUrl ( "/admin/delcoll?xml=1&delcoll=qatest123" ) )
return false;
}
//
// add the 'qatest123' collection
//
//static bool s_x2 = false;
if ( ! s_flags[1] ) {
s_flags[1] = true;
if ( ! getUrl ( "/admin/addcoll?addcoll=qatest123&xml=1" ,
// checksum of reply expected
238170006 ) )
return false;
}
// restrict hopcount to 0 or 1 in url filters so we do not spider
// too deep
//static bool s_z1 = false;
if ( ! s_flags[2] ) {
s_flags[2] = true;
SafeBuf sb;
sb.safePrintf("&c=qatest123&"
// make it the custom filter
"ufp=0&"
"fe=%%21ismanualadd+%%26%%26+%%21insitelist&hspl=0&hspl=1&fsf=0.000000&mspr=0&mspi=1&xg=1000&fsp=-3&"
// take out hopcount for now, just test quotas
// "fe1=tag%%3Ashallow+%%26%%26+hopcount%%3C%%3D1&hspl1=0&hspl1=1&fsf1=1.000000&mspr1=1&mspi1=1&xg1=1000&fsp1=3&"
// just one spider out allowed for consistency
"fe1=tag%%3Ashallow+%%26%%26+sitepages%%3C%%3D20&hspl1=0&hspl1=1&fsf1=1.000000&mspr1=1&mspi1=1&xg1=1000&fsp1=45&"
"fe2=default&hspl2=0&hspl2=1&fsf2=1.000000&mspr2=0&mspi2=1&xg2=1000&fsp2=45&"
);
if ( ! getUrl ( "/admin/filters",0,sb.getBufStart()) )
return false;
}
// set the site list to
// a few sites
//static bool s_z2 = false;
if ( ! s_flags[3] ) {
s_flags[3] = true;
SafeBuf sb;
sb.safePrintf("&c=qatest123&format=xml&sitelist=");
sb.urlEncode("tag:shallow site:www.walmart.com\r\n"
"tag:shallow site:http://www.ibm.com/\r\n");
sb.nullTerm();
if ( ! getUrl ("/admin/settings",0,sb.getBufStart() ) )
return false;
}
//
// use the add url interface now
// walmart.com above was not seeded because of the site: directive
// so this will seed it.
//
//static bool s_y2 = false;
if ( ! s_flags[4] ) {
s_flags[4] = true;
SafeBuf sb;
// delim=+++URL:
sb.safePrintf("&c=qatest123"
"&format=json"
"&strip=1"
"&spiderlinks=1"
"&urls=www.walmart.com+ibm.com"
);
// . now a list of websites we want to spider
// . the space is already encoded as +
//sb.urlEncode(s_urls1);
if ( ! getUrl ( "/admin/addurl",0,sb.getBufStart()) )
return false;
}
//
// wait for spidering to stop
//
checkagain:
// wait until spider finishes. check the spider status page
// in json to see when completed
//static bool s_k1 = false;
if ( ! s_flags[5] ) {
// wait 5 seconds, call sleep timer... then call qatest()
//usleep(5000000); // 5 seconds
wait(3.0);
s_flags[5] = true;
return false;
}
//.........这里部分代码省略.........
示例5: qainject2
bool qainject2 ( ) {
//if ( ! s_callback ) s_callback = qainject2;
//
// delete the 'qatest123' collection
//
//static bool s_x1 = false;
if ( ! s_flags[0] ) {
s_flags[0] = true;
if ( ! getUrl ( "/admin/delcoll?xml=1&delcoll=qatest123" ) )
return false;
}
//
// add the 'qatest123' collection
//
//static bool s_x2 = false;
if ( ! s_flags[1] ) {
s_flags[1] = true;
if ( ! getUrl ( "/admin/addcoll?addcoll=qatest123&xml=1" ,
// checksum of reply expected
238170006 ) )
return false;
}
//
// try delimeter based injecting
//
//static bool s_y2 = false;
if ( ! s_flags[7] ) {
s_flags[7] = true;
SafeBuf sb;
// delim=+++URL:
sb.safePrintf("&c=qatest123&deleteurl=0&"
"delim=%%2B%%2B%%2BURL%%3A&format=xml&u=xyz.com&"
"hasmime=1&content=");
// use injectme3 file
SafeBuf ubuf;
ubuf.load("./injectme3");
sb.urlEncode(ubuf.getBufStart());
if ( ! getUrl ( "/admin/inject",
// check reply, seems to have only a single
// docid in it
-1970198487, sb.getBufStart()) )
return false;
}
// now query check
//static bool s_y4 = false;
if ( ! s_flags[8] ) {
wait(1.5);
s_flags[8] = true;
return false;
}
if ( ! s_flags[14] ) {
s_flags[14] = true;
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&q=%2Bthe",
-1804253505 ) )
return false;
}
//static bool s_y5 = false;
if ( ! s_flags[9] ) {
s_flags[9] = true;
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&q=sports"
"+news&ns=1&tml=20&smxcpl=30&"
"sw=10&showimages=1"
,-1874756636 ) )
return false;
}
//static bool s_y6 = false;
if ( ! s_flags[10] ) {
s_flags[10] = true;
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&q=sports"
"+news&ns=1&tml=20&smxcpl=30&"
"sw=10&showimages=0&hacr=1"
,1651330319 ) )
return false;
}
//static bool s_y7 = false;
if ( ! s_flags[11] ) {
s_flags[11] = true;
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&q=sports"
"+news&ns=1&tml=20&smxcpl=30&"
"sw=10&showimages=0&sc=1"
,-1405546537 ) )
return false;
}
//
// delete the 'qatest123' collection
//
if ( ! s_flags[12] ) {
s_flags[12] = true;
//.........这里部分代码省略.........
示例6: qainject1
//
// the injection qa test suite
//
bool qainject1 ( ) {
//if ( ! s_callback ) s_callback = qainject1;
//
// delete the 'qatest123' collection
//
//static bool s_x1 = false;
if ( ! s_flags[0] ) {
s_flags[0] = true;
if ( ! getUrl ( "/admin/delcoll?xml=1&delcoll=qatest123" ) )
return false;
}
//
// add the 'qatest123' collection
//
//static bool s_x2 = false;
if ( ! s_flags[1] ) {
s_flags[1] = true;
if ( ! getUrl ( "/admin/addcoll?addcoll=qatest123&xml=1" ,
// checksum of reply expected
238170006 ) )
return false;
}
// this only loads once
loadUrls();
long max = s_ubuf2.length()/(long)sizeof(char *);
//max = 1;
//
// inject urls, return false if not done yet
//
//static bool s_x4 = false;
if ( ! s_flags[2] ) {
// TODO: try delimeter based injection too
//static long s_ii = 0;
for ( ; s_flags[20] < max ; ) {
// inject using html api
SafeBuf sb;
sb.safePrintf("&c=qatest123&deleteurl=0&"
"format=xml&u=");
sb.urlEncode ( s_urlPtrs[s_flags[20]] );
// the content
sb.safePrintf("&hasmime=1");
// sanity
//if ( strstr(s_urlPtrs[s_flags[20]],"wdc.htm") )
// log("hey");
sb.safePrintf("&content=");
sb.urlEncode(s_contentPtrs[s_flags[20]] );
sb.nullTerm();
// pre-inc it in case getUrl() blocks
s_flags[20]++;//ii++;
if ( ! getUrl("/admin/inject",
0, // no idea what crc to expect
sb.getBufStart()) )
return false;
}
s_flags[2] = true;
}
// +the
//static bool s_x5 = false;
if ( ! s_flags[3] ) {
wait(1.5);
s_flags[3] = true;
return false;
}
if ( ! s_flags[16] ) {
s_flags[16] = true;
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&q=%2Bthe",
702467314 ) )
return false;
}
// sports news
//static bool s_x7 = false;
if ( ! s_flags[4] ) {
s_flags[4] = true;
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&"
"q=sports+news",2009472889 ) )
return false;
}
// 'washer & dryer' does some algorithmic synonyms 'washer and dryer'
if ( ! s_flags[15] ) {
s_flags[15] = true;
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&"
"debug=1&q=washer+%26+dryer",9999 ) )
return false;
}
//
// mdw: query reindex test
//
//.........这里部分代码省略.........
示例7: qajson
bool qajson ( ) {
//
// delete the 'qatest123' collection
//
//static bool s_x1 = false;
if ( ! s_flags[0] ) {
s_flags[0] = true;
if ( ! getUrl ( "/admin/delcoll?xml=1&delcoll=qatest123" ) )
return false;
}
//
// add the 'qatest123' collection
//
//static bool s_x2 = false;
if ( ! s_flags[1] ) {
s_flags[1] = true;
if ( ! getUrl ( "/admin/addcoll?addcoll=qatest123&xml=1" ,
// checksum of reply expected
238170006 ) )
return false;
}
// add the 50 urls
if ( ! s_flags[3] ) {
s_flags[3] = true;
SafeBuf sb;
sb.safePrintf("&c=qatest123"
"&format=json"
"&strip=1"
"&spiderlinks=0"
"&urls="//www.walmart.com+ibm.com"
);
sb.urlEncode ( s_ubuf4 );
// . now a list of websites we want to spider
// . the space is already encoded as +
if ( ! getUrl ( "/admin/addurl",0,sb.getBufStart()) )
return false;
}
//
// wait for spidering to stop
//
checkagain:
// wait until spider finishes. check the spider status page
// in json to see when completed
//static bool s_k1 = false;
if ( ! s_flags[5] ) {
// wait 5 seconds, call sleep timer... then call qatest()
//usleep(5000000); // 5 seconds
wait(3.0);
s_flags[5] = true;
return false;
}
if ( ! s_flags[15] ) {
s_flags[15] = true;
if ( ! getUrl ( "/admin/status?format=json&c=qatest123",0) )
return false;
}
//static bool s_k2 = false;
if ( ! s_flags[6] ) {
// ensure spiders are done.
// "Nothing currently available to spider"
if ( s_content&&!strstr(s_content,"Nothing currently avail")){
s_flags[5] = false;
s_flags[15] = false;
goto checkagain;
}
s_flags[6] = true;
}
if ( ! s_flags[7] ) {
s_flags[7] = true;
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&"
"q=type%3Ajson+meta.authors%3Appk",
-1310551262 ) )
return false;
}
if ( ! s_flags[8] ) {
s_flags[8] = true;
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&n=100&"
"q=type%3Ajson",
-1310551262 ) )
return false;
}
if ( ! s_flags[9] ) {
s_flags[9] = true;
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&"
"q=gbfacetstr%3Ameta.authors",
-1310551262 ) )
//.........这里部分代码省略.........
示例8: qaspider2
bool qaspider2 ( ) {
//
// delete the 'qatest123' collection
//
//static bool s_x1 = false;
if ( ! s_flags[0] ) {
s_flags[0] = true;
if ( ! getUrl ( "/admin/delcoll?xml=1&delcoll=qatest123" ) )
return false;
}
//
// add the 'qatest123' collection
//
//static bool s_x2 = false;
if ( ! s_flags[1] ) {
s_flags[1] = true;
if ( ! getUrl ( "/admin/addcoll?addcoll=qatest123&xml=1" ,
// checksum of reply expected
238170006 ) )
return false;
}
// restrict hopcount to 0 or 1 in url filters so we do not spider
// too deep
//static bool s_z1 = false;
if ( ! s_flags[2] ) {
s_flags[2] = true;
SafeBuf sb;
sb.safePrintf("&c=qatest123&"
// make it the custom filter
"ufp=0&"
"fe=%%21ismanualadd+%%26%%26+%%21insitelist&hspl=0&hspl=1&fsf=0.000000&mspr=0&mspi=1&xg=1000&fsp=-3&"
// take out hopcount for now, just test quotas
// "fe1=tag%%3Ashallow+%%26%%26+hopcount%%3C%%3D1&hspl1=0&hspl1=1&fsf1=1.000000&mspr1=1&mspi1=1&xg1=1000&fsp1=3&"
// sitepages is a little fuzzy so take it
// out for this test and use hopcount!!!
//"fe1=tag%%3Ashallow+%%26%%26+sitepages%%3C%%3D20&hspl1=0&hspl1=1&fsf1=1.000000&mspr1=1&mspi1=1&xg1=1000&fsp1=45&"
"fe1=tag%%3Ashallow+%%26%%26+hopcount<%%3D1&hspl1=0&hspl1=1&fsf1=1.000000&mspr1=1&mspi1=1&xg1=1000&fsp1=45&"
"fe2=default&hspl2=0&hspl2=1&fsf2=1.000000&mspr2=0&mspi2=1&xg2=1000&fsp2=45&"
);
if ( ! getUrl ( "/admin/filters",0,sb.getBufStart()) )
return false;
}
// set the site list to
// a few sites
// these should auto seed so no need to use addurl
//static bool s_z2 = false;
if ( ! s_flags[3] ) {
s_flags[3] = true;
SafeBuf sb;
sb.safePrintf("&c=qatest123&format=xml&sitelist=");
sb.urlEncode(//walmart has too many pages at depth 1, so remove it
//"tag:shallow www.walmart.com\r\n"
"tag:shallow http://www.ibm.com/\r\n");
sb.nullTerm();
if ( ! getUrl ("/admin/settings",0,sb.getBufStart() ) )
return false;
}
//
// wait for spidering to stop
//
checkagain:
// wait until spider finishes. check the spider status page
// in json to see when completed
//static bool s_k1 = false;
if ( ! s_flags[4] ) {
//usleep(5000000); // 5 seconds
s_flags[4] = true;
wait(3.0);
return false;
}
if ( ! s_flags[14] ) {
s_flags[14] = true;
if ( ! getUrl ( "/admin/status?format=json&c=qatest123",0) )
return false;
}
//static bool s_k2 = false;
if ( ! s_flags[5] ) {
// ensure spiders are done.
// "Nothing currently available to spider"
if ( s_content&&!strstr(s_content,"Nothing currently avail")){
s_flags[4] = false;
s_flags[14] = false;
goto checkagain;
}
s_flags[5] = true;
}
//.........这里部分代码省略.........