当前位置: 首页>>代码示例>>C++>>正文


C++ SpiderRequest::setDataSize方法代码示例

本文整理汇总了C++中SpiderRequest::setDataSize方法的典型用法代码示例。如果您正苦于以下问题:C++ SpiderRequest::setDataSize方法的具体用法?C++ SpiderRequest::setDataSize怎么用?C++ SpiderRequest::setDataSize使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在SpiderRequest的用法示例。


在下文中一共展示了SpiderRequest::setDataSize方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: sendPageGet


//.........这里部分代码省略.........
	// . we need to match summary here so we need to know this
	//bool seq = r->getLong ( "seq" , false );
	// restrict to root file?
	bool rtq = r->getLong ( "rtq" , false );

	// . get the titleRec
	// . TODO: redirect client to a better http server to save bandwidth
	State2 *st ;
	try { st = new (State2); }
	catch (... ) {
		g_errno = ENOMEM;
		log("PageGet: new(%i): %s", 
		    (int)sizeof(State2),mstrerror(g_errno));
		return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno));}
	mnew ( st , sizeof(State2) , "PageGet1" );
	// save the socket and if Host: is local in the Http request Mime
	st->m_socket   = s;
	st->m_isAdmin  = g_conf.isCollAdmin ( s , r );
	st->m_isLocal  = r->isLocal();
	st->m_docId    = docId;
	st->m_printed  = false;
	// include header ... "this page cached by Gigablast on..."
	st->m_includeHeader     = r->getLong ("ih"    , true  );
	st->m_includeBaseHref   = r->getLong ("ibh"   , false );
	st->m_queryHighlighting = r->getLong ("qh"    , true  );
	st->m_strip             = r->getLong ("strip" , 0     );
	st->m_clickAndScroll    = r->getLong ("cas"   , true  );
	st->m_cnsPage           = r->getLong ("cnsp"  , true );
	char *langAbbr = r->getString("qlang",NULL);
	st->m_langId = langUnknown;
	if ( langAbbr ) {
		uint8_t langId = getLangIdFromAbbr ( langAbbr );
		st->m_langId = langId;
	}
	strncpy ( st->m_coll , coll , MAX_COLL_LEN+1 );
	// store query for query highlighting
	st->m_netTestResults    = r->getLong ("rnettest", false );
	//if( st->m_netTestResults ) {
	//	mdelete ( st , sizeof(State2) , "PageGet1" );
	//	delete ( st );
	//	return sendPageNetResult( s );
	//}
	if ( q && qlen > 0 ) strcpy ( st->m_q , q );
	else                 st->m_q[0] = '\0';
	st->m_qlen = qlen;
	//st->m_seq      = seq;
	st->m_rtq      = rtq;
	st->m_boolFlag = r->getLong ("bq", 2 /*default is 2*/ );
	st->m_isBanned = false;
	st->m_noArchive = false;
	st->m_socket = s;
	st->m_format = r->getReplyFormat();
	// default to 0 niceness
	st->m_niceness = 0;
	st->m_r.copy ( r );
	//st->m_cr = cr;
	st->m_printDisclaimer = true;
	if ( st->m_cnsPage )
		st->m_printDisclaimer = false;
	if ( st->m_strip ) // ! st->m_evbits.isEmpty() ) 
		st->m_printDisclaimer = false;
	
	// should we cache it?
	char useCache = r->getLong ( "usecache" ,  1 );
	char rcache   = r->getLong ( "rcache"   ,  1 );
	char wcache   = r->getLong ( "wcache"   ,  1 );
	long cacheAge = r->getLong ( "cacheAge" , 60*60 ); // default one hour
	if ( useCache == 0 ) { cacheAge = 0; wcache = 0; }
	if ( rcache   == 0 )   cacheAge = 0; 
	// . fetch the TitleRec
	// . a max cache age of 0 means not to read from the cache
	XmlDoc *xd = &st->m_xd;
	// url based?
	if ( url ) {
		SpiderRequest sreq;
		sreq.reset();
		strcpy(sreq.m_url, url );
		sreq.setDataSize();
		// this returns false if "coll" is invalid
		if ( ! xd->set4 ( &sreq , NULL , coll , NULL , st->m_niceness ) ) 
			goto hadSetError;
	}
	// . when getTitleRec() is called it will load the old one
	//   since XmlDoc::m_setFromTitleRec will be true
	// . niceness is 0
	// . use st->m_coll since XmlDoc just points to it!
	// . this returns false if "coll" is invalid
	else if ( ! xd->set3 ( docId , st->m_coll , 0 ) ) {
	hadSetError:
		mdelete ( st , sizeof(State2) , "PageGet1" );
		delete ( st );
		g_errno = ENOMEM;
		log("PageGet: set3: %s", mstrerror(g_errno));
		return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno));
	}
	// if it blocks while it loads title rec, it will re-call this routine
	xd->setCallback ( st , processLoopWrapper );
	// good to go!
	return processLoop ( st );
}
开发者ID:firatkarakusoglu,项目名称:open-source-search-engine,代码行数:101,代码来源:PageGet.cpp

示例2: sendPageAddUrl


//.........这里部分代码省略.........
	// . should we force it into spiderdb even if already in there
	// . use to manually update spider times for a url
	// . however, will not remove old scheduled spider times
	// . mdw: made force on the default
	st1->m_forceRespider = r->getLong("force",1); // 0);

	long now = getTimeGlobal();

	// . allow 1 submit every 1 hour
	// . restrict by submitter domain ip
	if ( ! st1->m_isAdmin &&
	     ! canSubmit ( h , now , cr->m_maxAddUrlsPerIpDomPerDay ) ) {
		// return error page
		g_errno = ETOOEARLY;
		return sendReply ( st1 , true );
	}


	//st1->m_query = r->getString( "qts", &st1->m_queryLen );


	// check it, if turing test is enabled for this collection
	if ( ! st1->m_isAdmin && cr->m_doTuringTest && 
	     ! g_turingTest.isHuman(r) )  {
		// log note so we know it didn't make it
		g_msg = " (error: bad answer)";
		//log("PageAddUrl:: addurl failed for %s : bad answer",
		//    iptoa(s->m_ip));
		st1->m_goodAnswer = false;
		return sendReply ( st1 , true /*addUrl enabled?*/ );
	}

	//if ( st1->m_queryLen > 0 )
	//	return getPages( st1 );

	// if no url given, just print a blank page
	if ( ! url ) return sendReply (  st1 , true );


	//
	// make a SpiderRequest
	//

	SpiderRequest *sreq = &st1->m_sreq;
	// reset it
	sreq->reset();
	// make the probable docid
	long long probDocId = g_titledb.getProbableDocId ( st1->m_url );
	// make one up, like we do in PageReindex.cpp
	long firstIp = (probDocId & 0xffffffff);
	// . now fill it up
	// . TODO: calculate the other values... lazy!!! (m_isRSSExt, 
	//         m_siteNumInlinks,...)
	sreq->m_isNewOutlink = 1;
	sreq->m_isAddUrl     = 1;
	sreq->m_addedTime    = now;
	sreq->m_fakeFirstIp   = 1;
	sreq->m_probDocId     = probDocId;
	sreq->m_firstIp       = firstIp;
	sreq->m_hopCount      = 0;
	// its valid if root
	Url uu; uu.set ( st1->m_url );
	if ( uu.isRoot() ) sreq->m_hopCountValid = true;
	// too big?
	//long len = st1->m_urlLen;
	// the url! includes \0
	strcpy ( sreq->m_url , st1->m_url );
	// call this to set sreq->m_dataSize now
	sreq->setDataSize();
	// make the key dude -- after setting url
	sreq->setKey ( firstIp , 0LL, false );
	// need a fake first ip lest we core!
	//sreq->m_firstIp = (pdocId & 0xffffffff);
	// how to set m_firstIp? i guess addurl can be throttled independently
	// of the other urls???  use the hash of the domain for it!
	long  dlen;
	char *dom = getDomFast ( st1->m_url , &dlen );
	// fake it for this...
	//sreq->m_firstIp = hash32 ( dom , dlen );
	// sanity
	if ( ! dom ) {
		g_errno = EBADURL;
		return sendReply ( st1 , true );
	}
	// shortcut
	Msg4 *m = &st1->m_msg4;
	// now add that to spiderdb using msg4
	if ( ! m->addMetaList ( (char *)sreq    ,
				sreq->getRecSize() ,
				coll            ,
				st1             , // state
				addedStuff      ,
				MAX_NICENESS    ,
				RDB_SPIDERDB    ) )
		// we blocked
		return false;

	// send back the reply
	return sendReply ( st1 , true );
}
开发者ID:RevBooyah,项目名称:open-source-search-engine,代码行数:101,代码来源:PageAddUrl.cpp


注:本文中的SpiderRequest::setDataSize方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。