当前位置: 首页>>代码示例>>C++>>正文


C++ Url::getDomainHash32方法代码示例

本文整理汇总了C++中Url::getDomainHash32方法的典型用法代码示例。如果您正苦于以下问题:C++ Url::getDomainHash32方法的具体用法?C++ Url::getDomainHash32怎么用?C++ Url::getDomainHash32使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Url的用法示例。


在下文中一共展示了Url::getDomainHash32方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: sendPageAnalyze


//.........这里部分代码省略.........
	long isXml = r->getLong("xml",0);

	// if got docid, use that
	if ( st->m_docId != -1 ) {
		if ( ! xd->set3 ( st->m_docId,
				  st->m_coll,
				  0 ) ) // niceness
			// return error reply if g_errno is set
			return sendErrorReply ( st , g_errno );
		// make this our callback in case something blocks
		xd->setCallback ( st , gotXmlDoc );
		xd->m_pbuf = &st->m_wbuf;
		// reset this flag
		st->m_donePrinting = false;
		// . set xd from the old title rec if recycle is true
		// . can also use XmlDoc::m_loadFromOldTitleRec flag
		//if ( st->m_recycle ) xd->m_recycleContent = true;
		xd->m_recycleContent = true;
		// force this on
		//xd->m_useSiteLinkBuf = true;
		//xd->m_usePageLinkBuf = true;
		if ( isXml ) xd->m_printInXml = true;
		// now tell it to fetch the old title rec
		if ( ! xd->loadFromOldTitleRec () )
			// return false if this blocks
			return false;
		return gotXmlDoc ( st );
	}			

	// set this up
	SpiderRequest sreq;
	sreq.reset();
	if ( st->m_u ) strcpy(sreq.m_url,st->m_u);
	long firstIp = hash32n(st->m_u);
	if ( firstIp == -1 || firstIp == 0 ) firstIp = 1;
	// parentdocid of 0
	sreq.setKey( firstIp, 0LL, false );
	sreq.m_isPageParser = 1;
	sreq.m_hopCount = st->m_hopCount;
	sreq.m_hopCountValid = 1;
	sreq.m_fakeFirstIp   = 1;
	sreq.m_firstIp = firstIp;
	Url nu;
	nu.set(sreq.m_url);
	sreq.m_domHash32 = nu.getDomainHash32();
	sreq.m_siteHash32 = nu.getHostHash32();

	// . get provided content if any
	// . will be NULL if none provided
	// . "content" may contain a MIME
	long  contentLen = 0;
	char *content = r->getString ( "content" , &contentLen , NULL );
	// is the "content" url-encoded? default is true.
	bool contentIsEncoded = true;
	// mark doesn't like to url-encode his content
	if ( ! content ) {
		content    = r->getUnencodedContent    ();
		contentLen = r->getUnencodedContentLen ();
		contentIsEncoded = false;
	}
	// ensure null
	if ( contentLen == 0 ) content = NULL;

	//uint8_t contentType = CT_HTML;
	//if ( isXml ) contentType = CT_XML;
	long ctype = r->getLong("ctype",CT_HTML);

	// . use the enormous power of our new XmlDoc class
	// . this returns false if blocked
	if ( ! xd->set4 ( &sreq       ,
			  NULL        ,
			  st->m_coll  ,
			  // we need this so the term table is set!
			  &st->m_wbuf        , // XmlDoc::m_pbuf
			  0, // try 0 now! 1 ,//PP_NICENESS ))
			  content ,
			  false, // deletefromindex
			  0, // forced ip
			  ctype ))
		// return error reply if g_errno is set
		return sendErrorReply ( st , g_errno );
	// make this our callback in case something blocks
	xd->setCallback ( st , gotXmlDoc );
	// reset this flag
	st->m_donePrinting = false;
	// prevent a core here in the event we download the page content
	xd->m_crawlDelayValid = true;
	xd->m_crawlDelay = 0;
	// . set xd from the old title rec if recycle is true
	// . can also use XmlDoc::m_loadFromOldTitleRec flag
	//if ( st->m_recycle ) xd->m_recycleContent = true;
	// only recycle if docid is given!!
	if ( st->m_recycle ) xd->m_recycleContent = true;
	// force this on
	//xd->m_useSiteLinkBuf = true;
	//xd->m_usePageLinkBuf = true;
	if ( isXml ) xd->m_printInXml = true;

	return gotXmlDoc ( st );
}
开发者ID:acchou,项目名称:open-source-search-engine,代码行数:101,代码来源:PageParser.cpp

示例2: sendPageParser2


//.........这里部分代码省略.........
		  "<br>",

			  //oips ,
			  contentParm );



	xbuf->safePrintf(
			 "<center>"
			 "<input type=submit value=Submit>"
			 "</center>"
			 );


	// just print the page if no url given
	if ( ! st->m_u || ! st->m_u[0] ) return processLoop ( st );


	XmlDoc *xd = &st->m_xd;
	// set this up
	SpiderRequest sreq;
	sreq.reset();
	strcpy(sreq.m_url,st->m_u);
	long firstIp = hash32n(st->m_u);
	if ( firstIp == -1 || firstIp == 0 ) firstIp = 1;
	// parentdocid of 0
	sreq.setKey( firstIp, 0LL, false );
	sreq.m_isPageParser = 1;
	sreq.m_hopCount = st->m_hopCount;
	sreq.m_hopCountValid = 1;
	sreq.m_fakeFirstIp   = 1;
	sreq.m_firstIp = firstIp;
	Url nu;
	nu.set(sreq.m_url);
	sreq.m_domHash32 = nu.getDomainHash32();
	sreq.m_siteHash32 = nu.getHostHash32();

	// . get provided content if any
	// . will be NULL if none provided
	// . "content" may contain a MIME
	long  contentLen = 0;
	char *content = r->getString ( "content" , &contentLen , NULL );
	// is the "content" url-encoded? default is true.
	bool contentIsEncoded = true;
	// mark doesn't like to url-encode his content
	if ( ! content ) {
		content    = r->getUnencodedContent    ();
		contentLen = r->getUnencodedContentLen ();
		contentIsEncoded = false;
	}
	// ensure null
	if ( contentLen == 0 ) content = NULL;

	uint8_t contentType = CT_HTML;
	if ( r->getBool("xml",0) ) contentType = CT_XML;

	contentType = r->getLong("ctype",contentType);//CT_HTML);


	// if facebook, load xml content from title rec...
	bool isFacebook = (bool)strstr(st->m_u,"http://www.facebook.com/");
	if ( isFacebook && ! content ) {
		long long docId = g_titledb.getProbableDocId(st->m_u);
		sprintf(sreq.m_url ,"%llu", docId );
		sreq.m_isPageReindex = true;
	}

	// hack
	if ( content ) {
		st->m_dbuf.purge();
		st->m_dbuf.safeStrcpy(content);
		//char *data = strstr(content,"\r\n\r\n");
		//long dataPos = 0;
		//if ( data ) dataPos = (data + 4) - content;
		//st->m_dbuf.convertJSONtoXML(0,dataPos);
		//st->m_dbuf.decodeJSON(0);
		content = st->m_dbuf.getBufStart();
	}

	// . use the enormous power of our new XmlDoc class
	// . this returns false if blocked
	if ( ! xd->set4 ( &sreq       ,
			  NULL        ,
			  st->m_coll  ,
			  &st->m_wbuf        ,
			  0 ,//PP_NICENESS ))
			  content ,
			  false, // deletefromindex
			  0, // forced ip
			  contentType ))
		// return error reply if g_errno is set
		return sendErrorReply ( st , g_errno );
	// make this our callback in case something blocks
	xd->setCallback ( st , processLoop );
	// . set xd from the old title rec if recycle is true
	// . can also use XmlDoc::m_loadFromOldTitleRec flag
	if ( st->m_recycle ) xd->m_recycleContent = true;

	return processLoop ( st );
}
开发者ID:acchou,项目名称:open-source-search-engine,代码行数:101,代码来源:PageParser.cpp


注:本文中的Url::getDomainHash32方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。