当前位置: 首页>>代码示例>>C++>>正文


C++ Url::getHostLen方法代码示例

本文整理汇总了C++中Url::getHostLen方法的典型用法代码示例。如果您正苦于以下问题:C++ Url::getHostLen方法的具体用法?C++ Url::getHostLen怎么用?C++ Url::getHostLen使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Url的用法示例。


在下文中一共展示了Url::getHostLen方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: setTitle

// returns false and sets g_errno on error
bool Title::setTitle ( Xml *xml, Words *words, int32_t maxTitleLen, Query *query,
                       LinkInfo *linkInfo, Url *firstUrl, const char *filteredRootTitleBuf, int32_t filteredRootTitleBufSize,
                       uint8_t contentType, uint8_t langId, int32_t niceness ) {
	// make Msg20.cpp faster if it is just has
	// Msg20Request::m_setForLinkInfo set to true, no need to extricate a title.
	if ( maxTitleLen <= 0 ) {
		return true;
	}

	m_niceness = niceness;
	m_maxTitleLen = maxTitleLen;

	// if this is too big the "first line" algo can be huge!!!
	// and really slow everything way down with a huge title candidate
	int32_t maxTitleWords = 128;

	// assume no title
	reset();

	int32_t NW = words->getNumWords();

	//
	// now get all the candidates
	//

	// . allow up to 100 title CANDIDATES
	// . "as" is the word # of the first word in the candidate
	// . "bs" is the word # of the last word IN the candidate PLUS ONE
	int32_t n = 0;
	int32_t as[MAX_TIT_CANDIDATES];
	int32_t bs[MAX_TIT_CANDIDATES];
	float scores[MAX_TIT_CANDIDATES];
	Words *cptrs[MAX_TIT_CANDIDATES];
	int32_t types[MAX_TIT_CANDIDATES];
	int32_t parent[MAX_TIT_CANDIDATES];

	// record the scoring algos effects
	float  baseScore        [MAX_TIT_CANDIDATES];
	float  noCapsBoost      [MAX_TIT_CANDIDATES];
	float  qtermsBoost      [MAX_TIT_CANDIDATES];
	float  inCommonCandBoost[MAX_TIT_CANDIDATES];

	// reset these
	for ( int32_t i = 0 ; i < MAX_TIT_CANDIDATES ; i++ ) {
		// assume no parent
		parent[i] = -1;
	}

	// xml and words class for each link info, rss item
	Xml   tx[MAX_TIT_CANDIDATES];
	Words tw[MAX_TIT_CANDIDATES];
	int32_t  ti = 0;

	// restrict how many link texts and rss blobs we check for titles
	// because title recs like www.google.com have hundreds and can
	// really slow things down to like 50ms for title generation
	int32_t kcount = 0;
	int32_t rcount = 0;

	//int64_t x = gettimeofdayInMilliseconds();

	// . get every link text
	// . TODO: repeat for linkInfo2, the imported link text
	for ( Inlink *k = NULL; linkInfo && (k = linkInfo->getNextInlink(k)) ; ) {
		// breathe
		QUICKPOLL(m_niceness);
		// fast skip check for link text
		if ( k->size_linkText >= 3 && ++kcount >= 20 ) continue;
		// fast skip check for rss item
		if ( k->size_rssItem > 10 && ++rcount >= 20 ) continue;

		// set Url
		Url u;
		u.set( k->getUrl(), k->size_urlBuf );

		// is it the same host as us?
		bool sh = true;

		// skip if not from same host and should be
		if ( firstUrl->getHostLen() != u.getHostLen() ) {
			sh = false;
		}

		// skip if not from same host and should be
		if ( strncmp( firstUrl->getHost(), u.getHost(), u.getHostLen() ) ) {
			sh = false;
		}

		// get the link text
		if ( k->size_linkText >= 3 ) {
			char *p    = k->getLinkText();
			int32_t  plen = k->size_linkText - 1;
			if ( ! verifyUtf8 ( p , plen ) ) {
				log("title: set4 bad link text from url=%s", k->getUrl());
				continue;
			}

			// now the words.
			if ( !tw[ti].set( k->getLinkText(), k->size_linkText - 1, true, 0 ) ) {
//.........这里部分代码省略.........
开发者ID:lemire,项目名称:open-source-search-engine,代码行数:101,代码来源:Title.cpp

示例2: addCookieHeader

bool HttpMime::addCookieHeader(const char *cookieJar, const char *url, SafeBuf *sb) {
	Url tmpUrl;
	tmpUrl.set(url);

	SafeBuf tmpSb;

	size_t cookieJarLen = strlen(cookieJar);

	const char *lineStartPos = cookieJar;
	const char *lineEndPos = NULL;
	while ((lineEndPos = (const char*)memchr(lineStartPos, '\n', cookieJarLen - (lineStartPos - cookieJar))) != NULL) {
		const char *currentPos = lineStartPos;
		const char *tabPos = NULL;
		unsigned fieldCount = 0;

		bool skipCookie = false;
		const char *domain = NULL;
		int32_t domainLen = 0;
		while (fieldCount < 5 && (tabPos = (const char*)memchr(currentPos, '\t', lineEndPos - currentPos)) != NULL) {
			switch (fieldCount) {
				case 0:
					// domain
					domain = currentPos;
					domainLen = tabPos - currentPos;
					break;
				case 1:
					// flag
					if (memcmp(currentPos, "TRUE", 4) == 0) {
						// allow subdomain
						if (tmpUrl.getHostLen() >= domainLen) {
							if (!endsWith(tmpUrl.getHost(), tmpUrl.getHostLen(), domain, domainLen)) {
								// doesn't end with domain - ignore cookie
								skipCookie = true;
								break;
							}
						} else {
							skipCookie = true;
							break;
						}
					} else {
						// only specific domain
						if (tmpUrl.getHostLen() != domainLen || strncasecmp(domain, tmpUrl.getHost(), domainLen) != 0) {
							// non-matching domain - ignore cookie
							skipCookie = true;
							break;
						}
					}
					break;
				case 2: {
					// path
					const char *path = currentPos;
					int32_t pathLen = tabPos - currentPos;
					if (strncasecmp(path, tmpUrl.getPath(), pathLen) == 0) {
						if (tmpUrl.getPathLen() != pathLen) {
							if (path[pathLen - 1] != '/' && tmpUrl.getPath()[tmpUrl.getPathLen() - 1] != '/') {
								// non-matching path - ignore cookie
								skipCookie = true;
								break;
							}
						}
					} else {
						// non-matching path - ignore cookie
						skipCookie = true;
						break;
					}
				} break;
				case 3:
					// secure

					break;
				case 4:
					// expiration

					break;
			}

			currentPos = tabPos + 1;
			++fieldCount;
		}

		if (!skipCookie) {
			tmpSb.safeMemcpy(currentPos, lineEndPos - currentPos);
			tmpSb.pushChar(';');
		}

		lineStartPos = lineEndPos + 1;
	}
	// we don't need to care about the last line (we always end on \n)

	if (tmpSb.length() > 0) {
		sb->safeStrcpy("Cookie: ");
		sb->safeMemcpy(&tmpSb);
		sb->safeStrcpy("\r\n");
	}

	return true;
}
开发者ID:privacore,项目名称:open-source-search-engine,代码行数:97,代码来源:HttpMime.cpp

示例3: main

int main ( int argc , char *argv[] ) {
	bool addWWW = true;
	bool stripSession = true;
	// check for arguments
	for (int32_t i = 1; i < argc; i++) {
		if (strcmp(argv[i], "-w") == 0)
			addWWW = false;
		else if (strcmp(argv[i], "-s") == 0)
			stripSession = false;
	}
	// initialize
	//g_mem.init(100*1024);
	hashinit();
	//g_conf.m_tfndbExtBits = 23;
 loop:
	// read a url from stddin
	char sbuf[1024];
	if ( ! fgets ( sbuf , 1024 , stdin ) ) exit(1);
	char *s = sbuf;
	char fbuf[1024];
	// decode if we should
	if ( strncmp(s,"http%3A%2F%2F",13) == 0 ||
	     strncmp(s,"https%3A%2F%2F",13) == 0 ) {
		urlDecode(fbuf,s,gbstrlen(s));
		s = fbuf;
	}
	// old url
	printf("###############\n");
	printf("old: %s",s);
	int32_t slen = gbstrlen(s);
	// remove any www. if !addWWW
	if (!addWWW) {
		if (slen >= 4 &&
		    strncasecmp(s, "www.", 4) == 0) {
			slen -= 4;
			memmove(s, &s[4], slen);
		}
		else {
			// get past a ://
			int32_t si = 0;
			while (si < slen &&
			       ( s[si] != ':' ||
				 s[si+1] != '/' ||
				 s[si+2] != '/' ) )
				si++;
			// remove the www.
			if (si + 7 < slen) {
				si += 3;
				if (strncasecmp(&s[si], "www.", 4) == 0) {
					slen -= 4;
					memmove(&s[si], &s[si+4], slen-si);
				}
			}
		}
	}
	// set it
	Url u;
	u.set ( s , slen ,
		addWWW   ,      /*add www?*/
		stripSession ); /*strip session ids?*/
	// print it
	char out[1024*4];
	char *p = out;
	p += sprintf(p,"tld: ");
	gbmemcpy ( p, u.getTLD(),u.getTLDLen());
	p += u.getTLDLen();
	char c = *p;
	*p = '\0';
	printf("%s\n",out);
	*p = c;
	

	// dom
	p = out;
	sprintf ( p , "dom: ");
	p += gbstrlen ( p );
	gbmemcpy ( p , u.getDomain() , u.getDomainLen() );
	p += u.getDomainLen();
	c = *p;
	*p = '\0';
	printf("%s\n",out);
	*p = c;
	// host
	p = out;
	sprintf ( p , "host: ");
	p += gbstrlen ( p );
	gbmemcpy ( p , u.getHost() , u.getHostLen() );
	p += u.getHostLen();
	c = *p;
	*p = '\0';
	printf("%s\n",out);
	*p = c;
	// then the whole url
	printf("url: %s\n", u.getUrl() );

	/*
	int32_t  siteLen;
	char *site = u.getSite ( &siteLen , NULL , false );
	if ( site ) {
		c = site[siteLen];
//.........这里部分代码省略.........
开发者ID:DeadNumbers,项目名称:open-source-search-engine,代码行数:101,代码来源:urlinfo.cpp

示例4: addCookie

void HttpMime::addCookie(const httpcookie_t &cookie, const Url &currentUrl, SafeBuf *cookieJar) {
	// don't add expired cookie into cookie jar
	if (cookie.m_expired) {
		return;
	}

	if (cookie.m_domain) {
		cookieJar->safeMemcpy(cookie.m_domain, cookie.m_domainLen);
		cookieJar->pushChar('\t');
		cookieJar->safeStrcpy(cookie.m_defaultDomain ? "FALSE\t" : "TRUE\t");
	} else {
		cookieJar->safeMemcpy(currentUrl.getHost(), currentUrl.getHostLen());
		cookieJar->pushChar('\t');

		cookieJar->safeStrcpy("FALSE\t");
	}

	if (cookie.m_path) {
		cookieJar->safeMemcpy(cookie.m_path, cookie.m_pathLen);
		cookieJar->pushChar('\t');
	} else {
		if (currentUrl.getPathLen()) {
			cookieJar->safeMemcpy(currentUrl.getPath(), currentUrl.getPathLen());
		} else {
			cookieJar->pushChar('/');
		}
		cookieJar->pushChar('\t');
	}

	if (cookie.m_secure) {
		cookieJar->safeStrcpy("TRUE\t");
	} else {
		cookieJar->safeStrcpy("FALSE\t");
	}

	// we're not using expiration field
	cookieJar->safeStrcpy("0\t");

	int32_t currentLen = cookieJar->length();
	cookieJar->safeMemcpy(cookie.m_cookie, cookie.m_cookieLen);

	// cater for multiline cookie
	const char *currentPos = cookieJar->getBufStart() + currentLen;
	const char *delPosStart = NULL;
	int32_t delLength = 0;
	while (currentPos < cookieJar->getBufPtr() - 1) {
		if (delPosStart) {
			if (is_wspace_a(*currentPos) || *currentPos == '\n' || *currentPos == '\r') {
				++delLength;
			} else {
				break;
			}
		} else {
			if (*currentPos == '\n' || *currentPos == '\r') {
				delPosStart = currentPos;
				++delLength;
			}
		}

		++currentPos;
	}
	cookieJar->removeChunk1(delPosStart, delLength);

	/// @todo ALC handle httpOnly attribute

	cookieJar->pushChar('\n');
}
开发者ID:privacore,项目名称:open-source-search-engine,代码行数:67,代码来源:HttpMime.cpp


注:本文中的Url::getHostLen方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。