本文整理汇总了C++中Url::getHostLen方法的典型用法代码示例。如果您正苦于以下问题:C++ Url::getHostLen方法的具体用法?C++ Url::getHostLen怎么用?C++ Url::getHostLen使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Url
的用法示例。
在下文中一共展示了Url::getHostLen方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: setTitle
// returns false and sets g_errno on error
bool Title::setTitle ( Xml *xml, Words *words, int32_t maxTitleLen, Query *query,
LinkInfo *linkInfo, Url *firstUrl, const char *filteredRootTitleBuf, int32_t filteredRootTitleBufSize,
uint8_t contentType, uint8_t langId, int32_t niceness ) {
// make Msg20.cpp faster if it is just has
// Msg20Request::m_setForLinkInfo set to true, no need to extricate a title.
if ( maxTitleLen <= 0 ) {
return true;
}
m_niceness = niceness;
m_maxTitleLen = maxTitleLen;
// if this is too big the "first line" algo can be huge!!!
// and really slow everything way down with a huge title candidate
int32_t maxTitleWords = 128;
// assume no title
reset();
int32_t NW = words->getNumWords();
//
// now get all the candidates
//
// . allow up to 100 title CANDIDATES
// . "as" is the word # of the first word in the candidate
// . "bs" is the word # of the last word IN the candidate PLUS ONE
int32_t n = 0;
int32_t as[MAX_TIT_CANDIDATES];
int32_t bs[MAX_TIT_CANDIDATES];
float scores[MAX_TIT_CANDIDATES];
Words *cptrs[MAX_TIT_CANDIDATES];
int32_t types[MAX_TIT_CANDIDATES];
int32_t parent[MAX_TIT_CANDIDATES];
// record the scoring algos effects
float baseScore [MAX_TIT_CANDIDATES];
float noCapsBoost [MAX_TIT_CANDIDATES];
float qtermsBoost [MAX_TIT_CANDIDATES];
float inCommonCandBoost[MAX_TIT_CANDIDATES];
// reset these
for ( int32_t i = 0 ; i < MAX_TIT_CANDIDATES ; i++ ) {
// assume no parent
parent[i] = -1;
}
// xml and words class for each link info, rss item
Xml tx[MAX_TIT_CANDIDATES];
Words tw[MAX_TIT_CANDIDATES];
int32_t ti = 0;
// restrict how many link texts and rss blobs we check for titles
// because title recs like www.google.com have hundreds and can
// really slow things down to like 50ms for title generation
int32_t kcount = 0;
int32_t rcount = 0;
//int64_t x = gettimeofdayInMilliseconds();
// . get every link text
// . TODO: repeat for linkInfo2, the imported link text
for ( Inlink *k = NULL; linkInfo && (k = linkInfo->getNextInlink(k)) ; ) {
// breathe
QUICKPOLL(m_niceness);
// fast skip check for link text
if ( k->size_linkText >= 3 && ++kcount >= 20 ) continue;
// fast skip check for rss item
if ( k->size_rssItem > 10 && ++rcount >= 20 ) continue;
// set Url
Url u;
u.set( k->getUrl(), k->size_urlBuf );
// is it the same host as us?
bool sh = true;
// skip if not from same host and should be
if ( firstUrl->getHostLen() != u.getHostLen() ) {
sh = false;
}
// skip if not from same host and should be
if ( strncmp( firstUrl->getHost(), u.getHost(), u.getHostLen() ) ) {
sh = false;
}
// get the link text
if ( k->size_linkText >= 3 ) {
char *p = k->getLinkText();
int32_t plen = k->size_linkText - 1;
if ( ! verifyUtf8 ( p , plen ) ) {
log("title: set4 bad link text from url=%s", k->getUrl());
continue;
}
// now the words.
if ( !tw[ti].set( k->getLinkText(), k->size_linkText - 1, true, 0 ) ) {
//.........这里部分代码省略.........
示例2: addCookieHeader
bool HttpMime::addCookieHeader(const char *cookieJar, const char *url, SafeBuf *sb) {
Url tmpUrl;
tmpUrl.set(url);
SafeBuf tmpSb;
size_t cookieJarLen = strlen(cookieJar);
const char *lineStartPos = cookieJar;
const char *lineEndPos = NULL;
while ((lineEndPos = (const char*)memchr(lineStartPos, '\n', cookieJarLen - (lineStartPos - cookieJar))) != NULL) {
const char *currentPos = lineStartPos;
const char *tabPos = NULL;
unsigned fieldCount = 0;
bool skipCookie = false;
const char *domain = NULL;
int32_t domainLen = 0;
while (fieldCount < 5 && (tabPos = (const char*)memchr(currentPos, '\t', lineEndPos - currentPos)) != NULL) {
switch (fieldCount) {
case 0:
// domain
domain = currentPos;
domainLen = tabPos - currentPos;
break;
case 1:
// flag
if (memcmp(currentPos, "TRUE", 4) == 0) {
// allow subdomain
if (tmpUrl.getHostLen() >= domainLen) {
if (!endsWith(tmpUrl.getHost(), tmpUrl.getHostLen(), domain, domainLen)) {
// doesn't end with domain - ignore cookie
skipCookie = true;
break;
}
} else {
skipCookie = true;
break;
}
} else {
// only specific domain
if (tmpUrl.getHostLen() != domainLen || strncasecmp(domain, tmpUrl.getHost(), domainLen) != 0) {
// non-matching domain - ignore cookie
skipCookie = true;
break;
}
}
break;
case 2: {
// path
const char *path = currentPos;
int32_t pathLen = tabPos - currentPos;
if (strncasecmp(path, tmpUrl.getPath(), pathLen) == 0) {
if (tmpUrl.getPathLen() != pathLen) {
if (path[pathLen - 1] != '/' && tmpUrl.getPath()[tmpUrl.getPathLen() - 1] != '/') {
// non-matching path - ignore cookie
skipCookie = true;
break;
}
}
} else {
// non-matching path - ignore cookie
skipCookie = true;
break;
}
} break;
case 3:
// secure
break;
case 4:
// expiration
break;
}
currentPos = tabPos + 1;
++fieldCount;
}
if (!skipCookie) {
tmpSb.safeMemcpy(currentPos, lineEndPos - currentPos);
tmpSb.pushChar(';');
}
lineStartPos = lineEndPos + 1;
}
// we don't need to care about the last line (we always end on \n)
if (tmpSb.length() > 0) {
sb->safeStrcpy("Cookie: ");
sb->safeMemcpy(&tmpSb);
sb->safeStrcpy("\r\n");
}
return true;
}
示例3: main
int main ( int argc , char *argv[] ) {
bool addWWW = true;
bool stripSession = true;
// check for arguments
for (int32_t i = 1; i < argc; i++) {
if (strcmp(argv[i], "-w") == 0)
addWWW = false;
else if (strcmp(argv[i], "-s") == 0)
stripSession = false;
}
// initialize
//g_mem.init(100*1024);
hashinit();
//g_conf.m_tfndbExtBits = 23;
loop:
// read a url from stddin
char sbuf[1024];
if ( ! fgets ( sbuf , 1024 , stdin ) ) exit(1);
char *s = sbuf;
char fbuf[1024];
// decode if we should
if ( strncmp(s,"http%3A%2F%2F",13) == 0 ||
strncmp(s,"https%3A%2F%2F",13) == 0 ) {
urlDecode(fbuf,s,gbstrlen(s));
s = fbuf;
}
// old url
printf("###############\n");
printf("old: %s",s);
int32_t slen = gbstrlen(s);
// remove any www. if !addWWW
if (!addWWW) {
if (slen >= 4 &&
strncasecmp(s, "www.", 4) == 0) {
slen -= 4;
memmove(s, &s[4], slen);
}
else {
// get past a ://
int32_t si = 0;
while (si < slen &&
( s[si] != ':' ||
s[si+1] != '/' ||
s[si+2] != '/' ) )
si++;
// remove the www.
if (si + 7 < slen) {
si += 3;
if (strncasecmp(&s[si], "www.", 4) == 0) {
slen -= 4;
memmove(&s[si], &s[si+4], slen-si);
}
}
}
}
// set it
Url u;
u.set ( s , slen ,
addWWW , /*add www?*/
stripSession ); /*strip session ids?*/
// print it
char out[1024*4];
char *p = out;
p += sprintf(p,"tld: ");
gbmemcpy ( p, u.getTLD(),u.getTLDLen());
p += u.getTLDLen();
char c = *p;
*p = '\0';
printf("%s\n",out);
*p = c;
// dom
p = out;
sprintf ( p , "dom: ");
p += gbstrlen ( p );
gbmemcpy ( p , u.getDomain() , u.getDomainLen() );
p += u.getDomainLen();
c = *p;
*p = '\0';
printf("%s\n",out);
*p = c;
// host
p = out;
sprintf ( p , "host: ");
p += gbstrlen ( p );
gbmemcpy ( p , u.getHost() , u.getHostLen() );
p += u.getHostLen();
c = *p;
*p = '\0';
printf("%s\n",out);
*p = c;
// then the whole url
printf("url: %s\n", u.getUrl() );
/*
int32_t siteLen;
char *site = u.getSite ( &siteLen , NULL , false );
if ( site ) {
c = site[siteLen];
//.........这里部分代码省略.........
示例4: addCookie
void HttpMime::addCookie(const httpcookie_t &cookie, const Url ¤tUrl, SafeBuf *cookieJar) {
// don't add expired cookie into cookie jar
if (cookie.m_expired) {
return;
}
if (cookie.m_domain) {
cookieJar->safeMemcpy(cookie.m_domain, cookie.m_domainLen);
cookieJar->pushChar('\t');
cookieJar->safeStrcpy(cookie.m_defaultDomain ? "FALSE\t" : "TRUE\t");
} else {
cookieJar->safeMemcpy(currentUrl.getHost(), currentUrl.getHostLen());
cookieJar->pushChar('\t');
cookieJar->safeStrcpy("FALSE\t");
}
if (cookie.m_path) {
cookieJar->safeMemcpy(cookie.m_path, cookie.m_pathLen);
cookieJar->pushChar('\t');
} else {
if (currentUrl.getPathLen()) {
cookieJar->safeMemcpy(currentUrl.getPath(), currentUrl.getPathLen());
} else {
cookieJar->pushChar('/');
}
cookieJar->pushChar('\t');
}
if (cookie.m_secure) {
cookieJar->safeStrcpy("TRUE\t");
} else {
cookieJar->safeStrcpy("FALSE\t");
}
// we're not using expiration field
cookieJar->safeStrcpy("0\t");
int32_t currentLen = cookieJar->length();
cookieJar->safeMemcpy(cookie.m_cookie, cookie.m_cookieLen);
// cater for multiline cookie
const char *currentPos = cookieJar->getBufStart() + currentLen;
const char *delPosStart = NULL;
int32_t delLength = 0;
while (currentPos < cookieJar->getBufPtr() - 1) {
if (delPosStart) {
if (is_wspace_a(*currentPos) || *currentPos == '\n' || *currentPos == '\r') {
++delLength;
} else {
break;
}
} else {
if (*currentPos == '\n' || *currentPos == '\r') {
delPosStart = currentPos;
++delLength;
}
}
++currentPos;
}
cookieJar->removeChunk1(delPosStart, delLength);
/// @todo ALC handle httpOnly attribute
cookieJar->pushChar('\n');
}