本文整理汇总了C++中CUrl::getUrl方法的典型用法代码示例。如果您正苦于以下问题:C++ CUrl::getUrl方法的具体用法?C++ CUrl::getUrl怎么用?C++ CUrl::getUrl使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类CUrl
的用法示例。
在下文中一共展示了CUrl::getUrl方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: doLogin
int FetcherManager::doLogin(CURL *curl, Task *task, UrlNode *urlnode) {
InfoCrawler *infocrawler = InfoCrawler::getInstance();
TaskOtherInfo *taskother = infocrawler->getTaskScheduleManager()->getTaskOtherInfo(task->id);
if (!taskother) {
return -1;
}
if (!task) {
return -1;
}
CUrl url;
url.parse(task->loginurl);
if (url.getUrl().empty()) {
return -1;
}
HttpProtocol httpprotocol;
char downstatistic[512] ;
downstatistic[0] = 0;
RESPONSE_HEADER rheader;
mylog_info(m_pLogGlobalCtrl->infolog, "before login %s - %s:%s:%d",url.getUrl().c_str(),INFO_LOG_SUFFIX);
int ret = httpprotocol.curl_login(curl, url, urlnode, infocrawler->getConf()->httptimeout, &rheader, downstatistic);
mylog_info(m_pLogGlobalCtrl->infolog, "after login %s %s %d - %s:%s:%d",url.getUrl().c_str(), downstatistic, ret,INFO_LOG_SUFFIX);
/* if (ret == HTTP_FETCH_RET_REDIRECT) { //redirect
errorlog("LOGIN ERROR: fetched %s relocated to %s taskid %d\n", url.getUrl().c_str() ,(char *)page.m_sLocation.c_str(),task->id);
} else*/
if (ret == HTTP_FETCH_RET_ERROR) {//just discard
mylog_error(m_pLogGlobalCtrl->errorlog, "login fetched %s taskid %d - %s:%s:%d:%d", url.getUrl().c_str(), task->id,INFO_LOG_SUFFIX,ret);
} else if (ret == HTTP_FETCH_RET_ERROR_INVALIDHOST) { //invalid host, can not access
mylog_error(m_pLogGlobalCtrl->errorlog, "login fetched %s taskid %d - %s:%s:%d:%d", url.getUrl().c_str(), task->id,INFO_LOG_SUFFIX,ret);
} else if (ret == HTTP_FETCH_RET_ERROR_UNACCEPTED) { //content is invalid, discard
mylog_error(m_pLogGlobalCtrl->errorlog, "LOGIN fetched %s unaccepted contenttyped %s taskid %d - %s:%s:%d:%d", url.getUrl().c_str(), rheader.contenttype.c_str(), task->id,INFO_LOG_SUFFIX,ret);
} else
{
taskother->fetchingcookie = true;
static char *loginok = "LOGIN OK";
saveCookie(task->id, loginok, strlen(loginok));
taskother->fetchingcookie = false;
return 1;
}
return -1;
}
示例2: fetch
int FetcherManager::fetch() {
InfoCrawler *infocrawler = InfoCrawler::getInstance();
UrlAnalyseManager *urlAnalyseManager = infocrawler->getUrlAnalyseManager();
CURL *curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_COOKIEFILE, ""); //just to start the cookie engine
curl_easy_setopt(curl, CURLOPT_SHARE, sh);
while(running()) {
curl_easy_reset(curl);
UrlNode *urlnode = NULL;
bool html_from_outer= false;
urlnode = urlAnalyseManager->getUrlFromOuterHtml();
if (urlnode) {
html_from_outer = true;
} else {
urlnode = urlAnalyseManager->getUrl();
}
if (urlnode == NULL) {
my_sleep(100 * 1000); //0.1s
continue;
}
if (!(urlnode->task))
{
mylog_info(m_pLogGlobalCtrl->infolog, "node task is null %s - %s:%s:%d",urlnode->url,INFO_LOG_SUFFIX);
}
TaskOtherInfo *taskother = infocrawler->getTaskScheduleManager()->getTaskOtherInfo(urlnode->taskid);
int taskbatch = urlnode->taskbatch;
if (urlnode->needtologin) {
//need to login and cookie is null
if (!(infocrawler->getTaskScheduleManager()->getCookieFromTask(urlnode->taskid))) {
if (taskother->fetchingcookie) {
infocrawler->getUrlAnalyseManager()->insertUrl(urlnode);
infocrawler->getTaskScheduleManager()->decreaseTaskUrlNum(urlnode->task,taskbatch);
#ifdef URLMEMCACHEDB
infocrawler->deleteUrlMcLocalThread();
#endif
continue;
} else {
doLogin(curl, urlnode->task, urlnode);
}
}
}
/*if (urlnode->task->sourcetype == SOURCE_TYPE_COMPANY && urlnode->type & URL_TYPE_HOMEPAGE)
{
strcat(urlnode->url, "&event=32698647");
strcpy(urlnode->refererurl, "http://search.china.alibaba.com/tools/validate_redirect.htm?ru=http%253A%252F%252Fsearch.china.alibaba.com%252Fcompany%252Fcompany_search.htm%253Fkeywords%253D%25CA%25D6%25BB%25FA%2526pageSize%253D30%2526n%253Dy%2526showStyle%253Dpopular%2526beginPage%253D4&event=32698647&n=y");
}*/
CUrl url;
url.parse(urlnode->url);
//wrong url format
if (url.getUrl().empty()) {
infocrawler->getTaskScheduleManager()->increaseTaskErrorUrlNum(urlnode->taskid);
infocrawler->getTaskScheduleManager()->decreaseTaskUrlNum(urlnode->task, taskbatch);
infocrawler->getLocalDbManager()->decidesaveFetched(urlnode);
delete urlnode;
#ifdef URLMEMCACHEDB
infocrawler->deleteUrlMcLocalThread();
#endif
continue;
}
Page page;
Buffer *content = create_buffer(DEFAULT_PAGE_BUF_SIZE);
//do fetch
HttpProtocol httpprotocol;
char downstatistic[512] ;
downstatistic[0] = 0;
RESPONSE_HEADER rheader;
// mylog_info(m_pLogGlobalCtrl->infolog, "before fetch %s %s %llu %d %d - %s:%s:%d",url.getUrl().c_str(), urlnode->url, urlnode->id, urlnode->taskid, urlnode->errornum,INFO_LOG_SUFFIX);
//int ret = httpprotocol.fetch(url, content, urlnode, page, infocrawler->getConf()->httptimeout,urlnode->task->tasksendtype);
// int ret = httpprotocol.curl_fetch(curl, url, content, urlnode, infocrawler->getConf()->httptimeout, urlnode->task->tasksendtype, &rheader, downstatistic);
int sendtype = urlnode->task->tasksendtype;
if (urlnode->task->sourcetype == SOURCE_TYPE_COMPANY && urlnode->type & URL_TYPE_HOMEPAGE)
{
/*FILE * f = fopen("ali.txt", "rb");
char line[1024] = {0};
int i = 0;
string cookie;
string post;
while(fgets(line, 1023, f)) {
char *newline = strtrim(line, NULL);
if (i++ == 0) {
cookie = newline;
} else {
post = newline;
}
}
fclose(f);
*/
sendtype = REQUEST_TYPE_GET;
}
int ret = 0;
//.........这里部分代码省略.........