当前位置: 首页>>代码示例>>C++>>正文


C++ CUrl::getUrl方法代码示例

本文整理汇总了C++中CUrl::getUrl方法的典型用法代码示例。如果您正苦于以下问题:C++ CUrl::getUrl方法的具体用法?C++ CUrl::getUrl怎么用?C++ CUrl::getUrl使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在CUrl的用法示例。


在下文中一共展示了CUrl::getUrl方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: doLogin

int FetcherManager::doLogin(CURL *curl, Task *task, UrlNode *urlnode) {
    InfoCrawler *infocrawler = InfoCrawler::getInstance();
    TaskOtherInfo *taskother = infocrawler->getTaskScheduleManager()->getTaskOtherInfo(task->id);
    if (!taskother) {
        return -1;
    }
    if (!task) {
        return -1;
    }
    CUrl url;
    url.parse(task->loginurl);

    if (url.getUrl().empty()) {
        return -1;
    }

    HttpProtocol httpprotocol;
    char downstatistic[512] ;
    downstatistic[0] = 0;
    RESPONSE_HEADER rheader;

    mylog_info(m_pLogGlobalCtrl->infolog, "before login %s - %s:%s:%d",url.getUrl().c_str(),INFO_LOG_SUFFIX);
    int ret = httpprotocol.curl_login(curl, url, urlnode, infocrawler->getConf()->httptimeout, &rheader, downstatistic);
    mylog_info(m_pLogGlobalCtrl->infolog, "after login  %s %s %d - %s:%s:%d",url.getUrl().c_str(), downstatistic, ret,INFO_LOG_SUFFIX);
    /* if (ret == HTTP_FETCH_RET_REDIRECT) { //redirect
         errorlog("LOGIN ERROR: fetched %s  relocated to %s taskid %d\n", url.getUrl().c_str() ,(char *)page.m_sLocation.c_str(),task->id);
     } else*/
    if (ret == HTTP_FETCH_RET_ERROR) {//just discard
        mylog_error(m_pLogGlobalCtrl->errorlog, "login fetched %s taskid %d - %s:%s:%d:%d", url.getUrl().c_str(), task->id,INFO_LOG_SUFFIX,ret);
    } else if (ret == HTTP_FETCH_RET_ERROR_INVALIDHOST) { //invalid host, can not access
        mylog_error(m_pLogGlobalCtrl->errorlog, "login fetched %s taskid %d - %s:%s:%d:%d", url.getUrl().c_str(), task->id,INFO_LOG_SUFFIX,ret);
    } else if (ret == HTTP_FETCH_RET_ERROR_UNACCEPTED) { //content is invalid, discard
        mylog_error(m_pLogGlobalCtrl->errorlog, "LOGIN fetched %s unaccepted contenttyped %s taskid %d - %s:%s:%d:%d", url.getUrl().c_str(), rheader.contenttype.c_str(), task->id,INFO_LOG_SUFFIX,ret);
    } else
    {
        taskother->fetchingcookie = true;
        static char *loginok = "LOGIN OK";
        saveCookie(task->id, loginok, strlen(loginok));
        taskother->fetchingcookie = false;
        return 1;
    }
    return -1;
}
开发者ID:codrocker,项目名称:bloomServer,代码行数:43,代码来源:FetcherManager.cpp

示例2: fetch

int FetcherManager::fetch() {
    InfoCrawler *infocrawler = InfoCrawler::getInstance();
    UrlAnalyseManager *urlAnalyseManager = infocrawler->getUrlAnalyseManager();

    CURL *curl = curl_easy_init();
    curl_easy_setopt(curl, CURLOPT_COOKIEFILE, ""); //just to start the cookie engine
    curl_easy_setopt(curl, CURLOPT_SHARE, sh);

    while(running()) {
        curl_easy_reset(curl);

        UrlNode *urlnode = NULL;
        bool html_from_outer= false;

        urlnode = urlAnalyseManager->getUrlFromOuterHtml();
        if (urlnode) {
            html_from_outer = true;
        } else {
            urlnode = urlAnalyseManager->getUrl();
        }

        if (urlnode == NULL) {
            my_sleep(100 * 1000); //0.1s
            continue;
        }
        if (!(urlnode->task))
        {
            mylog_info(m_pLogGlobalCtrl->infolog, "node task is null %s - %s:%s:%d",urlnode->url,INFO_LOG_SUFFIX);
        }
        TaskOtherInfo *taskother = infocrawler->getTaskScheduleManager()->getTaskOtherInfo(urlnode->taskid);
        int taskbatch = urlnode->taskbatch;
        if (urlnode->needtologin) {
            //need to login and cookie is null
            if (!(infocrawler->getTaskScheduleManager()->getCookieFromTask(urlnode->taskid))) {
                if (taskother->fetchingcookie) {
                    infocrawler->getUrlAnalyseManager()->insertUrl(urlnode);
                    infocrawler->getTaskScheduleManager()->decreaseTaskUrlNum(urlnode->task,taskbatch);
#ifdef URLMEMCACHEDB
                    infocrawler->deleteUrlMcLocalThread();
#endif
                    continue;
                } else {
                    doLogin(curl, urlnode->task, urlnode);
                }
            }
        }

        /*if (urlnode->task->sourcetype == SOURCE_TYPE_COMPANY && urlnode->type & URL_TYPE_HOMEPAGE)
        {
            strcat(urlnode->url, "&event=32698647");
            strcpy(urlnode->refererurl, "http://search.china.alibaba.com/tools/validate_redirect.htm?ru=http%253A%252F%252Fsearch.china.alibaba.com%252Fcompany%252Fcompany_search.htm%253Fkeywords%253D%25CA%25D6%25BB%25FA%2526pageSize%253D30%2526n%253Dy%2526showStyle%253Dpopular%2526beginPage%253D4&event=32698647&n=y");
        }*/
        CUrl url;
        url.parse(urlnode->url);
        //wrong url format
        if (url.getUrl().empty()) {
            infocrawler->getTaskScheduleManager()->increaseTaskErrorUrlNum(urlnode->taskid);
            infocrawler->getTaskScheduleManager()->decreaseTaskUrlNum(urlnode->task, taskbatch);
            infocrawler->getLocalDbManager()->decidesaveFetched(urlnode);
            delete urlnode;
#ifdef URLMEMCACHEDB
            infocrawler->deleteUrlMcLocalThread();
#endif
            continue;
        }
        Page page;
        Buffer *content = create_buffer(DEFAULT_PAGE_BUF_SIZE);


        //do fetch
        HttpProtocol httpprotocol;
        char downstatistic[512] ;
        downstatistic[0] = 0;
        RESPONSE_HEADER rheader;

//        mylog_info(m_pLogGlobalCtrl->infolog, "before fetch %s %s %llu %d %d  - %s:%s:%d",url.getUrl().c_str(), urlnode->url, urlnode->id, urlnode->taskid, urlnode->errornum,INFO_LOG_SUFFIX);
        //int ret = httpprotocol.fetch(url, content, urlnode, page, infocrawler->getConf()->httptimeout,urlnode->task->tasksendtype);
//        int ret = httpprotocol.curl_fetch(curl, url, content, urlnode, infocrawler->getConf()->httptimeout, urlnode->task->tasksendtype, &rheader, downstatistic);
        int sendtype = urlnode->task->tasksendtype;
        if (urlnode->task->sourcetype == SOURCE_TYPE_COMPANY && urlnode->type & URL_TYPE_HOMEPAGE)
        {
            /*FILE * f = fopen("ali.txt", "rb");
            char line[1024] = {0};
            int i = 0;
            string cookie;
            string post;
            while(fgets(line, 1023, f)) {
            	char *newline = strtrim(line, NULL);
            	if (i++ == 0) {
            		cookie = newline;
            	} else {
            		post = newline;
            	}
            }
            fclose(f);
            */
            sendtype = REQUEST_TYPE_GET;
        }

        int ret = 0;
//.........这里部分代码省略.........
开发者ID:codrocker,项目名称:bloomServer,代码行数:101,代码来源:FetcherManager.cpp


注:本文中的CUrl::getUrl方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。