当前位置: 首页>>代码示例>>Python>>正文


Python Request.meta['url']方法代码示例

本文整理汇总了Python中scrapy.Request.meta['url']方法的典型用法代码示例。如果您正苦于以下问题:Python Request.meta['url']方法的具体用法?Python Request.meta['url']怎么用?Python Request.meta['url']使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scrapy.Request的用法示例。


在下文中一共展示了Request.meta['url']方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: initStartRequests

# 需要导入模块: from scrapy import Request [as 别名]
# 或者: from scrapy.Request import meta['url'] [as 别名]
 def initStartRequests(self, keyword):
     """
     初始化起始request
     :param keyword:
     :return:
     """
     keyword = self.getUnicode(keyword)
     url = self.baseURL
     url[1] = keyword
     request = Request(url=''.join(url))
     request.meta['keyword'] = keyword
     request.meta['url'] = ''.join(url)
     self.keywordsAndPages[keyword] = 1  # 每一个关键字开始爬取的都是第一页
     return request
开发者ID:nanchengking,项目名称:searchSpider,代码行数:16,代码来源:haosouSearchSpider.py

示例2: createNextPageRequest

# 需要导入模块: from scrapy import Request [as 别名]
# 或者: from scrapy.Request import meta['url'] [as 别名]
 def createNextPageRequest(self, keyword, pn):
     """
     针对每一个关键字,生成一个request,可以增长页面的
     :param keyword: 需要查询的关键字
     :param pn: 0代表第一页,10代表第二页,20代表第三页……
     :return:一个request
     """
     tem = self.baseURL
     tem[1] = keyword
     url = ''.join(tem) + '&pn=' + str(pn)
     request = Request(url=url)
     request.meta['keyword'] = keyword
     request.meta['url'] = url
     return request
开发者ID:nanchengking,项目名称:searchSpider,代码行数:16,代码来源:baiduSearchSpider.py

示例3: parse

# 需要导入模块: from scrapy import Request [as 别名]
# 或者: from scrapy.Request import meta['url'] [as 别名]
 def parse(self, response):
     self.num += 1
     if response.status == 200:
         true = True  # 千万不要删除
         false = False
         tem = re.findall(r"^jsonp_search\((.*?)\)$", response.body)
         results = eval(tem[0])['data']
         for result in results:
             item = MusicSearchspiderItem()
             item['platform'] = u"天天动听"
             item['keyword'] = response.meta['keyword']
             item['resultUrl'] = response.url
             if 'audition_list' in result.keys():
                 item['targetUrl'] = result['audition_list'][0]['url']
             else:
                 item['targetUrl'] = ''
             # item['program'] = result['song_name']
             item['album'] = ''
             item['author'] = result['singer_name']
             item['createDate'] = datetime.datetime.now()
             item['status'] = 0
             item['processDate'] = datetime.datetime.now()
             item['checkStatus'] = 0
             item['searchTask'] = None if self.searchTaskId == -1 else self.searchTaskId
             item['project'] = None if self.projectId == -1 else self.projectId
             item['program'] = self.program
             if not item['targetUrl'] in self.songsURLS:  # 去重操作
                 if self.filter(targetTitle=item['program'], author=item['author']):  # 过滤操作
                     self.songsURLS.add(item['targetUrl'])
                     yield item
         logging.info(u'===这一页有%s条数据===' % results.__len__())
         if results.__len__() == 50:  # 判断还有没有下一页
             keyword = response.meta['keyword']
             self.keywordsAndPages[keyword] += 1
             pageNum = self.keywordsAndPages[keyword]
             keyword = self.getUnicode(keyword)
             nextURL = self.baseURL
             nextURL[1] = keyword
             nextURL[3] = str(pageNum)
             nextURL = ''.join(nextURL)
             if pageNum < (self.limit + 1):
                 logging.info(u"===现在爬取的关键字是: %s===", keyword)
                 logging.info(u"===现在爬取的关键字的page num是: %s===", pageNum)
                 request = Request(url=nextURL)
                 request.meta['keyword'] = keyword
                 request.meta['url'] = nextURL
                 yield request
     else:
         logging.info(response.status)
开发者ID:nanchengking,项目名称:searchSpider,代码行数:51,代码来源:tiantianMusicSearchSpider.py

示例4: parse

# 需要导入模块: from scrapy import Request [as 别名]
# 或者: from scrapy.Request import meta['url'] [as 别名]
 def parse(self, response):
     self.num += 1
     if response.status == 200:
         results = json.loads(response.body)['result']['songs']
         for result in results:
             item = MusicSearchspiderItem()
             item['platform'] = u"网易音乐"
             item['keyword'] = response.meta['keyword']
             item['resultUrl'] = response.url
             item['targetUrl'] = ''
             item['program'] = self.program
             # item['program'] = result['filename']
             item['targetTitle'] = result['name']
             item['album'] = result['album']['name']
             item['author'] = result['artists'][0]['name']
             item['unique_code'] = item['program'] + item['author']
             item['createDate'] = datetime.datetime.now()
             item['status'] = 0
             item['processDate'] = datetime.datetime.now()
             item['checkStatus'] = 0
             item['searchTask'] = None if self.searchTaskId == -1 else self.searchTaskId
             item['project'] = None if self.projectId == -1 else self.projectId
             if not item['unique_code'] in self.unique_codes:  # 去重操作
                 if self.filter(targetTitle=item['targetTitle'], author=item['author']):  # 过滤操作
                     self.unique_codes.add(item['unique_code'])
                     yield item
         logging.info(u'===这一页有%s条数据===' % results.__len__())
         if results.__len__() == 100:  # 判断还有没有下一页
             keyword = response.meta['keyword']
             self.keywordsAndPages[keyword] += 100
             pageNum = self.keywordsAndPages[keyword]
             keyword = self.getUnicode(keyword)
             nextURL = self.baseURL
             nextURL[1] = keyword
             nextURL[4] = str(pageNum)
             nextURL = ''.join(nextURL)
             if pageNum < self.limit * 100:
                 logging.info(u"===现在爬取的关键字是: %s===", keyword)
                 logging.info(u"===现在爬取的关键字的page num是: %s===", pageNum)
                 request = Request(url=nextURL)
                 request.meta['keyword'] = keyword
                 request.meta['url'] = nextURL
                 request.headers.appendlist("Referer", 'http://music.163.com')
                 request.headers.appendlist("Cookies", 'appver=2.0.2')
                 request.method = "POST"
                 yield request
     else:
         logging.info(response.status)
开发者ID:nanchengking,项目名称:searchSpider,代码行数:50,代码来源:wangyiMusicSearchSpider.py

示例5: createNextPageRequest

# 需要导入模块: from scrapy import Request [as 别名]
# 或者: from scrapy.Request import meta['url'] [as 别名]
 def createNextPageRequest(self, keyword, pn):
     """
     针对每一个关键字,生成一个request,可以增长页面的
     :param keyword: 需要查询的关键字
     :param pn: 1代表第一页,2代表第二页,3代表第三页……
     :return:一个request
     """
     tem = self.baseURL
     tem[1] = keyword
     tem[3] = str(pn)
     url = ''.join(tem)
     request = Request(url=url)
     request.meta['keyword'] = keyword
     request.meta['url'] = url
     logging.info(u"===进行下一页===")
     return request
开发者ID:nanchengking,项目名称:searchSpider,代码行数:18,代码来源:shenmaSearchSpider.py

示例6: initStartRequests

# 需要导入模块: from scrapy import Request [as 别名]
# 或者: from scrapy.Request import meta['url'] [as 别名]
 def initStartRequests(self, keyword):
     """
     初始化起始request
     :param keyword:
     :return:
     """
     keyword = self.getUnicode(keyword)
     url = self.baseURL
     url[1] = keyword
     request = Request(url=''.join(url))
     request.meta['keyword'] = keyword
     request.meta['url'] = ''.join(url)
     request.headers.appendlist("Referer", 'http://music.163.com')
     request.headers.appendlist("Cookies", 'appver=2.0.2')
     request.method = "POST"
     self.keywordsAndPages[keyword] = 0
     return request
开发者ID:nanchengking,项目名称:searchSpider,代码行数:19,代码来源:wangyiMusicSearchSpider.py

示例7: parse

# 需要导入模块: from scrapy import Request [as 别名]
# 或者: from scrapy.Request import meta['url'] [as 别名]
    def parse(self, response):
        self.num += 1
        if response.status == 200:
            results = response.css("div.list ul li.clearfix")
            for result in results:
                item = MusicSearchspiderItem()
                item['platform'] = u"酷我音乐"
                item['keyword'] = response.meta['keyword']
                item['resultUrl'] = response.url
                item['targetUrl'] = self.getUnicode(
                    ''.join(result.xpath("./p[@class='m_name']/a[@title]/@href").extract())).strip()
                # item['program'] = self.getUnicode(
                #     ''.join(result.xpath("./p[@class='m_name']/a[@title]/@title").extract())).strip()
                item['album'] = self.getUnicode(
                    ''.join(result.xpath("./p[@class='a_name']/a[@title]/@title").extract())).strip()
                item['author'] = self.getUnicode(
                    ''.join(result.xpath("./p[@class='s_name']/a[@title]/@title").extract())).strip()
                item['createDate'] = datetime.datetime.now()
                item['status'] = 0
                item['processDate'] = datetime.datetime.now()
                item['checkStatus'] = 0
                item['searchTask'] = None if self.searchTaskId == -1 else self.searchTaskId
                item['project'] = None if self.projectId == -1 else self.projectId
                item['program'] = self.program
                if not item['targetUrl'] in self.songsURLS:  # 去重操作
                    if self.filter(targetTitle=item['program'], author=item['author']):  # 过滤操作
                        self.songsURLS.add(item['targetUrl'])
                        yield item

            nextA=response.css("div.page a")[-2]
            if nextA.xpath("./text()")[0].extract().strip()==u'下一页':#判断是否存在下一页
                keyword = response.meta['keyword']
                self.keywordsAndPages[keyword] += 1
                pageNum = self.keywordsAndPages[keyword]
                nextURL = u'http://sou.kuwo.cn'+self.getUnicode(''.join(
                    nextA.xpath("./@href").extract())).strip()
                if pageNum < (self.limit):
                    logging.info(u"==现在爬取的关键字是: %s", keyword)
                    logging.info(u"==现在爬取的关键字的page num是: %s", pageNum)
                    request = Request(url=nextURL)
                    request.meta['keyword'] = keyword
                    request.meta['url'] = nextURL
                    yield request
        else:
            logging.info(response.status)
开发者ID:nanchengking,项目名称:searchSpider,代码行数:47,代码来源:kuwoMusicSearchSpider.py

示例8: parse

# 需要导入模块: from scrapy import Request [as 别名]
# 或者: from scrapy.Request import meta['url'] [as 别名]
    def parse(self, response):
        self.num += 1
        if response.status == 200:
            results = response.css("tbody tr")
            for result in results:
                item = MusicSearchspiderItem()
                item['platform'] = u"虾米音乐"
                item['keyword'] = response.meta['keyword']
                item['resultUrl'] = response.url
                item['targetUrl'] = self.getUnicode(
                    ''.join(result.xpath("./td[@class='song_name']/a[@target]/@href").extract())).strip()
                # item['program'] = self.getUnicode(
                #     ''.join(result.xpath("./td[@class='song_name']/a[@target]/@title").extract())).strip()
                item['album'] = self.getUnicode(
                    ''.join(result.xpath("./td[@class='song_album']/a[@target]/@title").extract())).strip()
                item['author'] = self.getUnicode(
                    ''.join(result.xpath("./td[@class='song_artist']/a[@target]/text()").extract())).strip()
                item['createDate'] = datetime.datetime.now()
                item['status'] = 0
                item['processDate'] = datetime.datetime.now()
                item['checkStatus'] = 0
                item['searchTask'] = None if self.searchTaskId == -1 else self.searchTaskId
                item['project'] = None if self.projectId == -1 else self.projectId
                item['program'] = self.program
                if not item['targetUrl'] in self.songsURLS:  # 去重操作
                    if self.filter(targetTitle=item['program'], author=item['author']):  # 过滤操作
                        self.songsURLS.add(item['targetUrl'])
                        yield item

            if response.xpath("//a[@class='p_redirect_l']/@href"):
                keyword = response.meta['keyword']
                self.keywordsAndPages[keyword] += 1
                pageNum = self.keywordsAndPages[keyword]
                nextURL = u'http://www.xiami.com' + self.getUnicode(''.join(
                    response.xpath("//div[@class='all_page']/a[@class='p_redirect_l']/@href").extract())).strip()
                if pageNum < (self.limit):
                    logging.info(u"==现在爬取的关键字是: %s", keyword)
                    logging.info(u"==现在爬取的关键字的page num是: %s", pageNum)
                    request = Request(url=nextURL)
                    request.meta['keyword'] = keyword
                    request.meta['url'] = nextURL
                    yield request
        else:
            logging.info(response.status)
开发者ID:nanchengking,项目名称:searchSpider,代码行数:46,代码来源:xiamiMusicSearchSpider.py

示例9: parse

# 需要导入模块: from scrapy import Request [as 别名]
# 或者: from scrapy.Request import meta['url'] [as 别名]
    def parse(self, response):
        self.num += 1
        if response.status == 200:
            results = response.css("div.song-item")
            for result in results:
                item = MusicSearchspiderItem()
                item['platform'] = u"百度音乐"
                item['keyword'] = response.meta['keyword']
                item['resultUrl'] = response.meta['url']
                item['targetUrl'] = u"http://music.baidu.com" + self.getUnicode(
                    ''.join(result.xpath("./span[@class='song-title']/a[@data-songdata]/@href").extract())).strip()
                # item['program'] = self.getUnicode(
                #     ''.join(result.xpath("./span[@class='song-title']//text()").extract())).strip()
                item['album'] = self.getUnicode(
                    ''.join(result.xpath("./span[@class='album-title']//text()").extract())).strip()
                item['author'] = self.getUnicode(
                    ''.join(result.xpath("./span[@class='singer']//text()").extract())).strip()
                item['createDate'] = datetime.datetime.now()
                item['status'] = 0
                item['processDate'] = datetime.datetime.now()
                item['checkStatus'] = 0
                item['searchTask'] = None if self.searchTaskId == -1 else self.searchTaskId
                item['project'] = None if self.projectId == -1 else self.projectId
                item['program'] = self.program
                if not item['targetUrl'] in self.songsURLS:  # 去重操作
                    if self.filter(targetTitle=item['program'], author=item['author']):  # 过滤操作
                        self.songsURLS.add(item['targetUrl'])
                        yield item

            if response.xpath("//div[@class='page-inner']/a[@class='page-navigator-next']/@href"):
                keyword = response.meta['keyword']
                self.keywordsAndPages[keyword] += 1
                pageNum = self.keywordsAndPages[keyword]
                nextURL = u"http://music.baidu.com" + self.getUnicode(''.join(response.xpath(
                    "//div[@class='page-inner']/a[@class='page-navigator-next']/@href").extract())).strip()
                if pageNum < (self.limit):
                    logging.info(u"==现在爬取的关键字是: %s", keyword)
                    logging.info(u"==现在爬取的关键字的page num是: %s", pageNum)
                    request = Request(url=nextURL)
                    request.meta['keyword'] = keyword
                    request.meta['url'] = nextURL
                    yield request
        else:
            logging.info(response.status)
开发者ID:nanchengking,项目名称:searchSpider,代码行数:46,代码来源:baiduMusicSearchSpider.py

示例10: parse

# 需要导入模块: from scrapy import Request [as 别名]
# 或者: from scrapy.Request import meta['url'] [as 别名]
 def parse(self, response):
     self.num += 1
     if response.status == 200:
         results = eval(response.body)['data']['info']
         for result in results:
             item = MusicSearchspiderItem()
             item['platform'] = u"酷狗音乐"
             item['keyword'] = response.meta['keyword']
             item['resultUrl'] = response.url
             item['targetUrl'] = ''
             item['program'] = self.program
             # item['program'] = result['filename']
             item['album'] = result['album_name']
             item['author'] = result['singername']
             item['unique_code'] = item['program'] + item['author']
             item['createDate'] = datetime.datetime.now()
             item['status'] = 0
             item['processDate'] = datetime.datetime.now()
             item['checkStatus'] = 0
             item['searchTask'] = None if self.searchTaskId == -1 else self.searchTaskId
             item['project'] = None if self.projectId == -1 else self.projectId
             if not item['unique_code'] in self.unique_codes:  # 去重操作
                 if self.filter(targetTitle=item['program'], author=item['author']):  # 过滤操作
                     self.unique_codes.add(item['unique_code'])
                     yield item
         logging.info(u'===这一页有%s条数据===' % results.__len__())
         if results.__len__() == 50:  # 判断还有没有下一页
             keyword = response.meta['keyword']
             self.keywordsAndPages[keyword] += 1
             pageNum = self.keywordsAndPages[keyword]
             keyword = self.getUnicode(keyword)
             nextURL = self.baseURL
             nextURL[1] = keyword
             nextURL[3] = str(pageNum)
             nextURL = ''.join(nextURL)
             if pageNum < (self.limit + 1):
                 logging.info(u"===现在爬取的关键字是: %s===", keyword)
                 logging.info(u"===现在爬取的关键字的page num是: %s===", pageNum)
                 request = Request(url=nextURL)
                 request.meta['keyword'] = keyword
                 request.meta['url'] = nextURL
                 yield request
     else:
         logging.info(response.status)
开发者ID:nanchengking,项目名称:searchSpider,代码行数:46,代码来源:kugouMusicSearchSpider.py


注:本文中的scrapy.Request.meta['url']方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。