本文整理汇总了Python中scrapy.http.request.Request.meta['title']方法的典型用法代码示例。如果您正苦于以下问题:Python Request.meta['title']方法的具体用法?Python Request.meta['title']怎么用?Python Request.meta['title']使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.http.request.Request
的用法示例。
在下文中一共展示了Request.meta['title']方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse
# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['title'] [as 别名]
def parse(self, response):
base_url = "http://www.thesaigontimes.vn"
posts = Selector(response).xpath('//div[@class="ARTICLE"]')
for post in posts:
postTitle = post.xpath(
'a[@class="ArticleTitle"]/text()').extract()[0]
itemFullURL = base_url + post.xpath(
'a[@class="ArticleTitle"]/@href').extract()[0]
request = Request(itemFullURL, callback = self.parse_full_post)
request.meta['title'] = postTitle
request.meta['url'] = itemFullURL
yield request
示例2: parse
# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['title'] [as 别名]
def parse(self, response):
sel = Selector(response)
for li in sel.css('div.post li'):
url = li.css('a::attr(href)').extract()[0]
title = li.css('a::text').extract()[0]
date_text = li.css('small::text').extract()[0]
date = re.match(r" posted on \w+ (.+)", date_text).group(1)
date = datetime.strftime(datetime.strptime(date, '%d %B %Y'), '%Y-%m-%d')
print url, title, date
req = Request('http://www.listen-to-english.com/%s' % url, callback=self.parse_article)
req.meta['title'] = title
req.meta['date'] = date
yield req
示例3: parse
# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['title'] [as 别名]
def parse(self, response):
sel = Selector(response)
for li in sel.css('div.li-columnised div.ts-headline'):
url = li.css('h3.ts-title a::attr("href")').extract()[0]
title = li.css('h3.ts-title a::text').extract()[0]
date_text = li.css('div.body::text').extract()[0]
try:
date = datetime.strftime(datetime.strptime(date_text.strip(), '%d %B %Y'), '%Y-%m-%d')
except:
date = datetime.strftime(datetime.strptime(date_text.strip(), '%d %b %Y'), '%Y-%m-%d')
req = Request('http://www.bbc.co.uk' + url, callback=self.parse_article)
req.meta['title'] = title
req.meta['date'] = date
yield req
示例4: parse
# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['title'] [as 别名]
def parse(self, response):
self.counter += 1
base_url = "http://m.vnexpress.net"
hxs = HtmlXPathSelector(response)
next_page = hxs.select("//a[@class='right txt_1_1em']/@href").extract()
if ((next_page) and self.counter < self.max_page):
yield Request(base_url + next_page[0], self.parse)
posts = hxs.select("//a[@class='block_image_relative ui-link']")
for post in posts:
itemFullURL = base_url + post.select('.//@href').extract()[0]
request = Request(itemFullURL, callback=self.parse_full_post)
request.meta['title'] = post.select('.//h2[@class="h2SdTopHome txt_1_5em"][1]/text()').extract()
request.meta['url'] = itemFullURL
yield request