本文整理汇总了Python中scrapy.Request.meta['author']方法的典型用法代码示例。如果您正苦于以下问题:Python Request.meta['author']方法的具体用法?Python Request.meta['author']怎么用?Python Request.meta['author']使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.Request
的用法示例。
在下文中一共展示了Request.meta['author']方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse_author_id
# 需要导入模块: from scrapy import Request [as 别名]
# 或者: from scrapy.Request import meta['author'] [as 别名]
def parse_author_id(self,response):
response.selector.remove_namespaces()
for n in response.xpath('//datafield[@tag="035"]'):
if n.xpath('subfield[@code="9"]/text()').extract() == ['BAI']:
name = n.xpath('subfield[@code="a"]/text()').extract()[0]
if not name:
name = response.meta['author']['name']
req = Request("http://inspirehep.net/search?ln=pl&ln=pl&p=author:{0} year:2014->2015&of=xm".format(name.encode('ascii', 'xmlcharrefreplace')), callback=self.parse_name)
req.meta['recid'] = response.meta['recid']
req.meta['author'] = response.meta['author']
req.meta['author']['name'] = unicode(name)
yield req
示例2: _process
# 需要导入模块: from scrapy import Request [as 别名]
# 或者: from scrapy.Request import meta['author'] [as 别名]
def _process(author, recid):
name = author.xpath('subfield[@code="a"]/text()').extract()[0]
print type(name)
print(name)
identifier = author.xpath('subfield[@code="x"]/text()').extract()
print("{2} === I have name: {0} and id: {1}".format(name.encode('utf-8'), identifier, recid))
if identifier:
print("Checking author")
req = Request("http://inspirehep.net/record/{0}/export/xm".format(identifier[0]), callback=self.parse_author_id)
else:
print("Checking records for author")
req = Request("http://inspirehep.net/search?ln=pl&ln=pl&p=author:{0} year:2014->2015&of=xm".format(name.encode('ascii', 'xmlcharrefreplace')), callback=self.parse_name)
req.meta['recid'] = recid
req.meta['author'] = {'name': unicode(name), 'id': identifier}
return req
示例3: parse
# 需要导入模块: from scrapy import Request [as 别名]
# 或者: from scrapy.Request import meta['author'] [as 别名]
def parse(self, response):
poetry = response.xpath('//div[@class="main3"]/div[@class="shileft"]')
title = poetry.xpath('./div[@class="son1"]/h1/text()').extract()
if len(title) > 0:
title = str(title[0])
#print title
paragrashs = poetry.xpath('./div[@class="son2"]/p//text()').extract()
if len(paragrashs) >= 4:
dynasty = str(paragrashs[1])
author = str(paragrashs[3])
rawText = "\r\n".join(paragrashs[5:len(paragrashs)])
canRawText = "\r\n".join(poetry.xpath('./div[@class="son2"]/text()').extract())
if len(canRawText) > 0 :
rawText += canRawText
translationUrl = poetry.xpath('./div[@class="son5"]/p/a/@href').extract()
if len(translationUrl) > 0:
translationUrl = self.translation_home +translationUrl[0]
#print translationUrl
request = Request(url = translationUrl, callback=self.parseTranslation)
request.meta['dynasty'] = dynasty.strip().replace('\'','\'\'')
request.meta['title'] = title.strip().replace('\'','\'\'')
request.meta['rawText'] = rawText.strip().replace('\'','\'\'')
request.meta['author'] = author.strip().replace('\'','\'\'')
yield request