本文整理汇总了Python中scrapy.loader.processors.MapCompose方法的典型用法代码示例。如果您正苦于以下问题:Python processors.MapCompose方法的具体用法?Python processors.MapCompose怎么用?Python processors.MapCompose使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.loader.processors
的用法示例。
在下文中一共展示了processors.MapCompose方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: scrape_product
# 需要导入模块: from scrapy.loader import processors [as 别名]
# 或者: from scrapy.loader.processors import MapCompose [as 别名]
def scrape_product(self, response):
item_loader = ItemLoader(item=MyItem(), response=response)
item_loader.default_input_processor = MapCompose(remove_tags)
item_loader.default_output_processor = TakeFirst()
item_loader.add_css("my_field", "selector")
return item_loader.load_item()
示例2: populate_item
# 需要导入模块: from scrapy.loader import processors [as 别名]
# 或者: from scrapy.loader.processors import MapCompose [as 别名]
def populate_item(self, response):
item_loader = ItemLoader(item=MySpiderItem(), response=response)
item_loader.default_input_processor = MapCompose(remove_tags)
# item_loader.add_css("", "")
yield item_loader.load_item()
# 3. PAGINATION LEVEL 1
示例3: populate_item
# 需要导入模块: from scrapy.loader import processors [as 别名]
# 或者: from scrapy.loader.processors import MapCompose [as 别名]
def populate_item(self, response):
item_loader = ItemLoader(item=MySpiderItem(), response=response)
item_loader.default_input_processor = MapCompose(remove_tags)
# item_loader.add_css("")
# item_loader.add_value("raw", raw)
# yield the populated item first
yield item_loader.load_item()
# then yield the function which paginates to another page that contains data
yield self.paginate(response)
# 3. PAGINATION LEVEL 2
示例4: populate_item
# 需要导入模块: from scrapy.loader import processors [as 别名]
# 或者: from scrapy.loader.processors import MapCompose [as 别名]
def populate_item(self, response):
item_loader = ItemLoader(item=MySpiderItem(), response=response)
item_loader.default_input_processor = MapCompose(remove_tags)
#item_loader.add_css("", "")
#item_loader.add_css("", "")
yield item_loader.load_item()
示例5: parse
# 需要导入模块: from scrapy.loader import processors [as 别名]
# 或者: from scrapy.loader.processors import MapCompose [as 别名]
def parse(self, response):
item_loader = ItemLoader(item=MyItem(), response=response)
item_loader.default_input_processor = MapCompose(remove_tags)
item_loader.default_output_processor = TakeFirst()
#
#item_loader.add_css("my_field", "my_css")
#item_loader.add_xpath("my_field", "my_xpath")
#
return item_loader.load_item()
示例6: populate_item
# 需要导入模块: from scrapy.loader import processors [as 别名]
# 或者: from scrapy.loader.processors import MapCompose [as 别名]
def populate_item(self, response):
item_loader = ItemLoader(item=MySpiderItem(), response=response)
item_loader.default_input_processor = MapCompose(remove_tags)
#item_loader.add_css("field", "")
yield item_loader.load_item()
# 3. PAGINATION LEVEL 2
示例7: parse
# 需要导入模块: from scrapy.loader import processors [as 别名]
# 或者: from scrapy.loader.processors import MapCompose [as 别名]
def parse(self, response):
item_loader = ItemLoader(item=MyItem(), response=response)
item_loader.default_input_processor = MapCompose(remove_tags)
#item_loader.add_css("", "")
#item_loader.add_css("", "")
#item_loader.add_css("", "")
yield FormRequest("POST_URL", formdata={'parameter': 'p'},
meta={'item': item_loader.load_item()}, callback=self.populate_field)
示例8: populate_field
# 需要导入模块: from scrapy.loader import processors [as 别名]
# 或者: from scrapy.loader.processors import MapCompose [as 别名]
def populate_field(self, response):
item_loader = ItemLoader(item=response.meta["item"], response=response)
item_loader.default_input_processor = MapCompose(remove_tags)
#item_loader.add_css("field", "")
return item_loader.load_item()
示例9: parse_blog_article
# 需要导入模块: from scrapy.loader import processors [as 别名]
# 或者: from scrapy.loader.processors import MapCompose [as 别名]
def parse_blog_article(self, response):
remove_elems = [".ad-component", ".wp-caption-text"]
il = FeedEntryItemLoader(
response=response,
remove_elems=remove_elems,
base_url="https://cms.{}".format(self.name),
timezone="Europe/Vienna",
dayfirst=True,
yearfirst=False,
)
il.add_css("content_html", "article > h2")
il.add_css("content_html", ".storycontent-article")
il.add_css("author_name", ".falter-heading ::text", MapCompose(str.title))
il.add_css(
"author_name", ".thinktank-meta > span ::text", MapCompose(str.title)
)
il.add_css("updated", ".post > .text-label ::text", re=r"(\d{2}\.\d{2}\.\d{4})")
il.add_value("link", response.url)
il.add_value("path", "blog_{}".format(response.meta["blog"]))
il.add_css("title", "article > h1 ::text")
return il.load_item()
示例10: custom_field
# 需要导入模块: from scrapy.loader import processors [as 别名]
# 或者: from scrapy.loader.processors import MapCompose [as 别名]
def custom_field():
return scrapy.Field(input_processor=MapCompose(DataUtils.remove_html), output_processor=Join())
示例11: numeric_field
# 需要导入模块: from scrapy.loader import processors [as 别名]
# 或者: from scrapy.loader.processors import MapCompose [as 别名]
def numeric_field():
return scrapy.Field(input_processor=MapCompose(DataUtils.remove_html), output_processor=TakeFirst())
示例12: price_field
# 需要导入模块: from scrapy.loader import processors [as 别名]
# 或者: from scrapy.loader.processors import MapCompose [as 别名]
def price_field():
return scrapy.Field(input_processor=MapCompose(
lambda value: value.replace('$', '') if type(value) == str else value,
DataUtils.remove_html, float),
output_processor=TakeFirst())
示例13: url_field
# 需要导入模块: from scrapy.loader import processors [as 别名]
# 或者: from scrapy.loader.processors import MapCompose [as 别名]
def url_field():
return scrapy.Field(input_processor=MapCompose(DataUtils.remove_html,
lambda value: value \
.replace('//', '/') \
.replace('https:/', 'https://') \
.replace('http:/', 'http://') \
.rstrip('/')),
output_processor=Join())
示例14: address_field
# 需要导入模块: from scrapy.loader import processors [as 别名]
# 或者: from scrapy.loader.processors import MapCompose [as 别名]
def address_field():
def parse_address(value):
parsed = usaddress.parse(value)
def default_or_empty(field, default):
if any(i[0] for i in parsed if i[1] == field):
return ''
return default
city_append = default_or_empty("PlaceName", " Chicago, ")
state_append = default_or_empty("StateName", "IL")
return f'{value}{city_append}{state_append}'
return scrapy.Field(input_processor=MapCompose(
DataUtils.remove_html,
parse_address),
output_processor=Join())