当前位置: 首页>>代码示例>>Python>>正文


Python processors.MapCompose方法代码示例

本文整理汇总了Python中scrapy.loader.processors.MapCompose方法的典型用法代码示例。如果您正苦于以下问题:Python processors.MapCompose方法的具体用法?Python processors.MapCompose怎么用?Python processors.MapCompose使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scrapy.loader.processors的用法示例。


在下文中一共展示了processors.MapCompose方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: scrape_product

# 需要导入模块: from scrapy.loader import processors [as 别名]
# 或者: from scrapy.loader.processors import MapCompose [as 别名]
def scrape_product(self, response):
        item_loader = ItemLoader(item=MyItem(), response=response)
        item_loader.default_input_processor = MapCompose(remove_tags)
        item_loader.default_output_processor = TakeFirst()

        item_loader.add_css("my_field", "selector")

        return item_loader.load_item() 
开发者ID:zseta,项目名称:scrapy-templates,代码行数:10,代码来源:sitemap_spider.py

示例2: populate_item

# 需要导入模块: from scrapy.loader import processors [as 别名]
# 或者: from scrapy.loader.processors import MapCompose [as 别名]
def populate_item(self, response):
        item_loader = ItemLoader(item=MySpiderItem(), response=response)
        item_loader.default_input_processor = MapCompose(remove_tags)

        # item_loader.add_css("", "")
        yield item_loader.load_item()

    # 3. PAGINATION LEVEL 1 
开发者ID:zseta,项目名称:scrapy-templates,代码行数:10,代码来源:1fol_pag2scr.py

示例3: populate_item

# 需要导入模块: from scrapy.loader import processors [as 别名]
# 或者: from scrapy.loader.processors import MapCompose [as 别名]
def populate_item(self, response):
        item_loader = ItemLoader(item=MySpiderItem(), response=response)
        item_loader.default_input_processor = MapCompose(remove_tags)

        # item_loader.add_css("")
        # item_loader.add_value("raw", raw)

        # yield the populated item first
        yield item_loader.load_item()
        # then yield the function which paginates to another page that contains data
        yield self.paginate(response)

    # 3. PAGINATION LEVEL 2 
开发者ID:zseta,项目名称:scrapy-templates,代码行数:15,代码来源:1fol2scr_pag.py

示例4: populate_item

# 需要导入模块: from scrapy.loader import processors [as 别名]
# 或者: from scrapy.loader.processors import MapCompose [as 别名]
def populate_item(self, response):
        item_loader = ItemLoader(item=MySpiderItem(), response=response)
        item_loader.default_input_processor = MapCompose(remove_tags)

        #item_loader.add_css("", "")
        #item_loader.add_css("", "")

        yield item_loader.load_item() 
开发者ID:zseta,项目名称:scrapy-templates,代码行数:10,代码来源:1fol2scr.py

示例5: parse

# 需要导入模块: from scrapy.loader import processors [as 别名]
# 或者: from scrapy.loader.processors import MapCompose [as 别名]
def parse(self, response):
        item_loader = ItemLoader(item=MyItem(), response=response)
        item_loader.default_input_processor = MapCompose(remove_tags)
        item_loader.default_output_processor = TakeFirst()
        #
        #item_loader.add_css("my_field", "my_css")
        #item_loader.add_xpath("my_field", "my_xpath")
        #
        return item_loader.load_item() 
开发者ID:zseta,项目名称:scrapy-templates,代码行数:11,代码来源:1scr.py

示例6: populate_item

# 需要导入模块: from scrapy.loader import processors [as 别名]
# 或者: from scrapy.loader.processors import MapCompose [as 别名]
def populate_item(self, response):
        item_loader = ItemLoader(item=MySpiderItem(), response=response)
        item_loader.default_input_processor = MapCompose(remove_tags)

        #item_loader.add_css("field", "")
        yield item_loader.load_item()

    # 3. PAGINATION LEVEL 2 
开发者ID:zseta,项目名称:scrapy-templates,代码行数:10,代码来源:1fol2fol_pag3scr.py

示例7: parse

# 需要导入模块: from scrapy.loader import processors [as 别名]
# 或者: from scrapy.loader.processors import MapCompose [as 别名]
def parse(self, response):
        item_loader = ItemLoader(item=MyItem(), response=response)
        item_loader.default_input_processor = MapCompose(remove_tags)
        #item_loader.add_css("", "")
        #item_loader.add_css("", "")
        #item_loader.add_css("", "")
        yield FormRequest("POST_URL", formdata={'parameter': 'p'},
                                        meta={'item': item_loader.load_item()}, callback=self.populate_field) 
开发者ID:zseta,项目名称:scrapy-templates,代码行数:10,代码来源:post_pass_item.py

示例8: populate_field

# 需要导入模块: from scrapy.loader import processors [as 别名]
# 或者: from scrapy.loader.processors import MapCompose [as 别名]
def populate_field(self, response):
        item_loader = ItemLoader(item=response.meta["item"], response=response)
        item_loader.default_input_processor = MapCompose(remove_tags)
        #item_loader.add_css("field", "")
        return item_loader.load_item() 
开发者ID:zseta,项目名称:scrapy-templates,代码行数:7,代码来源:post_pass_item.py

示例9: parse_blog_article

# 需要导入模块: from scrapy.loader import processors [as 别名]
# 或者: from scrapy.loader.processors import MapCompose [as 别名]
def parse_blog_article(self, response):
        remove_elems = [".ad-component", ".wp-caption-text"]
        il = FeedEntryItemLoader(
            response=response,
            remove_elems=remove_elems,
            base_url="https://cms.{}".format(self.name),
            timezone="Europe/Vienna",
            dayfirst=True,
            yearfirst=False,
        )
        il.add_css("content_html", "article > h2")
        il.add_css("content_html", ".storycontent-article")
        il.add_css("author_name", ".falter-heading ::text", MapCompose(str.title))
        il.add_css(
            "author_name", ".thinktank-meta > span ::text", MapCompose(str.title)
        )
        il.add_css("updated", ".post > .text-label ::text", re=r"(\d{2}\.\d{2}\.\d{4})")
        il.add_value("link", response.url)
        il.add_value("path", "blog_{}".format(response.meta["blog"]))
        il.add_css("title", "article > h1 ::text")
        return il.load_item() 
开发者ID:PyFeeds,项目名称:PyFeeds,代码行数:23,代码来源:falter_at.py

示例10: custom_field

# 需要导入模块: from scrapy.loader import processors [as 别名]
# 或者: from scrapy.loader.processors import MapCompose [as 别名]
def custom_field():
    return scrapy.Field(input_processor=MapCompose(DataUtils.remove_html), output_processor=Join()) 
开发者ID:In2ItChicago,项目名称:In2ItChicago,代码行数:4,代码来源:event.py

示例11: numeric_field

# 需要导入模块: from scrapy.loader import processors [as 别名]
# 或者: from scrapy.loader.processors import MapCompose [as 别名]
def numeric_field():
    return scrapy.Field(input_processor=MapCompose(DataUtils.remove_html), output_processor=TakeFirst()) 
开发者ID:In2ItChicago,项目名称:In2ItChicago,代码行数:4,代码来源:event.py

示例12: price_field

# 需要导入模块: from scrapy.loader import processors [as 别名]
# 或者: from scrapy.loader.processors import MapCompose [as 别名]
def price_field():
    return scrapy.Field(input_processor=MapCompose(
            lambda value: value.replace('$', '') if type(value) == str else value,
            DataUtils.remove_html, float),
        output_processor=TakeFirst()) 
开发者ID:In2ItChicago,项目名称:In2ItChicago,代码行数:7,代码来源:event.py

示例13: url_field

# 需要导入模块: from scrapy.loader import processors [as 别名]
# 或者: from scrapy.loader.processors import MapCompose [as 别名]
def url_field():
    return scrapy.Field(input_processor=MapCompose(DataUtils.remove_html, 
        lambda value: value \
            .replace('//', '/') \
            .replace('https:/', 'https://') \
            .replace('http:/', 'http://') \
            .rstrip('/')),
        output_processor=Join()) 
开发者ID:In2ItChicago,项目名称:In2ItChicago,代码行数:10,代码来源:event.py

示例14: address_field

# 需要导入模块: from scrapy.loader import processors [as 别名]
# 或者: from scrapy.loader.processors import MapCompose [as 别名]
def address_field():
    def parse_address(value):
        parsed = usaddress.parse(value) 
        def default_or_empty(field, default):
            if any(i[0] for i in parsed if i[1] == field):
                return ''
            return default 
        city_append = default_or_empty("PlaceName", " Chicago, ")
        state_append = default_or_empty("StateName", "IL")
        return f'{value}{city_append}{state_append}' 

    return scrapy.Field(input_processor=MapCompose(
            DataUtils.remove_html,
            parse_address),
        output_processor=Join()) 
开发者ID:In2ItChicago,项目名称:In2ItChicago,代码行数:17,代码来源:event.py


注:本文中的scrapy.loader.processors.MapCompose方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。