当前位置: 首页>>代码示例>>Python>>正文


Python loader.ItemLoader方法代码示例

本文整理汇总了Python中scrapy.loader.ItemLoader方法的典型用法代码示例。如果您正苦于以下问题:Python loader.ItemLoader方法的具体用法?Python loader.ItemLoader怎么用?Python loader.ItemLoader使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scrapy.loader的用法示例。


在下文中一共展示了loader.ItemLoader方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: parse

# 需要导入模块: from scrapy import loader [as 别名]
# 或者: from scrapy.loader import ItemLoader [as 别名]
def parse(self, response):
        self.logger.info('Parse function called on {}'.format(response.url))
        # quotes = response.xpath("//div[@class='quote']")
        quotes = response.css('div.quote')

        for quote in quotes:
            loader = ItemLoader(item=QuoteItem(), selector=quote)
            # pay attention to the dot .// to use relative xpath
            # loader.add_xpath('quote_content', ".//span[@class='text']/text()")
            loader.add_css('quote_content', '.text::text')
            # loader.add_xpath('author', './/small//text()')
            loader.add_css('tags', '.tag::text')
            quote_item = loader.load_item()
            author_url = quote.css('.author + a::attr(href)').get()
            # go to the author page and pass the current collected quote info
            yield response.follow(author_url, self.parse_author, meta={'quote_item': quote_item})

        # go to Next page
        for a in response.css('li.next a'):
            yield response.follow(a, self.parse) 
开发者ID:harrywang,项目名称:scrapy-tutorial,代码行数:22,代码来源:quotes_spider.py

示例2: parse_post

# 需要导入模块: from scrapy import loader [as 别名]
# 或者: from scrapy.loader import ItemLoader [as 别名]
def parse_post(self,response):
        new = ItemLoader(item=FbcrawlItem(),response=response,parent=response.meta['item'])
        new.context['lang'] = self.lang           
        new.add_xpath('source', "//td/div/h3/strong/a/text() | //span/strong/a/text() | //div/div/div/a[contains(@href,'post_id')]/strong/text()")
        new.add_xpath('shared_from','//div[contains(@data-ft,"top_level_post_id") and contains(@data-ft,\'"isShare":1\')]/div/div[3]//strong/a/text()')
     #   new.add_xpath('date','//div/div/abbr/text()')
        new.add_xpath('text','//div[@data-ft]//p//text() | //div[@data-ft]/div[@class]/div[@class]/text()')
        
        #check reactions for old posts
        check_reactions = response.xpath("//a[contains(@href,'reaction/profile')]/div/div/text()").get()
        if not check_reactions:
            yield new.load_item()       
        else:
            new.add_xpath('reactions',"//a[contains(@href,'reaction/profile')]/div/div/text()")              
            reactions = response.xpath("//div[contains(@id,'sentence')]/a[contains(@href,'reaction/profile')]/@href")
            reactions = response.urljoin(reactions[0].extract())
            yield scrapy.Request(reactions, callback=self.parse_reactions, meta={'item':new}) 
开发者ID:rugantio,项目名称:fbcrawl,代码行数:19,代码来源:fbcrawl.py

示例3: scrape_product

# 需要导入模块: from scrapy import loader [as 别名]
# 或者: from scrapy.loader import ItemLoader [as 别名]
def scrape_product(self, response):
        item_loader = ItemLoader(item=MyItem(), response=response)
        item_loader.default_input_processor = MapCompose(remove_tags)
        item_loader.default_output_processor = TakeFirst()

        item_loader.add_css("my_field", "selector")

        return item_loader.load_item() 
开发者ID:zseta,项目名称:scrapy-templates,代码行数:10,代码来源:sitemap_spider.py

示例4: populate_item

# 需要导入模块: from scrapy import loader [as 别名]
# 或者: from scrapy.loader import ItemLoader [as 别名]
def populate_item(self, response):
        item_loader = ItemLoader(item=MySpiderItem(), response=response)
        item_loader.default_input_processor = MapCompose(remove_tags)

        # item_loader.add_css("", "")
        yield item_loader.load_item()

    # 3. PAGINATION LEVEL 1 
开发者ID:zseta,项目名称:scrapy-templates,代码行数:10,代码来源:1fol_pag2scr.py

示例5: populate_item

# 需要导入模块: from scrapy import loader [as 别名]
# 或者: from scrapy.loader import ItemLoader [as 别名]
def populate_item(self, response):
        item_loader = ItemLoader(item=MySpiderItem(), response=response)
        item_loader.default_input_processor = MapCompose(remove_tags)

        # item_loader.add_css("")
        # item_loader.add_value("raw", raw)

        # yield the populated item first
        yield item_loader.load_item()
        # then yield the function which paginates to another page that contains data
        yield self.paginate(response)

    # 3. PAGINATION LEVEL 2 
开发者ID:zseta,项目名称:scrapy-templates,代码行数:15,代码来源:1fol2scr_pag.py

示例6: populate_item

# 需要导入模块: from scrapy import loader [as 别名]
# 或者: from scrapy.loader import ItemLoader [as 别名]
def populate_item(self, response):
        item_loader = ItemLoader(item=MySpiderItem(), response=response)
        item_loader.default_input_processor = MapCompose(remove_tags)

        #item_loader.add_css("", "")
        #item_loader.add_css("", "")

        yield item_loader.load_item() 
开发者ID:zseta,项目名称:scrapy-templates,代码行数:10,代码来源:1fol2scr.py

示例7: parse

# 需要导入模块: from scrapy import loader [as 别名]
# 或者: from scrapy.loader import ItemLoader [as 别名]
def parse(self, response):
        item_loader = ItemLoader(item=MyItem(), response=response)
        item_loader.default_input_processor = MapCompose(remove_tags)
        item_loader.default_output_processor = TakeFirst()
        #
        #item_loader.add_css("my_field", "my_css")
        #item_loader.add_xpath("my_field", "my_xpath")
        #
        return item_loader.load_item() 
开发者ID:zseta,项目名称:scrapy-templates,代码行数:11,代码来源:1scr.py

示例8: populate_item

# 需要导入模块: from scrapy import loader [as 别名]
# 或者: from scrapy.loader import ItemLoader [as 别名]
def populate_item(self, response):
        item_loader = ItemLoader(item=MySpiderItem(), response=response)
        item_loader.default_input_processor = MapCompose(remove_tags)

        #item_loader.add_css("field", "")
        yield item_loader.load_item()

    # 3. PAGINATION LEVEL 2 
开发者ID:zseta,项目名称:scrapy-templates,代码行数:10,代码来源:1fol2fol_pag3scr.py

示例9: parse

# 需要导入模块: from scrapy import loader [as 别名]
# 或者: from scrapy.loader import ItemLoader [as 别名]
def parse(self, response):
        item = PriceItem()
        item_loader = ItemLoader(item=item, response=response)
        item_loader.default_output_processor = TakeFirst()
        
        item_loader.add_css("price", self.price_css)
        item_loader.add_css("stock", self.stock_css)

        item_loader.add_value("product_id", response.meta.get("product_id"))
        item_loader.add_value("cron_id", self.cron_id)
        item_loader.add_value("shop_id", self.shop_id)
        item_loader.add_value("item_id", str(uuid.uuid1()))
        item_loader.add_value("updated", str(datetime.datetime.now()))
        item_loader.add_value("url", response.url)

        return item_loader.load_item()

    # 2. Updating database by calling the backend API 
开发者ID:zseta,项目名称:scrapy-templates,代码行数:20,代码来源:price_crawler.py

示例10: parse

# 需要导入模块: from scrapy import loader [as 别名]
# 或者: from scrapy.loader import ItemLoader [as 别名]
def parse(self, response):
        item_loader = ItemLoader(item=MyItem(), response=response)
        item_loader.default_input_processor = MapCompose(remove_tags)
        #item_loader.add_css("", "")
        #item_loader.add_css("", "")
        #item_loader.add_css("", "")
        yield FormRequest("POST_URL", formdata={'parameter': 'p'},
                                        meta={'item': item_loader.load_item()}, callback=self.populate_field) 
开发者ID:zseta,项目名称:scrapy-templates,代码行数:10,代码来源:post_pass_item.py

示例11: parse

# 需要导入模块: from scrapy import loader [as 别名]
# 或者: from scrapy.loader import ItemLoader [as 别名]
def parse(self,response):
        print('url:', response.url)
        articles = response.xpath('//div[@class="postArticle postArticle--short js-postArticle js-trackPostPresentation js-trackPostScrolls"]')
        for article in articles:

            if article.xpath('.//a[@class="button button--smaller button--chromeless u-baseColor--buttonNormal"]/@href').extract_first():
                l = ItemLoader(item = MediumItem(), selector = article)
                l.default_output_processor = scrapy.loader.processors.TakeFirst()
                l.add_css('Title','div > h3::text')
                l.add_xpath('Name','.//a[@class="ds-link ds-link--styleSubtle link link--darken link--accent u-accentColor--textNormal u-accentColor--textDarken"]/text()')
                l.add_css('Read','span::attr(title)')
                l.add_xpath('Publication', './/a[@class="ds-link ds-link--styleSubtle link--darkenlink--accent u-accentColor--textNormal"]/text()')
                l.add_xpath('Claps','.//button[@class="button button--chromeless u-baseColor--buttonNormal js-multirecommendCountButton u-disablePointerEvents"]/text()')
                l.add_xpath('Responses','.//a[@class="button button--chromeless u-baseColor--buttonNormal"]/text()')
                l.add_value('Page', response.url)
                yield l.load_item() 
开发者ID:furas,项目名称:python-examples,代码行数:18,代码来源:main.py

示例12: parse

# 需要导入模块: from scrapy import loader [as 别名]
# 或者: from scrapy.loader import ItemLoader [as 别名]
def parse(self, response):
        """Scrape a list of world rankings, cities, countries, and congestion levels"""
        """Then populate an item with the data and return it"""
        # Base XPath for extract need values
        xpath_selector = "//div[@id='RankingPage-table']//td[{}]"
        world_ranks = response.xpath(xpath_selector.format(1)).getall()
        cities = response.xpath(xpath_selector.format(3)).getall()
        countries = response.xpath(xpath_selector.format(4)).getall()
        congestion_levels = response.xpath(xpath_selector.format(5)).getall()
        for rank, city, country, level in zip(
            world_ranks, cities, countries, congestion_levels,
        ):
            i = ItemLoader(item=TrafficIndexItem())
            i.add_value("world_rank", rank)
            i.add_value("city", city)
            i.add_value("country", country)
            i.add_value("congestion_level", level)
            yield i.load_item() 
开发者ID:serhii73,项目名称:place2live,代码行数:20,代码来源:traffic.py

示例13: parse

# 需要导入模块: from scrapy import loader [as 别名]
# 或者: from scrapy.loader import ItemLoader [as 别名]
def parse(self, response):
        table = bs(response.body, "html.parser").findAll("tbody")[2]

        countries = [
            c["title"] for c in table.findAll("a", href=True, title=True)
        ]
        dateCandidates = table.findAll("td", {"data-sort-value": True})
        dates = [re.findall(r'\d+', yr)[0] for yr in dateCandidates]
        inflations = []
        for td in [i.replace("−", "-") for i in table.findAll("td")]:
            if isinstance(td, float):
                inflations.append(td)

        for inflation, country, date in zip(
            inflations, countries, dates,
        ):
            i = ItemLoader(item=InflationItem())
            i.add_value("inflation", inflation)
            i.add_value("date", date)
            i.add_value("country", country)
            yield i.load_item() 
开发者ID:serhii73,项目名称:place2live,代码行数:23,代码来源:inflation.py

示例14: parse_item

# 需要导入模块: from scrapy import loader [as 别名]
# 或者: from scrapy.loader import ItemLoader [as 别名]
def parse_item(self, response):
        il = ItemLoader(item=ImageItem(), response=response)
        il.add_css('image_urls', 'img::attr(src)')
        return il.load_item() 
开发者ID:evilcos,项目名称:crawlers,代码行数:6,代码来源:rosi.py

示例15: __init__

# 需要导入模块: from scrapy import loader [as 别名]
# 或者: from scrapy.loader import ItemLoader [as 别名]
def __init__(self, *args, **kwargs):
        item_loader = ItemLoader(item=Event())
        for key, value in kwargs.items():
            try:
                item_loader.add_value(key, value)
            except KeyError:
                raise KeyError(f'{key} is not a valid event field')
        self.item = item_loader.load_item() 
开发者ID:In2ItChicago,项目名称:In2ItChicago,代码行数:10,代码来源:event.py


注:本文中的scrapy.loader.ItemLoader方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。