本文整理汇总了Python中scrapy.loader.ItemLoader类的典型用法代码示例。如果您正苦于以下问题:Python ItemLoader类的具体用法?Python ItemLoader怎么用?Python ItemLoader使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了ItemLoader类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse_item
def parse_item(self, response):
l = ItemLoader(item=PageItem(), response=response)
l.add_value('title', response.request.cookies['title'])
l.add_value('url', response.url)
l.add_value('name', self.name)
l.add_xpath('image_urls', '//div[@class="l_effect_img_mid"]/a/img/@src')
return l.load_item()
示例2: parse_item
def parse_item(self, response):
l = ItemLoader(item=PageItem(), response=response)
l.add_value('title', response.request.cookies['title'])
l.add_value('name', self.name)
l.add_value('url', response.url)
l.add_xpath('image_urls', '//td[@valign="top"]/img/@src')
return l.load_item()
示例3: parse_item
def parse_item(self,response):
l = ItemLoader(item =MeizituItem(),response = response)
l.add_xpath('name','//h2/a/text()')
l.add_xpath('tags', "//div[@id='maincontent']/div[@class='postmeta clearfix']/div[@class='metaRight']/p")
l.add_xpath('image_urls', "//div[@id='picture']/p/img/@src",Identity())
l.add_value('url', response.url)
return l.load_item()
示例4: _parse
def _parse(self, response):
l = ItemLoader(item=BookmarksItem(), response=response)
l.add_xpath(u"name", u"/html/head/title")
l.add_xpath(u"anchors", u"//a/@href'")
l.add_xpath(u"description", u"/html/body/text()")
l.add_value(u"last_updated", datetime.datetime) # you can also use literal values
return l.load_item()
示例5: parse_titles
def parse_titles(self, response):
loader = ItemLoader(item=BlogCategory(), response=response)
loader.add_value('hub', response.meta['hname'])
loader.add_css('title', 'div.company_post h1 span::text')
loader.add_css('date', 'div.published::text')
loader.add_css('article', 'div.content::text')
yield loader.load_item()
示例6: parse_rate
def parse_rate(self,response):
loader = ItemLoader(item = RateItem(),response=response)
for attr,xpath in self.settings.getdict('RATE_XPATH').items():
loader.add_xpath(attr,xpath)
return loader.load_item()
示例7: parse
def parse(self, response):
for item in self.find_items(response):
loader = ItemLoader(item=self.item_class())
for target in self.get_targets():
loader.add_value(target.name, target.get_value(item, response))
val = self.Meta.detail_path.get_value(item, response)
yield gen_request(val, self.parse_details, loader.load_item())
示例8: parse
def parse(self, response):
item = Item()
l = ItemLoader(item=item, response=response)
for name, xpath in response.meta['fields'].iteritems():
if xpath:
item.fields[name] = Field()
l.add_xpath(name, xpath)
return l.load_item()
示例9: parse_detail
def parse_detail(self, response):
il = ItemLoader(NewsItem(), response=response)
il.add_css("title", "%s::text" % self.title)
il.add_css("date", "%s::text" % self.date)
il.add_css("auth", "%s::text" % self.auth)
il.add_css("content", "%s > p::text" % self.content)
il.add_value("cate", response.meta["cate"])
return il.load_item()
示例10: test_load_item_using_default_loader
def test_load_item_using_default_loader(self):
i = TestItem()
i['summary'] = u'lala'
il = ItemLoader(item=i)
il.add_value('name', u'marta')
item = il.load_item()
assert item is i
self.assertEqual(item['summary'], u'lala')
self.assertEqual(item['name'], [u'marta'])
示例11: parse_content
def parse_content(self,response):
bbsItem_loader = ItemLoader(item=BbsDmozItem(),response = response)
url = str(response.url)
bbsItem_loader.add_value('url',url)
bbsItem_loader.add_xpath('forum',self._x_query['forum'])
bbsItem_loader.add_xpath('poster',self._x_query['poster'])
bbsItem_loader.add_xpath('content',self._x_query['page_content'])
return bbsItem_loader.load_item()
示例12: parse_stuff
def parse_stuff(self, response):
hxs = Selector(response)
sites = hxs.xpath('//body')
items_main = []
for site in sites:
loader = ItemLoader(item = Items_Main(), response = response)
loader.add_xpath('fragment', '//*[not(self::script)]/text()')
items_main.append(loader.load_item())
return items_main
示例13: parse
def parse(self, response):
l = ItemLoader(item=PlantItem(), response=response)
l.add_xpath('name', "//div[@id='bodycontent']/div[@class='post']/div[@class='pagebanner']/h2/text()")
l.add_xpath('species', "//div[@id='bodycontent']/div[@class='post']/div[@class='pagebanner']/div[@class='clear resultSpecies']/text()")
l.add_xpath('key', "//div[@id='bodycontent']/div[@class='post']/div[@class='contents']/div[@id='tabbedinfo']/div[@class='tabscontain']/div[@class='tabs']/div[@class='post-meta']/div[@class='post-meta-key']/text()")
l.add_xpath('value', "//div[@id='bodycontent']/div[@class='post']/div[@class='contents']/div[@id='tabbedinfo']/div[@class='tabscontain']/div[@class='tabs']/div[@class='post-meta']/div[@class='post-meta-value']/child::node()")
# l.add_xpath('value', "//div[@id='bodycontent']/div[@class='post']/div[@class='contents']/div[@id='tabbedinfo']/div[@class='tabscontain']/div[@class='tabs']/div[@class='post-meta']/div[@class='post-meta-value']/a/text()")
return l.load_item()
示例14: parse_event_detail
def parse_event_detail(self, response):
event = response.meta['event']
events = response.meta['events']
players = response.xpath('//table[@class="sticky-enabled"]/tbody/tr')
event_loader = ItemLoader(event)
for player in players:
event_loader.add_value(
'players', player.xpath('td/text()').extract())
events.append(event_loader.load_item())
return events
示例15: parse
def parse(self, response):
l = ItemLoader(item=UniprotItem(), response=response)
l.add_xpath('proteinName', "//*[@id='page-header']/h2/span/text()")
l.add_value('uniprotAccession', response.url)
l.add_xpath('uniprotProteinLength', "//*[@id='sequences-section']/div[1]/div[2]/div[1]/span[2]/text()")
listing = response.xpath("//*[@id='subcellular_location']/div[1]/ul")
subcellular_location = []
for li in listing:
subcellular_location.append(li.xpath("./li/a/text()").extract())
l.add_value('uniprotLocalization', subcellular_location)
yield l.load_item()