当前位置: 首页>>代码示例>>Python>>正文


Python Request.meta['item']方法代码示例

本文整理汇总了Python中scrapy.http.request.Request.meta['item']方法的典型用法代码示例。如果您正苦于以下问题:Python Request.meta['item']方法的具体用法?Python Request.meta['item']怎么用?Python Request.meta['item']使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scrapy.http.request.Request的用法示例。


在下文中一共展示了Request.meta['item']方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: snapdeal_scraper

# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
	def snapdeal_scraper(self,response):
		item = response.meta['item']
		sel = Selector(response)
		item['Snapdeal_URL']= response.url
		try:
			if sel.xpath("//div[@class='notifyMe-soldout']"):
				ProductName = sel.xpath("//h1[@itemprop='name']/text()").extract()[0].replace(",","")
				item['Snapdeal__ProductName'] =ProductName
				item['Snapdeal_MRP']=item['Snapdeal_SP'] = ''
				item['Snapdeal_Stock'] = 'Out of Stock'
				
				
			else:
				mrp = sel.xpath("//span[@id='original-price-id']/text()").extract()
				if mrp:
					item['Snapdeal_SP'] = sel.xpath("//span[@id='selling-price-id']/text()").extract()[0]
					item['Snapdeal_MRP'] = sel.xpath("//span[@id='original-price-id']/text()").extract()[0]
				else:
					item['Snapdeal_MRP'] = sel.xpath("//span[@id='selling-price-id']/text()").extract()[0]
					item['Snapdeal_SP'] = ''
					
				item['Snapdeal__ProductName'] = sel.xpath("//h1[@itemprop='name']/text()").extract()[0].replace(",","")
				stock = sel.xpath("//div[@class='notifyMe-soldout']").extract()
				discntnd = sel.xpath("//div[@class='noLongerProduct']").extract()
				if stock or discntnd:
					item['Snapdeal_Stock'] = "Out Of Stock"
				else:
					item['Snapdeal_Stock'] = "In Stock"				
				
		except:			
			item['Snapdeal__ProductName'] =	item['Snapdeal_MRP'] = item['Snapdeal_SP'] = ''
			item['Snapdeal_Stock'] = 'Not Found'
		
		
		try:
			amazon_url = amazon_urls[item['index']]
			request = Request(amazon_url,
								headers={'Referer':'http://amazon.in'},
								callback = self.amazon_scraper)
			request.meta['item'] = item
			request.meta['proxy'] = "http://111.161.126.100:80"
			yield request
			
		except:				
			try:
				flipkart_url = flipkart_urls[item['index']]
				request = Request(flipkart_url,callback = self.flipkart_scraper)
				request.meta['item'] = item
				# request.meta['proxy'] = "http://111.161.126.100:80"
				yield request
		
			except:			
				try:
					paytm_url = paytm_urls[item['index']]
					request = Request(paytm_url,callback = self.paytm_scraper)
					request.meta['item'] = item
					request.meta['proxy'] = "http://111.161.126.100:80"
					yield request
				except:
					self.to_csv(item)
开发者ID:Diwahars,项目名称:scrapers,代码行数:62,代码来源:PopularProductsScraper.py

示例2: amazon_marketplace

# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
	def amazon_marketplace(self,response):
		
		sel = Selector(response)
		item = response.meta['item']
		try:
			sp = sel.xpath("//span[@style='text-decoration: inherit; white-space: nowrap;']/text()").extract()[0].replace(",","")
			shippingcost = sel.xpath("//span[@class='olpShippingPrice']/span/text()").extract()
			if shippingcost:
				sp = str(float(sp) + float(sel.xpath("//span[@class='olpShippingPrice']/span/text()").extract()[0].replace(",","")))	
			
			if sp>item['SP']:
				sp = item['SP']
		except:			
			try:
				flipkart_url = flipkart_urls[item['index']]
				request = Request(flipkart_url,callback = self.flipkart_scraper)
				request.meta['item'] = item
				# request.meta['proxy'] = "http://111.161.126.100:80"
				yield request
		
			except:				
				try:
					paytm_url = paytm_urls[item['index']]
					request = Request(paytm_url,callback = self.paytm_scraper)
					request.meta['item'] = item
					request.meta['proxy'] = "http://111.161.126.100:80"
					yield request
				except:
					self.to_csv(item)
开发者ID:Diwahars,项目名称:scrapers,代码行数:31,代码来源:PopularProductsScraper.py

示例3: parse_user

# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
    def parse_user(self, response):
        item = MFWItem()

        item['uid'] = response.meta['uid']
        item['name'] = response.xpath(
            '//div[@class="MAvaName"]/text()').extract_first()
        item['level'] = int(response.xpath(
            '//span[@class="MAvaLevel flt1"]/a/@title').extract_first().split('.')[-1])
        if item['level'] <= 3:
            return
        item['tags'] = response.xpath(
            '//div[@class="its_tags"]//i[contains(@class, "on")]/../@title').extract()
        item['attention'] = [int(i) for i in response.xpath(
            '//div[@class="MAvaMore clearfix"]//a/text()').extract()]
        item['groups'] = response.xpath(
            '//div[@class="MGroupDetail"]//a[@class="name"]/text()').extract()
        item['dynamic'] = response.xpath(
            '//span[@class="time"]/text()').extract()
        item['download'] = []
        infos = response.xpath('//div[@class="common_block relative_info"]/p')
        for info in infos:
            if u'刚刚下载了' in ''.join(info.xpath('text()').extract()):

                item['download'].append({'time': info.xpath(
                    'span[@class="time"]/text()').extract_first(), 'name': info.xpath('a/text()').extract()[-1]})

        item['note'] = {}
        item['path'] = []
        item['review'] = []
        item['together'] = []
        note = response.xpath(u'//a[@title="TA的游记"]/@href').extract_first()
        req = Request(urljoin(response.url, note), callback=self.parse_note)
        req.meta['item'] = item
        yield req
开发者ID:twelfthing,项目名称:socool,代码行数:36,代码来源:mafengwo.py

示例4: parse_monuments_en

# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
	def parse_monuments_en(self,response):
		sel=Selector(response)
		monument=sel.xpath('//div[@class="col-50 content-desc"]')
		title=monument.xpath("h2[@class='big sec-color']/text()").extract()
		summary=''.join(monument.xpath("div[@id='idContentScroll']/span/p//text()").extract())
		informationLink=monument.xpath("div[@id='idContentScroll']/span/a/@href").extract()
		item = response.meta['item']
		if len(informationLink)>0:
			item['informationLink_en']=informationLink.pop()
		else:
			item['informationLink_en']=response.url
		if len(title)>0:
			item['name_en']=title.pop()
		else:
			item['name_en']=''
		if len(summary)>0:
			item['description_en']=summary
		else:
			item['description_en']=''
		if len(informationLink)>0:
			item['informationLink']=informationLink.pop()
		else:
			item['informationLink']=response.url
		
		euLink=sel.xpath('//*[@id="eu"]/@href').extract()
		request=Request(self.BASE+str(euLink.pop()),callback=self.parse_monuments_eu)
		request.meta['item']=item
		yield request
开发者ID:AritzBi,项目名称:BilbaoTourismScraper,代码行数:30,代码来源:patrimonioBTurismo_spider.py

示例5: parse_restaurants_en

# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
	def parse_restaurants_en(self,response):
		sel=Selector(response)
		item = response.meta['item']
		descriptionpath=sel.xpath("//*[@id='idContentScroll']")
		description=descriptionpath.xpath("span[@itemprop='description']/p//text()").extract()
		timetable=descriptionpath.xpath("span[@itemprop='description']/p[2]//text()").extract()
		timetable2=descriptionpath.xpath("span[@itemprop='description']/p[3]//text()").extract()
		categoryPath=sel.xpath("//*[@id='gastronomy-content']/section[2]/div/section[1]/section/div/ul/li[2]/p[2]")
		category=categoryPath.xpath("a/strong/text()").extract()
		if len(description)>0:
			item['description_en']=' '.join(description)
		else:
			item['description_en']=''
		if len(category)>0:
			item['category_en']=['Restaurant',category.pop()]	
		else:
			item['category_en']=['Restaurant','Others']
		if len(timetable)>0:
			if len(timetable2)>0:
				item['timetable_en']=' '.join([timetable.pop(),timetable2.pop()])
			else:
				item['timetable_en']=timetable.pop()
		else:
			item['timetable_en']=''
		link=response.url
		link=link.replace("/en/","/eu/")
		request=Request(link,callback=self.parse_restaurants_eu)
		request.meta['item']=item
		yield request		
开发者ID:AritzBi,项目名称:BilbaoTourismScraper,代码行数:31,代码来源:restaurantesBTurismo_spider.py

示例6: parse_history

# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
 def parse_history(self,response):
     #Parse Price History Table
     house = response.meta['item']
     tax_url = house['tax_url']
     price_history = []
     pattern = r' { "html": "(.*)" }'
     html = re.search(pattern, response.body).group(1)
     html = re.sub(r'\\"', r'"', html)  # Correct escaped quotes
     html = re.sub(r'\\/', r'/', html)  # Correct escaped forward
     if (html != ""):
         soup = BeautifulSoup(html)
         table = soup.find('table')
         table_body = table.find('tbody')
         rows = table_body.find_all('tr')
         for row in rows:
             cols = row.find_all('td')
             cols = [ele for ele in cols]
             cols = cols[:3]
             if (cols[2].find('span') != None):
                 date = cols[0].get_text()
                 event = cols[1].get_text()
                 price = cols[2].find('span').get_text()
                 price_history.append([date, event, price])
         #Store history as JSON string    
         house['price_history'] = json.dumps(price_history)
     tax_request = Request(tax_url, 
                       callback=self.parse_taxes)
     tax_request.meta['item'] = house
     
     return tax_request
开发者ID:KaroAntonio,项目名称:Guru,代码行数:32,代码来源:zillow_spider.py

示例7: listing_scraper

# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
  def listing_scraper(self, response):
    """
    Given a page with members, scrape user info
    """
    # Get member info
    x = HtmlXPathSelector(response)

    reqs = []

    for alum_tr in x.select("//table[@id='cid_41_RadGrid1_ctl00']/tbody/tr"):
      tds = alum_tr.select("td")


      if len(tds[1].select("a/@href").extract()) == 0:
        print "GOT A WEIRD RESPONSE WITH BLANK NAMES, handling"
        alum = Alum(
          mid = tds[0].select("text()").extract(),
          #name = tds[1].select("a/text()").extract(),
          preferred_class_year = tds[2].select("text()").extract(),
          city = tds[3].select("text()").extract(),
          state = tds[4].select("text()").extract(),
          page_url =
          "http://wesconnect.wesleyan.edu/s/1318/index.aspx?sid=1318&gid=1&pgid=94&cid=256&mid=" +
          tds[0].select("text()").extract()[0],
          country = tds[5].select("text()").extract()
        )
        req = Request(url = alum['page_url'], callback=self.member_parser)
        req.meta['item'] = alum
        reqs.append(req)
      else:
        alum = Alum(
          page_url = "http://wesconnect.wesleyan.edu" + tds[1].select("a/@href").extract()[0],
          mid = tds[0].select("text()").extract(),
          name = tds[1].select("a/text()").extract(),
          preferred_class_year = tds[2].select("text()").extract(),
          city = tds[3].select("text()").extract(),
          state = tds[4].select("text()").extract(),
          country = tds[5].select("text()").extract()
        )
        req = Request(url = alum['page_url'], callback=self.member_parser)
        req.meta['item'] = alum
        reqs.append(req)

    return reqs
开发者ID:digideskio,项目名称:WesPeople.com,代码行数:46,代码来源:wesconnect_spider.py

示例8: start_requests

# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
 def start_requests(self):
     with open('imageURLs.csv') as csvFile:
         reader = csv.DictReader(csvFile)
         for row in reader:
             item = GetimagesprojectItem()
             image_url = row['URL']
             item['image_urls'] = [row['URL'],]
             item['pid'] = row['ID']
             request = Request(image_url,callback = self.parse)
             request.meta['item'] = item
             yield request
开发者ID:anurag-priyadarshi,项目名称:getImagesProject,代码行数:13,代码来源:imageSpider.py

示例9: flipkart_scraper

# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
	def flipkart_scraper(self,response):
		sel = Selector(response)
		item = response.meta['item']		
		item['Flipkart_URL'] = response.url
		
		
		
		if sel.xpath("//h1[@class='title']/text()").extract():
			item['Flipkart_ProductName'] = sel.xpath("//h1[@class='title']/text()").extract()[0].replace(",","")      
			
			
			mrp_xpath = sel.xpath("//span[@class='price']/text()").extract()			
			sp_xpath = sel.xpath("//span[@class='selling-price omniture-field']/text()").extract()
			
			if mrp_xpath and sp_xpath:
				item['Flipkart_MRP'] = mrp_xpath[0].replace(",","").replace("Rs.","")
				item['Flipkart_SP'] = sp_xpath[0].replace(",","").replace("Rs.","")
			elif sp_xpath:
				item['Flipkart_MRP'] = sp_xpath[0].replace(",","").replace("Rs.","")
				item['Flipkart_SP'] = ''
			elif mrp_xpath:
				item['Flipkart_MRP'] = mrp_xpath[0].replace(",","").replace("Rs.","")
				item['Flipkart_SP'] = ''
				
			
			stock = sel.xpath("//div[@class='out-of-stock-status'] | //div[@class='no-sellers-available omniture-field']").extract()					
			
			if not stock :
				item['Flipkart_Stock'] = "In Stock"
			else:
				item['Flipkart_Stock'] = 'Out Of Stock'			
		
		else:
			item['Flipkart_ProductName'] = item['Flipkart_SP'] = item['Flipkart_MRP'] = ''
			item['Flipkart_Stock'] = "Not Found"
		
		print item
		try:			
			paytm_url = paytm_urls[item['index']]
			paytm_url = paytm_url.replace('//paytm.com/shop', '//catalog.paytm.com/v1')
			request = Request(paytm_url,callback = self.paytm_scraper)
			request.meta['item'] = item
			request.meta['proxy'] = "http://111.161.126.100:80"			
			yield request
		
		except:
			self.to_csv(item)
开发者ID:Diwahars,项目名称:scrapers,代码行数:49,代码来源:PopularProductsScraper.py

示例10: parse_monuments

# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
	def parse_monuments(self,response):
		sel=Selector(response)
		monument=sel.xpath('//div[@class="col-50 content-desc"]')
		title=monument.xpath("h2[@class='big sec-color']/text()").extract()
		summary=''.join(monument.xpath("div[@id='idContentScroll']/span/p//text()").extract())
		address=monument.xpath("span/text()").re(r'[\w\s,-\/]*\s48\d{3}\s*\w*')
		informationLink=monument.xpath("div[@id='idContentScroll']/span/a/@href").extract()
		images=sel.xpath("//*[@id='CapaImagen_0']/img/@src").extract()
		monumentoReligioso=sel.xpath("//*[@id='see-and-do-content']/section[2]/div/section[1]/section/ul/li/a[@class='sec-bg'][contains(.,'Monumentos religiosos')]").extract()
		if monumentoReligioso:
			category="Monumento Religioso"
			category_en="Religious monument"
			category_eu=""
		else:
			category="Monumento Historico"
			category_en="Historical monument"
			category_eu=""
		item=BuildingItem()
		if len(title)>0:
			item['name']=title.pop()
		else:
			item['name']=''
		if len(summary)>0:
			item['description']=summary
		else:
			item['description']=''
		if len(address)>0:
			item['address']=address.pop().strip()
		else:
			item['address']=''
		if len(informationLink)>0:
			item['informationLink']=informationLink.pop()
		else:
			item['informationLink']=response.url
		if len(images)>0:
			item['image_urls']=[''.join([self.BASE,images.pop()])]
		item['category']=category
		item['category_en']=category_en
		item['category_eu']=category_eu

		enLink=sel.xpath('//*[@id="en"]/@href').extract()
		request=Request(self.BASE+str(enLink.pop()),callback=self.parse_monuments_en)
		request.meta['item']=item

		yield request
开发者ID:AritzBi,项目名称:BilbaoTourismScraper,代码行数:47,代码来源:patrimonioBTurismo_spider.py

示例11: parse

# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
    def parse(self, response):
        sel = Selector(response)
        results = sel.xpath("//ul/li[@class='search_result']")
        item = RapGeniusItem()
        items = []

        for result in results:
            title = result.xpath("a/span[@class='title_with_artists']").extract()
            info = remove_html_tags(title[0]).split(u' \u2013 ')
            matches = result.xpath("p").extract()
            print result.xpath("p").extract()
            url = result.xpath("a/@href").extract()
            item['url'] = self.base_url + url[0]
            item['artist'] = info[0].split('\n    ')[1]            # This is hacky!
            item['song_name'] = info[1].splitlines()[0]
            request = Request(item['url'], callback='self.parse_song')
            request.meta['item'] = item
            yield request
开发者ID:shipleydaniel,项目名称:hacktx2013,代码行数:20,代码来源:rapgenius_spider.py

示例12: parse_path

# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
 def parse_path(self, response):
     item = response.meta['item']
     for country in response.xpath('//div[@class="country-block other-block"]'):
         for city in country.xpath('div')[1:]:
             path = {}
             path['country'] = ''.join(country.xpath(
                 './/div[@class="cb-hd"]/h2/text()').extract()).strip()
             path['date'] = city.xpath(
                 './/span[@class="time"]/span/text()').extract_first().replace('.', '-')
             path['city'] = city.xpath('.//h3/span/text()').extract_first()
             if not path['city']:
                 path['city'] = city.xpath('.//div[@class="vertical"]/p/text()').extract_first()[:-3]
             path['pois'] = city.xpath('.//h4/text()').extract()
             item['path'].append(path)
     review = response.xpath(u'//a[@title="TA的点评"]/@href').extract_first()
     req = Request(urljoin(response.url, review),
                   callback=self.parse_review)
     req.meta['item'] = item
     yield req
开发者ID:twelfthing,项目名称:socool,代码行数:21,代码来源:mafengwo.py

示例13: parse_countries_page

# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
    def parse_countries_page(self, response):

        countries = response.meta['country_data']

        for c in countries:
            # http://www.dillards.com/
            # country=AG; currency=USD
            # Because the name of the country doesn't appear on the home page we need to extract it from here
            item = ScraperItem()
            item['country'] = c[0]

            request_with_cookies = Request(
                url="http://www.dillards.com/",
                cookies={'currency': c[1], 'country': c[2]},
                callback=self.parse_home_page,
                dont_filter=True
            )
            request_with_cookies.meta['item'] = item
            yield request_with_cookies
开发者ID:ahsaeldin,项目名称:projects,代码行数:21,代码来源:dillards.py

示例14: parse_review

# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
    def parse_review(self, response):
        item = response.meta['item']
        if not response.xpath('//div[@class="no_data"]'):
            for div in response.xpath('//div[@id="_j_poilist"]/div'):
                poi = {}
                poi['name'] = div.xpath(
                    './/h3[@class="title"]/a/text()').extract_first()
                poi['comment'] = div.xpath(
                    './/div[@class="poi-rev _j_comment"]/text()').extract_first().strip()
                poi['comment_datetime'] = div.xpath(
                    './/span[@class="time"]/text()').extract_first().strip()
                poi['star'] = int(
                    div.xpath('.//div[@class="review-score"]/span/@class').extract_first()[-1])
                item['review'].append(poi)

        together = response.xpath(u'//a[@title="TA的结伴"]/@href').extract_first()
        req = Request(urljoin(response.url, together),
                      callback=self.parse_together)
        req.meta['item'] = item
        yield req
开发者ID:twelfthing,项目名称:socool,代码行数:22,代码来源:mafengwo.py

示例15: parse_node

# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
	def parse_node(self, response, node):
		#Se obtiene el enlace donde hay más información
		link= node.xpath('link/text()').extract()
		#Se obtiene el titulo del evento
		title= node.xpath('title/text()').extract()
		#Se obtiene la descripción del evento
		description=node.xpath('description/text()').extract()
		#Se crea un item por cada evento
		item=EventItem()
		item['title']=title[0]
		#Se quitan todos los elementos HTML
		description=re.sub('<[^>]*>', '', description[0])
		item['description']=description
		item['informationLink']=link
		#Se crea un objeto request de Scrapy, indicando que enlance tiene que analizar y en qué metodo
		request=Request(link[0],callback=self.parse_events_links)
		#Se añade a la request el Item del evento donde se irá añadiendo la información
		request.meta['item']=item
		#Se hace la request
		yield request
开发者ID:AritzBi,项目名称:BilbaoTourismScraper,代码行数:22,代码来源:kedin_spider.py


注:本文中的scrapy.http.request.Request.meta['item']方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。