本文整理汇总了Python中scrapy.http.request.Request.meta['item']方法的典型用法代码示例。如果您正苦于以下问题:Python Request.meta['item']方法的具体用法?Python Request.meta['item']怎么用?Python Request.meta['item']使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.http.request.Request
的用法示例。
在下文中一共展示了Request.meta['item']方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: snapdeal_scraper
# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
def snapdeal_scraper(self,response):
item = response.meta['item']
sel = Selector(response)
item['Snapdeal_URL']= response.url
try:
if sel.xpath("//div[@class='notifyMe-soldout']"):
ProductName = sel.xpath("//h1[@itemprop='name']/text()").extract()[0].replace(",","")
item['Snapdeal__ProductName'] =ProductName
item['Snapdeal_MRP']=item['Snapdeal_SP'] = ''
item['Snapdeal_Stock'] = 'Out of Stock'
else:
mrp = sel.xpath("//span[@id='original-price-id']/text()").extract()
if mrp:
item['Snapdeal_SP'] = sel.xpath("//span[@id='selling-price-id']/text()").extract()[0]
item['Snapdeal_MRP'] = sel.xpath("//span[@id='original-price-id']/text()").extract()[0]
else:
item['Snapdeal_MRP'] = sel.xpath("//span[@id='selling-price-id']/text()").extract()[0]
item['Snapdeal_SP'] = ''
item['Snapdeal__ProductName'] = sel.xpath("//h1[@itemprop='name']/text()").extract()[0].replace(",","")
stock = sel.xpath("//div[@class='notifyMe-soldout']").extract()
discntnd = sel.xpath("//div[@class='noLongerProduct']").extract()
if stock or discntnd:
item['Snapdeal_Stock'] = "Out Of Stock"
else:
item['Snapdeal_Stock'] = "In Stock"
except:
item['Snapdeal__ProductName'] = item['Snapdeal_MRP'] = item['Snapdeal_SP'] = ''
item['Snapdeal_Stock'] = 'Not Found'
try:
amazon_url = amazon_urls[item['index']]
request = Request(amazon_url,
headers={'Referer':'http://amazon.in'},
callback = self.amazon_scraper)
request.meta['item'] = item
request.meta['proxy'] = "http://111.161.126.100:80"
yield request
except:
try:
flipkart_url = flipkart_urls[item['index']]
request = Request(flipkart_url,callback = self.flipkart_scraper)
request.meta['item'] = item
# request.meta['proxy'] = "http://111.161.126.100:80"
yield request
except:
try:
paytm_url = paytm_urls[item['index']]
request = Request(paytm_url,callback = self.paytm_scraper)
request.meta['item'] = item
request.meta['proxy'] = "http://111.161.126.100:80"
yield request
except:
self.to_csv(item)
示例2: amazon_marketplace
# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
def amazon_marketplace(self,response):
sel = Selector(response)
item = response.meta['item']
try:
sp = sel.xpath("//span[@style='text-decoration: inherit; white-space: nowrap;']/text()").extract()[0].replace(",","")
shippingcost = sel.xpath("//span[@class='olpShippingPrice']/span/text()").extract()
if shippingcost:
sp = str(float(sp) + float(sel.xpath("//span[@class='olpShippingPrice']/span/text()").extract()[0].replace(",","")))
if sp>item['SP']:
sp = item['SP']
except:
try:
flipkart_url = flipkart_urls[item['index']]
request = Request(flipkart_url,callback = self.flipkart_scraper)
request.meta['item'] = item
# request.meta['proxy'] = "http://111.161.126.100:80"
yield request
except:
try:
paytm_url = paytm_urls[item['index']]
request = Request(paytm_url,callback = self.paytm_scraper)
request.meta['item'] = item
request.meta['proxy'] = "http://111.161.126.100:80"
yield request
except:
self.to_csv(item)
示例3: parse_user
# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
def parse_user(self, response):
item = MFWItem()
item['uid'] = response.meta['uid']
item['name'] = response.xpath(
'//div[@class="MAvaName"]/text()').extract_first()
item['level'] = int(response.xpath(
'//span[@class="MAvaLevel flt1"]/a/@title').extract_first().split('.')[-1])
if item['level'] <= 3:
return
item['tags'] = response.xpath(
'//div[@class="its_tags"]//i[contains(@class, "on")]/../@title').extract()
item['attention'] = [int(i) for i in response.xpath(
'//div[@class="MAvaMore clearfix"]//a/text()').extract()]
item['groups'] = response.xpath(
'//div[@class="MGroupDetail"]//a[@class="name"]/text()').extract()
item['dynamic'] = response.xpath(
'//span[@class="time"]/text()').extract()
item['download'] = []
infos = response.xpath('//div[@class="common_block relative_info"]/p')
for info in infos:
if u'刚刚下载了' in ''.join(info.xpath('text()').extract()):
item['download'].append({'time': info.xpath(
'span[@class="time"]/text()').extract_first(), 'name': info.xpath('a/text()').extract()[-1]})
item['note'] = {}
item['path'] = []
item['review'] = []
item['together'] = []
note = response.xpath(u'//a[@title="TA的游记"]/@href').extract_first()
req = Request(urljoin(response.url, note), callback=self.parse_note)
req.meta['item'] = item
yield req
示例4: parse_monuments_en
# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
def parse_monuments_en(self,response):
sel=Selector(response)
monument=sel.xpath('//div[@class="col-50 content-desc"]')
title=monument.xpath("h2[@class='big sec-color']/text()").extract()
summary=''.join(monument.xpath("div[@id='idContentScroll']/span/p//text()").extract())
informationLink=monument.xpath("div[@id='idContentScroll']/span/a/@href").extract()
item = response.meta['item']
if len(informationLink)>0:
item['informationLink_en']=informationLink.pop()
else:
item['informationLink_en']=response.url
if len(title)>0:
item['name_en']=title.pop()
else:
item['name_en']=''
if len(summary)>0:
item['description_en']=summary
else:
item['description_en']=''
if len(informationLink)>0:
item['informationLink']=informationLink.pop()
else:
item['informationLink']=response.url
euLink=sel.xpath('//*[@id="eu"]/@href').extract()
request=Request(self.BASE+str(euLink.pop()),callback=self.parse_monuments_eu)
request.meta['item']=item
yield request
示例5: parse_restaurants_en
# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
def parse_restaurants_en(self,response):
sel=Selector(response)
item = response.meta['item']
descriptionpath=sel.xpath("//*[@id='idContentScroll']")
description=descriptionpath.xpath("span[@itemprop='description']/p//text()").extract()
timetable=descriptionpath.xpath("span[@itemprop='description']/p[2]//text()").extract()
timetable2=descriptionpath.xpath("span[@itemprop='description']/p[3]//text()").extract()
categoryPath=sel.xpath("//*[@id='gastronomy-content']/section[2]/div/section[1]/section/div/ul/li[2]/p[2]")
category=categoryPath.xpath("a/strong/text()").extract()
if len(description)>0:
item['description_en']=' '.join(description)
else:
item['description_en']=''
if len(category)>0:
item['category_en']=['Restaurant',category.pop()]
else:
item['category_en']=['Restaurant','Others']
if len(timetable)>0:
if len(timetable2)>0:
item['timetable_en']=' '.join([timetable.pop(),timetable2.pop()])
else:
item['timetable_en']=timetable.pop()
else:
item['timetable_en']=''
link=response.url
link=link.replace("/en/","/eu/")
request=Request(link,callback=self.parse_restaurants_eu)
request.meta['item']=item
yield request
示例6: parse_history
# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
def parse_history(self,response):
#Parse Price History Table
house = response.meta['item']
tax_url = house['tax_url']
price_history = []
pattern = r' { "html": "(.*)" }'
html = re.search(pattern, response.body).group(1)
html = re.sub(r'\\"', r'"', html) # Correct escaped quotes
html = re.sub(r'\\/', r'/', html) # Correct escaped forward
if (html != ""):
soup = BeautifulSoup(html)
table = soup.find('table')
table_body = table.find('tbody')
rows = table_body.find_all('tr')
for row in rows:
cols = row.find_all('td')
cols = [ele for ele in cols]
cols = cols[:3]
if (cols[2].find('span') != None):
date = cols[0].get_text()
event = cols[1].get_text()
price = cols[2].find('span').get_text()
price_history.append([date, event, price])
#Store history as JSON string
house['price_history'] = json.dumps(price_history)
tax_request = Request(tax_url,
callback=self.parse_taxes)
tax_request.meta['item'] = house
return tax_request
示例7: listing_scraper
# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
def listing_scraper(self, response):
"""
Given a page with members, scrape user info
"""
# Get member info
x = HtmlXPathSelector(response)
reqs = []
for alum_tr in x.select("//table[@id='cid_41_RadGrid1_ctl00']/tbody/tr"):
tds = alum_tr.select("td")
if len(tds[1].select("a/@href").extract()) == 0:
print "GOT A WEIRD RESPONSE WITH BLANK NAMES, handling"
alum = Alum(
mid = tds[0].select("text()").extract(),
#name = tds[1].select("a/text()").extract(),
preferred_class_year = tds[2].select("text()").extract(),
city = tds[3].select("text()").extract(),
state = tds[4].select("text()").extract(),
page_url =
"http://wesconnect.wesleyan.edu/s/1318/index.aspx?sid=1318&gid=1&pgid=94&cid=256&mid=" +
tds[0].select("text()").extract()[0],
country = tds[5].select("text()").extract()
)
req = Request(url = alum['page_url'], callback=self.member_parser)
req.meta['item'] = alum
reqs.append(req)
else:
alum = Alum(
page_url = "http://wesconnect.wesleyan.edu" + tds[1].select("a/@href").extract()[0],
mid = tds[0].select("text()").extract(),
name = tds[1].select("a/text()").extract(),
preferred_class_year = tds[2].select("text()").extract(),
city = tds[3].select("text()").extract(),
state = tds[4].select("text()").extract(),
country = tds[5].select("text()").extract()
)
req = Request(url = alum['page_url'], callback=self.member_parser)
req.meta['item'] = alum
reqs.append(req)
return reqs
示例8: start_requests
# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
def start_requests(self):
with open('imageURLs.csv') as csvFile:
reader = csv.DictReader(csvFile)
for row in reader:
item = GetimagesprojectItem()
image_url = row['URL']
item['image_urls'] = [row['URL'],]
item['pid'] = row['ID']
request = Request(image_url,callback = self.parse)
request.meta['item'] = item
yield request
示例9: flipkart_scraper
# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
def flipkart_scraper(self,response):
sel = Selector(response)
item = response.meta['item']
item['Flipkart_URL'] = response.url
if sel.xpath("//h1[@class='title']/text()").extract():
item['Flipkart_ProductName'] = sel.xpath("//h1[@class='title']/text()").extract()[0].replace(",","")
mrp_xpath = sel.xpath("//span[@class='price']/text()").extract()
sp_xpath = sel.xpath("//span[@class='selling-price omniture-field']/text()").extract()
if mrp_xpath and sp_xpath:
item['Flipkart_MRP'] = mrp_xpath[0].replace(",","").replace("Rs.","")
item['Flipkart_SP'] = sp_xpath[0].replace(",","").replace("Rs.","")
elif sp_xpath:
item['Flipkart_MRP'] = sp_xpath[0].replace(",","").replace("Rs.","")
item['Flipkart_SP'] = ''
elif mrp_xpath:
item['Flipkart_MRP'] = mrp_xpath[0].replace(",","").replace("Rs.","")
item['Flipkart_SP'] = ''
stock = sel.xpath("//div[@class='out-of-stock-status'] | //div[@class='no-sellers-available omniture-field']").extract()
if not stock :
item['Flipkart_Stock'] = "In Stock"
else:
item['Flipkart_Stock'] = 'Out Of Stock'
else:
item['Flipkart_ProductName'] = item['Flipkart_SP'] = item['Flipkart_MRP'] = ''
item['Flipkart_Stock'] = "Not Found"
print item
try:
paytm_url = paytm_urls[item['index']]
paytm_url = paytm_url.replace('//paytm.com/shop', '//catalog.paytm.com/v1')
request = Request(paytm_url,callback = self.paytm_scraper)
request.meta['item'] = item
request.meta['proxy'] = "http://111.161.126.100:80"
yield request
except:
self.to_csv(item)
示例10: parse_monuments
# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
def parse_monuments(self,response):
sel=Selector(response)
monument=sel.xpath('//div[@class="col-50 content-desc"]')
title=monument.xpath("h2[@class='big sec-color']/text()").extract()
summary=''.join(monument.xpath("div[@id='idContentScroll']/span/p//text()").extract())
address=monument.xpath("span/text()").re(r'[\w\s,-\/]*\s48\d{3}\s*\w*')
informationLink=monument.xpath("div[@id='idContentScroll']/span/a/@href").extract()
images=sel.xpath("//*[@id='CapaImagen_0']/img/@src").extract()
monumentoReligioso=sel.xpath("//*[@id='see-and-do-content']/section[2]/div/section[1]/section/ul/li/a[@class='sec-bg'][contains(.,'Monumentos religiosos')]").extract()
if monumentoReligioso:
category="Monumento Religioso"
category_en="Religious monument"
category_eu=""
else:
category="Monumento Historico"
category_en="Historical monument"
category_eu=""
item=BuildingItem()
if len(title)>0:
item['name']=title.pop()
else:
item['name']=''
if len(summary)>0:
item['description']=summary
else:
item['description']=''
if len(address)>0:
item['address']=address.pop().strip()
else:
item['address']=''
if len(informationLink)>0:
item['informationLink']=informationLink.pop()
else:
item['informationLink']=response.url
if len(images)>0:
item['image_urls']=[''.join([self.BASE,images.pop()])]
item['category']=category
item['category_en']=category_en
item['category_eu']=category_eu
enLink=sel.xpath('//*[@id="en"]/@href').extract()
request=Request(self.BASE+str(enLink.pop()),callback=self.parse_monuments_en)
request.meta['item']=item
yield request
示例11: parse
# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
def parse(self, response):
sel = Selector(response)
results = sel.xpath("//ul/li[@class='search_result']")
item = RapGeniusItem()
items = []
for result in results:
title = result.xpath("a/span[@class='title_with_artists']").extract()
info = remove_html_tags(title[0]).split(u' \u2013 ')
matches = result.xpath("p").extract()
print result.xpath("p").extract()
url = result.xpath("a/@href").extract()
item['url'] = self.base_url + url[0]
item['artist'] = info[0].split('\n ')[1] # This is hacky!
item['song_name'] = info[1].splitlines()[0]
request = Request(item['url'], callback='self.parse_song')
request.meta['item'] = item
yield request
示例12: parse_path
# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
def parse_path(self, response):
item = response.meta['item']
for country in response.xpath('//div[@class="country-block other-block"]'):
for city in country.xpath('div')[1:]:
path = {}
path['country'] = ''.join(country.xpath(
'.//div[@class="cb-hd"]/h2/text()').extract()).strip()
path['date'] = city.xpath(
'.//span[@class="time"]/span/text()').extract_first().replace('.', '-')
path['city'] = city.xpath('.//h3/span/text()').extract_first()
if not path['city']:
path['city'] = city.xpath('.//div[@class="vertical"]/p/text()').extract_first()[:-3]
path['pois'] = city.xpath('.//h4/text()').extract()
item['path'].append(path)
review = response.xpath(u'//a[@title="TA的点评"]/@href').extract_first()
req = Request(urljoin(response.url, review),
callback=self.parse_review)
req.meta['item'] = item
yield req
示例13: parse_countries_page
# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
def parse_countries_page(self, response):
countries = response.meta['country_data']
for c in countries:
# http://www.dillards.com/
# country=AG; currency=USD
# Because the name of the country doesn't appear on the home page we need to extract it from here
item = ScraperItem()
item['country'] = c[0]
request_with_cookies = Request(
url="http://www.dillards.com/",
cookies={'currency': c[1], 'country': c[2]},
callback=self.parse_home_page,
dont_filter=True
)
request_with_cookies.meta['item'] = item
yield request_with_cookies
示例14: parse_review
# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
def parse_review(self, response):
item = response.meta['item']
if not response.xpath('//div[@class="no_data"]'):
for div in response.xpath('//div[@id="_j_poilist"]/div'):
poi = {}
poi['name'] = div.xpath(
'.//h3[@class="title"]/a/text()').extract_first()
poi['comment'] = div.xpath(
'.//div[@class="poi-rev _j_comment"]/text()').extract_first().strip()
poi['comment_datetime'] = div.xpath(
'.//span[@class="time"]/text()').extract_first().strip()
poi['star'] = int(
div.xpath('.//div[@class="review-score"]/span/@class').extract_first()[-1])
item['review'].append(poi)
together = response.xpath(u'//a[@title="TA的结伴"]/@href').extract_first()
req = Request(urljoin(response.url, together),
callback=self.parse_together)
req.meta['item'] = item
yield req
示例15: parse_node
# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['item'] [as 别名]
def parse_node(self, response, node):
#Se obtiene el enlace donde hay más información
link= node.xpath('link/text()').extract()
#Se obtiene el titulo del evento
title= node.xpath('title/text()').extract()
#Se obtiene la descripción del evento
description=node.xpath('description/text()').extract()
#Se crea un item por cada evento
item=EventItem()
item['title']=title[0]
#Se quitan todos los elementos HTML
description=re.sub('<[^>]*>', '', description[0])
item['description']=description
item['informationLink']=link
#Se crea un objeto request de Scrapy, indicando que enlance tiene que analizar y en qué metodo
request=Request(link[0],callback=self.parse_events_links)
#Se añade a la request el Item del evento donde se irá añadiendo la información
request.meta['item']=item
#Se hace la request
yield request