本文整理汇总了Python中scrapy.http.request.Request.meta['tax_url']方法的典型用法代码示例。如果您正苦于以下问题:Python Request.meta['tax_url']方法的具体用法?Python Request.meta['tax_url']怎么用?Python Request.meta['tax_url']使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.http.request.Request
的用法示例。
在下文中一共展示了Request.meta['tax_url']方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse_house
# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['tax_url'] [as 别名]
def parse_house(self,response):
#wait a random amount of time to disguise spider
#time.sleep(randint(0,50)/100)
self.requests += 1
if (self.pauseEnabled & (self.requests % self.request_interval == 0)):
print("Pause")
self.request_interval = randint(1,self.max_interval)
pause_time = randint(0,200)/100
time.sleep(pause_time)
print("Paused " + str(pause_time) + "s")
house = HouseItem()
house['zillow_url'] = response.url
address_field = response.xpath('//h1/text()').extract()[0]
address_test = re.search( r'^(.*?),', address_field )
if (address_test == None):
house['address'] = address_field
else:
house['address'] = address_test.group(1)
house['city'] = re.search( r'^(.*?),', response.xpath('//h1/span/text()').extract()[0] ).group(1)
house['state'] = re.search( r',\s(.*?)\s', response.xpath('//h1/span/text()').extract()[0] ).group(1)
non_decimal = re.compile(r'[^\d.]+')
house['price'] = non_decimal.sub('', response.xpath('//*[contains(concat(" ", normalize-space(@class), " "), " main-row ")]/span/text()').extract()[0].replace(r'$', "").replace(r',', "").replace( "[^\\d]", "" ) )
house['sale_status'] = response.xpath('//*[contains(concat(" ", normalize-space(@class), " "), " status-icon-row ")]/text()').extract()[1].lstrip().rstrip()
stripped_line = house['sale_status'].strip()
if (stripped_line == ""):
house['sale_status'] = response.xpath('//*[contains(concat(" ", normalize-space(@class), " "), " status-icon-row ")]/span/text()').extract()[0]
zestimate_field = response.xpath('//*[contains(concat(" ", normalize-space(@class), " "), " zest-value ")]/text()').extract()[1]
if (zestimate_field != 'Unavailable'):
house['rent_zestimate'] = re.search( r'^(.*?)/', zestimate_field ).group(1).replace(r',', "").replace(r'$', "")
else:
house['rent_zestimate'] = -1;
bedroom_field = re.search( r'^(.*?)\s', response.xpath('//*[contains(concat(" ", normalize-space(@class), " "), " addr_bbs ")]/text()').extract()[0] )
if (bedroom_field != None):
house['bedrooms'] = bedroom_field.group(1)
else:
house['bedrooms'] = "Studio"
house['bathrooms'] = re.search( r'^(.*?)\s', response.xpath('//*[contains(concat(" ", normalize-space(@class), " "), " addr_bbs ")][2]/text()').extract()[0] ).group(1)
house['sqrft'] = re.search( r'^(.*?)\s', response.xpath('//*[contains(concat(" ", normalize-space(@class), " "), " addr_bbs ")][3]/text()').extract()[0] ).group(1).replace(r',', "")
lot_field = response.xpath('//*[contains(concat(" ", normalize-space(@class), " "), " zsg-list_square ")]/li[1]/text()').extract()[0]
lot_field_test = re.search( r'^([^0-9]*)$', lot_field)
if (lot_field_test != None):
house['lot_size'] = lot_field
else:
house['lot_size'] = re.search( r'\s(.*?)$', lot_field ).group(1).replace(r',', "")
house['id'] = re.search(r'/(\d*)_zpid', response.url).group(1)
#https://docs.python.org/2/library/datetime.html
house['timestamp'] = datetime.datetime.now().isoformat()
#Request Histories
soup = BeautifulSoup(response.body)
history_url = get_ajax_url(soup, "z-hdp-price-history")
tax_url = get_ajax_url(soup, "z-expando-table")
history_request = Request(history_url,
callback=self.parse_history)
history_request.meta['item'] = house
history_request.meta['tax_url'] = tax_url
house['tax_url'] = tax_url
return history_request