当前位置: 首页>>代码示例>>Python>>正文


Python Request.meta['tax_url']方法代码示例

本文整理汇总了Python中scrapy.http.request.Request.meta['tax_url']方法的典型用法代码示例。如果您正苦于以下问题:Python Request.meta['tax_url']方法的具体用法?Python Request.meta['tax_url']怎么用?Python Request.meta['tax_url']使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scrapy.http.request.Request的用法示例。


在下文中一共展示了Request.meta['tax_url']方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: parse_house

# 需要导入模块: from scrapy.http.request import Request [as 别名]
# 或者: from scrapy.http.request.Request import meta['tax_url'] [as 别名]
 def parse_house(self,response):
     #wait a random amount of time to disguise spider
     #time.sleep(randint(0,50)/100)
     self.requests += 1
     if (self.pauseEnabled & (self.requests % self.request_interval == 0)):
         print("Pause")
         self.request_interval = randint(1,self.max_interval)
         pause_time = randint(0,200)/100
         time.sleep(pause_time)
         print("Paused " + str(pause_time) + "s")
     
     house = HouseItem()
     house['zillow_url'] = response.url
     address_field = response.xpath('//h1/text()').extract()[0]
     address_test = re.search( r'^(.*?),', address_field )
     if (address_test == None):
         house['address'] = address_field
     else:
         house['address'] = address_test.group(1)
     house['city'] = re.search( r'^(.*?),', response.xpath('//h1/span/text()').extract()[0] ).group(1)
     house['state'] = re.search( r',\s(.*?)\s', response.xpath('//h1/span/text()').extract()[0] ).group(1)
     non_decimal = re.compile(r'[^\d.]+')
     house['price'] = non_decimal.sub('', response.xpath('//*[contains(concat(" ", normalize-space(@class), " "), " main-row ")]/span/text()').extract()[0].replace(r'$', "").replace(r',', "").replace( "[^\\d]", "" ) )
     house['sale_status'] = response.xpath('//*[contains(concat(" ", normalize-space(@class), " "), " status-icon-row ")]/text()').extract()[1].lstrip().rstrip()
     stripped_line = house['sale_status'].strip()
     if (stripped_line == ""):
         house['sale_status'] = response.xpath('//*[contains(concat(" ", normalize-space(@class), " "), " status-icon-row ")]/span/text()').extract()[0]
     zestimate_field = response.xpath('//*[contains(concat(" ", normalize-space(@class), " "), " zest-value ")]/text()').extract()[1]
     if (zestimate_field != 'Unavailable'):
         house['rent_zestimate'] = re.search( r'^(.*?)/', zestimate_field ).group(1).replace(r',', "").replace(r'$', "")
     else:
         house['rent_zestimate'] = -1;
         
     bedroom_field = re.search( r'^(.*?)\s', response.xpath('//*[contains(concat(" ", normalize-space(@class), " "), " addr_bbs ")]/text()').extract()[0] )
     if (bedroom_field != None): 
         house['bedrooms'] = bedroom_field.group(1)
     else:
         house['bedrooms'] = "Studio"
     house['bathrooms'] = re.search( r'^(.*?)\s', response.xpath('//*[contains(concat(" ", normalize-space(@class), " "), " addr_bbs ")][2]/text()').extract()[0] ).group(1)
     house['sqrft'] = re.search( r'^(.*?)\s', response.xpath('//*[contains(concat(" ", normalize-space(@class), " "), " addr_bbs ")][3]/text()').extract()[0] ).group(1).replace(r',', "")
     lot_field = response.xpath('//*[contains(concat(" ", normalize-space(@class), " "), " zsg-list_square ")]/li[1]/text()').extract()[0]
     lot_field_test = re.search( r'^([^0-9]*)$', lot_field)
     if (lot_field_test != None):
         house['lot_size'] = lot_field
     else:
         house['lot_size'] = re.search( r'\s(.*?)$', lot_field ).group(1).replace(r',', "")
     house['id'] = re.search(r'/(\d*)_zpid', response.url).group(1)
     #https://docs.python.org/2/library/datetime.html
     house['timestamp'] = datetime.datetime.now().isoformat()
     
     #Request Histories
     soup = BeautifulSoup(response.body)
     history_url = get_ajax_url(soup, "z-hdp-price-history")
     tax_url = get_ajax_url(soup, "z-expando-table")
     history_request = Request(history_url, 
                       callback=self.parse_history)
     history_request.meta['item'] = house
     history_request.meta['tax_url'] = tax_url
     house['tax_url'] = tax_url
     
     return history_request
开发者ID:KaroAntonio,项目名称:Guru,代码行数:63,代码来源:zillow_spider.py


注:本文中的scrapy.http.request.Request.meta['tax_url']方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。