本文整理汇总了Python中lxml.cssselect.CSSSelector.replace方法的典型用法代码示例。如果您正苦于以下问题:Python CSSSelector.replace方法的具体用法?Python CSSSelector.replace怎么用?Python CSSSelector.replace使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类lxml.cssselect.CSSSelector
的用法示例。
在下文中一共展示了CSSSelector.replace方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: rent
# 需要导入模块: from lxml.cssselect import CSSSelector [as 别名]
# 或者: from lxml.cssselect.CSSSelector import replace [as 别名]
def rent(self,url):
# self.fd['house_city'] = urlparse(url)[1].replace('.ganji.com',"")
hc= urlparse(url)[1].replace('.ganji.com',"")
hc2=citynameDict_sf.get(hc)
if hc2:
self.fd['house_city']=hc2
else:
self.fd['house_city']=hc
request = urllib2.Request(url, None, self.header)
response = urllib2.urlopen(request).read()
if self.mayGetIt(response):
self.fd={}
raise
tree = etree.HTML(response)
if re.search("<span class=\"city\"><a .*?>(.*?)</a>", response):
cityname=re.search("<span class=\"city\"><a .*?>(.*?)</a>", response).group(1)
self.fd['cityname'] = cityname
else:
raise
self.fd['house_flag'] = 2
self.fd['house_type'] = 6
self.fd['house_floor'] = 0
self.fd['house_topfloor'] = 0
soup =BeautifulSoup(response)
detail_mer = soup.find('div',{'class':'detail_mer'})
#非个人房源 return
if u"个人房源" not in str(detail_mer):raise
Dname = detail_mer.find('span',{'class':'Dname'})
if Dname:
self.fd['owner_name'] = str(Dname.string)
else:
self.fd['owner_name'] = ""
ganji_phone_call_class = detail_mer.find('span',{'class':'ganji_phone_call_class'})
if ganji_phone_call_class:
self.fd['owner_phone_pic'] = ganji_phone_call_class.contents[0]
if str(ganji_phone_call_class).find('src='):
self.fd['owner_phone_pic'] = 'http://'+urlparse(url)[1]+ganji_phone_call_class.img['src']
else:
self.fd['owner_phone_pic'] = None
else:
self.fd['owner_phone_pic'] = None
#没有联系方式 return
if not self.fd['owner_phone_pic']:raise
if re.search(self.house_totalarea_regex, response):
house_totalarea=re.search(self.house_totalarea_regex, response).group(1)
self.fd['house_area'] = house_totalarea
else:
self.fd['house_area'] = None
if re.search(self.house_price_regex_2, response):
house_price=re.search(self.house_price_regex_2, response).group(1)
if house_price=="面议":
house_price=0
self.fd['house_price'] = int(house_price)
else:
self.fd['house_price'] = 0
# house_price=tree.xpath("/html/body/div[2]/div/div/ul/li/span") and tree.xpath("/html/body/div[2]/div/div/ul/li/span")[0].text.strip() or None
# v['house_price'] = house_price
posttime=CSSSelector('span.pub_time')(tree)!=None and CSSSelector('span.pub_time')(tree)[0].text.strip() or None
if posttime:
Y=int(time.strftime('%Y', time.localtime()))
M=int(posttime.split(' ')[0].split('-')[0])
D=int(posttime.split(' ')[0].split('-')[1])
H=int(time.strftime('%H',time.localtime(time.time())))
Min=int(time.strftime('%M',time.localtime(time.time())))
s = datetime.datetime(Y,M,D,H,Min)
posttime=str(int(time.mktime(s.timetuple())))
self.fd['house_posttime'] =posttime
else:
s=time.localtime(time.time())
self.fd['house_posttime'] =str(int(time.mktime(s)))
house_title=CSSSelector("div.detail_title h1")(tree)[0] !=None and CSSSelector("div.detail_title h1")(tree)[0].text.strip() or None
self.fd['house_title'] = house_title.replace("(求购)","").replace("(求租)","").replace("(出售)","")
if re.search(self.house_room_regex, response):
house_room=re.search(self.house_room_regex, response).group(1)
self.fd['house_room'] = house_room
else:
self.fd['house_room'] = 0
if re.search(self.house_hall_regex, response):
house_hall=re.search(self.house_hall_regex, response).group(1)
self.fd['house_hall'] = house_hall
else:
self.fd['house_hall'] = 0
if re.search(self.house_toilet_regex, response):
house_toilet=re.search(self.house_toilet_regex, response).group(1)
#.........这里部分代码省略.........
示例2: require
# 需要导入模块: from lxml.cssselect import CSSSelector [as 别名]
# 或者: from lxml.cssselect.CSSSelector import replace [as 别名]
def require(self,url):
hc= urlparse(url)[1].replace('.ganji.com',"")
hc2=citynameDict_sf.get(hc)
if hc2:
self.fd['house_city']=hc2
else:
self.fd['house_city']=hc
request = urllib2.Request(url, None, self.header)
response = urllib2.urlopen(request).read()
if self.mayGetIt(response):
self.fd={}
raise
tree = etree.HTML(response)
if re.search("<span class=\"city\"><a .*?>(.*?)</a>", response):
cityname=re.search("<span class=\"city\"><a .*?>(.*?)</a>", response).group(1)
self.fd['cityname'] = cityname
else:
raise
self.fd['house_flag'] = 4
self.fd['house_type'] = 6
self.fd['house_floor'] = 0
self.fd['house_topfloor'] = 0
self.fd['house_area']=0
self.fd['house_age'] = 0
self.fd['house_toward'] = 0
self.fd['house_fitment'] = 0
self.fd['house_deposit'] = 0
# self.fd['house_totalarea_max'] = 0
# self.fd['house_totalarea_min'] = 0
soup =BeautifulSoup(response)
detail_mer = soup.find('div',{'class':'detail_mer'})
#非个人房源 return
if u"个人房源" not in str(detail_mer):raise
Dname = detail_mer.find('span',{'class':'Dname'})
if Dname:
self.fd['owner_name'] = Dname.string
else:
self.fd['owner_name'] = None
ganji_phone_call_class = detail_mer.find('span',{'class':'ganji_phone_call_class'})
if ganji_phone_call_class:
self.fd['owner_phone_pic'] = ganji_phone_call_class.contents[0]
if str(ganji_phone_call_class).find('src='):
self.fd['owner_phone_pic'] = 'http://'+urlparse(url)[1]+ganji_phone_call_class.img['src']
else:
self.fd['owner_phone_pic'] = None
else:
self.fd['owner_phone_pic'] = None
#没有联系方式 return
if not self.fd['owner_phone_pic']:raise
if re.search(self.house_price_regex_zu, response):
house_price_zu = re.search(self.house_price_regex_zu, response).group(1)
house_price_zu = house_price_zu.replace('元/月','')
if house_price_zu.find("以上") != -1:
self.fd['house_price_max'] = 0
self.fd['house_price'] = int(house_price_zu.replace('以上',''))
elif house_price_zu.find("以下") != -1:
self.fd['house_price_max'] = int(house_price_zu.replace('以下',''))
self.fd['house_price'] = 0
elif house_price_zu.find("-") != -1:
self.fd['house_price_max'] = int(house_price_zu.split('-')[1])
self.fd['house_price'] = int(house_price_zu.split('-')[0])
else:
self.fd['house_price_max'] = 0
self.fd['house_price'] = 0
else:
self.fd['house_price_max'] = 0
self.fd['house_price'] = 0
posttime=CSSSelector('span.pub_time')(tree)!=None and CSSSelector('span.pub_time')(tree)[0].text.strip() or None
if posttime:
Y=int(time.strftime('%Y', time.localtime()))
M=int(posttime.split(' ')[0].split('-')[0])
D=int(posttime.split(' ')[0].split('-')[1])
H=int(time.strftime('%H',time.localtime(time.time())))
Min=int(time.strftime('%M',time.localtime(time.time())))
s = datetime.datetime(Y,M,D,H,Min)
posttime=str(int(time.mktime(s.timetuple())))
self.fd['house_posttime'] =posttime
else:
s=time.localtime(time.time())
self.fd['house_posttime'] =str(int(time.mktime(s)))
house_title=CSSSelector("div.detail_title h1")(tree)[0] !=None and CSSSelector("div.detail_title h1")(tree)[0].text.strip() or None
self.fd['house_title'] = house_title.replace("(求购)","").replace("(求租)","").replace("(出售)","")
if re.search(self.house_room_regex, response):
house_room=re.search(self.house_room_regex, response).group(1)
self.fd['house_room'] = int(house_room)
else:
self.fd['house_room'] = 0
#.........这里部分代码省略.........
示例3: sell
# 需要导入模块: from lxml.cssselect import CSSSelector [as 别名]
# 或者: from lxml.cssselect.CSSSelector import replace [as 别名]
#.........这里部分代码省略.........
else:
self.fd['house_price'] = None
posttime=CSSSelector('span.pub_time')(tree)!=None and CSSSelector('span.pub_time')(tree)[0].text.strip() or None
if posttime:
Y=int(time.strftime('%Y', time.localtime()))
M=int(posttime.split(' ')[0].split('-')[0])
D=int(posttime.split(' ')[0].split('-')[1])
s = datetime.datetime(Y,M,D,0,0)
posttime=int(time.mktime(s.timetuple()))
self.fd['posttime'] =posttime
else:
self.fd['posttime'] =None
if re.search(self.house_room_regex, response):
house_room=re.search(self.house_room_regex, response).group(1)
self.fd['house_room'] = house_room
else:
self.fd['house_room'] = '0'
if re.search(self.house_hall_regex, response):
house_hall=re.search(self.house_hall_regex, response).group(1)
self.fd['house_hall'] = house_hall
else:
self.fd['house_hall'] = '0'
if re.search(self.house_toilet_regex, response):
house_toilet=re.search(self.house_toilet_regex, response).group(1)
self.fd['house_toilet'] = house_toilet
else:
self.fd['house_toilet'] = '0'
house_title=CSSSelector("div.detail_title h1")(tree)[0] !=None and CSSSelector("div.detail_title h1")(tree)[0].text.strip() or None
self.fd['house_title'] = house_title.replace("(求购)","").replace("(求租)","").replace("(出售)","")
#描述
detail_box = soup.find('div',{'class':'detail_box'})
if detail_box:
house_desc = str(detail_box('p')[1])
self.fd['house_desc'] = re.sub("<.*?>|\n|\r|\t|联系我时请说明是从赶集网上看到的","",house_desc)
else:
self.fd['house_desc'] = None
d_i = soup.find('ul',{'class':'d_i'})
#小区名
#先处理JS
if re.search(self.xiaoqu_regex, response):
borough_name=re.search(self.xiaoqu_regex, response).group(1)
self.fd['borough_name'] = borough_name
if re.search(self.address_regex, response):
house_addr=re.search(self.address_regex, response).group(1)
self.fd['house_addr'] = house_addr
else:
if d_i.find(text="小区: "):
borough_box = d_i.find(text="小区: ").parent
borough_name = borough_box.find("a")
if borough_name:
self.fd['borough_name'] = borough_name.string
else:
self.fd['borough_name'] = None
#地址
if borough_name and borough_name.nextSibling:
house_addr = borough_name.nextSibling.string
self.fd['house_addr'] = re.sub("\(|\)| ","",house_addr)
else: