本文整理汇总了Python中weboob.browser.filters.standard.CleanText类的典型用法代码示例。如果您正苦于以下问题:Python CleanText类的具体用法?Python CleanText怎么用?Python CleanText使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了CleanText类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse
def parse(self, el):
rooms_bedrooms_area = el.xpath(
'.//div[@class="clearfix"]/ul[has-class("item-tags")]/li'
)
self.env['rooms'] = NotLoaded
self.env['bedrooms'] = NotLoaded
self.env['area'] = NotLoaded
for item in rooms_bedrooms_area:
name = CleanText('.')(item)
if 'chambre' in name.lower():
name = 'bedrooms'
value = CleanDecimal('./strong')(item)
elif 'pièce' in name.lower():
name = 'rooms'
value = CleanDecimal('./strong')(item)
else:
name = 'area'
value = CleanDecimal(
Regexp(
CleanText(
'.'
),
r'(\d*\.*\d*) .*'
)
)(item)
self.env[name] = value
示例2: obj_photos
def obj_photos(self):
photos = []
for i in range(1, CleanDecimal('//NbPhotos')(self) + 1):
img = CleanText('//LienImage%s' % i, replace=[(u'w=69&h=52', u'w=786&h=481')])(self)
url = img if img.startswith('http') else u'http://www.entreparticuliers.com%s' % img
photos.append(HousingPhoto(url))
return photos
示例3: find_account
def find_account(self, acclabel, accowner):
accowner = sorted(accowner.lower().split()) # first name and last name may not be ordered the same way on market site...
# Check if history is present
if CleanText(default=None).filter(self.doc.xpath('//body/p[contains(text(), "indisponible pour le moment")]')):
return False
ids = None
for a in self.doc.xpath('//a[contains(@onclick, "indiceCompte")]'):
self.logger.debug("get investment from onclick")
label = CleanText('.')(a)
owner = CleanText('./ancestor::tr/preceding-sibling::tr[@class="LnMnTiers"][1]')(a)
owner = sorted(owner.lower().split())
if label == acclabel and owner == accowner:
ids = list(re.search(r'indiceCompte[^\d]+(\d+).*idRacine[^\d]+(\d+)', Attr('.', 'onclick')(a)).groups())
ids.append(CleanText('./ancestor::td/preceding-sibling::td')(a))
self.logger.debug("assign value to ids: {}".format(ids))
return ids
for a in self.doc.xpath('//a[contains(@href, "indiceCompte")]'):
self.logger.debug("get investment from href")
if CleanText('.')(a) == acclabel:
ids = list(re.search(r'indiceCompte[^\d]+(\d+).*idRacine[^\d]+(\d+)', Attr('.', 'href')(a)).groups())
ids.append(CleanText('./ancestor::td/preceding-sibling::td')(a))
self.logger.debug("assign value to ids: {}".format(ids))
return ids
示例4: obj_size
def obj_size(self):
rawsize = CleanText('//div[has-class("files")]/../h5')(self)
s = rawsize.split(',')[-1].replace(')', '')
nsize = float(re.sub(r'[A-Za-z]', '', s))
usize = re.sub(r'[.0-9 ]', '', s).upper()
size = get_bytes_size(nsize, usize)
return size
示例5: obj_details
def obj_details(self):
details = {}
details["creationDate"] = Date(
Regexp(
CleanText(
'//p[@class="offer-description-notes"]|//p[has-class("darkergrey")]'
),
u'.*Mis en ligne : (\d{2}/\d{2}/\d{4}).*'
),
dayfirst=True
)(self)
honoraires = CleanText(
(
'//div[has-class("offer-price")]/span[has-class("lbl-agencyfees")]'
),
default=None
)(self)
if honoraires:
details["Honoraires"] = (
"{} (TTC, en sus)".format(
honoraires.split(":")[1].strip()
)
)
for li in XPath('//ul[@itemprop="description"]/li')(self):
label = CleanText('./div[has-class("criteria-label")]')(li)
value = CleanText('./div[has-class("criteria-value")]')(li)
details[label] = value
return details
示例6: obj_id
def obj_id(self):
href = CleanText('./td[2]/a/@href', default=NotAvailable)(self)
spl = href.replace('.html', '').split('/')
lid = spl[2]
aid = spl[3]
sid = spl[4]
return '%s|%s|%s' % (lid, aid, sid)
示例7: obj_type
def obj_type(self):
url = BrowserURL('housing', _id=Env('_id'))(self)
if 'colocation' in url:
return POSTS_TYPES.SHARING
elif 'location' in url:
isFurnished = False
for li in XPath('//ul[@itemprop="description"]/li')(self):
label = CleanText('./div[has-class("criteria-label")]')(li)
if label.lower() == "meublé":
isFurnished = (
CleanText('./div[has-class("criteria-value")]')(li).lower() == 'oui'
)
if isFurnished:
return POSTS_TYPES.FURNISHED_RENT
else:
return POSTS_TYPES.RENT
elif 'vente' in url:
offertype = Attr(
'//button[has-class("offer-contact-vertical-phone")][1]',
'data-offertransactiontype'
)(self)
if offertype == '4':
return POSTS_TYPES.VIAGER
else:
return POSTS_TYPES.SALE
return NotAvailable
示例8: obj_nb_person
def obj_nb_person(self):
nb_person = CleanText(
'//span[@class="bu_cuisine_title_3 bu_cuisine_title_3--subtitle"]'
)(self)
nb_person = nb_person.lstrip('/').replace("pour", "").strip()
return [
nb_person
]
示例9: obj_label
def obj_label(self):
raw_label = CleanText(TableCell('label'))(self)
label = CleanText(TableCell('label')(self)[0].xpath('./br/following-sibling::text()'))(self)
if (label and label.split()[0] != raw_label.split()[0]) or not label:
label = raw_label
return CleanText(TableCell('label')(self)[0].xpath('./noscript'))(self) or label
示例10: obj_utilities
def obj_utilities(self):
price = CleanText(
'//p[has-class("OfferTop-price")]'
)(self)
if "charges comprises" in price.lower():
return UTILITIES.INCLUDED
else:
return UTILITIES.EXCLUDED
示例11: obj_DPE
def obj_DPE(self):
energy_value = CleanText(
'//div[has-class("offer-energy-greenhouseeffect-summary")]//div[has-class("energy-summary")]',
default=""
)(self)
if len(energy_value):
energy_value = energy_value.replace("DPE", "").strip()[0]
return getattr(ENERGY_CLASS, energy_value, NotAvailable)
示例12: obj_GES
def obj_GES(self):
greenhouse_value = CleanText(
'//div[has-class("offer-energy-greenhouseeffect-summary")]//div[has-class("greenhouse-summary")]',
default=""
)(self)
if len(greenhouse_value):
greenhouse_value = greenhouse_value.replace("GES", "").strip()[0]
return getattr(ENERGY_CLASS, greenhouse_value, NotAvailable)
示例13: obj_url
def obj_url(self):
url = CleanText('//meta[@property="og:url"]/@content', default=None)(self)
if url is None:
url = JSVar(CleanText('//script'), var='_JobUrl')(self)
if not url.startswith('http'):
url = 'www.adecco.fr%s' % url
return url
示例14: next_page
def next_page(self):
js_datas = CleanText('//div[@id="js-data"]/@data-rest-search-request')(self)
total_page = self.page.browser.get_total_page(js_datas.split("?")[-1])
m = re.match(".*page=(\d?)(?:&.*)?", self.page.url)
if m:
current_page = int(m.group(1))
next_page = current_page + 1
if next_page <= total_page:
return self.page.url.replace("page=%d" % current_page, "page=%d" % next_page)
示例15: get_author
def get_author(self):
try:
author = CleanText('.')(self.get_element_author())
if author.startswith('Par '):
return author.split('Par ', 1)[1]
else:
return author
except AttributeError:
return ''