当前位置: 首页>>代码示例>>Python>>正文


Python standard.CleanText类代码示例

本文整理汇总了Python中weboob.browser.filters.standard.CleanText的典型用法代码示例。如果您正苦于以下问题:Python CleanText类的具体用法?Python CleanText怎么用?Python CleanText使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了CleanText类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: parse

            def parse(self, el):
                rooms_bedrooms_area = el.xpath(
                    './/div[@class="clearfix"]/ul[has-class("item-tags")]/li'
                )
                self.env['rooms'] = NotLoaded
                self.env['bedrooms'] = NotLoaded
                self.env['area'] = NotLoaded

                for item in rooms_bedrooms_area:
                    name = CleanText('.')(item)
                    if 'chambre' in name.lower():
                        name = 'bedrooms'
                        value = CleanDecimal('./strong')(item)
                    elif 'pièce' in name.lower():
                        name = 'rooms'
                        value = CleanDecimal('./strong')(item)
                    else:
                        name = 'area'
                        value = CleanDecimal(
                            Regexp(
                                CleanText(
                                    '.'
                                ),
                                r'(\d*\.*\d*) .*'
                            )
                        )(item)
                    self.env[name] = value
开发者ID:P4ncake,项目名称:weboob,代码行数:27,代码来源:pages.py

示例2: obj_photos

 def obj_photos(self):
     photos = []
     for i in range(1, CleanDecimal('//NbPhotos')(self) + 1):
         img = CleanText('//LienImage%s' % i, replace=[(u'w=69&h=52', u'w=786&h=481')])(self)
         url = img if img.startswith('http') else u'http://www.entreparticuliers.com%s' % img
         photos.append(HousingPhoto(url))
     return photos
开发者ID:h4wkmoon,项目名称:weboob,代码行数:7,代码来源:pages.py

示例3: find_account

    def find_account(self, acclabel, accowner):
        accowner = sorted(accowner.lower().split()) # first name and last name may not be ordered the same way on market site...

        # Check if history is present
        if CleanText(default=None).filter(self.doc.xpath('//body/p[contains(text(), "indisponible pour le moment")]')):
            return False

        ids = None
        for a in self.doc.xpath('//a[contains(@onclick, "indiceCompte")]'):
            self.logger.debug("get investment from onclick")

            label = CleanText('.')(a)
            owner = CleanText('./ancestor::tr/preceding-sibling::tr[@class="LnMnTiers"][1]')(a)
            owner = sorted(owner.lower().split())

            if label == acclabel and owner == accowner:
                ids = list(re.search(r'indiceCompte[^\d]+(\d+).*idRacine[^\d]+(\d+)', Attr('.', 'onclick')(a)).groups())
                ids.append(CleanText('./ancestor::td/preceding-sibling::td')(a))
                self.logger.debug("assign value to ids: {}".format(ids))
                return ids

        for a in self.doc.xpath('//a[contains(@href, "indiceCompte")]'):
            self.logger.debug("get investment from href")
            if CleanText('.')(a) == acclabel:
                ids = list(re.search(r'indiceCompte[^\d]+(\d+).*idRacine[^\d]+(\d+)', Attr('.', 'href')(a)).groups())
                ids.append(CleanText('./ancestor::td/preceding-sibling::td')(a))
                self.logger.debug("assign value to ids: {}".format(ids))
                return ids
开发者ID:P4ncake,项目名称:weboob,代码行数:28,代码来源:pages.py

示例4: obj_size

 def obj_size(self):
     rawsize = CleanText('//div[has-class("files")]/../h5')(self)
     s = rawsize.split(',')[-1].replace(')', '')
     nsize = float(re.sub(r'[A-Za-z]', '', s))
     usize = re.sub(r'[.0-9 ]', '', s).upper()
     size = get_bytes_size(nsize, usize)
     return size
开发者ID:P4ncake,项目名称:weboob,代码行数:7,代码来源:pages.py

示例5: obj_details

        def obj_details(self):
            details = {}

            details["creationDate"] = Date(
                Regexp(
                    CleanText(
                        '//p[@class="offer-description-notes"]|//p[has-class("darkergrey")]'
                    ),
                    u'.*Mis en ligne : (\d{2}/\d{2}/\d{4}).*'
                ),
                dayfirst=True
            )(self)

            honoraires = CleanText(
                (
                    '//div[has-class("offer-price")]/span[has-class("lbl-agencyfees")]'
                ),
                default=None
            )(self)
            if honoraires:
                details["Honoraires"] = (
                    "{} (TTC, en sus)".format(
                        honoraires.split(":")[1].strip()
                    )
                )

            for li in XPath('//ul[@itemprop="description"]/li')(self):
                label = CleanText('./div[has-class("criteria-label")]')(li)
                value = CleanText('./div[has-class("criteria-value")]')(li)
                details[label] = value

            return details
开发者ID:P4ncake,项目名称:weboob,代码行数:32,代码来源:pages.py

示例6: obj_id

 def obj_id(self):
     href = CleanText('./td[2]/a/@href', default=NotAvailable)(self)
     spl = href.replace('.html', '').split('/')
     lid = spl[2]
     aid = spl[3]
     sid = spl[4]
     return '%s|%s|%s' % (lid, aid, sid)
开发者ID:laurentb,项目名称:weboob,代码行数:7,代码来源:pages.py

示例7: obj_type

 def obj_type(self):
     url = BrowserURL('housing', _id=Env('_id'))(self)
     if 'colocation' in url:
         return POSTS_TYPES.SHARING
     elif 'location' in url:
         isFurnished = False
         for li in XPath('//ul[@itemprop="description"]/li')(self):
             label = CleanText('./div[has-class("criteria-label")]')(li)
             if label.lower() == "meublé":
                 isFurnished = (
                     CleanText('./div[has-class("criteria-value")]')(li).lower() == 'oui'
                 )
         if isFurnished:
             return POSTS_TYPES.FURNISHED_RENT
         else:
             return POSTS_TYPES.RENT
     elif 'vente' in url:
         offertype = Attr(
             '//button[has-class("offer-contact-vertical-phone")][1]',
             'data-offertransactiontype'
         )(self)
         if offertype == '4':
             return POSTS_TYPES.VIAGER
         else:
             return POSTS_TYPES.SALE
     return NotAvailable
开发者ID:P4ncake,项目名称:weboob,代码行数:26,代码来源:pages.py

示例8: obj_nb_person

 def obj_nb_person(self):
     nb_person = CleanText(
         '//span[@class="bu_cuisine_title_3 bu_cuisine_title_3--subtitle"]'
     )(self)
     nb_person = nb_person.lstrip('/').replace("pour", "").strip()
     return [
         nb_person
     ]
开发者ID:laurentb,项目名称:weboob,代码行数:8,代码来源:pages.py

示例9: obj_label

            def obj_label(self):
                raw_label = CleanText(TableCell('label'))(self)
                label = CleanText(TableCell('label')(self)[0].xpath('./br/following-sibling::text()'))(self)

                if (label and label.split()[0] != raw_label.split()[0]) or not label:
                    label = raw_label

                return CleanText(TableCell('label')(self)[0].xpath('./noscript'))(self) or label
开发者ID:P4ncake,项目名称:weboob,代码行数:8,代码来源:accounthistory.py

示例10: obj_utilities

 def obj_utilities(self):
     price = CleanText(
         '//p[has-class("OfferTop-price")]'
     )(self)
     if "charges comprises" in price.lower():
         return UTILITIES.INCLUDED
     else:
         return UTILITIES.EXCLUDED
开发者ID:P4ncake,项目名称:weboob,代码行数:8,代码来源:pages.py

示例11: obj_DPE

 def obj_DPE(self):
     energy_value = CleanText(
         '//div[has-class("offer-energy-greenhouseeffect-summary")]//div[has-class("energy-summary")]',
         default=""
     )(self)
     if len(energy_value):
         energy_value = energy_value.replace("DPE", "").strip()[0]
     return getattr(ENERGY_CLASS, energy_value, NotAvailable)
开发者ID:P4ncake,项目名称:weboob,代码行数:8,代码来源:pages.py

示例12: obj_GES

 def obj_GES(self):
     greenhouse_value = CleanText(
         '//div[has-class("offer-energy-greenhouseeffect-summary")]//div[has-class("greenhouse-summary")]',
         default=""
     )(self)
     if len(greenhouse_value):
         greenhouse_value = greenhouse_value.replace("GES", "").strip()[0]
     return getattr(ENERGY_CLASS, greenhouse_value, NotAvailable)
开发者ID:P4ncake,项目名称:weboob,代码行数:8,代码来源:pages.py

示例13: obj_url

        def obj_url(self):
            url = CleanText('//meta[@property="og:url"]/@content', default=None)(self)
            if url is None:
                url = JSVar(CleanText('//script'), var='_JobUrl')(self)

            if not url.startswith('http'):
                url = 'www.adecco.fr%s' % url

            return url
开发者ID:P4ncake,项目名称:weboob,代码行数:9,代码来源:pages.py

示例14: next_page

 def next_page(self):
     js_datas = CleanText('//div[@id="js-data"]/@data-rest-search-request')(self)
     total_page = self.page.browser.get_total_page(js_datas.split("?")[-1])
     m = re.match(".*page=(\d?)(?:&.*)?", self.page.url)
     if m:
         current_page = int(m.group(1))
         next_page = current_page + 1
         if next_page <= total_page:
             return self.page.url.replace("page=%d" % current_page, "page=%d" % next_page)
开发者ID:nojhan,项目名称:weboob-devel,代码行数:9,代码来源:pages.py

示例15: get_author

 def get_author(self):
     try:
         author = CleanText('.')(self.get_element_author())
         if author.startswith('Par '):
             return author.split('Par ', 1)[1]
         else:
             return author
     except AttributeError:
         return ''
开发者ID:P4ncake,项目名称:weboob,代码行数:9,代码来源:pages.py


注:本文中的weboob.browser.filters.standard.CleanText类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。