当前位置: 首页>>代码示例>>Python>>正文


Python cssselect.CSSSelector类代码示例

本文整理汇总了Python中lxml.cssselect.CSSSelector的典型用法代码示例。如果您正苦于以下问题:Python CSSSelector类的具体用法?Python CSSSelector怎么用?Python CSSSelector使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了CSSSelector类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_selected_items

    def get_selected_items(self):
        response = self.session.get(self.url("selected_items"))

        tree = lxml.html.fromstring(response.text)

        item_sel = CSSSelector('div[headers="th_selected_items"]')
        name_sel = CSSSelector("h4.il_ContainerItemTitle")
        icon_sel = CSSSelector("img.ilListItemIcon")

        results = item_sel(tree)

        for result in results:
            item = Item()

            name = name_sel(result)[0]

            try:
                name = CSSSelector("a")(name)[0]
            except IndexError:
                pass

            item.name = name.text
            item.url = name.get("href")

            icon = icon_sel(result)[0]
            item.icon = icon.get("src")

            yield item
开发者ID:lukasklein,项目名称:fh-muenster-ilias,代码行数:28,代码来源:ilias.py

示例2: _fetch_from_cache

    def _fetch_from_cache(language, url):
        from . import utils

        cms_url = utils.get_cms_url(language, url)

        if cms_url in cache:
            html = cache.get(cms_url)
        else:
            html = utils.get_cms_page(language, url)
            cache.set(cms_url, html)

        parser = etree.HTMLParser()
        tree = etree.parse(StringIO(html), parser).getroot()
        toc = CSSSelector('.toc')

        # Removing all table of contents
        for table in toc(tree):
            table.getparent().remove(table)

        title = CSSSelector('.page-title')(tree)[0]
        title.getparent().remove(title)

        elements = list(CSSSelector('.cms-content')(tree)[0])

        headers = [i for i, e in enumerate(elements) if CSSSelector('.section-header')(e)]
        title_icons = list(CSSSelector('.title-icon')(tree))

        page_contents = []

        for i, h in enumerate(headers):
            icon = ""
            if i < len(title_icons) and 'src' in title_icons[i].attrib:
                icon = title_icons[i].attrib['src']

            element = elements[h]
            if (i + 1) == len(headers):
                contents = elements[h + 1:]
            else:
                contents = elements[h + 1:headers[i + 1]]

            for e in elements:
                if 'dir' in e.attrib:
                    del e.attrib['dir']

            section_title = CSSSelector('a[name]')(element)[0].text
            section_body = ""
            for c in contents:
                section_body += etree.tostring(c, pretty_print=True, method="html")

            page_contents.append({
                "is_important": True if CSSSelector('.important')(element) else False,
                "title": section_title,
                "body": section_body,
                "icon": icon
            })

        return {
            "title": title.text,
            "contents": page_contents
        }
开发者ID:benrito,项目名称:refugeeinfo.eu,代码行数:60,代码来源:api.py

示例3: process_html

	def process_html(self, html, path):
		parser = etree.HTMLParser(encoding='utf-8')
		tree = etree.fromstring(html.decode('utf-8'), parser).getroottree()
		page = tree.getroot()

		if page is None:
			print(repr(html))
			raise ParserError('Could not parse the html')

		lines = html.splitlines()
		body, = CSSSelector('body')(page)
		self._bodies.append(body)
		if self.optimize_lookup:
			for each in body.iter():
				identifier = each.attrib.get('id')
				if identifier:
				    self._all_ids.add(identifier)
				classes = each.attrib.get('class')
				if classes:
				    for class_ in classes.split():
				        self._all_classes.add(class_)

		for style in CSSSelector('style')(page):
		    first_line = style.text.strip().splitlines()[0]
		    for i, line in enumerate(lines):
				if line.count(first_line):
					key = (i + 1, path)
					self.blocks[key] = style.text
					break
开发者ID:mikelsons,项目名称:css-dust-cleaner,代码行数:29,代码来源:dustcleaner.py

示例4: detect_withdrawn

 def detect_withdrawn(self, tree, url):
     comment = CSSSelector(".tablecell.comments")(tree)
     if comment:
         comment = comment[0].text_content()
         if "withdrawn" in comment.lower():
             print("Paper", url, "appears to be withdrawn!")
             return True
     return False
开发者ID:seasky100,项目名称:AI-metrics,代码行数:8,代码来源:taxonomy.py

示例5: post_node

def post_node(title, datetime, content):
    post = copy(POST)
    CSSSelector('.title .text')(post)[0].text = title
    CSSSelector('.datetime')(post)[0].text = datetime.strftime("%H:%M on %A the %%s of %B, %Y") % niceday(datetime)
    content_css = CSSSelector('.content')(post)[0]
    for fragment in fragments_fromstring(cleaner_trusted.clean_html(content)):
        content_css.append(fragment)

    return post
开发者ID:ahri,项目名称:nodeblog,代码行数:9,代码来源:blog.py

示例6: get_or_create_head

def get_or_create_head(root):
    """Ensures that `root` contains a <head> element and returns it.
    """
    head = CSSSelector('head')(root)
    if not head:
        head = etree.Element('head')
        body = CSSSelector('body')(root)[0]
        body.getparent().insert(0, head)
        return head
    else:
        return head[0]
开发者ID:abhijitmamarde,项目名称:premailer,代码行数:11,代码来源:premailer.py

示例7: process_html

    def process_html(self, html, url):
        parser = etree.HTMLParser(encoding='utf-8')
        tree = etree.fromstring(html.encode('utf-8'), parser).getroottree()
        page = tree.getroot()

        if page is None:
            print(repr(html))
            raise ParserError('Could not parse the html')

        lines = html.splitlines()
        body, = CSSSelector('body')(page)
        self._bodies.append(body)
        if self.optimize_lookup:
            for each in body.iter():
                identifier = each.attrib.get('id')
                if identifier:
                    self._all_ids.add(identifier)
                classes = each.attrib.get('class')
                if classes:
                    for class_ in classes.split():
                        self._all_classes.add(class_)

        for style in CSSSelector('style')(page):
            try:
                first_line = style.text.strip().splitlines()[0]
            except IndexError:
                # meaning the inline style tag was just whitespace
                continue
            except AttributeError:
                # happend when the style tag has absolute nothing it
                # not even whitespace
                continue
            for i, line in enumerate(lines):
                if line.count(first_line):
                    key = (i + 1, url)
                    self.blocks[key] = style.text
                    break

        for link in CSSSelector('link')(page):
            if (
                link.attrib.get('rel', '') == 'stylesheet' or
                link.attrib['href'].lower().split('?')[0].endswith('.css')
            ):
                link_url = self.make_absolute_url(url, link.attrib['href'])
                key = (link_url, link.attrib['href'])
                self.blocks[key] = self.download(link_url)
                if self.preserve_remote_urls:
                    self.blocks[key] = self._rewrite_urls(
                        self.blocks[key],
                        link_url
                    )
开发者ID:alanjds,项目名称:mincss,代码行数:51,代码来源:processor.py

示例8: get_submission_dates

    def get_submission_dates(self, arxiv_tree, queried_version):
        links = CSSSelector("div.submission-history")(arxiv_tree)[0]
        versions = {}
        #print "Parsing", links.text_content()
        for line in links.text_content().split("\n"):
            match = self.version_re.match(line)
            if match:
                version, d = match.group(1), match.group(2)
                d = datetime.datetime.strptime(d,'%a, %d %b %Y').date()
                versions[version] = d
                if queried_version == version:
                    return {version: d}
                #print version, date

        return versions
开发者ID:seasky100,项目名称:AI-metrics,代码行数:15,代码来源:taxonomy.py

示例9: process_html

    def process_html(self, html, url):
        parser = etree.HTMLParser()
        tree = etree.fromstring(html, parser).getroottree()
        page = tree.getroot()

        if page is None:
            print repr(html)
            raise ParserError("Could not parse the html")

        lines = html.splitlines()
        body, = CSSSelector('body')(page)
        self._bodies.append(body)
        if self.optimize_lookup:
            for each in body.iter():
                id = each.attrib.get('id')
                if id:
                    self._all_ids.add(id)
                classes = each.attrib.get('class')
                if classes:
                    for class_ in classes.split():
                        self._all_classes.add(class_)

        for style in CSSSelector('style')(page):
            first_line = style.text.strip().splitlines()[0]
            for i, line in enumerate(lines):
                if line.count(first_line):
                    key = (i + 1, url)
                    self.blocks[key] = style.text
                    break

        for link in CSSSelector('link')(page):
            if (
                link.attrib.get('rel', '') == 'stylesheet' or
                link.attrib['href'].lower().split('?')[0].endswith('.css')
            ):
                link_url = self.make_absolute_url(url, link.attrib['href'])
                key = (link_url, link.attrib['href'])
                self.blocks[key] = self._download(link_url)
                if self.preserve_remote_urls:
                    self.blocks[key] = self._rewrite_urls(
                        self.blocks[key],
                        link_url
                    )
开发者ID:JHei,项目名称:mincss,代码行数:43,代码来源:processor.py

示例10: load_stations

def load_stations(file="stations-converted.json"):
    global STATIONS

    with open(file) as f:
        STATIONS = anyjson.deserialize(f.read())

    for station in STATIONS.values():
        try:
            uri = "http://hydro.chmi.cz/isarrow/object.php?seq=2000855701&chemie=1&biota=1&ukol_p=1&id_objekt=&vod_typ=R&nadmh_sign=%3E&rickm_sign=%3E&rok_od=2007&rok_do=2012&objekty_chemdata=1&matrice=2000868184&typodb=41"
            seq = CSSSelector("form input[name='seq']")(fromstring(urllib2.urlopen(uri).read().decode("cp1250")))[
                0
            ].value

            # print 'seq is ' + seq

            uri = (
                "http://hydro.chmi.cz/isarrow/object.php?agenda=POV&objekty_chemdata=1&objekty_biodata=&taxon_tree=&seq="
                + seq
                + "&data_sel=chemdata&chemie=1&biota=1&rok_od=2007&rok_do=2012&matrice=2000868184&typodb=41&tscongrp=&tscon=&data_mez_stanovitelnosti=&data_od=&data_do=&taxon=&send=Chemick%E9+vzorky"
            )
            tree = fromstring(urllib2.urlopen(uri).read().decode("cp1250"))

            link = CSSSelector("table.tbl a")(tree)[-1]

            uri = "http://hydro.chmi.cz/isarrow/" + link.get("href")
            tree = fromstring(urllib2.urlopen(uri).read().decode("cp1250"))

            csv_link = tree.xpath("//form[1]//a")[0]

            uri = "http://hydro.chmi.cz/isarrow/" + csv_link.get("href")

            # FIXME: CSV export is now broken on IS ARROW
            # wait for them to fix it or parse from table -- and store relevant data into structure
            reader = csv.reader(urllib2.urlopen(uri))
            for row in reader:
                print row

        except Exception:
            print "Failed to retrieve values for station " + station["id"]
            import traceback

            traceback.print_exc()
开发者ID:RHoKPrague2012,项目名称:Papadipupi,代码行数:42,代码来源:get_latest_measurment.py

示例11: make_emoji_img_elem

 def make_emoji_img_elem(emoji_span_elem: CSSSelector) -> Dict[str, Any]:
     # Convert the emoji spans to img tags.
     classes = emoji_span_elem.get('class')
     match = re.search('emoji-(?P<emoji_code>\S+)', classes)
     # re.search is capable of returning None,
     # but since the parent function should only be called with a valid css element
     # we assert that it does not.
     assert match is not None
     emoji_code = match.group('emoji_code')
     emoji_name = emoji_span_elem.get('title')
     alt_code = emoji_span_elem.text
     image_url = base_url + '/static/generated/emoji/images-%(emojiset)s-64/%(emoji_code)s.png' % {
         'emojiset': emojiset,
         'emoji_code': emoji_code
     }
     img_elem = lxml.html.fromstring(
         '<img alt="%(alt_code)s" src="%(image_url)s" title="%(title)s">' % {
             'alt_code': alt_code,
             'image_url': image_url,
             'title': emoji_name,
         })
     img_elem.set('style', 'height: 20px;')
     img_elem.tail = emoji_span_elem.tail
     return img_elem
开发者ID:umairwaheed,项目名称:zulip,代码行数:24,代码来源:notifications.py

示例12: getView

    def getView(self, document, sheet, media='all', name=None, styleCallback=None):
        """
        document
            a DOM document, currently an lxml HTML document
        sheet
            a CSS StyleSheet object, currently cssutils sheet
        media: optional
            TODO: view for which media it should be
        name: optional
            TODO: names of sheets only
        styleCallback: optional
            should return css.CSSStyleDeclaration of inline styles, for html
            a style declaration for ``[email protected]``. Gets one parameter
            ``element`` which is the relevant DOMElement

        returns style view
            a dict of {DOMElement: css.CSSStyleDeclaration} for html
        """

        styleCallback = styleCallback or self.styleattribute

        _unmergable_rules = CSSStyleSheet()

        view = {}
        specificities = {}  # needed temporarily

        # TODO: filter rules simpler?, add @media
        rules = (rule for rule in sheet if rule.type == rule.STYLE_RULE)
        for rule in rules:
            for selector in rule.selectorList:
                self.log(0, 'SELECTOR', selector.selectorText)
                # TODO: make this a callback to be able to use other stuff than lxml
                try:
                    cssselector = CSSSelector(selector.selectorText)
                except (ExpressionError, NotImplementedError) as e:
                    _unmergable_rules.add(CSSStyleRule(selectorText=selector.selectorText,
                                                       style=rule.style))
                    continue

                matching = cssselector.evaluate(document)

                for element in matching:

                        if element.tag in self.NONVISUAL_TAGS:
                            continue

                        # add styles for all matching DOM elements
                        self.log(1, 'ELEMENT', id(element), element.text)

                        if element not in view:
                            # add initial empty style declatation
                            view[element] = CSSStyleDeclaration()
                            specificities[element] = {}

                            # and add inline @style if present
                            inlinestyle = styleCallback(element)
                            if inlinestyle:
                                for p in inlinestyle:
                                    # set inline style specificity
                                    view[element].setProperty(p)
                                    specificities[element][p.name] = (1, 0, 0, 0)

                        for p in rule.style:
                            # update style declaration
                            if p not in view[element]:
                                # setProperty needs a new Property object and
                                # MUST NOT reuse the existing Property
                                # which would be the same for all elements!
                                # see Issue #23
                                view[element].setProperty(p.name, p.value, p.priority)
                                specificities[element][p.name] = selector.specificity
                                self.log(2, view[element].getProperty('color'))

                            else:
                                self.log(2, view[element].getProperty('color'))
                                sameprio = (p.priority ==
                                            view[element].getPropertyPriority(p.name))
                                if not sameprio and bool(p.priority) or (
                                   sameprio and selector.specificity >=
                                        specificities[element][p.name]):
                                    # later, more specific or higher prio
                                    view[element].setProperty(p.name, p.value, p.priority)

        _unmergable_css = _unmergable_rules.cssText
        if _unmergable_css:
            e = etree.Element('style')
            # print __name__, _unmergable_css.__repr__()
            e.text = to_unicode(_unmergable_css, 'utf-8')
            body = document.find('body') or document
            body.insert(0, e)  # add <style> right into body

        return view
开发者ID:lisbitid,项目名称:less_flask,代码行数:92,代码来源:cssinliner.py

示例13: len

import lxml.etree
from lxml.cssselect import CSSSelector
from BeautifulSoup import BeautifulSoup

if len(sys.argv) < 2:
    print >>sys.stderr, 'usage: weather.py CITY, STATE'
    exit(2)

data = urllib.urlencode({'inputstring': ' '.join(sys.argv[1:])})
info = urllib2.urlopen('http://forecast.weather.gov/zipcity.php', data)
content = info.read()

# Solution #1
parser = lxml.etree.HTMLParser(encoding='utf-8')
tree = lxml.etree.fromstring(content, parser)
big = CSSSelector('td.big')(tree)[0]
if big.find('font') is not None:
    big = big.find('font')
print 'Condition:', big.text.strip()
print 'Temperature:', big.findall('br')[1].tail
tr = tree.xpath('.//td[b="Humidity"]')[0].getparent()
print 'Humidity:', tr.findall('td')[1].text
print

# Solution #2
soup = BeautifulSoup(content)  # doctest: +SKIP
big = soup.find('td', 'big')
if big.font is not None:
    big = big.font
print 'Condition:', big.contents[0].string.strip()
temp = big.contents[3].string or big.contents[4].string  # can be either
开发者ID:Isaac1989,项目名称:Project_github,代码行数:31,代码来源:scrape_weather.py

示例14: getView

def getView(document, css, media='all', name=None, 
            styleCallback=lambda element: None):
    """
    document
        a DOM document, currently an lxml HTML document
    css
        a CSS StyleSheet string
    media: optional
        TODO: view for which media it should be
    name: optional
        TODO: names of sheets only
    styleCallback: optional
        should return css.CSSStyleDeclaration of inline styles, for html
        a style declaration for ``[email protected]``. Gets one parameter 
        ``element`` which is the relevant DOMElement
    
    returns style view
        a dict of {DOMElement: css.CSSStyleDeclaration} for html
    """
    sheet = cssutils.parseString(css)
    
    view = {}
    specificities = {} # needed temporarily 

    # TODO: filter rules simpler?, add @media
    rules = (rule for rule in sheet if rule.type == rule.STYLE_RULE)    
    for rule in rules:
        for selector in rule.selectorList:
            log(0, 'SELECTOR', selector.selectorText)
            # TODO: make this a callback to be able to use other stuff than lxml
            cssselector = CSSSelector(selector.selectorText)
            matching = cssselector.evaluate(document)
            for element in matching:
                #if element.tag in ('div',):
                    # add styles for all matching DOM elements
                    log(1, 'ELEMENT', id(element), element.text)
                    
                    if element not in view:    
                        # add initial empty style declatation
                        view[element] = cssutils.css.CSSStyleDeclaration()
                        specificities[element] = {}                    
                        
                        # and add inline @style if present
                        inlinestyle = styleCallback(element)
                        if inlinestyle:
                            for p in inlinestyle:
                                # set inline style specificity
                                view[element].setProperty(p)
                                specificities[element][p.name] = (1,0,0,0)
                                                            
                    for p in rule.style:
                        # update style declaration
                        if p not in view[element]:
                            # setProperty needs a new Property object and
                            # MUST NOT reuse the existing Property
                            # which would be the same for all elements!
                            # see Issue #23
                            view[element].setProperty(p.name, p.value, p.priority)
                            specificities[element][p.name] = selector.specificity
                            log(2, view[element].getProperty('color'))
                            
                        else:
                            log(2, view[element].getProperty('color'))
                            sameprio = (p.priority == 
                                        view[element].getPropertyPriority(p.name))
                            if not sameprio and bool(p.priority) or (
                               sameprio and selector.specificity >= 
                                            specificities[element][p.name]):
                                # later, more specific or higher prio 
                                view[element].setProperty(p.name, p.value, p.priority)
                    
                   
    #pprint(view)
    return view                        
开发者ID:Western-Toronto,项目名称:truly_native_john,代码行数:74,代码来源:style.py

示例15: require

    def require(self,url):
        hc= urlparse(url)[1].replace('.ganji.com',"") 
        hc2=citynameDict_sf.get(hc)
        if hc2:
            self.fd['house_city']=hc2
        else:
            self.fd['house_city']=hc
        request = urllib2.Request(url, None, self.header)
        response = urllib2.urlopen(request).read()
        if self.mayGetIt(response):
            self.fd={}
            raise
        tree = etree.HTML(response)
        if re.search("<span class=\"city\"><a .*?>(.*?)</a>", response):
            cityname=re.search("<span class=\"city\"><a .*?>(.*?)</a>", response).group(1)
            self.fd['cityname'] = cityname
        else:
            raise
        
        self.fd['house_flag'] = 4
        self.fd['house_type'] = 6
        self.fd['house_floor'] = 0
        self.fd['house_topfloor'] = 0 
        self.fd['house_area']=0
        self.fd['house_age'] = 0
        self.fd['house_toward'] = 0
        self.fd['house_fitment'] = 0
        self.fd['house_deposit'] = 0
#        self.fd['house_totalarea_max'] = 0
#        self.fd['house_totalarea_min'] = 0
        
        soup =BeautifulSoup(response)
        detail_mer = soup.find('div',{'class':'detail_mer'})
        
        #非个人房源 return
        if u"个人房源"  not in str(detail_mer):raise
        
        Dname = detail_mer.find('span',{'class':'Dname'})
        if Dname:
            self.fd['owner_name'] = Dname.string
        else:
            self.fd['owner_name'] = None
            
        ganji_phone_call_class = detail_mer.find('span',{'class':'ganji_phone_call_class'})
        
        if ganji_phone_call_class:
            self.fd['owner_phone_pic'] = ganji_phone_call_class.contents[0]
            if str(ganji_phone_call_class).find('src='):                
                self.fd['owner_phone_pic'] = 'http://'+urlparse(url)[1]+ganji_phone_call_class.img['src']
            else:
                self.fd['owner_phone_pic'] = None            
        else:
            self.fd['owner_phone_pic'] = None
            
            
        #没有联系方式  return
        if not self.fd['owner_phone_pic']:raise     
        
        if re.search(self.house_price_regex_zu, response):
            house_price_zu = re.search(self.house_price_regex_zu, response).group(1)
            house_price_zu = house_price_zu.replace('元/月','')
            if house_price_zu.find("以上") != -1:
                self.fd['house_price_max'] = 0
                self.fd['house_price'] = int(house_price_zu.replace('以上',''))
            elif house_price_zu.find("以下") != -1:
                self.fd['house_price_max'] = int(house_price_zu.replace('以下',''))
                self.fd['house_price'] = 0
            elif house_price_zu.find("-") != -1:
                self.fd['house_price_max'] = int(house_price_zu.split('-')[1])
                self.fd['house_price'] = int(house_price_zu.split('-')[0])
            else:
                self.fd['house_price_max'] = 0
                self.fd['house_price'] = 0
        else:
            self.fd['house_price_max'] = 0
            self.fd['house_price'] = 0
        
        posttime=CSSSelector('span.pub_time')(tree)!=None and CSSSelector('span.pub_time')(tree)[0].text.strip() or None 
        if posttime:
            Y=int(time.strftime('%Y', time.localtime()))
            M=int(posttime.split(' ')[0].split('-')[0])
            D=int(posttime.split(' ')[0].split('-')[1])
            H=int(time.strftime('%H',time.localtime(time.time())))
            Min=int(time.strftime('%M',time.localtime(time.time())))
            s = datetime.datetime(Y,M,D,H,Min)
            posttime=str(int(time.mktime(s.timetuple())))
            self.fd['house_posttime'] =posttime 
        else:
            s=time.localtime(time.time())
            self.fd['house_posttime'] =str(int(time.mktime(s)))
            
        house_title=CSSSelector("div.detail_title h1")(tree)[0] !=None and CSSSelector("div.detail_title h1")(tree)[0].text.strip() or None
        self.fd['house_title'] = house_title.replace("(求购)","").replace("(求租)","").replace("(出售)","")
        

        if re.search(self.house_room_regex, response):
            house_room=re.search(self.house_room_regex, response).group(1)
            self.fd['house_room'] = int(house_room)
        else:
            self.fd['house_room'] = 0
#.........这里部分代码省略.........
开发者ID:ptphp,项目名称:PyLib,代码行数:101,代码来源:ganji.py


注:本文中的lxml.cssselect.CSSSelector类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。