当前位置: 首页>>代码示例>>Python>>正文


Python cssselect.CSSSelector方法代码示例

本文整理汇总了Python中lxml.cssselect.CSSSelector方法的典型用法代码示例。如果您正苦于以下问题:Python cssselect.CSSSelector方法的具体用法?Python cssselect.CSSSelector怎么用?Python cssselect.CSSSelector使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在lxml.cssselect的用法示例。


在下文中一共展示了cssselect.CSSSelector方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: links_to_style

# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def links_to_style(tree, rewrite_src=None):
    """
    Replace all stylesheet links in the parsed document tree `tree` with
    <script> tags containing the contents of the URL.
    """
    if not rewrite_src:
        rewrite_src = lambda x: x

    for link in CSSSelector("link[rel=stylesheet]")(tree):
        src = rewrite_src(link.attrib["href"])
        pieces = parse.urlsplit(src)
        if pieces.netloc:
            style_body = urlopen(src).read()
        else:
            with open(src) as local_styles:
                style_body = local_styles.read()
        style = STYLE(style_body.replace("\n", ""))
        link.replace(style)

    return tree 
开发者ID:codeforboston,项目名称:cornerwise,代码行数:22,代码来源:css_inliner.py

示例2: extract_comments

# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def extract_comments(html):
    tree = lxml.html.fromstring(html)
    item_sel = CSSSelector('.comment-item')
    text_sel = CSSSelector('.comment-text-content')
    time_sel = CSSSelector('.time')
    author_sel = CSSSelector('.user-name')
    vote_sel = CSSSelector('.like-count.off')
    photo_sel = CSSSelector('.user-photo')

    for item in item_sel(tree):
        yield {'cid': item.get('data-cid'),
               'text': text_sel(item)[0].text_content(),
               'time': time_sel(item)[0].text_content().strip(),
               'author': author_sel(item)[0].text_content(),
               'votes': vote_sel(item)[0].text_content() if len(vote_sel(item)) > 0 else 0,
               'photo': photo_sel(item)[0].get('src')} 
开发者ID:egbertbouman,项目名称:youtube-comment-downloader,代码行数:18,代码来源:downloader.py

示例3: cssselect

# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def cssselect(self, expr, translator='html'):
        """
        Run the CSS expression on this element and its children,
        returning a list of the results.

        Equivalent to lxml.cssselect.CSSSelect(expr, translator='html')(self)
        -- note that pre-compiling the expression can provide a substantial
        speedup.
        """
        # Do the import here to make the dependency optional.
        from lxml.cssselect import CSSSelector
        return CSSSelector(expr, translator=translator)(self)

    ########################################
    ## Link functions
    ######################################## 
开发者ID:JFox,项目名称:aws-lambda-lxml,代码行数:18,代码来源:__init__.py

示例4: pullup_elems

# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def pullup_elems(tree, loader_context):
    for elem_child, parent_dist in loader_context.get("pullup_elems", {}).items():
        selector = CSSSelector(elem_child)
        for elem in selector(tree):
            parent = elem
            for _ in range(parent_dist):
                parent = parent.getparent()
            if parent is not None and parent.getparent() is not None:
                elem.tail = parent.tail
                parent.getparent().replace(parent, elem)
            else:
                logger.error(
                    'Could not find parent with distance {} for selector "{}".'.format(
                        parent_dist, elem_child
                    )
                )

    return [tree] 
开发者ID:PyFeeds,项目名称:PyFeeds,代码行数:20,代码来源:loaders.py

示例5: remove_elems

# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def remove_elems(tree, loader_context):
    remove_elems = []

    settings = get_feeds_settings()
    remove_images = settings.getbool("FEEDS_CONFIG_REMOVE_IMAGES")
    if remove_images:
        remove_elems += ["img"]

    # Remove tags.
    for elem_sel in loader_context.get("remove_elems", []) + remove_elems:
        selector = CSSSelector(elem_sel)
        for elem in selector(tree):
            elem.drop_tree()

    for elem_sel in loader_context.get("remove_elems_xpath", []):
        for elem in tree.xpath(elem_sel):
            elem.drop_tree()

    return [tree] 
开发者ID:PyFeeds,项目名称:PyFeeds,代码行数:21,代码来源:loaders.py

示例6: convert_iframes

# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def convert_iframes(tree, loader_context):
    """Convert iframes to divs with links to its src.

    convert_iframes() is called after remove_elems() so that unwanted iframes can be
    eliminated first.
    """
    base_url = loader_context.get("base_url", None) if loader_context else None
    selector = CSSSelector("iframe")
    for elem in selector(tree):
        if "src" not in elem.attrib:
            continue
        url = urljoin(base_url, elem.attrib.pop("src"))
        elem_new = lxml.html.fragment_fromstring(
            '<div><a href="{url}">{url}</a></div>'.format(url=url)
        )
        elem_new.tail = elem.tail
        elem.getparent().replace(elem, elem_new)

    return [tree] 
开发者ID:PyFeeds,项目名称:PyFeeds,代码行数:21,代码来源:loaders.py

示例7: select_one

# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def select_one(tree, expr):
  sel = CSSSelector(expr)
  el = sel(tree)
  if isinstance(el, list) and len(el) > 0:
    return el[0]
  else:
    return None 
开发者ID:blissland,项目名称:blissflixx,代码行数:9,代码来源:chanutils.py

示例8: select_all

# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def select_all(tree, expr):
  sel = CSSSelector(expr)
  return sel(tree) 
开发者ID:blissland,项目名称:blissflixx,代码行数:5,代码来源:chanutils.py

示例9: get_tree_tag

# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def get_tree_tag(self, selector='', get_one=False, *args, **kwargs):
		sel = cssselect.CSSSelector(selector)
		tags = sel(self.tree)
		if get_one:
			return tags[0]
		return tags 
开发者ID:AlexMathew,项目名称:scrapple,代码行数:8,代码来源:css.py

示例10: fix_emojis

# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def fix_emojis(content: str, base_url: str, emojiset: str) -> str:
    def make_emoji_img_elem(emoji_span_elem: CSSSelector) -> Dict[str, Any]:
        # Convert the emoji spans to img tags.
        classes = emoji_span_elem.get('class')
        match = re.search(r'emoji-(?P<emoji_code>\S+)', classes)
        # re.search is capable of returning None,
        # but since the parent function should only be called with a valid css element
        # we assert that it does not.
        assert match is not None
        emoji_code = match.group('emoji_code')
        emoji_name = emoji_span_elem.get('title')
        alt_code = emoji_span_elem.text
        image_url = base_url + f'/static/generated/emoji/images-{emojiset}-64/{emoji_code}.png'
        img_elem = lxml.html.fromstring(
            f'<img alt="{alt_code}" src="{image_url}" title="{emoji_name}">')
        img_elem.set('style', 'height: 20px;')
        img_elem.tail = emoji_span_elem.tail
        return img_elem

    fragment = lxml.html.fromstring(content)
    for elem in fragment.cssselect('span.emoji'):
        parent = elem.getparent()
        img_elem = make_emoji_img_elem(elem)
        parent.replace(elem, img_elem)

    for realm_emoji in fragment.cssselect('.emoji'):
        del realm_emoji.attrib['class']
        realm_emoji.set('style', 'height: 20px;')

    content = lxml.html.tostring(fragment).decode('utf-8')
    return content 
开发者ID:zulip,项目名称:zulip,代码行数:33,代码来源:email_notifications.py

示例11: html_tree_to_text

# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def html_tree_to_text(tree):
    for style in CSSSelector('style')(tree):
        style.getparent().remove(style)

    for c in tree.xpath('//comment()'):
        parent = c.getparent()

        # comment with no parent does not impact produced text
        if parent is None:
            continue

        parent.remove(c)

    text = ""
    for el in tree.iter():
        el_text = (el.text or '') + (el.tail or '')
        if len(el_text) > 1:
            if el.tag in _BLOCKTAGS + _HARDBREAKS:
                text += "\n"
            if el.tag == 'li':
                text += "  * "
            text += el_text.strip() + " "

            # add href to the output
            href = el.attrib.get('href')
            if href:
                text += "(%s) " % href

        if (el.tag in _HARDBREAKS and text and
            not text.endswith("\n") and not el_text):
            text += "\n"

    retval = _rm_excessive_newlines(text)
    return _encode_utf8(retval) 
开发者ID:mailgun,项目名称:talon,代码行数:36,代码来源:utils.py

示例12: cssselect

# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def cssselect(expr, tree):
    return CSSSelector(expr)(tree) 
开发者ID:mailgun,项目名称:talon,代码行数:4,代码来源:utils.py

示例13: extract_reply_cids

# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def extract_reply_cids(html):
    tree = lxml.html.fromstring(html)
    sel = CSSSelector('.comment-replies-header > .load-comments')
    return [i.get('data-cid') for i in sel(tree)] 
开发者ID:egbertbouman,项目名称:youtube-comment-downloader,代码行数:6,代码来源:downloader.py

示例14: _scrape_subjurisdiction_paths

# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def _scrape_subjurisdiction_paths(self, html):
        """
        Parse subjurisdictions_url to find paths for counties.
        """
        tree = lxml.html.fromstring(html)
        sel = CSSSelector('ul li a')
        results = sel(tree)
        return [(match.get('value'), match.get('id')) for match in results] 
开发者ID:openelections,项目名称:clarify,代码行数:10,代码来源:jurisdiction.py

示例15: css

# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def css(expression):
    """ Returns a :func:`composable <wex.composed.composable>` callable that
        will select elements defined by a
        `CSS selector <http://en.wikipedia.org/wiki/Cascading_Style_Sheets#Selector>`_
        expression.

        :param expression: The CSS selector expression.

        The callable returned accepts a :class:`wex.response.Response`, a
        list of elements or an individual element as an argument.
    """
    return parse | map_if_list(CSSSelector(expression)) 
开发者ID:eBay,项目名称:wextracto,代码行数:14,代码来源:etree.py


注:本文中的lxml.cssselect.CSSSelector方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。