本文整理汇总了Python中lxml.cssselect.CSSSelector方法的典型用法代码示例。如果您正苦于以下问题:Python cssselect.CSSSelector方法的具体用法?Python cssselect.CSSSelector怎么用?Python cssselect.CSSSelector使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类lxml.cssselect
的用法示例。
在下文中一共展示了cssselect.CSSSelector方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: links_to_style
# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def links_to_style(tree, rewrite_src=None):
"""
Replace all stylesheet links in the parsed document tree `tree` with
<script> tags containing the contents of the URL.
"""
if not rewrite_src:
rewrite_src = lambda x: x
for link in CSSSelector("link[rel=stylesheet]")(tree):
src = rewrite_src(link.attrib["href"])
pieces = parse.urlsplit(src)
if pieces.netloc:
style_body = urlopen(src).read()
else:
with open(src) as local_styles:
style_body = local_styles.read()
style = STYLE(style_body.replace("\n", ""))
link.replace(style)
return tree
示例2: extract_comments
# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def extract_comments(html):
tree = lxml.html.fromstring(html)
item_sel = CSSSelector('.comment-item')
text_sel = CSSSelector('.comment-text-content')
time_sel = CSSSelector('.time')
author_sel = CSSSelector('.user-name')
vote_sel = CSSSelector('.like-count.off')
photo_sel = CSSSelector('.user-photo')
for item in item_sel(tree):
yield {'cid': item.get('data-cid'),
'text': text_sel(item)[0].text_content(),
'time': time_sel(item)[0].text_content().strip(),
'author': author_sel(item)[0].text_content(),
'votes': vote_sel(item)[0].text_content() if len(vote_sel(item)) > 0 else 0,
'photo': photo_sel(item)[0].get('src')}
示例3: cssselect
# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def cssselect(self, expr, translator='html'):
"""
Run the CSS expression on this element and its children,
returning a list of the results.
Equivalent to lxml.cssselect.CSSSelect(expr, translator='html')(self)
-- note that pre-compiling the expression can provide a substantial
speedup.
"""
# Do the import here to make the dependency optional.
from lxml.cssselect import CSSSelector
return CSSSelector(expr, translator=translator)(self)
########################################
## Link functions
########################################
示例4: pullup_elems
# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def pullup_elems(tree, loader_context):
for elem_child, parent_dist in loader_context.get("pullup_elems", {}).items():
selector = CSSSelector(elem_child)
for elem in selector(tree):
parent = elem
for _ in range(parent_dist):
parent = parent.getparent()
if parent is not None and parent.getparent() is not None:
elem.tail = parent.tail
parent.getparent().replace(parent, elem)
else:
logger.error(
'Could not find parent with distance {} for selector "{}".'.format(
parent_dist, elem_child
)
)
return [tree]
示例5: remove_elems
# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def remove_elems(tree, loader_context):
remove_elems = []
settings = get_feeds_settings()
remove_images = settings.getbool("FEEDS_CONFIG_REMOVE_IMAGES")
if remove_images:
remove_elems += ["img"]
# Remove tags.
for elem_sel in loader_context.get("remove_elems", []) + remove_elems:
selector = CSSSelector(elem_sel)
for elem in selector(tree):
elem.drop_tree()
for elem_sel in loader_context.get("remove_elems_xpath", []):
for elem in tree.xpath(elem_sel):
elem.drop_tree()
return [tree]
示例6: convert_iframes
# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def convert_iframes(tree, loader_context):
"""Convert iframes to divs with links to its src.
convert_iframes() is called after remove_elems() so that unwanted iframes can be
eliminated first.
"""
base_url = loader_context.get("base_url", None) if loader_context else None
selector = CSSSelector("iframe")
for elem in selector(tree):
if "src" not in elem.attrib:
continue
url = urljoin(base_url, elem.attrib.pop("src"))
elem_new = lxml.html.fragment_fromstring(
'<div><a href="{url}">{url}</a></div>'.format(url=url)
)
elem_new.tail = elem.tail
elem.getparent().replace(elem, elem_new)
return [tree]
示例7: select_one
# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def select_one(tree, expr):
sel = CSSSelector(expr)
el = sel(tree)
if isinstance(el, list) and len(el) > 0:
return el[0]
else:
return None
示例8: select_all
# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def select_all(tree, expr):
sel = CSSSelector(expr)
return sel(tree)
示例9: get_tree_tag
# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def get_tree_tag(self, selector='', get_one=False, *args, **kwargs):
sel = cssselect.CSSSelector(selector)
tags = sel(self.tree)
if get_one:
return tags[0]
return tags
示例10: fix_emojis
# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def fix_emojis(content: str, base_url: str, emojiset: str) -> str:
def make_emoji_img_elem(emoji_span_elem: CSSSelector) -> Dict[str, Any]:
# Convert the emoji spans to img tags.
classes = emoji_span_elem.get('class')
match = re.search(r'emoji-(?P<emoji_code>\S+)', classes)
# re.search is capable of returning None,
# but since the parent function should only be called with a valid css element
# we assert that it does not.
assert match is not None
emoji_code = match.group('emoji_code')
emoji_name = emoji_span_elem.get('title')
alt_code = emoji_span_elem.text
image_url = base_url + f'/static/generated/emoji/images-{emojiset}-64/{emoji_code}.png'
img_elem = lxml.html.fromstring(
f'<img alt="{alt_code}" src="{image_url}" title="{emoji_name}">')
img_elem.set('style', 'height: 20px;')
img_elem.tail = emoji_span_elem.tail
return img_elem
fragment = lxml.html.fromstring(content)
for elem in fragment.cssselect('span.emoji'):
parent = elem.getparent()
img_elem = make_emoji_img_elem(elem)
parent.replace(elem, img_elem)
for realm_emoji in fragment.cssselect('.emoji'):
del realm_emoji.attrib['class']
realm_emoji.set('style', 'height: 20px;')
content = lxml.html.tostring(fragment).decode('utf-8')
return content
示例11: html_tree_to_text
# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def html_tree_to_text(tree):
for style in CSSSelector('style')(tree):
style.getparent().remove(style)
for c in tree.xpath('//comment()'):
parent = c.getparent()
# comment with no parent does not impact produced text
if parent is None:
continue
parent.remove(c)
text = ""
for el in tree.iter():
el_text = (el.text or '') + (el.tail or '')
if len(el_text) > 1:
if el.tag in _BLOCKTAGS + _HARDBREAKS:
text += "\n"
if el.tag == 'li':
text += " * "
text += el_text.strip() + " "
# add href to the output
href = el.attrib.get('href')
if href:
text += "(%s) " % href
if (el.tag in _HARDBREAKS and text and
not text.endswith("\n") and not el_text):
text += "\n"
retval = _rm_excessive_newlines(text)
return _encode_utf8(retval)
示例12: cssselect
# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def cssselect(expr, tree):
return CSSSelector(expr)(tree)
示例13: extract_reply_cids
# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def extract_reply_cids(html):
tree = lxml.html.fromstring(html)
sel = CSSSelector('.comment-replies-header > .load-comments')
return [i.get('data-cid') for i in sel(tree)]
示例14: _scrape_subjurisdiction_paths
# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def _scrape_subjurisdiction_paths(self, html):
"""
Parse subjurisdictions_url to find paths for counties.
"""
tree = lxml.html.fromstring(html)
sel = CSSSelector('ul li a')
results = sel(tree)
return [(match.get('value'), match.get('id')) for match in results]
示例15: css
# 需要导入模块: from lxml import cssselect [as 别名]
# 或者: from lxml.cssselect import CSSSelector [as 别名]
def css(expression):
""" Returns a :func:`composable <wex.composed.composable>` callable that
will select elements defined by a
`CSS selector <http://en.wikipedia.org/wiki/Cascading_Style_Sheets#Selector>`_
expression.
:param expression: The CSS selector expression.
The callable returned accepts a :class:`wex.response.Response`, a
list of elements or an individual element as an argument.
"""
return parse | map_if_list(CSSSelector(expression))