本文整理汇总了Python中lxml.html.Element方法的典型用法代码示例。如果您正苦于以下问题:Python html.Element方法的具体用法?Python html.Element怎么用?Python html.Element使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类lxml.html
的用法示例。
在下文中一共展示了html.Element方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_user_rating
# 需要导入模块: from lxml import html [as 别名]
# 或者: from lxml.html import Element [as 别名]
def get_user_rating(username, data):
if not data:
element = Element('span')
element.text = username
return element
rating = data[1]
element = Element('a', {'class': 'rate-group', 'href': reverse('user_page', args=[username])})
if rating:
rating_css = rating_class(rating)
rate_box = Element('span', {'class': 'rate-box ' + rating_css})
rate_box.append(Element('span', {'style': 'height: %3.fem' % rating_progress(rating)}))
user = Element('span', {'class': 'rating ' + rating_css})
user.text = username
element.append(rate_box)
element.append(user)
else:
element.text = username
return element
示例2: parse_rsc_html
# 需要导入模块: from lxml import html [as 别名]
# 或者: from lxml.html import Element [as 别名]
def parse_rsc_html(htmlstring):
"""Messy RSC HTML needs this special parser to fix problems before creating selector."""
converted = UnicodeDammit(htmlstring)
if not converted.unicode_markup:
raise UnicodeDecodeError('Failed to detect encoding, tried [%s]')
root = fromstring(htmlstring, parser=HTMLParser(recover=True, encoding=converted.original_encoding))
# Add p.otherpara tags around orphan text
newp = None
for child in root.get_element_by_id('wrapper'):
if newp is not None:
if child.tag in BLOCK_ELEMENTS or child.get('id', '').startswith('sect') or child.getnext() is None:
child.addprevious(newp)
newp = None
else:
newp.append(child)
if newp is None and child.tag in BLOCK_ELEMENTS and child.tail and child.tail.strip():
newp = Element('p', **{'class': 'otherpara'})
newp.text = child.tail
child.tail = ''
return root
示例3: __init__
# 需要导入模块: from lxml import html [as 别名]
# 或者: from lxml.html import Element [as 别名]
def __init__(self, str):
try:
self._tree = html.fromstring(str, parser=html.HTMLParser(recover=True))
except (XMLSyntaxError, ParserError) as e:
if str and (not isinstance(e, ParserError) or e.args[0] != 'Document is empty'):
logger.exception('Failed to parse HTML string')
self._tree = html.Element('div')
示例4: get_user
# 需要导入模块: from lxml import html [as 别名]
# 或者: from lxml.html import Element [as 别名]
def get_user(username, data):
if not data:
element = Element('span')
element.text = username
return element
element = Element('span', {'class': Profile.get_user_css_class(*data)})
link = Element('a', {'href': reverse('user_page', args=[username])})
link.text = username
element.append(link)
return element
示例5: lazy_load
# 需要导入模块: from lxml import html [as 别名]
# 或者: from lxml.html import Element [as 别名]
def lazy_load(tree):
blank = static('blank.gif')
for img in tree.xpath('.//img'):
src = img.get('src', '')
if src.startswith('data') or '-math' in img.get('class', ''):
continue
noscript = html.Element('noscript')
copy = deepcopy(img)
copy.tail = ''
noscript.append(copy)
img.addprevious(noscript)
img.set('data-src', src)
img.set('src', blank)
img.set('class', img.get('class') + ' unveil' if img.get('class') else 'unveil')
示例6: fragments_to_tree
# 需要导入模块: from lxml import html [as 别名]
# 或者: from lxml.html import Element [as 别名]
def fragments_to_tree(fragment):
tree = html.Element('div')
try:
parsed = html.fragments_fromstring(fragment, parser=html.HTMLParser(recover=True))
except (XMLSyntaxError, ParserError) as e:
if fragment and (not isinstance(e, ParserError) or e.args[0] != 'Document is empty'):
logger.exception('Failed to parse HTML string')
return tree
if parsed and isinstance(parsed[0], str):
tree.text = parsed[0]
parsed = parsed[1:]
tree.extend(parsed)
return tree
示例7: fragment_fromstring
# 需要导入模块: from lxml import html [as 别名]
# 或者: from lxml.html import Element [as 别名]
def fragment_fromstring(html, create_parent=False,
guess_charset=False, parser=None):
"""Parses a single HTML element; it is an error if there is more than
one element, or if anything but whitespace precedes or follows the
element.
If create_parent is true (or is a tag name) then a parent node
will be created to encapsulate the HTML in a single element. In
this case, leading or trailing text is allowed.
"""
if not isinstance(html, _strings):
raise TypeError('string required')
accept_leading_text = bool(create_parent)
elements = fragments_fromstring(
html, guess_charset=guess_charset, parser=parser,
no_leading_text=not accept_leading_text)
if create_parent:
if not isinstance(create_parent, _strings):
create_parent = 'div'
new_root = Element(create_parent)
if elements:
if isinstance(elements[0], _strings):
new_root.text = elements[0]
del elements[0]
new_root.extend(elements)
return new_root
if not elements:
raise etree.ParserError('No elements found')
if len(elements) > 1:
raise etree.ParserError('Multiple elements found')
result = elements[0]
if result.tail and result.tail.strip():
raise etree.ParserError('Element followed by text: %r' % result.tail)
result.tail = None
return result
示例8: fragment_fromstring
# 需要导入模块: from lxml import html [as 别名]
# 或者: from lxml.html import Element [as 别名]
def fragment_fromstring(html, create_parent=False,
guess_charset=None, parser=None):
"""Parses a single HTML element; it is an error if there is more than
one element, or if anything but whitespace precedes or follows the
element.
If 'create_parent' is true (or is a tag name) then a parent node
will be created to encapsulate the HTML in a single element. In
this case, leading or trailing text is allowed.
If `guess_charset` is true, the `chardet` library will perform charset
guessing on the string.
"""
if not isinstance(html, _strings):
raise TypeError('string required')
accept_leading_text = bool(create_parent)
elements = fragments_fromstring(
html, guess_charset=guess_charset, parser=parser,
no_leading_text=not accept_leading_text)
if create_parent:
if not isinstance(create_parent, _strings):
create_parent = 'div'
new_root = Element(create_parent)
if elements:
if isinstance(elements[0], _strings):
new_root.text = elements[0]
del elements[0]
new_root.extend(elements)
return new_root
if not elements:
raise etree.ParserError('No elements found')
if len(elements) > 1:
raise etree.ParserError('Multiple elements found')
result = elements[0]
if result.tail and result.tail.strip():
raise etree.ParserError('Element followed by text: %r' % result.tail)
result.tail = None
return result