本文整理匯總了Python中lxml.html.fragment_fromstring方法的典型用法代碼示例。如果您正苦於以下問題:Python html.fragment_fromstring方法的具體用法?Python html.fragment_fromstring怎麽用?Python html.fragment_fromstring使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類lxml.html
的用法示例。
在下文中一共展示了html.fragment_fromstring方法的5個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: parse_html
# 需要導入模塊: from lxml import html [as 別名]
# 或者: from lxml.html import fragment_fromstring [as 別名]
def parse_html(html, cleanup=True):
"""
Parses an HTML fragment, returning an lxml element. Note that the HTML will be
wrapped in a <div> tag that was not in the original document.
If cleanup is true, make sure there's no <head> or <body>, and get
rid of any <ins> and <del> tags.
"""
if cleanup:
# This removes any extra markup or structure like <head>:
html = cleanup_html(html)
return fragment_fromstring(html, create_parent=True)
示例2: convert_json_to_html
# 需要導入模塊: from lxml import html [as 別名]
# 或者: from lxml.html import fragment_fromstring [as 別名]
def convert_json_to_html(elements):
content = html.fragment_fromstring('<div></div>')
for element in elements:
content.append(_recursive_convert_json(element))
content.make_links_absolute(base_url=base_url)
for x in content.xpath('.//span'):
x.drop_tag()
html_string = html.tostring(content, encoding='unicode')
html_string = replace_line_breaks_except_pre(html_string, '<br/>')
html_string = html_string[5:-6]
return html_string
示例3: transform_misused_divs_into_paragraphs
# 需要導入模塊: from lxml import html [as 別名]
# 或者: from lxml.html import fragment_fromstring [as 別名]
def transform_misused_divs_into_paragraphs(self):
"""
Transforms <div> without other block elements into <p>, merges near-standing <p> together.
"""
for elem in self.tags(self._html, 'div'):
# transform <div>s that do not contain other block elements into
# <p>s
# FIXME: The current implementation ignores all descendants that are not direct children of elem
# This results in incorrect results in case there is an <img> buried within an <a> for example
if not REGEXES['divToPElementsRe'].search(tostring(elem).decode()):
elem.tag = "p"
for elem in self.tags(self._html, 'div'):
if elem.text and elem.text.strip():
p = fragment_fromstring('<p/>')
p.text = elem.text
elem.text = None
elem.insert(0, p)
for pos, child in reversed(list(enumerate(elem))):
if child.tail and child.tail.strip():
p = fragment_fromstring('<p/>')
p.text = child.tail
child.tail = None
elem.insert(pos + 1, p)
if child.tag == 'br':
child.drop_tree()
示例4: initial_output
# 需要導入模塊: from lxml import html [as 別名]
# 或者: from lxml.html import fragment_fromstring [as 別名]
def initial_output(html_partial=False):
"""
Creates initial HTML document according to the given flag
:param html_partial: determines if there should be the html page or only a fragment
:return: html output element
"""
return fragment_fromstring('<div/>') if html_partial else document_fromstring('<div/>')
示例5: parse_html
# 需要導入模塊: from lxml import html [as 別名]
# 或者: from lxml.html import fragment_fromstring [as 別名]
def parse_html(text):
return html.fragment_fromstring(text, parser=_HTML_PARSER)