本文整理匯總了Python中bs4.Doctype方法的典型用法代碼示例。如果您正苦於以下問題:Python bs4.Doctype方法的具體用法?Python bs4.Doctype怎麽用?Python bs4.Doctype使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類bs4
的用法示例。
在下文中一共展示了bs4.Doctype方法的5個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: root
# 需要導入模塊: import bs4 [as 別名]
# 或者: from bs4 import Doctype [as 別名]
def root(tag_name="html", doctype=None, **kwargs):
"""
Creates a new soup with the given root element.
:param tag_name: Root element tag name.
:param doctype: Optional doctype tag to add.
:param kwargs: Optional parameters passed down to soup.new_tag()
:return: Soup.
"""
soup = parse("")
if doctype is not None:
soup.append(bs4.Doctype(doctype))
tag = soup.new_tag(tag_name, **kwargs)
tag.soup = soup
soup.append(tag)
return tag
示例2: cleanHtmlPage
# 需要導入模塊: import bs4 [as 別名]
# 或者: from bs4 import Doctype [as 別名]
def cleanHtmlPage(self, soup, url=None):
soup = self.relink(soup)
title = self.extractTitle(soup, url)
if isinstance(self.stripTitle, (list, set)):
for stripTitle in self.stripTitle:
title = title.replace(stripTitle, "")
else:
title = title.replace(self.stripTitle, "")
title = title.strip()
if soup.head:
soup.head.decompose()
# Since the content we're extracting will be embedded into another page, we want to
# strip out the <body> and <html> tags. `unwrap()` replaces the soup with the contents of the
# tag it's called on. We end up with just the contents of the <body> tag.
while soup.body:
# print("Unwrapping body tag")
soup.body.unwrap()
while soup.html:
# print("Unwrapping html tag")
soup.html.unwrap()
for item in soup.children:
if isinstance(item, bs4.Doctype):
# print("decomposing doctype")
item.extract()
contents = soup.prettify()
for item in common.global_constants.GLOBAL_INLINE_BULLSHIT:
contents = contents.replace(item, "")
return title, contents
示例3: is_special_string
# 需要導入模塊: import bs4 [as 別名]
# 或者: from bs4 import Doctype [as 別名]
def is_special_string(obj):
"""Is special string."""
return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype))
示例4: is_special_string
# 需要導入模塊: import bs4 [as 別名]
# 或者: from bs4 import Doctype [as 別名]
def is_special_string(obj):
"""Is special string."""
import bs4
return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype))
示例5: GenerateHTML
# 需要導入模塊: import bs4 [as 別名]
# 或者: from bs4 import Doctype [as 別名]
def GenerateHTML(self, controller, minify=False, prettify=False):
soup = _CreateSoupWithoutHeadOrBody(unicode(self._soup))
# Remove declaration.
for x in soup.contents:
if isinstance(x, bs4.Doctype):
x.extract()
# Remove declaration.
for x in soup.contents:
if isinstance(x, bs4.Declaration):
x.extract()
# Remove all imports.
imports = soup.findAll('link', rel='import')
for imp in imports:
imp.extract()
# Remove all script links.
scripts_external = soup.findAll('script', src=True)
for script in scripts_external:
script.extract()
# Remove all in-line scripts.
scripts_external = soup.findAll('script', src=None)
for script in scripts_external:
script.extract()
# Process all in-line styles.
inline_styles = soup.findAll('style')
for style in inline_styles:
html = controller.GetHTMLForInlineStylesheet(unicode(style.string))
if html:
ns = soup.new_tag('style')
ns.append(bs4.NavigableString(html))
style.replaceWith(ns)
else:
style.extract()
# Rewrite all external stylesheet hrefs or remove, as needed.
stylesheet_links = soup.findAll('link', rel='stylesheet')
for stylesheet_link in stylesheet_links:
html = controller.GetHTMLForStylesheetHRef(stylesheet_link['href'])
if html:
tmp = bs4.BeautifulSoup(html, 'html5lib').findAll('style')
assert len(tmp) == 1
stylesheet_link.replaceWith(tmp[0])
else:
stylesheet_link.extract()
# Remove comments if minifying.
if minify:
comments = soup.findAll(
text=lambda text: isinstance(text, bs4.Comment))
for comment in comments:
comment.extract()
if prettify:
return soup.prettify('utf-8').strip()
# We are done.
return unicode(soup).strip()