当前位置: 首页>>代码示例>>Python>>正文


Python bs4.Doctype方法代码示例

本文整理汇总了Python中bs4.Doctype方法的典型用法代码示例。如果您正苦于以下问题:Python bs4.Doctype方法的具体用法?Python bs4.Doctype怎么用?Python bs4.Doctype使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在bs4的用法示例。


在下文中一共展示了bs4.Doctype方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: root

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import Doctype [as 别名]
def root(tag_name="html", doctype=None, **kwargs):
    """
    Creates a new soup with the given root element.

    :param tag_name: Root element tag name.
    :param doctype: Optional doctype tag to add.
    :param kwargs: Optional parameters passed down to soup.new_tag()
    :return: Soup.
    """
    soup = parse("")
    if doctype is not None:
        soup.append(bs4.Doctype(doctype))
    tag = soup.new_tag(tag_name, **kwargs)
    tag.soup = soup
    soup.append(tag)
    return tag 
开发者ID:man-group,项目名称:PyBloqs,代码行数:18,代码来源:html.py

示例2: cleanHtmlPage

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import Doctype [as 别名]
def cleanHtmlPage(self, soup, url=None):

		soup = self.relink(soup)

		title = self.extractTitle(soup, url)


		if isinstance(self.stripTitle, (list, set)):
			for stripTitle in self.stripTitle:
				title = title.replace(stripTitle, "")
		else:
			title = title.replace(self.stripTitle, "")

		title = title.strip()

		if soup.head:
			soup.head.decompose()

		# Since the content we're extracting will be embedded into another page, we want to
		# strip out the <body> and <html> tags. `unwrap()`  replaces the soup with the contents of the
		# tag it's called on. We end up with just the contents of the <body> tag.
		while soup.body:
			# print("Unwrapping body tag")
			soup.body.unwrap()

		while soup.html:
			# print("Unwrapping html tag")
			soup.html.unwrap()

		for item in soup.children:
			if isinstance(item, bs4.Doctype):
				# print("decomposing doctype")
				item.extract()

		contents = soup.prettify()

		for item in common.global_constants.GLOBAL_INLINE_BULLSHIT:
			contents = contents.replace(item, "")

		return title, contents 
开发者ID:fake-name,项目名称:ReadableWebProxy,代码行数:42,代码来源:HtmlProcessor.py

示例3: is_special_string

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import Doctype [as 别名]
def is_special_string(obj):
        """Is special string."""
        return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype)) 
开发者ID:facelessuser,项目名称:soupsieve,代码行数:5,代码来源:css_match.py

示例4: is_special_string

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import Doctype [as 别名]
def is_special_string(obj):
        """Is special string."""

        import bs4
        return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype)) 
开发者ID:morpheus65535,项目名称:bazarr,代码行数:7,代码来源:css_match.py

示例5: GenerateHTML

# 需要导入模块: import bs4 [as 别名]
# 或者: from bs4 import Doctype [as 别名]
def GenerateHTML(self, controller, minify=False, prettify=False):
    soup = _CreateSoupWithoutHeadOrBody(unicode(self._soup))

    # Remove declaration.
    for x in soup.contents:
      if isinstance(x, bs4.Doctype):
        x.extract()

    # Remove declaration.
    for x in soup.contents:
      if isinstance(x, bs4.Declaration):
        x.extract()

    # Remove all imports.
    imports = soup.findAll('link', rel='import')
    for imp in imports:
      imp.extract()

    # Remove all script links.
    scripts_external = soup.findAll('script', src=True)
    for script in scripts_external:
      script.extract()

    # Remove all in-line scripts.
    scripts_external = soup.findAll('script', src=None)
    for script in scripts_external:
      script.extract()

    # Process all in-line styles.
    inline_styles = soup.findAll('style')
    for style in inline_styles:
      html = controller.GetHTMLForInlineStylesheet(unicode(style.string))
      if html:
        ns = soup.new_tag('style')
        ns.append(bs4.NavigableString(html))
        style.replaceWith(ns)
      else:
        style.extract()

    # Rewrite all external stylesheet hrefs or remove, as needed.
    stylesheet_links = soup.findAll('link', rel='stylesheet')
    for stylesheet_link in stylesheet_links:
      html = controller.GetHTMLForStylesheetHRef(stylesheet_link['href'])
      if html:
        tmp = bs4.BeautifulSoup(html, 'html5lib').findAll('style')
        assert len(tmp) == 1
        stylesheet_link.replaceWith(tmp[0])
      else:
        stylesheet_link.extract()

    # Remove comments if minifying.
    if minify:
      comments = soup.findAll(
          text=lambda text: isinstance(text, bs4.Comment))
      for comment in comments:
        comment.extract()
    if prettify:
      return soup.prettify('utf-8').strip()

    # We are done.
    return unicode(soup).strip() 
开发者ID:FSecureLABS,项目名称:Jandroid,代码行数:63,代码来源:parse_html_deps.py


注:本文中的bs4.Doctype方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。