当前位置: 首页>>代码示例>>Python>>正文


Python html5lib.parse方法代码示例

本文整理汇总了Python中pip._vendor.html5lib.parse方法的典型用法代码示例。如果您正苦于以下问题:Python html5lib.parse方法的具体用法?Python html5lib.parse怎么用?Python html5lib.parse使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pip._vendor.html5lib的用法示例。


在下文中一共展示了html5lib.parse方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from pip._vendor import html5lib [as 别名]
# 或者: from pip._vendor.html5lib import parse [as 别名]
def __init__(self, content, url, headers=None):
        # Determine if we have any encoding information in our headers
        encoding = None
        if headers and "Content-Type" in headers:
            content_type, params = cgi.parse_header(headers["Content-Type"])

            if "charset" in params:
                encoding = params['charset']

        self.content = content
        self.parsed = html5lib.parse(
            self.content,
            transport_encoding=encoding,
            namespaceHTMLElements=False,
        )
        self.url = url
        self.headers = headers 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:19,代码来源:index.py

示例2: __init__

# 需要导入模块: from pip._vendor import html5lib [as 别名]
# 或者: from pip._vendor.html5lib import parse [as 别名]
def __init__(self, content, url, headers=None):
        # Determine if we have any encoding information in our headers
        encoding = None
        if headers and "Content-Type" in headers:
            content_type, params = cgi.parse_header(headers["Content-Type"])

            if "charset" in params:
                encoding = params['charset']

        self.content = content
        self.parsed = html5lib.parse(
            self.content,
            encoding=encoding,
            namespaceHTMLElements=False,
        )
        self.url = url
        self.headers = headers 
开发者ID:jpush,项目名称:jbox,代码行数:19,代码来源:index.py

示例3: _egg_info_matches

# 需要导入模块: from pip._vendor import html5lib [as 别名]
# 或者: from pip._vendor.html5lib import parse [as 别名]
def _egg_info_matches(egg_info, canonical_name):
    # type: (str, str) -> Optional[str]
    """Pull the version part out of a string.

    :param egg_info: The string to parse. E.g. foo-2.1
    :param canonical_name: The canonicalized name of the package this
        belongs to.
    """
    try:
        version_start = _find_name_version_sep(egg_info, canonical_name) + 1
    except ValueError:
        return None
    version = egg_info[version_start:]
    if not version:
        return None
    return version 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:18,代码来源:index.py

示例4: _determine_base_url

# 需要导入模块: from pip._vendor import html5lib [as 别名]
# 或者: from pip._vendor.html5lib import parse [as 别名]
def _determine_base_url(document, page_url):
    """Determine the HTML document's base URL.

    This looks for a ``<base>`` tag in the HTML document. If present, its href
    attribute denotes the base URL of anchor tags in the document. If there is
    no such tag (or if it does not have a valid href attribute), the HTML
    file's URL is used as the base URL.

    :param document: An HTML document representation. The current
        implementation expects the result of ``html5lib.parse()``.
    :param page_url: The URL of the HTML document.
    """
    for base in document.findall(".//base"):
        href = base.get("href")
        if href is not None:
            return href
    return page_url 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:19,代码来源:index.py

示例5: iter_links

# 需要导入模块: from pip._vendor import html5lib [as 别名]
# 或者: from pip._vendor.html5lib import parse [as 别名]
def iter_links(self):
        # type: () -> Iterable[Link]
        """Yields all links in the page"""
        document = html5lib.parse(
            self.content,
            transport_encoding=_get_encoding_from_headers(self.headers),
            namespaceHTMLElements=False,
        )
        base_url = _determine_base_url(document, self.url)
        for anchor in document.findall(".//a"):
            if anchor.get("href"):
                href = anchor.get("href")
                url = _clean_link(urllib_parse.urljoin(base_url, href))
                pyrequire = anchor.get('data-requires-python')
                pyrequire = unescape(pyrequire) if pyrequire else None
                yield Link(url, self.url, requires_python=pyrequire) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:18,代码来源:index.py

示例6: _determine_base_url

# 需要导入模块: from pip._vendor import html5lib [as 别名]
# 或者: from pip._vendor.html5lib import parse [as 别名]
def _determine_base_url(document, page_url):
    # type: (HTMLElement, str) -> str
    """Determine the HTML document's base URL.

    This looks for a ``<base>`` tag in the HTML document. If present, its href
    attribute denotes the base URL of anchor tags in the document. If there is
    no such tag (or if it does not have a valid href attribute), the HTML
    file's URL is used as the base URL.

    :param document: An HTML document representation. The current
        implementation expects the result of ``html5lib.parse()``.
    :param page_url: The URL of the HTML document.
    """
    for base in document.findall(".//base"):
        href = base.get("href")
        if href is not None:
            return href
    return page_url 
开发者ID:pantsbuild,项目名称:pex,代码行数:20,代码来源:collector.py

示例7: parse_links

# 需要导入模块: from pip._vendor import html5lib [as 别名]
# 或者: from pip._vendor.html5lib import parse [as 别名]
def parse_links(page):
    # type: (HTMLPage) -> Iterable[Link]
    """
    Parse an HTML document, and yield its anchor elements as Link objects.
    """
    document = html5lib.parse(
        page.content,
        transport_encoding=page.encoding,
        namespaceHTMLElements=False,
    )

    url = page.url
    base_url = _determine_base_url(document, url)
    for anchor in document.findall(".//a"):
        link = _create_link_from_element(
            anchor,
            page_url=url,
            base_url=base_url,
        )
        if link is None:
            continue
        yield link 
开发者ID:pantsbuild,项目名称:pex,代码行数:24,代码来源:collector.py


注:本文中的pip._vendor.html5lib.parse方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。