Python html5lib.parse方法代码示例

本文整理汇总了Python中html5lib.parse方法的典型用法代码示例。如果您正苦于以下问题：Python html5lib.parse方法的具体用法？Python html5lib.parse怎么用？Python html5lib.parse使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类html5lib的用法示例。

在下文中一共展示了html5lib.parse方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_static_cache_headers

# 需要导入模块: import html5lib [as 别名]
# 或者: from html5lib import parse [as 别名]
def test_static_cache_headers(conf, requests_session):
    """Test that all scripts included from self-repair have long lived cache headers"""
    req = requests_session.get(conf.getoption("server") + "/en-US/repair")
    req.raise_for_status()
    document = html5lib.parse(req.content, treebuilder="dom")
    scripts = document.getElementsByTagName("script")
    for script in scripts:
        src = script.getAttribute("src")
        url = urljoin(conf.getoption("server"), src)
        script_req = requests_session.get(url)
        script_req.raise_for_status()
        cache_control = parse_cache_control(script_req.headers["cache-control"])
        assert cache_control["public"], f"Cache-control: public for {url}"
        ONE_YEAR = 31_536_000
        assert cache_control["max-age"] >= ONE_YEAR, f"Cache-control: max-age > 1 year for {url}"
        assert cache_control["immutable"], f"Cache-control: immutable for {url}"

开发者ID:mozilla，项目名称:normandy，代码行数:18，代码来源:test_performance.py

示例2: init

# 需要导入模块: import html5lib [as 别名]
# 或者: from html5lib import parse [as 别名]
def __init__(self, url, content, headers):
        if not url.endswith("/"):
            url += "/"

        self._url = url
        encoding = None
        if headers and "Content-Type" in headers:
            content_type, params = cgi.parse_header(headers["Content-Type"])

            if "charset" in params:
                encoding = params["charset"]

        self._content = content

        if encoding is None:
            self._parsed = html5lib.parse(content, namespaceHTMLElements=False)
        else:
            self._parsed = html5lib.parse(
                content, transport_encoding=encoding, namespaceHTMLElements=False
            )

开发者ID:python-poetry，项目名称:poetry，代码行数:22，代码来源:legacy_repository.py

示例3: link_version

# 需要导入模块: import html5lib [as 别名]
# 或者: from html5lib import parse [as 别名]
def link_version(self, link):  # type: (Link) -> Union[Version, None]
        m = wheel_file_re.match(link.filename)
        if m:
            version = m.group("ver")
        else:
            info, ext = link.splitext()
            match = self.VERSION_REGEX.match(info)
            if not match:
                return

            version = match.group(2)

        try:
            version = Version.parse(version)
        except ValueError:
            return

        return version

开发者ID:python-poetry，项目名称:poetry，代码行数:20，代码来源:legacy_repository.py

示例4: compatible_with

# 需要导入模块: import html5lib [as 别名]
# 或者: from html5lib import parse [as 别名]
def compatible_with(
        cls,
        python_version: PythonVersion,
        impl: Optional[str],
        plats: Optional[List[str]],
    ) -> WheelMatcher:
        required_python = packaging.version.parse(
            ".".join(str(v) for v in python_version)
        )
        # TODO: Add ABI customization.
        tag_it = itertools.chain(
            packaging.tags.compatible_tags(python_version, impl, plats),
            packaging.tags.cpython_tags(python_version, None, plats),
        )
        tags = {t: i for i, t in enumerate(tag_it)}
        return cls(required_python, tags)

开发者ID:sarugaku，项目名称:resolvelib，代码行数:18，代码来源:py2index.py

示例5: collect_best_dist_urls

# 需要导入模块: import html5lib [as 别名]
# 或者: from html5lib import parse [as 别名]
def collect_best_dist_urls(self, name: str) -> Dict[str, str]:
        all_dists: DistListMapping = collections.defaultdict(list)
        for index_url in self.index_urls:
            res = requests.get(f"{index_url}/{name}")
            res.raise_for_status()
            doc = html5lib.parse(res.content, namespaceHTMLElements=False)
            for el in doc.findall(".//a"):
                url = el.attrib["href"]
                filename = urllib.parse.urlsplit(url).path.rsplit("/", 1)[-1]
                wheel_name, ext = filename.rsplit(".", 1)
                if ext != "whl":
                    continue
                requires_python = el.attrib.get("data-requires-python")
                _, version, tag = _parse_wheel_name(wheel_name)
                rank = self.matcher.rank(tag, requires_python)
                if rank is None:
                    continue
                all_dists[version].append((rank, url))
        urls = {version: min(dists)[1] for version, dists in all_dists.items()}
        logger.info("%d URLs found for %s", len(urls), name)
        return urls

开发者ID:sarugaku，项目名称:resolvelib，代码行数:23，代码来源:py2index.py

示例6: get_text_from_html

# 需要导入模块: import html5lib [as 别名]
# 或者: from html5lib import parse [as 别名]
def get_text_from_html(markup):
    """
    Convert html markup to plain text.

    Includes stripping excess whitespace, and assuring whitespace
    exists between elements (e.g. table elements).
    """
    try:
        root = html5lib.parse(markup)
        text_list = []
        for val in get_text_from_element(root):
            text_list.extend(val.split())
        text = u' '.join(text_list)
    except Exception as exception:  # pylint: disable=broad-except
        # TODO: find out what exceptions might actually occur here, if any.
        # This may be unnecessarily paranoid, given html5lib's fallback behavior.
        log.error("Unparseable answer value markup: '%s' return exception %s", markup, exception)
        text = markup.strip()

    return text

开发者ID:edx，项目名称:edx-analytics-pipeline，代码行数:22，代码来源:answer_dist.py

示例7: parse_payfast_page

# 需要导入模块: import html5lib [as 别名]
# 或者: from html5lib import parse [as 别名]
def parse_payfast_page(response):  # type: (requests.Response) -> Dict[str, str]
    """
    Scrape some data from a PayFast payment page response.
    """
    assert 'text/html; charset=UTF-8' == response.headers['Content-Type']
    html = response.text
    doc = html5lib.parse(html)  # type: ElementTree

    def _parse():  # type: () -> Iterable[Tuple[str, str]]
        # The session info:
        session_tracker = find_id(doc, 'session-tracker')
        for name in ['type', 'id']:
            value = session_tracker.attrib['data-{}'.format(name)]
            if value:
                yield ('session_{}'.format(name), value)

        # The payment summary on the left.
        left = find_id(doc, 'left-column')
        yield ('payment_summary', text_collapsed(left))

        right = find_id(doc, 'right-column')
        content_box = find_id(right, 'contentBox')

        # The error notice, if any:
        notice = find_id_maybe(content_box, 'notice')
        if notice is not None:
            yield ('notice', text_lines(notice))

        # The wallet payment completion option, if present:
        wa_tab = find_id_maybe(content_box, 'waTab')
        if wa_tab is not None:
            yield ('payment_method', (wa_tab.attrib['data-methodkey']))
            pay_button = find_id(wa_tab, 'pay-with-wallet')
            yield ('pay_button', pay_button.attrib['value'])

    return dict(_parse())

开发者ID:PiDelport，项目名称:django-payfast，代码行数:38，代码来源:test_integration_sandbox.py

示例8: do_checkout

# 需要导入模块: import html5lib [as 别名]
# 或者: from html5lib import parse [as 别名]
def do_checkout(
        checkout_data,  # type: Dict[str, str]
        sign_checkout,  # type: bool
):  # type: (...) -> Dict[str, str]
    """
    Common test helper: do a checkout, and assert results.

    This takes unsigned checkout data, and will add a signature if `sign_checkout` is true.

    Return the checkout page's parse.
    """
    # Expected values for result assertions:
    try:
        expected_amount = '{:.2f}'.format(decimal.Decimal(checkout_data['amount']))
    except decimal.InvalidOperation:
        # We may be testing a value that isn't Decimal-parseable;
        # in that case, just expect it unmodified.
        expected_amount = checkout_data['amount']
    expected_item_name = checkout_data['item_name'].strip()  # PayFast strips this for display.
    expected_payment_summary = (
        '{} Payment total R {} ZAR'.format(expected_item_name, expected_amount)
        .strip()  # Strip to handle item names that render empty.
    )

    if sign_checkout:
        assert 'signature' not in checkout_data, checkout_data
        checkout_data['signature'] = api.checkout_signature(checkout_data)

    response = post_sandbox_checkout(checkout_data)
    parsed = parse_payfast_page(response)
    assert {
        'session_type': 'p-sb',
        'session_id': parsed.get('session_id', 'MISSING'),
        'payment_summary': expected_payment_summary,
        'payment_method': '1',
        'pay_button': 'Complete Payment',
    } == parsed

    return parsed

开发者ID:PiDelport，项目名称:django-payfast，代码行数:41，代码来源:test_integration_sandbox.py

示例9: test_cache_headers

# 需要导入模块: import html5lib [as 别名]
# 或者: from html5lib import parse [as 别名]
def test_cache_headers(self, conf, requests_session, path, only_readonly):
        if path.startswith("/api/"):
            pytest.xfail("caching temporarily hidden on api by nginx")
        r = requests_session.get(conf.getoption("server") + path)
        r.raise_for_status()
        cache_control = r.headers.get("cache-control")
        assert cache_control is not None

        # parse cache-control header.
        parts = [part.strip() for part in cache_control.split(",")]
        max_age = [part for part in parts if part.startswith("max-age=")][0]
        max_age_seconds = int(max_age.split("=")[1])
        assert "public" in parts
        assert max_age_seconds > 0

开发者ID:mozilla，项目名称:normandy，代码行数:16，代码来源:test_performance.py

示例10: parse_rss

# 需要导入模块: import html5lib [as 别名]
# 或者: from html5lib import parse [as 别名]
def parse_rss(url=None, **kwargs):
    try:
        f = fetch(decode(url), **kwargs)
    except (ValueError, URLError):
        parsed = rssparser.parse(url)
    else:
        content = f.read() if speedparser else f

        try:
            parsed = rssparser.parse(content)
        finally:
            f.close()

    return parsed

开发者ID:nerevu，项目名称:riko，代码行数:16，代码来源:parsers.py

示例11: xml2etree

# 需要导入模块: import html5lib [as 别名]
# 或者: from html5lib import parse [as 别名]
def xml2etree(f, xml=True, html5=False):
    if xml:
        element_tree = etree.parse(f)
    elif html5 and html5parser:
        element_tree = html5parser.parse(f)
    elif html5parser:
        element_tree = html.parse(f)
    else:
        # html5lib's parser returns an Element, so we must convert it into an
        # ElementTree
        element_tree = ElementTree(html.parse(f))

    return element_tree

开发者ID:nerevu，项目名称:riko，代码行数:15，代码来源:parsers.py

示例12: test_strip_tag

# 需要导入模块: import html5lib [as 别名]
# 或者: from html5lib import parse [as 别名]
def test_strip_tag ():
    d = html5lib.parse ('<a>barbaz<b>foobar</b>.</a><b>foobar</b>.<b attr=1><c></c>')
    stream = StripTagFilter (getTreeWalker ('etree')(d), ['b', 'c'])
    serializer = HTMLSerializer ()
    assert serializer.render (stream) == '<a>barbaz.</a>.'

开发者ID:PromyLOPh，项目名称:crocoite，代码行数:7，代码来源:test_html.py

示例13: test_strip_attribute

# 需要导入模块: import html5lib [as 别名]
# 或者: from html5lib import parse [as 别名]
def test_strip_attribute ():
    d = html5lib.parse ('<a b=1 c="yes" d></a><br b=2 c="no" d keep=1>')
    stream = StripAttributeFilter (getTreeWalker ('etree')(d), ['b', 'c', 'd'])
    serializer = HTMLSerializer ()
    assert serializer.render (stream) == '<a></a><br keep=1>'