本文整理汇总了Python中html5lib.parse方法的典型用法代码示例。如果您正苦于以下问题:Python html5lib.parse方法的具体用法?Python html5lib.parse怎么用?Python html5lib.parse使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类html5lib
的用法示例。
在下文中一共展示了html5lib.parse方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_static_cache_headers
# 需要导入模块: import html5lib [as 别名]
# 或者: from html5lib import parse [as 别名]
def test_static_cache_headers(conf, requests_session):
"""Test that all scripts included from self-repair have long lived cache headers"""
req = requests_session.get(conf.getoption("server") + "/en-US/repair")
req.raise_for_status()
document = html5lib.parse(req.content, treebuilder="dom")
scripts = document.getElementsByTagName("script")
for script in scripts:
src = script.getAttribute("src")
url = urljoin(conf.getoption("server"), src)
script_req = requests_session.get(url)
script_req.raise_for_status()
cache_control = parse_cache_control(script_req.headers["cache-control"])
assert cache_control["public"], f"Cache-control: public for {url}"
ONE_YEAR = 31_536_000
assert cache_control["max-age"] >= ONE_YEAR, f"Cache-control: max-age > 1 year for {url}"
assert cache_control["immutable"], f"Cache-control: immutable for {url}"
示例2: __init__
# 需要导入模块: import html5lib [as 别名]
# 或者: from html5lib import parse [as 别名]
def __init__(self, url, content, headers):
if not url.endswith("/"):
url += "/"
self._url = url
encoding = None
if headers and "Content-Type" in headers:
content_type, params = cgi.parse_header(headers["Content-Type"])
if "charset" in params:
encoding = params["charset"]
self._content = content
if encoding is None:
self._parsed = html5lib.parse(content, namespaceHTMLElements=False)
else:
self._parsed = html5lib.parse(
content, transport_encoding=encoding, namespaceHTMLElements=False
)
示例3: link_version
# 需要导入模块: import html5lib [as 别名]
# 或者: from html5lib import parse [as 别名]
def link_version(self, link): # type: (Link) -> Union[Version, None]
m = wheel_file_re.match(link.filename)
if m:
version = m.group("ver")
else:
info, ext = link.splitext()
match = self.VERSION_REGEX.match(info)
if not match:
return
version = match.group(2)
try:
version = Version.parse(version)
except ValueError:
return
return version
示例4: compatible_with
# 需要导入模块: import html5lib [as 别名]
# 或者: from html5lib import parse [as 别名]
def compatible_with(
cls,
python_version: PythonVersion,
impl: Optional[str],
plats: Optional[List[str]],
) -> WheelMatcher:
required_python = packaging.version.parse(
".".join(str(v) for v in python_version)
)
# TODO: Add ABI customization.
tag_it = itertools.chain(
packaging.tags.compatible_tags(python_version, impl, plats),
packaging.tags.cpython_tags(python_version, None, plats),
)
tags = {t: i for i, t in enumerate(tag_it)}
return cls(required_python, tags)
示例5: collect_best_dist_urls
# 需要导入模块: import html5lib [as 别名]
# 或者: from html5lib import parse [as 别名]
def collect_best_dist_urls(self, name: str) -> Dict[str, str]:
all_dists: DistListMapping = collections.defaultdict(list)
for index_url in self.index_urls:
res = requests.get(f"{index_url}/{name}")
res.raise_for_status()
doc = html5lib.parse(res.content, namespaceHTMLElements=False)
for el in doc.findall(".//a"):
url = el.attrib["href"]
filename = urllib.parse.urlsplit(url).path.rsplit("/", 1)[-1]
wheel_name, ext = filename.rsplit(".", 1)
if ext != "whl":
continue
requires_python = el.attrib.get("data-requires-python")
_, version, tag = _parse_wheel_name(wheel_name)
rank = self.matcher.rank(tag, requires_python)
if rank is None:
continue
all_dists[version].append((rank, url))
urls = {version: min(dists)[1] for version, dists in all_dists.items()}
logger.info("%d URLs found for %s", len(urls), name)
return urls
示例6: get_text_from_html
# 需要导入模块: import html5lib [as 别名]
# 或者: from html5lib import parse [as 别名]
def get_text_from_html(markup):
"""
Convert html markup to plain text.
Includes stripping excess whitespace, and assuring whitespace
exists between elements (e.g. table elements).
"""
try:
root = html5lib.parse(markup)
text_list = []
for val in get_text_from_element(root):
text_list.extend(val.split())
text = u' '.join(text_list)
except Exception as exception: # pylint: disable=broad-except
# TODO: find out what exceptions might actually occur here, if any.
# This may be unnecessarily paranoid, given html5lib's fallback behavior.
log.error("Unparseable answer value markup: '%s' return exception %s", markup, exception)
text = markup.strip()
return text
示例7: parse_payfast_page
# 需要导入模块: import html5lib [as 别名]
# 或者: from html5lib import parse [as 别名]
def parse_payfast_page(response): # type: (requests.Response) -> Dict[str, str]
"""
Scrape some data from a PayFast payment page response.
"""
assert 'text/html; charset=UTF-8' == response.headers['Content-Type']
html = response.text
doc = html5lib.parse(html) # type: ElementTree
def _parse(): # type: () -> Iterable[Tuple[str, str]]
# The session info:
session_tracker = find_id(doc, 'session-tracker')
for name in ['type', 'id']:
value = session_tracker.attrib['data-{}'.format(name)]
if value:
yield ('session_{}'.format(name), value)
# The payment summary on the left.
left = find_id(doc, 'left-column')
yield ('payment_summary', text_collapsed(left))
right = find_id(doc, 'right-column')
content_box = find_id(right, 'contentBox')
# The error notice, if any:
notice = find_id_maybe(content_box, 'notice')
if notice is not None:
yield ('notice', text_lines(notice))
# The wallet payment completion option, if present:
wa_tab = find_id_maybe(content_box, 'waTab')
if wa_tab is not None:
yield ('payment_method', (wa_tab.attrib['data-methodkey']))
pay_button = find_id(wa_tab, 'pay-with-wallet')
yield ('pay_button', pay_button.attrib['value'])
return dict(_parse())
示例8: do_checkout
# 需要导入模块: import html5lib [as 别名]
# 或者: from html5lib import parse [as 别名]
def do_checkout(
checkout_data, # type: Dict[str, str]
sign_checkout, # type: bool
): # type: (...) -> Dict[str, str]
"""
Common test helper: do a checkout, and assert results.
This takes unsigned checkout data, and will add a signature if `sign_checkout` is true.
Return the checkout page's parse.
"""
# Expected values for result assertions:
try:
expected_amount = '{:.2f}'.format(decimal.Decimal(checkout_data['amount']))
except decimal.InvalidOperation:
# We may be testing a value that isn't Decimal-parseable;
# in that case, just expect it unmodified.
expected_amount = checkout_data['amount']
expected_item_name = checkout_data['item_name'].strip() # PayFast strips this for display.
expected_payment_summary = (
'{} Payment total R {} ZAR'.format(expected_item_name, expected_amount)
.strip() # Strip to handle item names that render empty.
)
if sign_checkout:
assert 'signature' not in checkout_data, checkout_data
checkout_data['signature'] = api.checkout_signature(checkout_data)
response = post_sandbox_checkout(checkout_data)
parsed = parse_payfast_page(response)
assert {
'session_type': 'p-sb',
'session_id': parsed.get('session_id', 'MISSING'),
'payment_summary': expected_payment_summary,
'payment_method': '1',
'pay_button': 'Complete Payment',
} == parsed
return parsed
示例9: test_cache_headers
# 需要导入模块: import html5lib [as 别名]
# 或者: from html5lib import parse [as 别名]
def test_cache_headers(self, conf, requests_session, path, only_readonly):
if path.startswith("/api/"):
pytest.xfail("caching temporarily hidden on api by nginx")
r = requests_session.get(conf.getoption("server") + path)
r.raise_for_status()
cache_control = r.headers.get("cache-control")
assert cache_control is not None
# parse cache-control header.
parts = [part.strip() for part in cache_control.split(",")]
max_age = [part for part in parts if part.startswith("max-age=")][0]
max_age_seconds = int(max_age.split("=")[1])
assert "public" in parts
assert max_age_seconds > 0
示例10: parse_rss
# 需要导入模块: import html5lib [as 别名]
# 或者: from html5lib import parse [as 别名]
def parse_rss(url=None, **kwargs):
try:
f = fetch(decode(url), **kwargs)
except (ValueError, URLError):
parsed = rssparser.parse(url)
else:
content = f.read() if speedparser else f
try:
parsed = rssparser.parse(content)
finally:
f.close()
return parsed
示例11: xml2etree
# 需要导入模块: import html5lib [as 别名]
# 或者: from html5lib import parse [as 别名]
def xml2etree(f, xml=True, html5=False):
if xml:
element_tree = etree.parse(f)
elif html5 and html5parser:
element_tree = html5parser.parse(f)
elif html5parser:
element_tree = html.parse(f)
else:
# html5lib's parser returns an Element, so we must convert it into an
# ElementTree
element_tree = ElementTree(html.parse(f))
return element_tree
示例12: test_strip_tag
# 需要导入模块: import html5lib [as 别名]
# 或者: from html5lib import parse [as 别名]
def test_strip_tag ():
d = html5lib.parse ('<a>barbaz<b>foobar</b>.</a><b>foobar</b>.<b attr=1><c></c>')
stream = StripTagFilter (getTreeWalker ('etree')(d), ['b', 'c'])
serializer = HTMLSerializer ()
assert serializer.render (stream) == '<a>barbaz.</a>.'
示例13: test_strip_attribute
# 需要导入模块: import html5lib [as 别名]
# 或者: from html5lib import parse [as 别名]
def test_strip_attribute ():
d = html5lib.parse ('<a b=1 c="yes" d></a><br b=2 c="no" d keep=1>')
stream = StripAttributeFilter (getTreeWalker ('etree')(d), ['b', 'c', 'd'])
serializer = HTMLSerializer ()
assert serializer.render (stream) == '<a></a><br keep=1>'
示例14: get_project_from_pypi
# 需要导入模块: import html5lib [as 别名]
# 或者: from html5lib import parse [as 别名]
def get_project_from_pypi(project, extras):
"""Return candidates created from the project name and extras."""
url = "https://pypi.org/simple/{}".format(project)
data = requests.get(url).content
doc = html5lib.parse(data, namespaceHTMLElements=False)
for i in doc.findall(".//a"):
url = i.attrib["href"]
py_req = i.attrib.get("data-requires-python")
# Skip items that need a different Python version
if py_req:
spec = SpecifierSet(py_req)
if PYTHON_VERSION not in spec:
continue
path = urlparse(url).path
filename = path.rpartition("/")[-1]
# We only handle wheels
if not filename.endswith(".whl"):
continue
# TODO: Handle compatibility tags?
# Very primitive wheel filename parsing
name, version = filename[:-4].split("-")[:2]
try:
version = Version(version)
except InvalidVersion:
# Ignore files with invalid versions
continue
yield Candidate(name, version, url=url, extras=extras)
示例15: get_metadata_for_wheel
# 需要导入模块: import html5lib [as 别名]
# 或者: from html5lib import parse [as 别名]
def get_metadata_for_wheel(url):
data = requests.get(url).content
with ZipFile(BytesIO(data)) as z:
for n in z.namelist():
if n.endswith(".dist-info/METADATA"):
p = BytesParser()
return p.parse(z.open(n), headersonly=True)
# If we didn't find the metadata, return an empty dict
return EmailMessage()