本文整理汇总了Python中requests_html.HTML属性的典型用法代码示例。如果您正苦于以下问题:Python requests_html.HTML属性的具体用法?Python requests_html.HTML怎么用?Python requests_html.HTML使用的例子?那么恭喜您, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类requests_html
的用法示例。
在下文中一共展示了requests_html.HTML属性的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_trends
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def get_trends():
trends = []
headers = {
"Accept": "application/json, text/javascript, */*; q=0.01",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8",
"X-Twitter-Active-User": "yes",
"X-Requested-With": "XMLHttpRequest",
"Accept-Language": "en-US",
}
html = session.get("https://twitter.com/i/trends", headers=headers)
html = html.json()["module_html"]
html = HTML(html=html, url="bunk", default_encoding="utf-8")
for trend_item in html.find("li"):
trend_text = trend_item.attrs["data-trend-name"]
trends.append(trend_text)
return trends
示例2: parse
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def parse(self, html: HTML) -> [ProxyIP]:
ip_list: [ProxyIP] = []
text = html.raw_html
ip_port_str_list = re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{2,5}', text.decode('utf-8'))
for ip_port in ip_port_str_list:
ip = re.search(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', ip_port).group(0)
port = re.search(r':(\d{2,5})', ip_port).group(1)
if ip and port:
p = ProxyIP(ip=ip, port=port)
ip_list.append(p)
return ip_list
示例3: parse
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def parse(self, html: HTML) -> [ProxyIP]:
ip_list: [ProxyIP] = []
for ip_row in html.find('table.proxytbl tr'):
ip_element = ip_row.find('td:nth-child(1)', first=True)
port_element = ip_row.find('td:nth-child(2)', first=True)
try:
if ip_element and port_element:
port_str = re.search(r'//]]> (\d+)', port_element.text).group(1)
p = ProxyIP(ip=ip_element.text, port=port_str)
ip_list.append(p)
except AttributeError:
pass
return ip_list
示例4: parse
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def parse(self, html: HTML) -> [ProxyIP]:
ip_list: [ProxyIP] = []
for ip_row in html.find('table tr[onmouseover]'):
ip_port_text_elem = ip_row.find('.spy14', first=True)
if ip_port_text_elem:
ip_port_text = ip_port_text_elem.text
ip = re.search(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', ip_port_text).group(0)
port = re.search(r':\n(\d{2,5})', ip_port_text).group(1)
if ip and port:
p = ProxyIP(ip=ip, port=port)
ip_list.append(p)
return ip_list
示例5: parse
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def parse(self, html: HTML) -> [ProxyIP]:
ip_list: [ProxyIP] = []
for tr in html.find('#tbl_proxy_list > tbody:nth-child(2) > tr'):
if not 'data-proxy-id' in tr.attrs:
continue
script_element = tr.find('td:nth-child(1) > abbr > script', first=True)
port_element = tr.find('td:nth-child(2)', first=True)
if not script_element or not port_element:
continue
groups = re.findall(r"document\.write\('12345678(\d{1,3}\.\d{1,3})'\.substr\(8\) \+ '(\d{1,3}\.\d{1,3}\.\d{1,3})'\)", script_element.text)
if not groups or len(groups) != 1:
continue
ip = groups[0][0] + groups[0][1]
port = port_element.text
ip_list.append(ProxyIP(ip=ip, port=port))
return ip_list
示例6: parse
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def parse(self, html: HTML) -> [ProxyIP]:
ip_list: [ProxyIP] = []
if html is None:
return []
text = html.raw_html.decode('utf-8')
for ip_port in text.split('\n'):
if ip_port.strip() == '' or not re.match(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:(\d{2,5})', ip_port):
continue
ip = re.search(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', ip_port).group(0)
port = re.search(r':(\d{2,5})', ip_port).group(1)
if ip and port:
p = ProxyIP(ip=ip, port=port)
ip_list.append(p)
return ip_list
示例7: test_bare_render
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def test_bare_render():
doc = """<a href='https://httpbin.org'>"""
html = HTML(html=doc)
script = """
() => {
return {
width: document.documentElement.clientWidth,
height: document.documentElement.clientHeight,
deviceScaleFactor: window.devicePixelRatio,
}
}
"""
val = html.render(script=script, reload=False)
for value in ('width', 'height', 'deviceScaleFactor'):
assert value in val
assert html.find('html')
assert 'https://httpbin.org' in html.links
示例8: test_bare_arender
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def test_bare_arender():
doc = """<a href='https://httpbin.org'>"""
html = HTML(html=doc, async_=True)
script = """
() => {
return {
width: document.documentElement.clientWidth,
height: document.documentElement.clientHeight,
deviceScaleFactor: window.devicePixelRatio,
}
}
"""
val = await html.arender(script=script, reload=False)
for value in ('width', 'height', 'deviceScaleFactor'):
assert value in val
assert html.find('html')
assert 'https://httpbin.org' in html.links
await html.browser.close()
示例9: test_bare_js_async_eval
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def test_bare_js_async_eval():
doc = """
<!DOCTYPE html>
<html>
<body>
<div id="replace">This gets replaced</div>
<script type="text/javascript">
document.getElementById("replace").innerHTML = "yolo";
</script>
</body>
</html>
"""
html = HTML(html=doc, async_=True)
await html.arender()
assert html.find('#replace', first=True).text == 'yolo'
await html.browser.close()
示例10: get_description
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def get_description(problem_code, contest_code):
url = f'/api/contests/{contest_code}/problems/{problem_code}'
resp = request(url=url)
try:
resp_json = resp.json()
except ValueError:
return [{'code': 503}]
if resp_json["status"] == "success":
problem = [
'',
style_text('Name: ', "BOLD") + resp_json.get('problem_name', ''),
style_text("Description:", "BOLD"),
re.sub(r'(<|<\/)\w+>', '', resp_json.get("body", '')),
'',
style_text("Author: ", "BOLD") + resp_json.get('problem_author', ''),
style_text("Date Added: ", "BOLD") + resp_json.get('date_added', ''),
style_text("Max Time Limit: ", "BOLD") + f"{resp_json.get('max_timelimit', '')} secs",
style_text("Source Limit: ", "BOLD") + f"{resp_json.get('source_sizelimit', '')} Bytes",
style_text("Languages: ", "BOLD") + resp_json.get('languages_supported', ''),
''
]
if resp_json.get('tags'):
problem.append(
style_text('Tags: ', 'BOLD') +
" ".join([tag.text for tag in HTML(html=resp_json['tags']).find('a')])
)
problem.append('')
if resp_json.get('editorial_url'):
problem.append(style_text('Editorial: ', 'BOLD') + resp_json['editorial_url'])
problem.append('')
return [{"data": "\n".join(problem)}]
elif resp_json["status"] == "error":
return [{
'data': 'Problem not found. Use `--search` to search in a specific contest',
'code': 404
}]
return [{'code': 503}]
示例11: test_disconnect_active_sessions_success
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def test_disconnect_active_sessions_success(self):
"""Should return login success msg on disconnect"""
def mock_request(*args, **kwargs):
return MockHTMLResponse()
self.monkeypatch.setattr(auth, 'request', mock_request)
inputs = "".join([f"<input name='{idx}' value='{idx}' />" for idx in range(6)])
html = HTML(html=f'<input id="{CSRF_TOKEN_INPUT_ID}" value="ab" />'
f'<form id="{SESSION_LIMIT_FORM_ID[1:]}">{inputs}</form>')
resps = disconnect_active_sessions(None, html)
self.assertEqual(resps[0]['data'], LOGIN_SUCCESS_MSG)
示例12: test_disconnect_active_sessions_error
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def test_disconnect_active_sessions_error(self):
"""Should return 503 when status code is not 200"""
def mock_request(*args, **kwargs):
return MockHTMLResponse(status_code=500)
self.monkeypatch.setattr(auth, 'request', mock_request)
inputs = "".join([f"<input name='{idx}' value='{idx}' />" for idx in range(6)])
html = HTML(html=f'<input id="{CSRF_TOKEN_INPUT_ID}" value="ab" />'
f'<form id="{SESSION_LIMIT_FORM_ID[1:]}">{inputs}</form>')
resps = disconnect_active_sessions(None, html)
self.assertEqual(resps[0]['code'], 503)
示例13: __init__
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def __init__(self, data='<html />', status_code=200, url='', json=""):
self.html = HTML(html=data)
self.status_code = status_code
self.url = f'{BASE_URL}{url}'
self.text = json
示例14: test_html_to_list_valid_html
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def test_html_to_list_valid_html(self):
"""Should convert requests_html.HTML instance to `list`"""
html = HTML(html=" \
<tr><th>A</th><th>V</th></tr> \
<tr><td>a1</td><td>v1</td></tr> \
<tr><td>a2</td><td>v2</td></tr> \
")
self.assertEqual(html_to_list(html), [['A', 'V'], ['a1', 'v1'], ['a2', 'v2']])
示例15: test_get_csrf_token_no_value
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def test_get_csrf_token_no_value(self):
"""Should return None when html element has no value"""
html = HTML(html="<input id='a' />")
self.assertIsNone(get_csrf_token(html, "a"))