当前位置: 首页>>代码示例>>Python>>正文


Python requests_html.HTML属性代码示例

本文整理汇总了Python中requests_html.HTML属性的典型用法代码示例。如果您正苦于以下问题:Python requests_html.HTML属性的具体用法?Python requests_html.HTML怎么用?Python requests_html.HTML使用的例子?那么恭喜您, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在requests_html的用法示例。


在下文中一共展示了requests_html.HTML属性的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_trends

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def get_trends():
    trends = []

    headers = {
        "Accept": "application/json, text/javascript, */*; q=0.01",
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8",
        "X-Twitter-Active-User": "yes",
        "X-Requested-With": "XMLHttpRequest",
        "Accept-Language": "en-US",
    }

    html = session.get("https://twitter.com/i/trends", headers=headers)
    html = html.json()["module_html"]

    html = HTML(html=html, url="bunk", default_encoding="utf-8")

    for trend_item in html.find("li"):
        trend_text = trend_item.attrs["data-trend-name"]

        trends.append(trend_text)

    return trends 
开发者ID:bisguzar,项目名称:twitter-scraper,代码行数:24,代码来源:trends.py

示例2: parse

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def parse(self, html: HTML) -> [ProxyIP]:
        ip_list: [ProxyIP] = []

        text = html.raw_html

        ip_port_str_list = re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{2,5}', text.decode('utf-8'))

        for ip_port in ip_port_str_list:

            ip = re.search(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', ip_port).group(0)
            port = re.search(r':(\d{2,5})', ip_port).group(1)

            if ip and port:
                p = ProxyIP(ip=ip, port=port)
                ip_list.append(p)

        return ip_list 
开发者ID:imWildCat,项目名称:scylla,代码行数:19,代码来源:a2u_provider.py

示例3: parse

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def parse(self, html: HTML) -> [ProxyIP]:
        ip_list: [ProxyIP] = []

        for ip_row in html.find('table.proxytbl tr'):

            ip_element = ip_row.find('td:nth-child(1)', first=True)
            port_element = ip_row.find('td:nth-child(2)', first=True)

            try:
                if ip_element and port_element:
                    port_str = re.search(r'//]]> (\d+)', port_element.text).group(1)

                    p = ProxyIP(ip=ip_element.text, port=port_str)

                    ip_list.append(p)
            except AttributeError:
                pass

        return ip_list 
开发者ID:imWildCat,项目名称:scylla,代码行数:21,代码来源:http_proxy_provider.py

示例4: parse

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def parse(self, html: HTML) -> [ProxyIP]:
        ip_list: [ProxyIP] = []
        for ip_row in html.find('table tr[onmouseover]'):

            ip_port_text_elem = ip_row.find('.spy14', first=True)

            if ip_port_text_elem:
                ip_port_text = ip_port_text_elem.text

                ip = re.search(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', ip_port_text).group(0)
                port = re.search(r':\n(\d{2,5})', ip_port_text).group(1)

                if ip and port:
                    p = ProxyIP(ip=ip, port=port)
                    ip_list.append(p)

        return ip_list 
开发者ID:imWildCat,项目名称:scylla,代码行数:19,代码来源:spys_one_provider.py

示例5: parse

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def parse(self, html: HTML) -> [ProxyIP]:
        ip_list: [ProxyIP] = []

        for tr in html.find('#tbl_proxy_list > tbody:nth-child(2) > tr'):
            if not 'data-proxy-id' in tr.attrs:
                continue

            script_element = tr.find('td:nth-child(1) > abbr > script', first=True)
            port_element = tr.find('td:nth-child(2)', first=True)
            if not script_element or not port_element:
                continue

            groups = re.findall(r"document\.write\('12345678(\d{1,3}\.\d{1,3})'\.substr\(8\) \+ '(\d{1,3}\.\d{1,3}\.\d{1,3})'\)", script_element.text)
            if not groups or len(groups) != 1:
                continue
            ip = groups[0][0] + groups[0][1]
            port = port_element.text
            ip_list.append(ProxyIP(ip=ip, port=port))
        return ip_list 
开发者ID:imWildCat,项目名称:scylla,代码行数:21,代码来源:proxynova_provider.py

示例6: parse

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def parse(self, html: HTML) -> [ProxyIP]:
        ip_list: [ProxyIP] = []

        if html is None:
            return []

        text = html.raw_html.decode('utf-8')

        for ip_port in text.split('\n'):
            if ip_port.strip() == '' or not re.match(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:(\d{2,5})', ip_port):
                continue
            ip = re.search(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', ip_port).group(0)
            port = re.search(r':(\d{2,5})', ip_port).group(1)

            if ip and port:
                p = ProxyIP(ip=ip, port=port)
                ip_list.append(p)

        return ip_list 
开发者ID:imWildCat,项目名称:scylla,代码行数:21,代码来源:plain_text_provider.py

示例7: test_bare_render

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def test_bare_render():
    doc = """<a href='https://httpbin.org'>"""
    html = HTML(html=doc)
    script = """
        () => {
            return {
                width: document.documentElement.clientWidth,
                height: document.documentElement.clientHeight,
                deviceScaleFactor: window.devicePixelRatio,
            }
        }
    """
    val = html.render(script=script, reload=False)
    for value in ('width', 'height', 'deviceScaleFactor'):
        assert value in val

    assert html.find('html')
    assert 'https://httpbin.org' in html.links 
开发者ID:psf,项目名称:requests-html,代码行数:20,代码来源:test_requests_html.py

示例8: test_bare_arender

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def test_bare_arender():
    doc = """<a href='https://httpbin.org'>"""
    html = HTML(html=doc, async_=True)
    script = """
        () => {
            return {
                width: document.documentElement.clientWidth,
                height: document.documentElement.clientHeight,
                deviceScaleFactor: window.devicePixelRatio,
            }
        }
    """
    val = await html.arender(script=script, reload=False)
    for value in ('width', 'height', 'deviceScaleFactor'):
        assert value in val

    assert html.find('html')
    assert 'https://httpbin.org' in html.links
    await html.browser.close() 
开发者ID:psf,项目名称:requests-html,代码行数:21,代码来源:test_requests_html.py

示例9: test_bare_js_async_eval

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def test_bare_js_async_eval():
    doc = """
    <!DOCTYPE html>
    <html>
    <body>
    <div id="replace">This gets replaced</div>

    <script type="text/javascript">
      document.getElementById("replace").innerHTML = "yolo";
    </script>
    </body>
    </html>
    """

    html = HTML(html=doc, async_=True)
    await html.arender()

    assert html.find('#replace', first=True).text == 'yolo'
    await html.browser.close() 
开发者ID:psf,项目名称:requests-html,代码行数:21,代码来源:test_requests_html.py

示例10: get_description

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def get_description(problem_code, contest_code):
    url = f'/api/contests/{contest_code}/problems/{problem_code}'
    resp = request(url=url)

    try:
        resp_json = resp.json()
    except ValueError:
        return [{'code': 503}]

    if resp_json["status"] == "success":
        problem = [
            '',
            style_text('Name: ', "BOLD") + resp_json.get('problem_name', ''),
            style_text("Description:", "BOLD"),
            re.sub(r'(<|<\/)\w+>', '', resp_json.get("body", '')),
            '',
            style_text("Author: ", "BOLD") + resp_json.get('problem_author', ''),
            style_text("Date Added: ", "BOLD") + resp_json.get('date_added', ''),
            style_text("Max Time Limit: ", "BOLD") + f"{resp_json.get('max_timelimit', '')} secs",
            style_text("Source Limit: ", "BOLD") + f"{resp_json.get('source_sizelimit', '')} Bytes",
            style_text("Languages: ", "BOLD") + resp_json.get('languages_supported', ''),
            ''
        ]
        if resp_json.get('tags'):
            problem.append(
                style_text('Tags: ', 'BOLD') +
                " ".join([tag.text for tag in HTML(html=resp_json['tags']).find('a')])
            )
            problem.append('')
        if resp_json.get('editorial_url'):
            problem.append(style_text('Editorial: ', 'BOLD') + resp_json['editorial_url'])
            problem.append('')

        return [{"data": "\n".join(problem)}]
    elif resp_json["status"] == "error":
        return [{
            'data': 'Problem not found. Use `--search` to search in a specific contest',
            'code': 404
        }]
    return [{'code': 503}] 
开发者ID:sk364,项目名称:codechef-cli,代码行数:42,代码来源:problems.py

示例11: test_disconnect_active_sessions_success

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def test_disconnect_active_sessions_success(self):
        """Should return login success msg on disconnect"""
        def mock_request(*args, **kwargs):
            return MockHTMLResponse()

        self.monkeypatch.setattr(auth, 'request', mock_request)

        inputs = "".join([f"<input name='{idx}' value='{idx}' />" for idx in range(6)])
        html = HTML(html=f'<input id="{CSRF_TOKEN_INPUT_ID}" value="ab" />'
                    f'<form id="{SESSION_LIMIT_FORM_ID[1:]}">{inputs}</form>')
        resps = disconnect_active_sessions(None, html)
        self.assertEqual(resps[0]['data'], LOGIN_SUCCESS_MSG) 
开发者ID:sk364,项目名称:codechef-cli,代码行数:14,代码来源:test_auth_entry.py

示例12: test_disconnect_active_sessions_error

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def test_disconnect_active_sessions_error(self):
        """Should return 503 when status code is not 200"""
        def mock_request(*args, **kwargs):
            return MockHTMLResponse(status_code=500)

        self.monkeypatch.setattr(auth, 'request', mock_request)

        inputs = "".join([f"<input name='{idx}' value='{idx}' />" for idx in range(6)])
        html = HTML(html=f'<input id="{CSRF_TOKEN_INPUT_ID}" value="ab" />'
                    f'<form id="{SESSION_LIMIT_FORM_ID[1:]}">{inputs}</form>')
        resps = disconnect_active_sessions(None, html)
        self.assertEqual(resps[0]['code'], 503) 
开发者ID:sk364,项目名称:codechef-cli,代码行数:14,代码来源:test_auth_entry.py

示例13: __init__

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def __init__(self, data='<html />', status_code=200, url='', json=""):
        self.html = HTML(html=data)
        self.status_code = status_code
        self.url = f'{BASE_URL}{url}'
        self.text = json 
开发者ID:sk364,项目名称:codechef-cli,代码行数:7,代码来源:utils.py

示例14: test_html_to_list_valid_html

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def test_html_to_list_valid_html(self):
        """Should convert requests_html.HTML instance to `list`"""
        html = HTML(html=" \
            <tr><th>A</th><th>V</th></tr> \
            <tr><td>a1</td><td>v1</td></tr> \
            <tr><td>a2</td><td>v2</td></tr> \
        ")
        self.assertEqual(html_to_list(html), [['A', 'V'], ['a1', 'v1'], ['a2', 'v2']]) 
开发者ID:sk364,项目名称:codechef-cli,代码行数:10,代码来源:test_helpers.py

示例15: test_get_csrf_token_no_value

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTML [as 别名]
def test_get_csrf_token_no_value(self):
        """Should return None when html element has no value"""
        html = HTML(html="<input id='a' />")
        self.assertIsNone(get_csrf_token(html, "a")) 
开发者ID:sk364,项目名称:codechef-cli,代码行数:6,代码来源:test_helpers.py


注:本文中的requests_html.HTML属性示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。