当前位置: 首页>>代码示例>>Python>>正文


Python requests_html.HTMLSession方法代码示例

本文整理汇总了Python中requests_html.HTMLSession方法的典型用法代码示例。如果您正苦于以下问题:Python requests_html.HTMLSession方法的具体用法?Python requests_html.HTMLSession怎么用?Python requests_html.HTMLSession使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在requests_html的用法示例。


在下文中一共展示了requests_html.HTMLSession方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: getLinks

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def getLinks(self):
        session = HTMLSession()
        r = session.get('https://instagram.com/' + self.username)
        l = r.html.find('body > script:nth-child(5)')[0].text
        json_str = l[21:]
        json_str = json_str[:-1]
        json_parsed = json.loads(json_str)
        shortcodes = []
        try:
            images = json_parsed['entry_data']['ProfilePage'][0]['graphql']['user']['edge_owner_to_timeline_media']['edges']
            for image in images:
                node = image['node']
                shortcode = node['shortcode']
                shortcodes.append(shortcode)
            links = []
            for sc in shortcodes:
                r = session.get('https://instagram.com/p/' + sc + '/?taken-by=' + self.username)
                img = r.html.find('meta[property="og:image"]')
                if len(img) > 0:
                    img = img[0]
                    links.append(img.attrs['content'])
            return links
        except:
            return [] 
开发者ID:ThoughtfulDev,项目名称:EagleEye,代码行数:26,代码来源:instagram.py

示例2: test_american_english_dialect_selection

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def test_american_english_dialect_selection():
    # Pick a word for which Wiktionary has dialect-specified pronunciations
    # for both US and non-US English.
    word = "mocha"
    html_session = requests_html.HTMLSession()
    response = html_session.get(_PAGE_TEMPLATE.format(word=word))
    # Construct two configs to demonstrate the US dialect (non-)selection.
    config_only_us = config_factory(key="en", dialect="US | American English")
    config_any_dialect = config_factory(key="en")
    # Apply each config's XPath selector.
    results_only_us = response.html.xpath(config_only_us.pron_xpath_selector)
    results_any_dialect = response.html.xpath(
        config_any_dialect.pron_xpath_selector
    )
    assert (
        len(results_any_dialect)  # containing both US and non-US results
        > len(results_only_us)  # containing only the US result
        > 0
    ) 
开发者ID:kylebgorman,项目名称:wikipron,代码行数:21,代码来源:test_config.py

示例3: make_login_req

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def make_login_req(username, password, disconnect_sessions):
    with HTMLSession() as session:
        set_session_cookies(session)

        resp = request(session=session)
        token = get_csrf_token(resp.html, CSRF_TOKEN_INPUT_ID)
        if not token:
            return [{'data': CSRF_TOKEN_MISSING, 'code': 500}]

        data = {
            'name': username,
            'pass': password,
            'form_id': LOGIN_FORM_ID[1:],
            'csrfToken': token
        }

        resp = request(session=session, method='POST', data=data)
        resp_html = resp.html

        if resp.status_code == 200:
            if resp_html.find(SESSION_LIMIT_FORM_ID):
                if disconnect_sessions:
                    resps = disconnect_active_sessions(session, resp_html)
                    save_session_cookies(session, username)
                    return resps
                else:
                    logout(session=session)
                    return [{'data': SESSION_LIMIT_MSG, 'code': 400}]
            elif resp_html.find(LOGOUT_BUTTON_CLASS):
                save_session_cookies(session, username)
                return [{'data': LOGIN_SUCCESS_MSG}]
            return [{'data': INCORRECT_CREDS_MSG, 'code': 400}]
        return [{'code': 503}] 
开发者ID:sk364,项目名称:codechef-cli,代码行数:35,代码来源:auth.py

示例4: get_session

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def get_session():
    session = HTMLSession()

    if os.path.exists(COOKIES_FILE_PATH):
        set_session_cookies(session)
        session.cookies.load(ignore_discard=True, ignore_expires=True)
    return session 
开发者ID:sk364,项目名称:codechef-cli,代码行数:9,代码来源:helpers.py

示例5: test_get_session_cookies

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def test_get_session_cookies(self):
        """Should return requests_html.HTMLSession instance preloaded with cookies"""
        fake_login()

        session = get_session()
        self.assertIsInstance(session, HTMLSession)
        self.assertTrue(len(session.cookies) > 0) 
开发者ID:sk364,项目名称:codechef-cli,代码行数:9,代码来源:test_helpers.py

示例6: test_get_session_no_cookies

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def test_get_session_no_cookies(self):
        """Should return requests_html.HTMLSession instance"""
        fake_logout()

        session = get_session()
        self.assertIsInstance(session, HTMLSession)
        self.assertEqual(len(session.cookies), 0) 
开发者ID:sk364,项目名称:codechef-cli,代码行数:9,代码来源:test_helpers.py

示例7: __init__

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def __init__(self):
        self._headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
            'Accept-Encoding': ''
        }
        """
        头信息
        """

        self._session = HTMLSession()
        """
        HTMLSession 对象
        """ 
开发者ID:snakejordan,项目名称:administrative-divisions-of-China-on-Python,代码行数:15,代码来源:crawler.py

示例8: __init__

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def __init__(self, session=None, requests_kwargs=None):
        if session is None:
            session = HTMLSession()
            session.headers.update(self.default_headers)

        if requests_kwargs is None:
            requests_kwargs = {}

        self.session = session
        self.requests_kwargs = requests_kwargs 
开发者ID:kevinzg,项目名称:facebook-scraper,代码行数:12,代码来源:facebook_scraper.py

示例9: __init__

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def __init__(self):
        self._session = HTMLSession() 
开发者ID:tdickman,项目名称:crypto51,代码行数:4,代码来源:mtc.py

示例10: scrape_instagram_tag

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def scrape_instagram_tag(tag: str, total_count: int=50, existing: set=None):
    """
    Scrape and yield recently tagged instagram photos.
    """
    if existing is None:
        existing = set()

    url = f'https://www.instagram.com/explore/tags/{tag}'
    session = HTMLSession()
    req = session.get(url)

    imgs = set(existing)
    count = 0
    page = 0

    while count <= total_count:
        req.html.render(scrolldown=page)
        images = req.html.xpath('//img[@alt]')
        page += 1
        for image in images:
            if count > total_count:
                break
            try:
                url, caption = image.attrs['src'], image.attrs['alt']
            except:
                pass
            else:
                if url in imgs:
                    continue
                imgs.add(url)
                hashtags = set(REGEXES['hashtag'].findall(caption))
                mentions = set(REGEXES['username'].findall(caption))
                count += 1
                yield url, caption, hashtags, mentions 
开发者ID:meetmangukiya,项目名称:instagram-scraper,代码行数:36,代码来源:instagram_scraper.py

示例11: find_links

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def find_links(self):

        session = HTMLSession()
        session.headers['user-agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36'

        url = self.base_url + self.parameters.format(self.query)

        while self.is_alive:
            try:
                html = session.get(url).html
            except:
                break

            for r in html.find('.b_algo'):
                a = r.find('h2', first=True).find('a', first=True)

                try:
                    link = a.attrs['href']
                except:
                    continue

                if self.is_valid(link):
                    self.links.put(link)

            next_page = self.next_page(html)

            if not next_page:
                break

            url = next_page

        with self.lock:
            self.is_searching = False 
开发者ID:Pure-L0G1C,项目名称:SQL-scanner,代码行数:35,代码来源:search.py

示例12: session

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def session(populated_cluster):

    url = populated_cluster["url"].rstrip("/")

    s = HTMLSession()

    def new_request(prefix, f, method, url, *args, **kwargs):
        return f(method, prefix + url, *args, **kwargs)

    s.request = partial(new_request, url, s.request)
    return s 
开发者ID:hjacobs,项目名称:kube-web-view,代码行数:13,代码来源:conftest.py

示例13: __init__

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def __init__(self):
    self.connection = db.Connection(config.db["host"], config.db["db"], config.db["user"], config.db["password"])
    self.session = HTMLSession(mock_browser=False)
    self.session.headers['User-Agent'] = config.user_agent
    self.log = Logger() 
开发者ID:blekhmanlab,项目名称:rxivist,代码行数:7,代码来源:spider.py

示例14: _scrape_once

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def _scrape_once(data, config: Config) -> Iterator[WordPronPair]:
    session = requests_html.HTMLSession()
    for member in data["query"]["categorymembers"]:
        word = member["title"]
        date = member["timestamp"]
        if _skip_word(word, config.no_skip_spaces_word) or _skip_date(
            date, config.cut_off_date
        ):
            continue
        request = session.get(_PAGE_TEMPLATE.format(word=word), timeout=10)
        for word, pron in config.extract_word_pron(word, request, config):
            yield word, pron 
开发者ID:kylebgorman,项目名称:wikipron,代码行数:14,代码来源:scrape.py

示例15: __init__

# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def __init__(self):
        """Initialize the worker object

        """

        self.session = HTMLSession() 
开发者ID:imWildCat,项目名称:scylla,代码行数:8,代码来源:worker.py


注:本文中的requests_html.HTMLSession方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。