當前位置: 首頁>>代碼示例>>Python>>正文


Python requests_html.HTMLSession方法代碼示例

本文整理匯總了Python中requests_html.HTMLSession方法的典型用法代碼示例。如果您正苦於以下問題:Python requests_html.HTMLSession方法的具體用法?Python requests_html.HTMLSession怎麽用?Python requests_html.HTMLSession使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在requests_html的用法示例。


在下文中一共展示了requests_html.HTMLSession方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: getLinks

# 需要導入模塊: import requests_html [as 別名]
# 或者: from requests_html import HTMLSession [as 別名]
def getLinks(self):
        session = HTMLSession()
        r = session.get('https://instagram.com/' + self.username)
        l = r.html.find('body > script:nth-child(5)')[0].text
        json_str = l[21:]
        json_str = json_str[:-1]
        json_parsed = json.loads(json_str)
        shortcodes = []
        try:
            images = json_parsed['entry_data']['ProfilePage'][0]['graphql']['user']['edge_owner_to_timeline_media']['edges']
            for image in images:
                node = image['node']
                shortcode = node['shortcode']
                shortcodes.append(shortcode)
            links = []
            for sc in shortcodes:
                r = session.get('https://instagram.com/p/' + sc + '/?taken-by=' + self.username)
                img = r.html.find('meta[property="og:image"]')
                if len(img) > 0:
                    img = img[0]
                    links.append(img.attrs['content'])
            return links
        except:
            return [] 
開發者ID:ThoughtfulDev,項目名稱:EagleEye,代碼行數:26,代碼來源:instagram.py

示例2: test_american_english_dialect_selection

# 需要導入模塊: import requests_html [as 別名]
# 或者: from requests_html import HTMLSession [as 別名]
def test_american_english_dialect_selection():
    # Pick a word for which Wiktionary has dialect-specified pronunciations
    # for both US and non-US English.
    word = "mocha"
    html_session = requests_html.HTMLSession()
    response = html_session.get(_PAGE_TEMPLATE.format(word=word))
    # Construct two configs to demonstrate the US dialect (non-)selection.
    config_only_us = config_factory(key="en", dialect="US | American English")
    config_any_dialect = config_factory(key="en")
    # Apply each config's XPath selector.
    results_only_us = response.html.xpath(config_only_us.pron_xpath_selector)
    results_any_dialect = response.html.xpath(
        config_any_dialect.pron_xpath_selector
    )
    assert (
        len(results_any_dialect)  # containing both US and non-US results
        > len(results_only_us)  # containing only the US result
        > 0
    ) 
開發者ID:kylebgorman,項目名稱:wikipron,代碼行數:21,代碼來源:test_config.py

示例3: make_login_req

# 需要導入模塊: import requests_html [as 別名]
# 或者: from requests_html import HTMLSession [as 別名]
def make_login_req(username, password, disconnect_sessions):
    with HTMLSession() as session:
        set_session_cookies(session)

        resp = request(session=session)
        token = get_csrf_token(resp.html, CSRF_TOKEN_INPUT_ID)
        if not token:
            return [{'data': CSRF_TOKEN_MISSING, 'code': 500}]

        data = {
            'name': username,
            'pass': password,
            'form_id': LOGIN_FORM_ID[1:],
            'csrfToken': token
        }

        resp = request(session=session, method='POST', data=data)
        resp_html = resp.html

        if resp.status_code == 200:
            if resp_html.find(SESSION_LIMIT_FORM_ID):
                if disconnect_sessions:
                    resps = disconnect_active_sessions(session, resp_html)
                    save_session_cookies(session, username)
                    return resps
                else:
                    logout(session=session)
                    return [{'data': SESSION_LIMIT_MSG, 'code': 400}]
            elif resp_html.find(LOGOUT_BUTTON_CLASS):
                save_session_cookies(session, username)
                return [{'data': LOGIN_SUCCESS_MSG}]
            return [{'data': INCORRECT_CREDS_MSG, 'code': 400}]
        return [{'code': 503}] 
開發者ID:sk364,項目名稱:codechef-cli,代碼行數:35,代碼來源:auth.py

示例4: get_session

# 需要導入模塊: import requests_html [as 別名]
# 或者: from requests_html import HTMLSession [as 別名]
def get_session():
    session = HTMLSession()

    if os.path.exists(COOKIES_FILE_PATH):
        set_session_cookies(session)
        session.cookies.load(ignore_discard=True, ignore_expires=True)
    return session 
開發者ID:sk364,項目名稱:codechef-cli,代碼行數:9,代碼來源:helpers.py

示例5: test_get_session_cookies

# 需要導入模塊: import requests_html [as 別名]
# 或者: from requests_html import HTMLSession [as 別名]
def test_get_session_cookies(self):
        """Should return requests_html.HTMLSession instance preloaded with cookies"""
        fake_login()

        session = get_session()
        self.assertIsInstance(session, HTMLSession)
        self.assertTrue(len(session.cookies) > 0) 
開發者ID:sk364,項目名稱:codechef-cli,代碼行數:9,代碼來源:test_helpers.py

示例6: test_get_session_no_cookies

# 需要導入模塊: import requests_html [as 別名]
# 或者: from requests_html import HTMLSession [as 別名]
def test_get_session_no_cookies(self):
        """Should return requests_html.HTMLSession instance"""
        fake_logout()

        session = get_session()
        self.assertIsInstance(session, HTMLSession)
        self.assertEqual(len(session.cookies), 0) 
開發者ID:sk364,項目名稱:codechef-cli,代碼行數:9,代碼來源:test_helpers.py

示例7: __init__

# 需要導入模塊: import requests_html [as 別名]
# 或者: from requests_html import HTMLSession [as 別名]
def __init__(self):
        self._headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
            'Accept-Encoding': ''
        }
        """
        頭信息
        """

        self._session = HTMLSession()
        """
        HTMLSession 對象
        """ 
開發者ID:snakejordan,項目名稱:administrative-divisions-of-China-on-Python,代碼行數:15,代碼來源:crawler.py

示例8: __init__

# 需要導入模塊: import requests_html [as 別名]
# 或者: from requests_html import HTMLSession [as 別名]
def __init__(self, session=None, requests_kwargs=None):
        if session is None:
            session = HTMLSession()
            session.headers.update(self.default_headers)

        if requests_kwargs is None:
            requests_kwargs = {}

        self.session = session
        self.requests_kwargs = requests_kwargs 
開發者ID:kevinzg,項目名稱:facebook-scraper,代碼行數:12,代碼來源:facebook_scraper.py

示例9: __init__

# 需要導入模塊: import requests_html [as 別名]
# 或者: from requests_html import HTMLSession [as 別名]
def __init__(self):
        self._session = HTMLSession() 
開發者ID:tdickman,項目名稱:crypto51,代碼行數:4,代碼來源:mtc.py

示例10: scrape_instagram_tag

# 需要導入模塊: import requests_html [as 別名]
# 或者: from requests_html import HTMLSession [as 別名]
def scrape_instagram_tag(tag: str, total_count: int=50, existing: set=None):
    """
    Scrape and yield recently tagged instagram photos.
    """
    if existing is None:
        existing = set()

    url = f'https://www.instagram.com/explore/tags/{tag}'
    session = HTMLSession()
    req = session.get(url)

    imgs = set(existing)
    count = 0
    page = 0

    while count <= total_count:
        req.html.render(scrolldown=page)
        images = req.html.xpath('//img[@alt]')
        page += 1
        for image in images:
            if count > total_count:
                break
            try:
                url, caption = image.attrs['src'], image.attrs['alt']
            except:
                pass
            else:
                if url in imgs:
                    continue
                imgs.add(url)
                hashtags = set(REGEXES['hashtag'].findall(caption))
                mentions = set(REGEXES['username'].findall(caption))
                count += 1
                yield url, caption, hashtags, mentions 
開發者ID:meetmangukiya,項目名稱:instagram-scraper,代碼行數:36,代碼來源:instagram_scraper.py

示例11: find_links

# 需要導入模塊: import requests_html [as 別名]
# 或者: from requests_html import HTMLSession [as 別名]
def find_links(self):

        session = HTMLSession()
        session.headers['user-agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36'

        url = self.base_url + self.parameters.format(self.query)

        while self.is_alive:
            try:
                html = session.get(url).html
            except:
                break

            for r in html.find('.b_algo'):
                a = r.find('h2', first=True).find('a', first=True)

                try:
                    link = a.attrs['href']
                except:
                    continue

                if self.is_valid(link):
                    self.links.put(link)

            next_page = self.next_page(html)

            if not next_page:
                break

            url = next_page

        with self.lock:
            self.is_searching = False 
開發者ID:Pure-L0G1C,項目名稱:SQL-scanner,代碼行數:35,代碼來源:search.py

示例12: session

# 需要導入模塊: import requests_html [as 別名]
# 或者: from requests_html import HTMLSession [as 別名]
def session(populated_cluster):

    url = populated_cluster["url"].rstrip("/")

    s = HTMLSession()

    def new_request(prefix, f, method, url, *args, **kwargs):
        return f(method, prefix + url, *args, **kwargs)

    s.request = partial(new_request, url, s.request)
    return s 
開發者ID:hjacobs,項目名稱:kube-web-view,代碼行數:13,代碼來源:conftest.py

示例13: __init__

# 需要導入模塊: import requests_html [as 別名]
# 或者: from requests_html import HTMLSession [as 別名]
def __init__(self):
    self.connection = db.Connection(config.db["host"], config.db["db"], config.db["user"], config.db["password"])
    self.session = HTMLSession(mock_browser=False)
    self.session.headers['User-Agent'] = config.user_agent
    self.log = Logger() 
開發者ID:blekhmanlab,項目名稱:rxivist,代碼行數:7,代碼來源:spider.py

示例14: _scrape_once

# 需要導入模塊: import requests_html [as 別名]
# 或者: from requests_html import HTMLSession [as 別名]
def _scrape_once(data, config: Config) -> Iterator[WordPronPair]:
    session = requests_html.HTMLSession()
    for member in data["query"]["categorymembers"]:
        word = member["title"]
        date = member["timestamp"]
        if _skip_word(word, config.no_skip_spaces_word) or _skip_date(
            date, config.cut_off_date
        ):
            continue
        request = session.get(_PAGE_TEMPLATE.format(word=word), timeout=10)
        for word, pron in config.extract_word_pron(word, request, config):
            yield word, pron 
開發者ID:kylebgorman,項目名稱:wikipron,代碼行數:14,代碼來源:scrape.py

示例15: __init__

# 需要導入模塊: import requests_html [as 別名]
# 或者: from requests_html import HTMLSession [as 別名]
def __init__(self):
        """Initialize the worker object

        """

        self.session = HTMLSession() 
開發者ID:imWildCat,項目名稱:scylla,代碼行數:8,代碼來源:worker.py


注:本文中的requests_html.HTMLSession方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。