本文整理汇总了Python中requests_html.HTMLSession方法的典型用法代码示例。如果您正苦于以下问题:Python requests_html.HTMLSession方法的具体用法?Python requests_html.HTMLSession怎么用?Python requests_html.HTMLSession使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类requests_html
的用法示例。
在下文中一共展示了requests_html.HTMLSession方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: getLinks
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def getLinks(self):
session = HTMLSession()
r = session.get('https://instagram.com/' + self.username)
l = r.html.find('body > script:nth-child(5)')[0].text
json_str = l[21:]
json_str = json_str[:-1]
json_parsed = json.loads(json_str)
shortcodes = []
try:
images = json_parsed['entry_data']['ProfilePage'][0]['graphql']['user']['edge_owner_to_timeline_media']['edges']
for image in images:
node = image['node']
shortcode = node['shortcode']
shortcodes.append(shortcode)
links = []
for sc in shortcodes:
r = session.get('https://instagram.com/p/' + sc + '/?taken-by=' + self.username)
img = r.html.find('meta[property="og:image"]')
if len(img) > 0:
img = img[0]
links.append(img.attrs['content'])
return links
except:
return []
示例2: test_american_english_dialect_selection
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def test_american_english_dialect_selection():
# Pick a word for which Wiktionary has dialect-specified pronunciations
# for both US and non-US English.
word = "mocha"
html_session = requests_html.HTMLSession()
response = html_session.get(_PAGE_TEMPLATE.format(word=word))
# Construct two configs to demonstrate the US dialect (non-)selection.
config_only_us = config_factory(key="en", dialect="US | American English")
config_any_dialect = config_factory(key="en")
# Apply each config's XPath selector.
results_only_us = response.html.xpath(config_only_us.pron_xpath_selector)
results_any_dialect = response.html.xpath(
config_any_dialect.pron_xpath_selector
)
assert (
len(results_any_dialect) # containing both US and non-US results
> len(results_only_us) # containing only the US result
> 0
)
示例3: make_login_req
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def make_login_req(username, password, disconnect_sessions):
with HTMLSession() as session:
set_session_cookies(session)
resp = request(session=session)
token = get_csrf_token(resp.html, CSRF_TOKEN_INPUT_ID)
if not token:
return [{'data': CSRF_TOKEN_MISSING, 'code': 500}]
data = {
'name': username,
'pass': password,
'form_id': LOGIN_FORM_ID[1:],
'csrfToken': token
}
resp = request(session=session, method='POST', data=data)
resp_html = resp.html
if resp.status_code == 200:
if resp_html.find(SESSION_LIMIT_FORM_ID):
if disconnect_sessions:
resps = disconnect_active_sessions(session, resp_html)
save_session_cookies(session, username)
return resps
else:
logout(session=session)
return [{'data': SESSION_LIMIT_MSG, 'code': 400}]
elif resp_html.find(LOGOUT_BUTTON_CLASS):
save_session_cookies(session, username)
return [{'data': LOGIN_SUCCESS_MSG}]
return [{'data': INCORRECT_CREDS_MSG, 'code': 400}]
return [{'code': 503}]
示例4: get_session
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def get_session():
session = HTMLSession()
if os.path.exists(COOKIES_FILE_PATH):
set_session_cookies(session)
session.cookies.load(ignore_discard=True, ignore_expires=True)
return session
示例5: test_get_session_cookies
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def test_get_session_cookies(self):
"""Should return requests_html.HTMLSession instance preloaded with cookies"""
fake_login()
session = get_session()
self.assertIsInstance(session, HTMLSession)
self.assertTrue(len(session.cookies) > 0)
示例6: test_get_session_no_cookies
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def test_get_session_no_cookies(self):
"""Should return requests_html.HTMLSession instance"""
fake_logout()
session = get_session()
self.assertIsInstance(session, HTMLSession)
self.assertEqual(len(session.cookies), 0)
示例7: __init__
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def __init__(self):
self._headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
'Accept-Encoding': ''
}
"""
头信息
"""
self._session = HTMLSession()
"""
HTMLSession 对象
"""
示例8: __init__
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def __init__(self, session=None, requests_kwargs=None):
if session is None:
session = HTMLSession()
session.headers.update(self.default_headers)
if requests_kwargs is None:
requests_kwargs = {}
self.session = session
self.requests_kwargs = requests_kwargs
示例9: __init__
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def __init__(self):
self._session = HTMLSession()
示例10: scrape_instagram_tag
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def scrape_instagram_tag(tag: str, total_count: int=50, existing: set=None):
"""
Scrape and yield recently tagged instagram photos.
"""
if existing is None:
existing = set()
url = f'https://www.instagram.com/explore/tags/{tag}'
session = HTMLSession()
req = session.get(url)
imgs = set(existing)
count = 0
page = 0
while count <= total_count:
req.html.render(scrolldown=page)
images = req.html.xpath('//img[@alt]')
page += 1
for image in images:
if count > total_count:
break
try:
url, caption = image.attrs['src'], image.attrs['alt']
except:
pass
else:
if url in imgs:
continue
imgs.add(url)
hashtags = set(REGEXES['hashtag'].findall(caption))
mentions = set(REGEXES['username'].findall(caption))
count += 1
yield url, caption, hashtags, mentions
示例11: find_links
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def find_links(self):
session = HTMLSession()
session.headers['user-agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36'
url = self.base_url + self.parameters.format(self.query)
while self.is_alive:
try:
html = session.get(url).html
except:
break
for r in html.find('.b_algo'):
a = r.find('h2', first=True).find('a', first=True)
try:
link = a.attrs['href']
except:
continue
if self.is_valid(link):
self.links.put(link)
next_page = self.next_page(html)
if not next_page:
break
url = next_page
with self.lock:
self.is_searching = False
示例12: session
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def session(populated_cluster):
url = populated_cluster["url"].rstrip("/")
s = HTMLSession()
def new_request(prefix, f, method, url, *args, **kwargs):
return f(method, prefix + url, *args, **kwargs)
s.request = partial(new_request, url, s.request)
return s
示例13: __init__
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def __init__(self):
self.connection = db.Connection(config.db["host"], config.db["db"], config.db["user"], config.db["password"])
self.session = HTMLSession(mock_browser=False)
self.session.headers['User-Agent'] = config.user_agent
self.log = Logger()
示例14: _scrape_once
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def _scrape_once(data, config: Config) -> Iterator[WordPronPair]:
session = requests_html.HTMLSession()
for member in data["query"]["categorymembers"]:
word = member["title"]
date = member["timestamp"]
if _skip_word(word, config.no_skip_spaces_word) or _skip_date(
date, config.cut_off_date
):
continue
request = session.get(_PAGE_TEMPLATE.format(word=word), timeout=10)
for word, pron in config.extract_word_pron(word, request, config):
yield word, pron
示例15: __init__
# 需要导入模块: import requests_html [as 别名]
# 或者: from requests_html import HTMLSession [as 别名]
def __init__(self):
"""Initialize the worker object
"""
self.session = HTMLSession()