本文整理汇总了Python中cloudscraper.create_scraper方法的典型用法代码示例。如果您正苦于以下问题:Python cloudscraper.create_scraper方法的具体用法?Python cloudscraper.create_scraper怎么用?Python cloudscraper.create_scraper使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类cloudscraper
的用法示例。
在下文中一共展示了cloudscraper.create_scraper方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: cloudflare_get
# 需要导入模块: import cloudscraper [as 别名]
# 或者: from cloudscraper import create_scraper [as 别名]
def cloudflare_get(url, cookies={}, proxies=None):
retry = 6
from JavHelper.core.javlibrary import JavLibraryScraper
while retry > 0:
try:
cookies.update(JavLibraryScraper.load_local_cookies()) # update cloudflare cookies when updating
res = cloudscraper.create_scraper().get(url, cookies=cookies, proxies=proxies)
#print(res.text)
return res
#except cloudscraper.exceptions.CloudflareIUAMError:
except Exception as e:
print(f'cloudflare get failed on {e}, retrying')
retry = retry - 1
sleep(5)
raise Exception(f'cloudflare get {url} failed')
示例2: main
# 需要导入模块: import cloudscraper [as 别名]
# 或者: from cloudscraper import create_scraper [as 别名]
def main():
# Create structure
os.makedirs(FLAGS.output, exist_ok=True)
# Cloudflare scraper
scraper = cloudscraper.create_scraper()
# Download inscriptions
with concurrent.futures.ThreadPoolExecutor(max_workers=FLAGS.connections) as executor:
future_to_phi = (executor.submit(load_phi_id, text_i, FLAGS.timeout, FLAGS.output, scraper) for text_i in
range(1, FLAGS.max_phi_id))
for future in tqdm(concurrent.futures.as_completed(future_to_phi), total=FLAGS.max_phi_id):
try:
future.result()
except:
pass
示例3: query
# 需要导入模块: import cloudscraper [as 别名]
# 或者: from cloudscraper import create_scraper [as 别名]
def query(self):
"""
向接口查询子域并做子域匹配
"""
# 绕过cloudFlare验证
scraper = cloudscraper.create_scraper()
scraper.proxies = self.get_proxy(self.source)
url = self.addr + self.domain
try:
resp = scraper.get(url, timeout=self.timeout)
except Exception as e:
logger.log('ERROR', e.args)
return
if resp.status_code != 200:
return
subdomains = self.match_subdomains(self.domain, str(resp.json()))
# 合并搜索子域名搜索结果
self.subdomains = self.subdomains.union(subdomains)
示例4: get_urls_async
# 需要导入模块: import cloudscraper [as 别名]
# 或者: from cloudscraper import create_scraper [as 别名]
def get_urls_async(urls, configfile, dbfile, scraper=False):
if not scraper:
scraper = cloudscraper.create_scraper(browser={'browser': 'chrome', 'mobile': False})
results = []
def load_url(url):
return get_url(url, configfile, dbfile, scraper)
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
future_to_url = {executor.submit(load_url, url): url for url in urls}
for future in concurrent.futures.as_completed(future_to_url):
future_to_url[future]
try:
results.append(future.result())
except Exception:
pass
return [results, scraper]
示例5: json_download
# 需要导入模块: import cloudscraper [as 别名]
# 或者: from cloudscraper import create_scraper [as 别名]
def json_download(self, page_id):
headers = {
'User-Agent':
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
'Accept-Encoding': 'gzip, deflate'
}
sess = requests.session()
sess = cloudscraper.create_scraper(sess)
search_url = "http://www.mangaeden.com/api/chapter/{0}/".format(page_id)
connection = sess.get(search_url, headers=headers)
if connection.status_code != 200:
print("Whoops! Seems like I can't connect to website.")
print("It's showing : %s" % connection)
print("Run this script with the --verbose argument and report the issue along with log file on Github.")
sys.exit(1)
else:
json_data = connection.content
return json_data
示例6: json_download
# 需要导入模块: import cloudscraper [as 别名]
# 或者: from cloudscraper import create_scraper [as 别名]
def json_download(self, chapter_id):
headers = {
'User-Agent':
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
'Accept-Encoding': 'gzip, deflate'
}
sess = requests.session()
sess = cloudscraper.create_scraper(sess)
search_url = "http://www.mangaeden.com/api/manga/{0}/".format(chapter_id)
connection = sess.get(search_url, headers=headers)
if connection.status_code != 200:
print("Whoops! Seems like I can't connect to website.")
print("It's showing : %s" % connection)
print("Run this script with the --verbose argument and report the issue along with log file on Github.")
sys.exit(1)
else:
json_data = connection.content
return json_data
示例7: __init__
# 需要导入模块: import cloudscraper [as 别名]
# 或者: from cloudscraper import create_scraper [as 别名]
def __init__(self, manga_url, download_directory, chapter_range, **kwargs):
self.scraper = cloudscraper.create_scraper()
conversion = kwargs.get("conversion")
keep_files = kwargs.get("keep_files")
self.logging = kwargs.get("log_flag")
self.sorting = kwargs.get("sorting_order")
self.manga_url = manga_url + '/'
self.print_index = kwargs.get("print_index")
if 'manga' in manga_url:
self.comic_id = str(str(manga_url).split("/")[-1])
self.full_series(comic_id=self.comic_id, sorting=self.sorting, download_directory=download_directory,
chapter_range=chapter_range, conversion=conversion, keep_files=keep_files)
if 'lecture-en-ligne' in manga_url:
self.comic_id = str(str(manga_url).split("/")[-2])
chapter_path = re.sub(re.compile(r'.*japscan.to'), '', str(self.manga_url))
self.single_chapter(chapter_path, comic_id=self.comic_id, download_directory=download_directory,
scraper=scraper)
示例8: query
# 需要导入模块: import cloudscraper [as 别名]
# 或者: from cloudscraper import create_scraper [as 别名]
def query(self):
# 绕过cloudFlare验证
scraper = cloudscraper.create_scraper()
scraper.proxies = self.get_proxy(self.source)
url = self.addr + self.domain
try:
resp = scraper.get(url, timeout=self.timeout)
except Exception as e:
logger.log('ERROR', e.args)
return
if resp.status_code != 200:
return
subdomains = self.match_subdomains(self.domain, str(resp.json()))
# 合并搜索子域名搜索结果
self.subdomains = self.subdomains.union(subdomains)
示例9: __init__
# 需要导入模块: import cloudscraper [as 别名]
# 或者: from cloudscraper import create_scraper [as 别名]
def __init__(self):
self._destroyed = False
self.executor = futures.ThreadPoolExecutor(max_workers=2)
# Initialize cloudscrapper
self.scraper = cloudscraper.create_scraper(
browser={
'browser': 'firefox',
'mobile': False
}
)
# Must resolve these fields inside `read_novel_info`
self.novel_title = 'N/A'
self.novel_author = 'N/A'
self.novel_cover = None
self.is_rtl = False
# Each item must contain these keys:
# `id` - 1 based index of the volume
# `title` - the volume title (can be ignored)
self.volumes = []
# Each item must contain these keys:
# `id` - 1 based index of the chapter
# `title` - the title name
# `volume` - the volume id of this chapter
# `volume_title` - the volume title (can be ignored)
# `url` - the link where to download the chapter
self.chapters = []
# Other stuffs - not necessary to resolve from crawler instance.
self.home_url = ''
self.novel_url = ''
self.last_visited_url = None
# end def
示例10: check_updates
# 需要导入模块: import cloudscraper [as 别名]
# 或者: from cloudscraper import create_scraper [as 别名]
def check_updates():
try:
logger.info('Checking latest version')
pypi_short_url = 'http://bit.ly/2yYyFGd'
scraper = cloudscraper.create_scraper()
res = scraper.get(pypi_short_url, timeout=5)
latest_version = res.json()['info']['version']
if get_value() != latest_version:
new_version_news(latest_version)
# end if
except Exception:
logger.warn('Failed to check for update')
# end try
# end def
示例11: __init__
# 需要导入模块: import cloudscraper [as 别名]
# 或者: from cloudscraper import create_scraper [as 别名]
def __init__(self):
self.scrapper = cloudscraper.create_scraper()
super().__init__()
示例12: json_download
# 需要导入模块: import cloudscraper [as 别名]
# 或者: from cloudscraper import create_scraper [as 别名]
def json_download(self, manga_language):
print("Downloading The Latest Data Set...")
headers = {
'User-Agent':
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
'Accept-Encoding': 'gzip, deflate'
}
sess = requests.session()
sess = cloudscraper.create_scraper(sess)
search_url = "http://www.mangaeden.com/api/list/{0}/".format(manga_language)
connection = sess.get(search_url, headers=headers)
if connection.status_code != 200:
print("Whoops! Seems like I can't connect to website.")
print("It's showing : %s" % connection)
print("Run this script with the --verbose argument and report the issue along with log file on Github.")
sys.exit(1)
else:
json_data = connection.content
# print(json_data)
try:
# Let's save the JSON data
with open("Manga_Eden_Data.json", "wb") as write_file:
write_file.write(json_data)
except Exception as WriteError:
print("Couldn't make Cache : {0}".format(WriteError))
pass
return json_data
示例13: __init__
# 需要导入模块: import cloudscraper [as 别名]
# 或者: from cloudscraper import create_scraper [as 别名]
def __init__(self):
self.path = Path('Interface/AddOns')
self.configPath = Path('WTF/CurseBreaker.json')
self.cachePath = Path('WTF/CurseBreaker.cache')
self.clientType = 'wow_retail'
self.waCompanionVersion = 110
self.config = None
self.cfIDs = None
self.cfDirs = None
self.cfCache = {}
self.wowiCache = {}
self.checksumCache = {}
self.scraper = cloudscraper.create_scraper()
示例14: user_login
# 需要导入模块: import cloudscraper [as 别名]
# 或者: from cloudscraper import create_scraper [as 别名]
def user_login(self, username, password, **kwargs):
session_cookie = ""
headers = kwargs.get("headers")
if not headers:
headers = {
'User-Agent':
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
'Accept-Encoding': 'gzip, deflate',
'referer': 'https://bato.to/'
}
print("Getting Auth Token...")
page_source, update_cookie = globalFunctions.GlobalFunctions().page_downloader(
manga_url="https://bato.to/forums/index.php?app=core&module=global§ion=login")
soup_parse = page_source.find_all('input', {'type': 'hidden'})
auth_token = str([x['value'] for x in soup_parse][0]).strip()
payload = {
'auth_key': auth_token,
'ips_username': username,
'ips_password': password,
'rememberMe': '1'
}
sess = requests.session()
sess = cloudscraper.create_scraper(sess)
print('Trying To Log In...')
connection = sess.post("https://bato.to/forums/index.php?app=core&module=global§ion=login&do=process",
headers=headers, data=payload, cookies=kwargs.get("cookies"))
if connection.status_code != 200:
print("Whoops! Seems like I can't connect to website.")
print("It's showing : %s" % connection)
print("Run this script with the --verbose argument and report the issue along with log file on Github.")
sys.exit(1)
else:
page_source = BeautifulSoup(connection.text.encode("utf-8"), "html.parser")
if "logout" in str(page_source):
print("Successfully Logged In!")
else:
print("Couldn't Log You In. Please Check Your Credentials Again!")
session_cookie = sess.cookies
return session_cookie