Python cloudscraper.create_scraper方法代码示例

本文整理汇总了Python中cloudscraper.create_scraper方法的典型用法代码示例。如果您正苦于以下问题：Python cloudscraper.create_scraper方法的具体用法？Python cloudscraper.create_scraper怎么用？Python cloudscraper.create_scraper使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类cloudscraper的用法示例。

在下文中一共展示了cloudscraper.create_scraper方法的14个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: cloudflare_get

# 需要导入模块: import cloudscraper [as 别名]
# 或者: from cloudscraper import create_scraper [as 别名]
def cloudflare_get(url, cookies={}, proxies=None):
    retry = 6
    from JavHelper.core.javlibrary import JavLibraryScraper
    while retry > 0:
        try:
            cookies.update(JavLibraryScraper.load_local_cookies())  # update cloudflare cookies when updating
            res = cloudscraper.create_scraper().get(url, cookies=cookies, proxies=proxies)
            #print(res.text)
            return res
        #except cloudscraper.exceptions.CloudflareIUAMError:
        except Exception as e:
            print(f'cloudflare get failed on {e}, retrying')
            retry = retry - 1
            sleep(5)
    
    raise Exception(f'cloudflare get {url} failed')

开发者ID:ddd354，项目名称:JAVOneStop，代码行数:18，代码来源:requester_proxy.py

示例2: main

# 需要导入模块: import cloudscraper [as 别名]
# 或者: from cloudscraper import create_scraper [as 别名]
def main():
  # Create structure
  os.makedirs(FLAGS.output, exist_ok=True)

  # Cloudflare scraper
  scraper = cloudscraper.create_scraper()

  # Download inscriptions
  with concurrent.futures.ThreadPoolExecutor(max_workers=FLAGS.connections) as executor:
    future_to_phi = (executor.submit(load_phi_id, text_i, FLAGS.timeout, FLAGS.output, scraper) for text_i in
                     range(1, FLAGS.max_phi_id))
    for future in tqdm(concurrent.futures.as_completed(future_to_phi), total=FLAGS.max_phi_id):
      try:
        future.result()
      except:
        pass

开发者ID:sommerschield，项目名称:ancient-text-restoration，代码行数:18，代码来源:phi_download.py

示例3: query

# 需要导入模块: import cloudscraper [as 别名]
# 或者: from cloudscraper import create_scraper [as 别名]
def query(self):
        """
        向接口查询子域并做子域匹配
        """
        # 绕过cloudFlare验证
        scraper = cloudscraper.create_scraper()
        scraper.proxies = self.get_proxy(self.source)
        url = self.addr + self.domain
        try:
            resp = scraper.get(url, timeout=self.timeout)
        except Exception as e:
            logger.log('ERROR', e.args)
            return
        if resp.status_code != 200:
            return
        subdomains = self.match_subdomains(self.domain, str(resp.json()))
        # 合并搜索子域名搜索结果
        self.subdomains = self.subdomains.union(subdomains)

开发者ID:shmilylty，项目名称:OneForAll，代码行数:20，代码来源:bufferover.py

示例4: get_urls_async

# 需要导入模块: import cloudscraper [as 别名]
# 或者: from cloudscraper import create_scraper [as 别名]
def get_urls_async(urls, configfile, dbfile, scraper=False):
    if not scraper:
        scraper = cloudscraper.create_scraper(browser={'browser': 'chrome', 'mobile': False})
    results = []

    def load_url(url):
        return get_url(url, configfile, dbfile, scraper)

    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        future_to_url = {executor.submit(load_url, url): url for url in urls}
        for future in concurrent.futures.as_completed(future_to_url):
            future_to_url[future]
            try:
                results.append(future.result())
            except Exception:
                pass
    return [results, scraper]

开发者ID:rix1337，项目名称:RSScrawler，代码行数:19，代码来源:url.py

示例5: json_download

# 需要导入模块: import cloudscraper [as 别名]
# 或者: from cloudscraper import create_scraper [as 别名]
def json_download(self, page_id):
        headers = {
            'User-Agent':
                'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
            'Accept-Encoding': 'gzip, deflate'
        }

        sess = requests.session()
        sess = cloudscraper.create_scraper(sess)

        search_url = "http://www.mangaeden.com/api/chapter/{0}/".format(page_id)

        connection = sess.get(search_url, headers=headers)
        if connection.status_code != 200:
            print("Whoops! Seems like I can't connect to website.")
            print("It's showing : %s" % connection)
            print("Run this script with the --verbose argument and report the issue along with log file on Github.")
            sys.exit(1)
        else:
            json_data = connection.content

            return json_data

开发者ID:Xonshiz，项目名称:comic-dl，代码行数:24，代码来源:mangaChapterDownload.py

示例6: json_download

# 需要导入模块: import cloudscraper [as 别名]
# 或者: from cloudscraper import create_scraper [as 别名]
def json_download(self, chapter_id):
        headers = {
            'User-Agent':
                'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
            'Accept-Encoding': 'gzip, deflate'
        }

        sess = requests.session()
        sess = cloudscraper.create_scraper(sess)

        search_url = "http://www.mangaeden.com/api/manga/{0}/".format(chapter_id)

        connection = sess.get(search_url, headers=headers)
        if connection.status_code != 200:
            print("Whoops! Seems like I can't connect to website.")
            print("It's showing : %s" % connection)
            print("Run this script with the --verbose argument and report the issue along with log file on Github.")
            sys.exit(1)
        else:
            json_data = connection.content

            return json_data

开发者ID:Xonshiz，项目名称:comic-dl，代码行数:24，代码来源:mangaChapters.py

示例7: init

# 需要导入模块: import cloudscraper [as 别名]
# 或者: from cloudscraper import create_scraper [as 别名]
def __init__(self, manga_url, download_directory, chapter_range, **kwargs):
        self.scraper = cloudscraper.create_scraper()
        conversion = kwargs.get("conversion")
        keep_files = kwargs.get("keep_files")
        self.logging = kwargs.get("log_flag")
        self.sorting = kwargs.get("sorting_order")
        self.manga_url = manga_url + '/'
        self.print_index = kwargs.get("print_index")

        if 'manga' in manga_url:
            self.comic_id = str(str(manga_url).split("/")[-1])
            self.full_series(comic_id=self.comic_id, sorting=self.sorting, download_directory=download_directory,
                             chapter_range=chapter_range, conversion=conversion, keep_files=keep_files)

        if 'lecture-en-ligne' in manga_url:
            self.comic_id = str(str(manga_url).split("/")[-2])
            chapter_path = re.sub(re.compile(r'.*japscan.to'), '', str(self.manga_url))
            self.single_chapter(chapter_path, comic_id=self.comic_id, download_directory=download_directory,
                                scraper=scraper)

开发者ID:Xonshiz，项目名称:comic-dl，代码行数:21，代码来源:japscan.py

示例8: query

# 需要导入模块: import cloudscraper [as 别名]
# 或者: from cloudscraper import create_scraper [as 别名]
def query(self):
        # 绕过cloudFlare验证
        scraper = cloudscraper.create_scraper()
        scraper.proxies = self.get_proxy(self.source)
        url = self.addr + self.domain
        try:
            resp = scraper.get(url, timeout=self.timeout)
        except Exception as e:
            logger.log('ERROR', e.args)
            return
        if resp.status_code != 200:
            return
        subdomains = self.match_subdomains(self.domain, str(resp.json()))
        # 合并搜索子域名搜索结果
        self.subdomains = self.subdomains.union(subdomains)

开发者ID:shmilylty，项目名称:OneForAll，代码行数:17，代码来源:threatcrowd.py

示例9: init

# 需要导入模块: import cloudscraper [as 别名]
# 或者: from cloudscraper import create_scraper [as 别名]
def __init__(self):
        self._destroyed = False
        self.executor = futures.ThreadPoolExecutor(max_workers=2)

        # Initialize cloudscrapper
        self.scraper = cloudscraper.create_scraper(
            browser={
                'browser': 'firefox',
                'mobile': False
            }
        )

        # Must resolve these fields inside `read_novel_info`
        self.novel_title = 'N/A'
        self.novel_author = 'N/A'
        self.novel_cover = None
        self.is_rtl = False

        # Each item must contain these keys:
        # `id` - 1 based index of the volume
        # `title` - the volume title (can be ignored)
        self.volumes = []

        # Each item must contain these keys:
        # `id` - 1 based index of the chapter
        # `title` - the title name
        # `volume` - the volume id of this chapter
        # `volume_title` - the volume title (can be ignored)
        # `url` - the link where to download the chapter
        self.chapters = []

        # Other stuffs - not necessary to resolve from crawler instance.
        self.home_url = ''
        self.novel_url = ''
        self.last_visited_url = None
    # end def

开发者ID:dipu-bd，项目名称:lightnovel-crawler，代码行数:38，代码来源:crawler.py

示例10: check_updates

# 需要导入模块: import cloudscraper [as 别名]
# 或者: from cloudscraper import create_scraper [as 别名]
def check_updates():
    try:
        logger.info('Checking latest version')
        pypi_short_url = 'http://bit.ly/2yYyFGd'
        scraper = cloudscraper.create_scraper()
        res = scraper.get(pypi_short_url, timeout=5)
        latest_version = res.json()['info']['version']
        if get_value() != latest_version:
            new_version_news(latest_version)
        # end if
    except Exception:
        logger.warn('Failed to check for update')
    # end try
# end def

开发者ID:dipu-bd，项目名称:lightnovel-crawler，代码行数:16，代码来源:update_checker.py

示例11: init

# 需要导入模块: import cloudscraper [as 别名]
# 或者: from cloudscraper import create_scraper [as 别名]
def __init__(self):
        self.scrapper = cloudscraper.create_scraper()
        super().__init__()

开发者ID:ychenracing，项目名称:Spiders，代码行数:5，代码来源:flhhkk_spider.py

示例12: json_download

# 需要导入模块: import cloudscraper [as 别名]
# 或者: from cloudscraper import create_scraper [as 别名]
def json_download(self, manga_language):
        print("Downloading The Latest Data Set...")
        headers = {
            'User-Agent':
                'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
            'Accept-Encoding': 'gzip, deflate'
        }

        sess = requests.session()
        sess = cloudscraper.create_scraper(sess)

        search_url = "http://www.mangaeden.com/api/list/{0}/".format(manga_language)

        connection = sess.get(search_url, headers=headers)
        if connection.status_code != 200:
            print("Whoops! Seems like I can't connect to website.")
            print("It's showing : %s" % connection)
            print("Run this script with the --verbose argument and report the issue along with log file on Github.")
            sys.exit(1)
        else:
            json_data = connection.content
            # print(json_data)
            try:
                # Let's save the JSON data
                with open("Manga_Eden_Data.json", "wb") as write_file:
                    write_file.write(json_data)
            except Exception as WriteError:
                print("Couldn't make Cache : {0}".format(WriteError))
                pass

            return json_data

开发者ID:Xonshiz，项目名称:comic-dl，代码行数:33，代码来源:mangaSearch.py

示例13: init

# 需要导入模块: import cloudscraper [as 别名]
# 或者: from cloudscraper import create_scraper [as 别名]
def __init__(self):
        self.path = Path('Interface/AddOns')
        self.configPath = Path('WTF/CurseBreaker.json')
        self.cachePath = Path('WTF/CurseBreaker.cache')
        self.clientType = 'wow_retail'
        self.waCompanionVersion = 110
        self.config = None
        self.cfIDs = None
        self.cfDirs = None
        self.cfCache = {}
        self.wowiCache = {}
        self.checksumCache = {}
        self.scraper = cloudscraper.create_scraper()

开发者ID:AcidWeb，项目名称:CurseBreaker，代码行数:15，代码来源:Core.py

示例14: user_login

# 需要导入模块: import cloudscraper [as 别名]
# 或者: from cloudscraper import create_scraper [as 别名]
def user_login(self, username, password, **kwargs):
        session_cookie = ""

        headers = kwargs.get("headers")
        if not headers:
            headers = {
                'User-Agent':
                    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
                'Accept-Encoding': 'gzip, deflate',
                'referer': 'https://bato.to/'
            }
        print("Getting Auth Token...")
        page_source, update_cookie = globalFunctions.GlobalFunctions().page_downloader(
            manga_url="https://bato.to/forums/index.php?app=core&module=global&section=login")

        soup_parse = page_source.find_all('input', {'type': 'hidden'})
        auth_token = str([x['value'] for x in soup_parse][0]).strip()

        payload = {
            'auth_key': auth_token,
            'ips_username': username,
            'ips_password': password,
            'rememberMe': '1'
        }

        sess = requests.session()
        sess = cloudscraper.create_scraper(sess)

        print('Trying To Log In...')
        connection = sess.post("https://bato.to/forums/index.php?app=core&module=global&section=login&do=process",
                               headers=headers, data=payload, cookies=kwargs.get("cookies"))
        if connection.status_code != 200:
            print("Whoops! Seems like I can't connect to website.")
            print("It's showing : %s" % connection)
            print("Run this script with the --verbose argument and report the issue along with log file on Github.")
            sys.exit(1)
        else:
            page_source = BeautifulSoup(connection.text.encode("utf-8"), "html.parser")
            if "logout" in str(page_source):
                print("Successfully Logged In!")
            else:
                print("Couldn't Log You In. Please Check Your Credentials Again!")
            session_cookie = sess.cookies

        return session_cookie

开发者ID:Xonshiz，项目名称:comic-dl，代码行数:47，代码来源:batoto.py

注：本文中的cloudscraper.create_scraper方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。