Python cloudscraper.create_scraper方法代碼示例

本文整理匯總了Python中cloudscraper.create_scraper方法的典型用法代碼示例。如果您正苦於以下問題：Python cloudscraper.create_scraper方法的具體用法？Python cloudscraper.create_scraper怎麽用？Python cloudscraper.create_scraper使用的例子？那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類cloudscraper的用法示例。

在下文中一共展示了cloudscraper.create_scraper方法的14個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: cloudflare_get

# 需要導入模塊: import cloudscraper [as 別名]
# 或者: from cloudscraper import create_scraper [as 別名]
def cloudflare_get(url, cookies={}, proxies=None):
    retry = 6
    from JavHelper.core.javlibrary import JavLibraryScraper
    while retry > 0:
        try:
            cookies.update(JavLibraryScraper.load_local_cookies())  # update cloudflare cookies when updating
            res = cloudscraper.create_scraper().get(url, cookies=cookies, proxies=proxies)
            #print(res.text)
            return res
        #except cloudscraper.exceptions.CloudflareIUAMError:
        except Exception as e:
            print(f'cloudflare get failed on {e}, retrying')
            retry = retry - 1
            sleep(5)
    
    raise Exception(f'cloudflare get {url} failed')

開發者ID:ddd354，項目名稱:JAVOneStop，代碼行數:18，代碼來源:requester_proxy.py

示例2: main

# 需要導入模塊: import cloudscraper [as 別名]
# 或者: from cloudscraper import create_scraper [as 別名]
def main():
  # Create structure
  os.makedirs(FLAGS.output, exist_ok=True)

  # Cloudflare scraper
  scraper = cloudscraper.create_scraper()

  # Download inscriptions
  with concurrent.futures.ThreadPoolExecutor(max_workers=FLAGS.connections) as executor:
    future_to_phi = (executor.submit(load_phi_id, text_i, FLAGS.timeout, FLAGS.output, scraper) for text_i in
                     range(1, FLAGS.max_phi_id))
    for future in tqdm(concurrent.futures.as_completed(future_to_phi), total=FLAGS.max_phi_id):
      try:
        future.result()
      except:
        pass

開發者ID:sommerschield，項目名稱:ancient-text-restoration，代碼行數:18，代碼來源:phi_download.py

示例3: query

# 需要導入模塊: import cloudscraper [as 別名]
# 或者: from cloudscraper import create_scraper [as 別名]
def query(self):
        """
        向接口查詢子域並做子域匹配
        """
        # 繞過cloudFlare驗證
        scraper = cloudscraper.create_scraper()
        scraper.proxies = self.get_proxy(self.source)
        url = self.addr + self.domain
        try:
            resp = scraper.get(url, timeout=self.timeout)
        except Exception as e:
            logger.log('ERROR', e.args)
            return
        if resp.status_code != 200:
            return
        subdomains = self.match_subdomains(self.domain, str(resp.json()))
        # 合並搜索子域名搜索結果
        self.subdomains = self.subdomains.union(subdomains)

開發者ID:shmilylty，項目名稱:OneForAll，代碼行數:20，代碼來源:bufferover.py

示例4: get_urls_async

# 需要導入模塊: import cloudscraper [as 別名]
# 或者: from cloudscraper import create_scraper [as 別名]
def get_urls_async(urls, configfile, dbfile, scraper=False):
    if not scraper:
        scraper = cloudscraper.create_scraper(browser={'browser': 'chrome', 'mobile': False})
    results = []

    def load_url(url):
        return get_url(url, configfile, dbfile, scraper)

    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        future_to_url = {executor.submit(load_url, url): url for url in urls}
        for future in concurrent.futures.as_completed(future_to_url):
            future_to_url[future]
            try:
                results.append(future.result())
            except Exception:
                pass
    return [results, scraper]

開發者ID:rix1337，項目名稱:RSScrawler，代碼行數:19，代碼來源:url.py

示例5: json_download

# 需要導入模塊: import cloudscraper [as 別名]
# 或者: from cloudscraper import create_scraper [as 別名]
def json_download(self, page_id):
        headers = {
            'User-Agent':
                'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
            'Accept-Encoding': 'gzip, deflate'
        }

        sess = requests.session()
        sess = cloudscraper.create_scraper(sess)

        search_url = "http://www.mangaeden.com/api/chapter/{0}/".format(page_id)

        connection = sess.get(search_url, headers=headers)
        if connection.status_code != 200:
            print("Whoops! Seems like I can't connect to website.")
            print("It's showing : %s" % connection)
            print("Run this script with the --verbose argument and report the issue along with log file on Github.")
            sys.exit(1)
        else:
            json_data = connection.content

            return json_data

開發者ID:Xonshiz，項目名稱:comic-dl，代碼行數:24，代碼來源:mangaChapterDownload.py

示例6: json_download

# 需要導入模塊: import cloudscraper [as 別名]
# 或者: from cloudscraper import create_scraper [as 別名]
def json_download(self, chapter_id):
        headers = {
            'User-Agent':
                'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
            'Accept-Encoding': 'gzip, deflate'
        }

        sess = requests.session()
        sess = cloudscraper.create_scraper(sess)

        search_url = "http://www.mangaeden.com/api/manga/{0}/".format(chapter_id)

        connection = sess.get(search_url, headers=headers)
        if connection.status_code != 200:
            print("Whoops! Seems like I can't connect to website.")
            print("It's showing : %s" % connection)
            print("Run this script with the --verbose argument and report the issue along with log file on Github.")
            sys.exit(1)
        else:
            json_data = connection.content

            return json_data

開發者ID:Xonshiz，項目名稱:comic-dl，代碼行數:24，代碼來源:mangaChapters.py

示例7: init

# 需要導入模塊: import cloudscraper [as 別名]
# 或者: from cloudscraper import create_scraper [as 別名]
def __init__(self, manga_url, download_directory, chapter_range, **kwargs):
        self.scraper = cloudscraper.create_scraper()
        conversion = kwargs.get("conversion")
        keep_files = kwargs.get("keep_files")
        self.logging = kwargs.get("log_flag")
        self.sorting = kwargs.get("sorting_order")
        self.manga_url = manga_url + '/'
        self.print_index = kwargs.get("print_index")

        if 'manga' in manga_url:
            self.comic_id = str(str(manga_url).split("/")[-1])
            self.full_series(comic_id=self.comic_id, sorting=self.sorting, download_directory=download_directory,
                             chapter_range=chapter_range, conversion=conversion, keep_files=keep_files)

        if 'lecture-en-ligne' in manga_url:
            self.comic_id = str(str(manga_url).split("/")[-2])
            chapter_path = re.sub(re.compile(r'.*japscan.to'), '', str(self.manga_url))
            self.single_chapter(chapter_path, comic_id=self.comic_id, download_directory=download_directory,
                                scraper=scraper)

開發者ID:Xonshiz，項目名稱:comic-dl，代碼行數:21，代碼來源:japscan.py

示例8: query

# 需要導入模塊: import cloudscraper [as 別名]
# 或者: from cloudscraper import create_scraper [as 別名]
def query(self):
        # 繞過cloudFlare驗證
        scraper = cloudscraper.create_scraper()
        scraper.proxies = self.get_proxy(self.source)
        url = self.addr + self.domain
        try:
            resp = scraper.get(url, timeout=self.timeout)
        except Exception as e:
            logger.log('ERROR', e.args)
            return
        if resp.status_code != 200:
            return
        subdomains = self.match_subdomains(self.domain, str(resp.json()))
        # 合並搜索子域名搜索結果
        self.subdomains = self.subdomains.union(subdomains)

開發者ID:shmilylty，項目名稱:OneForAll，代碼行數:17，代碼來源:threatcrowd.py

示例9: init

# 需要導入模塊: import cloudscraper [as 別名]
# 或者: from cloudscraper import create_scraper [as 別名]
def __init__(self):
        self._destroyed = False
        self.executor = futures.ThreadPoolExecutor(max_workers=2)

        # Initialize cloudscrapper
        self.scraper = cloudscraper.create_scraper(
            browser={
                'browser': 'firefox',
                'mobile': False
            }
        )

        # Must resolve these fields inside `read_novel_info`
        self.novel_title = 'N/A'
        self.novel_author = 'N/A'
        self.novel_cover = None
        self.is_rtl = False

        # Each item must contain these keys:
        # `id` - 1 based index of the volume
        # `title` - the volume title (can be ignored)
        self.volumes = []

        # Each item must contain these keys:
        # `id` - 1 based index of the chapter
        # `title` - the title name
        # `volume` - the volume id of this chapter
        # `volume_title` - the volume title (can be ignored)
        # `url` - the link where to download the chapter
        self.chapters = []

        # Other stuffs - not necessary to resolve from crawler instance.
        self.home_url = ''
        self.novel_url = ''
        self.last_visited_url = None
    # end def

開發者ID:dipu-bd，項目名稱:lightnovel-crawler，代碼行數:38，代碼來源:crawler.py

示例10: check_updates

# 需要導入模塊: import cloudscraper [as 別名]
# 或者: from cloudscraper import create_scraper [as 別名]
def check_updates():
    try:
        logger.info('Checking latest version')
        pypi_short_url = 'http://bit.ly/2yYyFGd'
        scraper = cloudscraper.create_scraper()
        res = scraper.get(pypi_short_url, timeout=5)
        latest_version = res.json()['info']['version']
        if get_value() != latest_version:
            new_version_news(latest_version)
        # end if
    except Exception:
        logger.warn('Failed to check for update')
    # end try
# end def

開發者ID:dipu-bd，項目名稱:lightnovel-crawler，代碼行數:16，代碼來源:update_checker.py

示例11: init

# 需要導入模塊: import cloudscraper [as 別名]
# 或者: from cloudscraper import create_scraper [as 別名]
def __init__(self):
        self.scrapper = cloudscraper.create_scraper()
        super().__init__()

開發者ID:ychenracing，項目名稱:Spiders，代碼行數:5，代碼來源:flhhkk_spider.py

示例12: json_download

# 需要導入模塊: import cloudscraper [as 別名]
# 或者: from cloudscraper import create_scraper [as 別名]
def json_download(self, manga_language):
        print("Downloading The Latest Data Set...")
        headers = {
            'User-Agent':
                'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
            'Accept-Encoding': 'gzip, deflate'
        }

        sess = requests.session()
        sess = cloudscraper.create_scraper(sess)

        search_url = "http://www.mangaeden.com/api/list/{0}/".format(manga_language)

        connection = sess.get(search_url, headers=headers)
        if connection.status_code != 200:
            print("Whoops! Seems like I can't connect to website.")
            print("It's showing : %s" % connection)
            print("Run this script with the --verbose argument and report the issue along with log file on Github.")
            sys.exit(1)
        else:
            json_data = connection.content
            # print(json_data)
            try:
                # Let's save the JSON data
                with open("Manga_Eden_Data.json", "wb") as write_file:
                    write_file.write(json_data)
            except Exception as WriteError:
                print("Couldn't make Cache : {0}".format(WriteError))
                pass

            return json_data

開發者ID:Xonshiz，項目名稱:comic-dl，代碼行數:33，代碼來源:mangaSearch.py

示例13: init

# 需要導入模塊: import cloudscraper [as 別名]
# 或者: from cloudscraper import create_scraper [as 別名]
def __init__(self):
        self.path = Path('Interface/AddOns')
        self.configPath = Path('WTF/CurseBreaker.json')
        self.cachePath = Path('WTF/CurseBreaker.cache')
        self.clientType = 'wow_retail'
        self.waCompanionVersion = 110
        self.config = None
        self.cfIDs = None
        self.cfDirs = None
        self.cfCache = {}
        self.wowiCache = {}
        self.checksumCache = {}
        self.scraper = cloudscraper.create_scraper()

開發者ID:AcidWeb，項目名稱:CurseBreaker，代碼行數:15，代碼來源:Core.py

示例14: user_login

# 需要導入模塊: import cloudscraper [as 別名]
# 或者: from cloudscraper import create_scraper [as 別名]
def user_login(self, username, password, **kwargs):
        session_cookie = ""

        headers = kwargs.get("headers")
        if not headers:
            headers = {
                'User-Agent':
                    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
                'Accept-Encoding': 'gzip, deflate',
                'referer': 'https://bato.to/'
            }
        print("Getting Auth Token...")
        page_source, update_cookie = globalFunctions.GlobalFunctions().page_downloader(
            manga_url="https://bato.to/forums/index.php?app=core&module=global&section=login")

        soup_parse = page_source.find_all('input', {'type': 'hidden'})
        auth_token = str([x['value'] for x in soup_parse][0]).strip()

        payload = {
            'auth_key': auth_token,
            'ips_username': username,
            'ips_password': password,
            'rememberMe': '1'
        }

        sess = requests.session()
        sess = cloudscraper.create_scraper(sess)

        print('Trying To Log In...')
        connection = sess.post("https://bato.to/forums/index.php?app=core&module=global&section=login&do=process",
                               headers=headers, data=payload, cookies=kwargs.get("cookies"))
        if connection.status_code != 200:
            print("Whoops! Seems like I can't connect to website.")
            print("It's showing : %s" % connection)
            print("Run this script with the --verbose argument and report the issue along with log file on Github.")
            sys.exit(1)
        else:
            page_source = BeautifulSoup(connection.text.encode("utf-8"), "html.parser")
            if "logout" in str(page_source):
                print("Successfully Logged In!")
            else:
                print("Couldn't Log You In. Please Check Your Credentials Again!")
            session_cookie = sess.cookies

        return session_cookie

開發者ID:Xonshiz，項目名稱:comic-dl，代碼行數:47，代碼來源:batoto.py

注：本文中的cloudscraper.create_scraper方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。