本文整理汇总了Python中cfscrape.create_scraper方法的典型用法代码示例。如果您正苦于以下问题:Python cfscrape.create_scraper方法的具体用法?Python cfscrape.create_scraper怎么用?Python cfscrape.create_scraper使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类cfscrape
的用法示例。
在下文中一共展示了cfscrape.create_scraper方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: run
# 需要导入模块: import cfscrape [as 别名]
# 或者: from cfscrape import create_scraper [as 别名]
def run(self):
global ier
global ual
x=flag
self.target=target
us=ue[x]
try:
s = cfscrape.create_scraper()
c = s.get_cookie_string("http://"+self.target,user_agent=us)
c= str(c).split("'")[1].split("'")[0]
ual.append(us+':'+c)
except:
pass
self.target=None
us=None
x=None
ier+=1
示例2: PhishtankOSINT
# 需要导入模块: import cfscrape [as 别名]
# 或者: from cfscrape import create_scraper [as 别名]
def PhishtankOSINT(phishtank_file, ConfPHISHTANK_url, ConfPHISHTANK_keep, SrcDir, PROXY, LOG):
# Get phishtank OSINT JSON file
proxies = {'http': PROXY, 'https': PROXY}
LOG.info("Retrieving Phishtank's JSON file... Could take several minutes...")
# resp = requests.get(url=ConfPHISHTANK_url, proxies=proxies, allow_redirects=True)
# Using CloudFlare Scraper
scraper = cfscrape.create_scraper()
resp = scraper.get(ConfPHISHTANK_url, proxies=proxies, allow_redirects=True, timeout=(10, 20))
# download PhishTank JSON file
if str(resp.status_code) == "403":
LOG.error("PhishTank refused your connection (HTTP 403 code). Maybe Cloudflare asking for a captcha? Or there is an API key problem.")
sys.exit(0)
if str(resp.status_code) != "509":
with open(phishtank_file, "wb") as file:
file.write(resp.content)
LOG.info("Phishtank\'s file retrieved. Proceeding to extraction...")
# Error if download limit exceeded
else:
LOG.error("PhishTank download limit exceeded. Can't download JSON file. Maybe you should use an API key?")
sys.exit(0)
示例3: getuserstatus
# 需要导入模块: import cfscrape [as 别名]
# 或者: from cfscrape import create_scraper [as 别名]
def getuserstatus(session=''):
status = 'Guest'
user1 = 'Guest'
if session == '':
session = cfscrape.create_scraper()
with open('cookies') as f:
cookies = requests.utils.cookiejar_from_dict(pickle.load(f))
session = requests.session()
session.cookies = cookies
del session.cookies['c_visitor']
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0',
'Connection': 'keep-alive'}
site = session.get('https://www.crunchyroll.com/acct/membership', headers=headers, verify=True).text
#open('tempfile','w').write(site.encode('UTF-8'))
if re.search(re.escape(' ga(\'set\', \'dimension5\', \'registered\');'), site):
status = 'Free Member'
elif re.search(re.escape(' ga(\'set\', \'dimension6\', \'premium\');'), site):
status = 'Premium Member'
elif re.search(re.escape(' ga(\'set\', \'dimension6\', \'premiumplus\');'), site):
status = 'Premium+ Member'
if status != 'Guest':
user1 = re.findall('<a href=\"/user/(.+)\" ', site).pop()
return [status,user1]
示例4: page_downloader
# 需要导入模块: import cfscrape [as 别名]
# 或者: from cfscrape import create_scraper [as 别名]
def page_downloader(self, page_url, **kwargs):
headers = {
'User-Agent':
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
'Accept-Encoding': 'gzip, deflate'
}
sess = requests.session()
sess = cfscrape.create_scraper(sess)
connection = sess.get(page_url, headers=headers, cookies=kwargs.get("cookies"))
if connection.status_code != 200:
print("Whoops! Seems like I can't connect to website.")
print("It's showing : %s" % connection)
print("Run this script with the --verbose argument and report the issue along with log file on Github.")
sys.exit(1)
else:
page_source = BeautifulSoup(connection.content, "html.parser") # text.encode("utf-8")
connection_cookies = sess.cookies
return page_source, connection_cookies
示例5: _handle_connect
# 需要导入模块: import cfscrape [as 别名]
# 或者: from cfscrape import create_scraper [as 别名]
def _handle_connect(self):
if self.last_retry is not None and time() - self.last_retry >= 60:
logger.debug('Last reconnection was more than 60 seconds ago. Resetting retry counter.')
self.retry_fail = 0
else:
self.last_retry = time()
connection = Connection(self.url, Session())
hub = connection.register_hub(BittrexParameters.HUB)
connection.received += self._on_debug
connection.error += self.on_error
hub.client.on(BittrexParameters.MARKET_DELTA, self._on_public)
hub.client.on(BittrexParameters.SUMMARY_DELTA, self._on_public)
hub.client.on(BittrexParameters.SUMMARY_DELTA_LITE, self._on_public)
hub.client.on(BittrexParameters.BALANCE_DELTA, self._on_private)
hub.client.on(BittrexParameters.ORDER_DELTA, self._on_private)
self.connection = BittrexConnection(connection, hub)
thread = Thread(target=self._connection_handler, name=OtherConstants.SOCKET_CONNECTION_THREAD)
thread.daemon = True
self.threads.append(thread)
thread.start()
示例6: search
# 需要导入模块: import cfscrape [as 别名]
# 或者: from cfscrape import create_scraper [as 别名]
def search(url):
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'}
try:
if not re.match(r'http(s?)\:', url):
url = 'http://' + url
scraper = cfscrape.create_scraper()
data = scraper.get(url,headers=headers)
else:
scraper = cfscrape.create_scraper()
data = scraper.get(url,headers=headers)
except:
print("Hey buddy, pass a real address please!")
exit(1)
return
if data.status_code == 200:
soup = BeautifulSoup(data.text,'html.parser')
for link in soup.title:
return link
else:
print("We had a problem with the URL!")
exit(1)
示例7: getAnyRun
# 需要导入模块: import cfscrape [as 别名]
# 或者: from cfscrape import create_scraper [as 别名]
def getAnyRun(sha256):
"""
Retrieves information from AnyRun Service
:param sha256: hash value
:return info: info object
"""
info = {'anyrun_available': False}
if sha256 == "-":
return info
try:
if args.debug:
print("[D] Querying Anyrun")
cfscraper = cfscrape.create_scraper()
response = cfscraper.get(URL_ANYRUN % sha256, proxies=connections.PROXY)
if args.debug:
print("[D] Anyrun Response Code: %s" %response.status_code)
if response.status_code == 200:
info['anyrun_available'] = True
except ConnectionError as e:
print("Error while accessing AnyRun: connection failed")
if args.debug:
traceback.print_exc()
except Exception as e:
print("Error while accessing AnyRun")
if args.debug:
traceback.print_exc()
return info
示例8: _handle_connect
# 需要导入模块: import cfscrape [as 别名]
# 或者: from cfscrape import create_scraper [as 别名]
def _handle_connect(self):
connection = Connection(self.url, Session())
hub = connection.register_hub(BittrexParameters.HUB)
connection.received += self._on_debug
connection.error += self.on_error
hub.client.on(BittrexParameters.MARKET_DELTA, self._on_public)
hub.client.on(BittrexParameters.SUMMARY_DELTA, self._on_public)
hub.client.on(BittrexParameters.SUMMARY_DELTA_LITE, self._on_public)
hub.client.on(BittrexParameters.BALANCE_DELTA, self._on_private)
hub.client.on(BittrexParameters.ORDER_DELTA, self._on_private)
self.connection = BittrexConnection(connection, hub)
thread = Thread(target=self._connection_handler, daemon=True, name='SocketConnectionThread')
self.threads.append(thread)
thread.start()
示例9: main
# 需要导入模块: import cfscrape [as 别名]
# 或者: from cfscrape import create_scraper [as 别名]
def main(email):
req = requests.get("https://hacked-emails.com/api?q=%s" % email)
if "jschl-answer" in req.text:
print "Cloudflare detected... Solving challenge."
scraper = cfscrape.create_scraper()
req = scraper.get("https://hacked-emails.com/api?q=%s" % email)
print req.text
if "jschl-answer" in req.text:
return {}
data = json.loads(req.text.encode('UTF-8'))
return data
示例10: _connection_handler
# 需要导入模块: import cfscrape [as 别名]
# 或者: from cfscrape import create_scraper [as 别名]
def _connection_handler(self):
def _get_err_msg(exception):
error_message = 'Exception = {}, Message = <{}>'.format(type(exception), exception)
return error_message
if str(type(Session())) == OtherConstants.CF_SESSION_TYPE:
logger.info('Establishing connection to Bittrex through {}.'.format(self.url))
logger.info('cfscrape detected, will try to bypass Cloudflare if enabled.')
else:
logger.info('Establishing connection to Bittrex through {}.'.format(self.url))
try:
self.connection.conn.start()
except TimeoutError as e:
self.control_queue.put(ReconnectEvent(_get_err_msg(e)))
except WebSocketConnectionClosedByUser:
logger.info(InfoMessages.SUCCESSFUL_DISCONNECT)
except WebSocketConnectionClosedException as e:
self.control_queue.put(ReconnectEvent(_get_err_msg(e)))
except TimeoutErrorUrlLib as e:
self.control_queue.put(ReconnectEvent(_get_err_msg(e)))
except WebSocketTimeoutException as e:
self.control_queue.put(ReconnectEvent(_get_err_msg(e)))
except ConnectionError:
pass
# Commenting it for the time being. It should be handled in _handle_subscribe.
# event = ReconnectEvent(None)
# self.control_queue.put(event)
except Exception as e:
logger.error(ErrorMessages.UNHANDLED_EXCEPTION.format(_get_err_msg(e)))
self.disconnect()
# event = ReconnectEvent(None)
# self.control_queue.put(event)
示例11: get_cf_cookie
# 需要导入模块: import cfscrape [as 别名]
# 或者: from cfscrape import create_scraper [as 别名]
def get_cf_cookie(domain,user_agent):
try:
s = cfscrape.create_scraper()
c = s.get_cookie_string("http://"+domain,user_agent=user_agent)
return {user_agent: str(c).split("'")[1].split("'")[0]}
except:
return {}
示例12: get
# 需要导入模块: import cfscrape [as 别名]
# 或者: from cfscrape import create_scraper [as 别名]
def get(hostname, request, header, compression, timeout):
data = None
if hostname == Configuration().hostnames[constants.BITFINEX]:
scraper = cfscrape.create_scraper()
data = scraper.get("http://" + hostname + request).content.decode("utf-8")
else:
header['Accept-Encoding'] = compression
con = http.client.HTTPSConnection(hostname, timeout=timeout)
con.request(constants.GET, request, "", header)
response = con.getresponse()
data = decompress(response.read(), compression, header['Accept-Charset'])
con.close()
logger = logging.getLogger(logerName())
# TODO
# vyhodit - predelat
if verbose():
logger.debug(constants.DELIMITER)
logger.debug("REQUEST ")
logger.debug("Method: GET ")
logger.debug("Hostname: " + str(hostname))
logger.debug("Request: " + str(request))
logger.debug("HTTP head: " + str(header))
logger.debug("")
logger.debug("RESPONSE ")
logger.debug("JSON: " + str(data))
logger.debug(constants.DELIMITER)
return data
# TODO COPY LIKE HELL
示例13: get_hls_hotlink
# 需要导入模块: import cfscrape [as 别名]
# 或者: from cfscrape import create_scraper [as 别名]
def get_hls_hotlink(self, channel):
try:
scraper = cfscrape.create_scraper()
bsoup = Soup(scraper.get(self.channel_dict[channel.lower()]).content, 'html.parser')
for s in bsoup.findAll("script"):
if "player.setup" in str(s):
return s.next_element.split(" file: ")[1].split(',')[0].strip("\'")
except KeyError:
print("\n\033[1;31;49mInvalid channel name!\n")
return 1
示例14: PhishstatsOSINT
# 需要导入模块: import cfscrape [as 别名]
# 或者: from cfscrape import create_scraper [as 别名]
def PhishstatsOSINT(phishstats_file, ConfPHISHSTATS_url, ConfPHISHSTATS_keep, PROXY, SearchString, LOG):
global HTMLText
try:
proxies = {'http': PROXY, 'https': PROXY}
try:
# If more than one search word
if ',' in SearchString:
SearchString_list = [SearchString.strip(' ') for SearchString in SearchString.split(',')]
print(SearchString_list)
else:
SearchString_list = [SearchString]
except:
err = sys.exc_info()
LOG.error("SearchString error " + str(err))
# Using CloudFlare Scraper
scraper = cfscrape.create_scraper()
r = scraper.get(ConfPHISHSTATS_url + "(title,like,~" + SearchString + "~)", timeout=(10, 20))
# download Phishstats' JSON file
with open(phishstats_file, "wb") as file:
file.write(r.content)
LOG.info("Phishstats\' file retrieved. Proceeding to extraction...")
except requests.exceptions.ConnectTimeout as e:
LOG.error("Error while connecting to Phishstats: {}".format(e))
pass
except Exception as e:
LOG.error("Phishstats connection error: {}".format(e))
sys.exit(0)
pass
# Parse Phishstats result
示例15: page_downloader
# 需要导入模块: import cfscrape [as 别名]
# 或者: from cfscrape import create_scraper [as 别名]
def page_downloader(url, scrapper_delay=5, **kwargs):
headers = kwargs.get("headers")
received_cookies = kwargs.get("cookies")
if not headers:
headers = {
'User-Agent':
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
'Accept-Encoding': 'gzip, deflate'
}
sess = session()
sess = create_scraper(sess, delay=scrapper_delay)
connection = sess.get(url, headers=headers, cookies=received_cookies)
if connection.status_code != 200:
print("Whoops! Seems like I can't connect to website.")
print("It's showing : %s" % connection)
print("Run this script with the --verbose argument and report the issue along with log file on Github.")
# raise Warning("can't connect to website %s" % manga_url)
return False, None, None
else:
page_source = BeautifulSoup(connection.text.encode("utf-8"), "html.parser")
connection_cookies = sess.cookies
return True, page_source, received_cookies