本文整理匯總了Python中scrapy.utils.request.request_fingerprint方法的典型用法代碼示例。如果您正苦於以下問題:Python request.request_fingerprint方法的具體用法?Python request.request_fingerprint怎麽用?Python request.request_fingerprint使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類scrapy.utils.request
的用法示例。
在下文中一共展示了request.request_fingerprint方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: request_seen
# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def request_seen(self, request):
is_seen = is_request_seen(request)
if not is_seen:
log.msg('New URL: %s. Adding it to seen database' % request.url, log.DEBUG)
seen = Seen(fingerprint=request_fingerprint(request),
url=request.url,
last_crawl_time=datetime.now())
try:
session.add(seen)
session.commit()
except:
session.rollback()
raise
finally:
session.close()
else:
log.msg('[seen] "%s" is seen. Skipping.' % request.url, log.INFO)
return is_seen
示例2: request_seen
# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def request_seen(self, request):
"""Returns True if request was already seen.
Parameters
----------
request : scrapy.http.Request
Returns
-------
bool
"""
fp = self.request_fingerprint(request)
# This returns the number of values added, zero if already exists.
added = self.server.sadd(self.key, fp)
return added == 0
示例3: _extract_key_info
# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def _extract_key_info(self, request):
"""
從欲下載資源的request中, 獲得資源上傳七牛時的bucket和key
"""
from scrapy.utils.request import request_fingerprint
key_generator = request.meta.get('qiniu_key_generator')
if key_generator:
tmp = key_generator(request.url)
bucket = tmp['bucket'] or self.bucket
key = tmp['key']
else:
bucket = self.bucket
key = '%s%s' % (self.key_prefix, request_fingerprint(request))
return {'bucket': bucket, 'key': key}
示例4: _request_key
# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def _request_key(self, request):
return to_bytes(request_fingerprint(request))
示例5: request_seen
# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def request_seen(self, request):
fp = request_fingerprint(request)
c_id = request.meta['crawlid']
added = self.server.sadd(self.key + ":" + c_id, fp)
self.server.expire(self.key + ":" + c_id, self.timeout)
return not added
示例6: is_request_seen
# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def is_request_seen(request):
return session.query(exists().where(Seen.fingerprint == request_fingerprint(request))).scalar()
示例7: request_seen
# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def request_seen(self, request):
tid = request._plusmeta.get('taskid')
if tid:
fp = self.request_fingerprint(request)
added = self.server.sadd(self.key.format(tid), fp)
return added == 0
示例8: request_fingerprint
# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def request_fingerprint(self, request):
return request_fingerprint(request)
示例9: test_request
# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def test_request(self):
"""
測試請求
:return:
"""
req_01 = Request(url=self.url_01)
result_01 = request.request_fingerprint(req_01)
req_02 = Request(url=self.url_02)
result_02 = request.request_fingerprint(req_02)
self.assertEqual(result_01, result_02)
示例10: get_request_finger
# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def get_request_finger(url):
"""
獲取 url 指紋(允許參數無序)
:param url:
:return:
"""
req = Request(url=url)
return request.request_fingerprint(req)
示例11: _request_key
# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def _request_key(self, request):
return request_fingerprint(request)
示例12: _get_request_path
# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def _get_request_path(self, spider, request):
key = request_fingerprint(request)
return os.path.join(self.cachedir, spider.name, key[0:2], key)
示例13: request_seen
# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def request_seen(self, request):
fp = self.request_fingerprint(request)
if fp in self.fingerprints:
return True
self.fingerprints.add(fp)
if self.file:
self.file.write(fp + os.linesep)
示例14: request_fingerprint
# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def request_fingerprint(self, request):
"""Returns a fingerprint for a given request.
Parameters
----------
request : scrapy.http.Request
Returns
-------
str
"""
return request_fingerprint(request)
示例15: _process_request
# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def _process_request(self, request, info):
fp = request_fingerprint(request)
cb = request.callback or (lambda _: _)
eb = request.errback
request.callback = None
request.errback = None
# Return cached result if request was already seen
if fp in info.downloaded:
return defer_result(info.downloaded[fp]).addCallbacks(cb, eb)
# Otherwise, wait for result
wad = Deferred().addCallbacks(cb, eb)
info.waiting[fp].append(wad)
# Check if request is downloading right now to avoid doing it twice
if fp in info.downloading:
return wad
# Download request checking media_to_download hook output first
info.downloading.add(fp)
dfd = mustbe_deferred(self.media_to_download, request, info)
dfd.addCallback(self._check_media_to_download, request, info)
dfd.addBoth(self._cache_result_and_execute_waiters, fp, info)
dfd.addErrback(lambda f: logger.error(
f.value, exc_info=failure_to_exc_info(f), extra={'spider': info.spider})
)
return dfd.addBoth(lambda _: wad) # it must return wad at last