本文整理汇总了Python中scrapy.utils.request.request_fingerprint方法的典型用法代码示例。如果您正苦于以下问题:Python request.request_fingerprint方法的具体用法?Python request.request_fingerprint怎么用?Python request.request_fingerprint使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.utils.request
的用法示例。
在下文中一共展示了request.request_fingerprint方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: request_seen
# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def request_seen(self, request):
is_seen = is_request_seen(request)
if not is_seen:
log.msg('New URL: %s. Adding it to seen database' % request.url, log.DEBUG)
seen = Seen(fingerprint=request_fingerprint(request),
url=request.url,
last_crawl_time=datetime.now())
try:
session.add(seen)
session.commit()
except:
session.rollback()
raise
finally:
session.close()
else:
log.msg('[seen] "%s" is seen. Skipping.' % request.url, log.INFO)
return is_seen
示例2: request_seen
# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def request_seen(self, request):
"""Returns True if request was already seen.
Parameters
----------
request : scrapy.http.Request
Returns
-------
bool
"""
fp = self.request_fingerprint(request)
# This returns the number of values added, zero if already exists.
added = self.server.sadd(self.key, fp)
return added == 0
示例3: _extract_key_info
# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def _extract_key_info(self, request):
"""
从欲下载资源的request中, 获得资源上传七牛时的bucket和key
"""
from scrapy.utils.request import request_fingerprint
key_generator = request.meta.get('qiniu_key_generator')
if key_generator:
tmp = key_generator(request.url)
bucket = tmp['bucket'] or self.bucket
key = tmp['key']
else:
bucket = self.bucket
key = '%s%s' % (self.key_prefix, request_fingerprint(request))
return {'bucket': bucket, 'key': key}
示例4: _request_key
# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def _request_key(self, request):
return to_bytes(request_fingerprint(request))
示例5: request_seen
# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def request_seen(self, request):
fp = request_fingerprint(request)
c_id = request.meta['crawlid']
added = self.server.sadd(self.key + ":" + c_id, fp)
self.server.expire(self.key + ":" + c_id, self.timeout)
return not added
示例6: is_request_seen
# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def is_request_seen(request):
return session.query(exists().where(Seen.fingerprint == request_fingerprint(request))).scalar()
示例7: request_seen
# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def request_seen(self, request):
tid = request._plusmeta.get('taskid')
if tid:
fp = self.request_fingerprint(request)
added = self.server.sadd(self.key.format(tid), fp)
return added == 0
示例8: request_fingerprint
# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def request_fingerprint(self, request):
return request_fingerprint(request)
示例9: test_request
# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def test_request(self):
"""
测试请求
:return:
"""
req_01 = Request(url=self.url_01)
result_01 = request.request_fingerprint(req_01)
req_02 = Request(url=self.url_02)
result_02 = request.request_fingerprint(req_02)
self.assertEqual(result_01, result_02)
示例10: get_request_finger
# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def get_request_finger(url):
"""
获取 url 指纹(允许参数无序)
:param url:
:return:
"""
req = Request(url=url)
return request.request_fingerprint(req)
示例11: _request_key
# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def _request_key(self, request):
return request_fingerprint(request)
示例12: _get_request_path
# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def _get_request_path(self, spider, request):
key = request_fingerprint(request)
return os.path.join(self.cachedir, spider.name, key[0:2], key)
示例13: request_seen
# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def request_seen(self, request):
fp = self.request_fingerprint(request)
if fp in self.fingerprints:
return True
self.fingerprints.add(fp)
if self.file:
self.file.write(fp + os.linesep)
示例14: request_fingerprint
# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def request_fingerprint(self, request):
"""Returns a fingerprint for a given request.
Parameters
----------
request : scrapy.http.Request
Returns
-------
str
"""
return request_fingerprint(request)
示例15: _process_request
# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def _process_request(self, request, info):
fp = request_fingerprint(request)
cb = request.callback or (lambda _: _)
eb = request.errback
request.callback = None
request.errback = None
# Return cached result if request was already seen
if fp in info.downloaded:
return defer_result(info.downloaded[fp]).addCallbacks(cb, eb)
# Otherwise, wait for result
wad = Deferred().addCallbacks(cb, eb)
info.waiting[fp].append(wad)
# Check if request is downloading right now to avoid doing it twice
if fp in info.downloading:
return wad
# Download request checking media_to_download hook output first
info.downloading.add(fp)
dfd = mustbe_deferred(self.media_to_download, request, info)
dfd.addCallback(self._check_media_to_download, request, info)
dfd.addBoth(self._cache_result_and_execute_waiters, fp, info)
dfd.addErrback(lambda f: logger.error(
f.value, exc_info=failure_to_exc_info(f), extra={'spider': info.spider})
)
return dfd.addBoth(lambda _: wad) # it must return wad at last