当前位置: 首页>>代码示例>>Python>>正文


Python request.request_fingerprint方法代码示例

本文整理汇总了Python中scrapy.utils.request.request_fingerprint方法的典型用法代码示例。如果您正苦于以下问题:Python request.request_fingerprint方法的具体用法?Python request.request_fingerprint怎么用?Python request.request_fingerprint使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scrapy.utils.request的用法示例。


在下文中一共展示了request.request_fingerprint方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: request_seen

# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def request_seen(self, request):
        is_seen = is_request_seen(request)

        if not is_seen:
            log.msg('New URL: %s. Adding it to seen database' % request.url, log.DEBUG)
            seen = Seen(fingerprint=request_fingerprint(request),
                        url=request.url,
                        last_crawl_time=datetime.now())
            try:
                session.add(seen)
                session.commit()
            except:
                session.rollback()
                raise
            finally:
                session.close()
        else:
            log.msg('[seen] "%s" is seen. Skipping.' % request.url, log.INFO)

        return is_seen 
开发者ID:eren,项目名称:sozlukcrawler,代码行数:22,代码来源:dupefilter.py

示例2: request_seen

# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def request_seen(self, request):
        """Returns True if request was already seen.

        Parameters
        ----------
        request : scrapy.http.Request

        Returns
        -------
        bool

        """
        fp = self.request_fingerprint(request)
        # This returns the number of values added, zero if already exists.
        added = self.server.sadd(self.key, fp)
        return added == 0 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:18,代码来源:dupefilter.py

示例3: _extract_key_info

# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def _extract_key_info(self, request):
        """
        从欲下载资源的request中, 获得资源上传七牛时的bucket和key
        """
        from scrapy.utils.request import request_fingerprint

        key_generator = request.meta.get('qiniu_key_generator')
        if key_generator:
            tmp = key_generator(request.url)
            bucket = tmp['bucket'] or self.bucket
            key = tmp['key']
        else:
            bucket = self.bucket
            key = '%s%s' % (self.key_prefix, request_fingerprint(request))

        return {'bucket': bucket, 'key': key} 
开发者ID:haizi-zh,项目名称:scrapy-qiniu,代码行数:18,代码来源:impl.py

示例4: _request_key

# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def _request_key(self, request):
        return to_bytes(request_fingerprint(request)) 
开发者ID:invanalabs,项目名称:invana-bot,代码行数:4,代码来源:mongodb.py

示例5: request_seen

# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def request_seen(self, request):
        fp = request_fingerprint(request)
        c_id = request.meta['crawlid']

        added = self.server.sadd(self.key + ":" + c_id, fp)
        self.server.expire(self.key + ":" + c_id, self.timeout)

        return not added 
开发者ID:istresearch,项目名称:scrapy-cluster,代码行数:10,代码来源:redis_dupefilter.py

示例6: is_request_seen

# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def is_request_seen(request):
    return session.query(exists().where(Seen.fingerprint == request_fingerprint(request))).scalar() 
开发者ID:eren,项目名称:sozlukcrawler,代码行数:4,代码来源:utils.py

示例7: request_seen

# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def request_seen(self, request):
        tid = request._plusmeta.get('taskid')
        if tid:
            fp = self.request_fingerprint(request)
            added = self.server.sadd(self.key.format(tid), fp)
            return added == 0 
开发者ID:cilame,项目名称:vrequest,代码行数:8,代码来源:py_my_scrapy_redis_server.py

示例8: request_fingerprint

# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def request_fingerprint(self, request):
        return request_fingerprint(request) 
开发者ID:cilame,项目名称:vrequest,代码行数:4,代码来源:py_my_scrapy_redis_server.py

示例9: test_request

# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def test_request(self):
        """
        测试请求
        :return:
        """
        req_01 = Request(url=self.url_01)
        result_01 = request.request_fingerprint(req_01)

        req_02 = Request(url=self.url_02)
        result_02 = request.request_fingerprint(req_02)

        self.assertEqual(result_01, result_02) 
开发者ID:zhanghe06,项目名称:news_spider,代码行数:14,代码来源:test_finger.py

示例10: get_request_finger

# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def get_request_finger(url):
    """
    获取 url 指纹(允许参数无序)
    :param url:
    :return:
    """
    req = Request(url=url)
    return request.request_fingerprint(req) 
开发者ID:zhanghe06,项目名称:news_spider,代码行数:10,代码来源:url.py

示例11: _request_key

# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def _request_key(self, request):
        return request_fingerprint(request) 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:4,代码来源:httpcache.py

示例12: _get_request_path

# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def _get_request_path(self, spider, request):
        key = request_fingerprint(request)
        return os.path.join(self.cachedir, spider.name, key[0:2], key) 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:5,代码来源:httpcache.py

示例13: request_seen

# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def request_seen(self, request):
        fp = self.request_fingerprint(request)
        if fp in self.fingerprints:
            return True
        self.fingerprints.add(fp)
        if self.file:
            self.file.write(fp + os.linesep) 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:9,代码来源:dupefilters.py

示例14: request_fingerprint

# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def request_fingerprint(self, request):
        """Returns a fingerprint for a given request.

        Parameters
        ----------
        request : scrapy.http.Request

        Returns
        -------
        str

        """
        return request_fingerprint(request) 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:15,代码来源:dupefilter.py

示例15: _process_request

# 需要导入模块: from scrapy.utils import request [as 别名]
# 或者: from scrapy.utils.request import request_fingerprint [as 别名]
def _process_request(self, request, info):
        fp = request_fingerprint(request)
        cb = request.callback or (lambda _: _)
        eb = request.errback
        request.callback = None
        request.errback = None

        # Return cached result if request was already seen
        if fp in info.downloaded:
            return defer_result(info.downloaded[fp]).addCallbacks(cb, eb)

        # Otherwise, wait for result
        wad = Deferred().addCallbacks(cb, eb)
        info.waiting[fp].append(wad)

        # Check if request is downloading right now to avoid doing it twice
        if fp in info.downloading:
            return wad

        # Download request checking media_to_download hook output first
        info.downloading.add(fp)
        dfd = mustbe_deferred(self.media_to_download, request, info)
        dfd.addCallback(self._check_media_to_download, request, info)
        dfd.addBoth(self._cache_result_and_execute_waiters, fp, info)
        dfd.addErrback(lambda f: logger.error(
            f.value, exc_info=failure_to_exc_info(f), extra={'spider': info.spider})
        )
        return dfd.addBoth(lambda _: wad)  # it must return wad at last 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:30,代码来源:media.py


注:本文中的scrapy.utils.request.request_fingerprint方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。