當前位置: 首頁>>代碼示例>>Python>>正文


Python request.request_fingerprint方法代碼示例

本文整理匯總了Python中scrapy.utils.request.request_fingerprint方法的典型用法代碼示例。如果您正苦於以下問題:Python request.request_fingerprint方法的具體用法?Python request.request_fingerprint怎麽用?Python request.request_fingerprint使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在scrapy.utils.request的用法示例。


在下文中一共展示了request.request_fingerprint方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: request_seen

# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def request_seen(self, request):
        is_seen = is_request_seen(request)

        if not is_seen:
            log.msg('New URL: %s. Adding it to seen database' % request.url, log.DEBUG)
            seen = Seen(fingerprint=request_fingerprint(request),
                        url=request.url,
                        last_crawl_time=datetime.now())
            try:
                session.add(seen)
                session.commit()
            except:
                session.rollback()
                raise
            finally:
                session.close()
        else:
            log.msg('[seen] "%s" is seen. Skipping.' % request.url, log.INFO)

        return is_seen 
開發者ID:eren,項目名稱:sozlukcrawler,代碼行數:22,代碼來源:dupefilter.py

示例2: request_seen

# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def request_seen(self, request):
        """Returns True if request was already seen.

        Parameters
        ----------
        request : scrapy.http.Request

        Returns
        -------
        bool

        """
        fp = self.request_fingerprint(request)
        # This returns the number of values added, zero if already exists.
        added = self.server.sadd(self.key, fp)
        return added == 0 
開發者ID:wistbean,項目名稱:learn_python3_spider,代碼行數:18,代碼來源:dupefilter.py

示例3: _extract_key_info

# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def _extract_key_info(self, request):
        """
        從欲下載資源的request中, 獲得資源上傳七牛時的bucket和key
        """
        from scrapy.utils.request import request_fingerprint

        key_generator = request.meta.get('qiniu_key_generator')
        if key_generator:
            tmp = key_generator(request.url)
            bucket = tmp['bucket'] or self.bucket
            key = tmp['key']
        else:
            bucket = self.bucket
            key = '%s%s' % (self.key_prefix, request_fingerprint(request))

        return {'bucket': bucket, 'key': key} 
開發者ID:haizi-zh,項目名稱:scrapy-qiniu,代碼行數:18,代碼來源:impl.py

示例4: _request_key

# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def _request_key(self, request):
        return to_bytes(request_fingerprint(request)) 
開發者ID:invanalabs,項目名稱:invana-bot,代碼行數:4,代碼來源:mongodb.py

示例5: request_seen

# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def request_seen(self, request):
        fp = request_fingerprint(request)
        c_id = request.meta['crawlid']

        added = self.server.sadd(self.key + ":" + c_id, fp)
        self.server.expire(self.key + ":" + c_id, self.timeout)

        return not added 
開發者ID:istresearch,項目名稱:scrapy-cluster,代碼行數:10,代碼來源:redis_dupefilter.py

示例6: is_request_seen

# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def is_request_seen(request):
    return session.query(exists().where(Seen.fingerprint == request_fingerprint(request))).scalar() 
開發者ID:eren,項目名稱:sozlukcrawler,代碼行數:4,代碼來源:utils.py

示例7: request_seen

# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def request_seen(self, request):
        tid = request._plusmeta.get('taskid')
        if tid:
            fp = self.request_fingerprint(request)
            added = self.server.sadd(self.key.format(tid), fp)
            return added == 0 
開發者ID:cilame,項目名稱:vrequest,代碼行數:8,代碼來源:py_my_scrapy_redis_server.py

示例8: request_fingerprint

# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def request_fingerprint(self, request):
        return request_fingerprint(request) 
開發者ID:cilame,項目名稱:vrequest,代碼行數:4,代碼來源:py_my_scrapy_redis_server.py

示例9: test_request

# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def test_request(self):
        """
        測試請求
        :return:
        """
        req_01 = Request(url=self.url_01)
        result_01 = request.request_fingerprint(req_01)

        req_02 = Request(url=self.url_02)
        result_02 = request.request_fingerprint(req_02)

        self.assertEqual(result_01, result_02) 
開發者ID:zhanghe06,項目名稱:news_spider,代碼行數:14,代碼來源:test_finger.py

示例10: get_request_finger

# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def get_request_finger(url):
    """
    獲取 url 指紋(允許參數無序)
    :param url:
    :return:
    """
    req = Request(url=url)
    return request.request_fingerprint(req) 
開發者ID:zhanghe06,項目名稱:news_spider,代碼行數:10,代碼來源:url.py

示例11: _request_key

# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def _request_key(self, request):
        return request_fingerprint(request) 
開發者ID:wistbean,項目名稱:learn_python3_spider,代碼行數:4,代碼來源:httpcache.py

示例12: _get_request_path

# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def _get_request_path(self, spider, request):
        key = request_fingerprint(request)
        return os.path.join(self.cachedir, spider.name, key[0:2], key) 
開發者ID:wistbean,項目名稱:learn_python3_spider,代碼行數:5,代碼來源:httpcache.py

示例13: request_seen

# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def request_seen(self, request):
        fp = self.request_fingerprint(request)
        if fp in self.fingerprints:
            return True
        self.fingerprints.add(fp)
        if self.file:
            self.file.write(fp + os.linesep) 
開發者ID:wistbean,項目名稱:learn_python3_spider,代碼行數:9,代碼來源:dupefilters.py

示例14: request_fingerprint

# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def request_fingerprint(self, request):
        """Returns a fingerprint for a given request.

        Parameters
        ----------
        request : scrapy.http.Request

        Returns
        -------
        str

        """
        return request_fingerprint(request) 
開發者ID:wistbean,項目名稱:learn_python3_spider,代碼行數:15,代碼來源:dupefilter.py

示例15: _process_request

# 需要導入模塊: from scrapy.utils import request [as 別名]
# 或者: from scrapy.utils.request import request_fingerprint [as 別名]
def _process_request(self, request, info):
        fp = request_fingerprint(request)
        cb = request.callback or (lambda _: _)
        eb = request.errback
        request.callback = None
        request.errback = None

        # Return cached result if request was already seen
        if fp in info.downloaded:
            return defer_result(info.downloaded[fp]).addCallbacks(cb, eb)

        # Otherwise, wait for result
        wad = Deferred().addCallbacks(cb, eb)
        info.waiting[fp].append(wad)

        # Check if request is downloading right now to avoid doing it twice
        if fp in info.downloading:
            return wad

        # Download request checking media_to_download hook output first
        info.downloading.add(fp)
        dfd = mustbe_deferred(self.media_to_download, request, info)
        dfd.addCallback(self._check_media_to_download, request, info)
        dfd.addBoth(self._cache_result_and_execute_waiters, fp, info)
        dfd.addErrback(lambda f: logger.error(
            f.value, exc_info=failure_to_exc_info(f), extra={'spider': info.spider})
        )
        return dfd.addBoth(lambda _: wad)  # it must return wad at last 
開發者ID:wistbean,項目名稱:learn_python3_spider,代碼行數:30,代碼來源:media.py


注:本文中的scrapy.utils.request.request_fingerprint方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。