當前位置: 首頁>>代碼示例>>Python>>正文


Python FronteraScheduler.process_spider_output方法代碼示例

本文整理匯總了Python中frontera.contrib.scrapy.schedulers.frontier.FronteraScheduler.process_spider_output方法的典型用法代碼示例。如果您正苦於以下問題:Python FronteraScheduler.process_spider_output方法的具體用法?Python FronteraScheduler.process_spider_output怎麽用?Python FronteraScheduler.process_spider_output使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在frontera.contrib.scrapy.schedulers.frontier.FronteraScheduler的用法示例。


在下文中一共展示了FronteraScheduler.process_spider_output方法的4個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: test_process_spider_output

# 需要導入模塊: from frontera.contrib.scrapy.schedulers.frontier import FronteraScheduler [as 別名]
# 或者: from frontera.contrib.scrapy.schedulers.frontier.FronteraScheduler import process_spider_output [as 別名]
 def test_process_spider_output(self):
     i1 = {'name': 'item', 'item': 'i1'}
     i2 = {'name': 'item', 'item': 'i2'}
     result = [r1, r2, r3, i1, i2]
     resp = Response(fr1.url, request=Request(fr1.url, meta={'frontier_request': fr1}))
     crawler = FakeCrawler()
     fs = FronteraScheduler(crawler, manager=FakeFrontierManager)
     fs.open(Spider)
     assert sorted(list(fs.process_spider_output(resp, result, Spider))) == sorted([i1, i2])
     assert isinstance(fs.frontier.manager.responses[0], FResponse)
     assert fs.frontier.manager.responses[0].url == resp.url
     assert set([request.url for request in fs.frontier.manager.links]) == set([r1.url, r2.url, r3.url])
     assert all([isinstance(request, FRequest) for request in fs.frontier.manager.links])
     assert fs.stats_manager.stats.get_value('frontera/crawled_pages_count') == 1
     assert fs.stats_manager.stats.get_value('frontera/crawled_pages_count/200') == 1
     assert fs.stats_manager.stats.get_value('frontera/links_extracted_count') == 3
開發者ID:RaoUmer,項目名稱:frontera,代碼行數:18,代碼來源:test_frontera_scheduler.py

示例2: test_process_spider_output

# 需要導入模塊: from frontera.contrib.scrapy.schedulers.frontier import FronteraScheduler [as 別名]
# 或者: from frontera.contrib.scrapy.schedulers.frontier.FronteraScheduler import process_spider_output [as 別名]
 def test_process_spider_output(self):
     i1 = {"name": "item", "item": "i1"}
     i2 = {"name": "item", "item": "i2"}
     result = [r1, r2, r3, i1, i2]
     resp = Response(fr1.url, request=Request(fr1.url, meta={b"frontier_request": fr1}))
     crawler = FakeCrawler()
     fs = FronteraScheduler(crawler, manager=FakeFrontierManager)
     fs.open(Spider)
     assert sorted(list(fs.process_spider_output(resp, result, Spider)), key=lambda i: sorted(i["item"])) == sorted(
         [i1, i2], key=lambda i: sorted(i["item"])
     )
     assert isinstance(fs.frontier.manager.responses[0], FResponse)
     assert fs.frontier.manager.responses[0].url == resp.url
     assert set([request.url for request in fs.frontier.manager.links]) == set([r1.url, r2.url, r3.url])
     assert all([isinstance(request, FRequest) for request in fs.frontier.manager.links])
     assert fs.stats_manager.stats.get_value("frontera/crawled_pages_count") == 1
     assert fs.stats_manager.stats.get_value("frontera/crawled_pages_count/200") == 1
     assert fs.stats_manager.stats.get_value("frontera/links_extracted_count") == 3
開發者ID:voith,項目名稱:frontera,代碼行數:20,代碼來源:test_frontera_scheduler.py

示例3: test_process_spider_output

# 需要導入模塊: from frontera.contrib.scrapy.schedulers.frontier import FronteraScheduler [as 別名]
# 或者: from frontera.contrib.scrapy.schedulers.frontier.FronteraScheduler import process_spider_output [as 別名]
    def test_process_spider_output(self):
        i1 = {'name': 'item', 'item': 'i1'}
        i2 = {'name': 'item', 'item': 'i2'}
        items = [i1 , i2]
        requests = [r1, r2, r3]
        result = list(requests)
        result.extend(items)
        resp = Response(fr1.url, request=Request(fr1.url, meta={b'frontier_request': fr1}))
        crawler = FakeCrawler()
        fs = FronteraScheduler(crawler, manager=FakeFrontierManager)
        spider = Spider(name="testing")
        fs.open(spider)
        out_items = list(fs.process_spider_output(resp, result, spider))
        assert len(out_items) == len(items)
        assert set([r.url for r in fs.frontier.manager.links]) == set([r.url for r in requests])

        assert isinstance(fs.frontier.manager.responses[0], FResponse)
        assert fs.frontier.manager.responses[0].url == resp.url
        assert set([request.url for request in fs.frontier.manager.links]) == set([r1.url, r2.url, r3.url])
        assert all([isinstance(request, FRequest) for request in fs.frontier.manager.links])
        assert fs.stats_manager.stats.get_value('frontera/crawled_pages_count') == 1
        assert fs.stats_manager.stats.get_value('frontera/crawled_pages_count/200') == 1
        assert fs.stats_manager.stats.get_value('frontera/links_extracted_count') == 3
開發者ID:scrapinghub,項目名稱:frontera,代碼行數:25,代碼來源:test_frontera_scheduler.py

示例4: test_process_spider_output

# 需要導入模塊: from frontera.contrib.scrapy.schedulers.frontier import FronteraScheduler [as 別名]
# 或者: from frontera.contrib.scrapy.schedulers.frontier.FronteraScheduler import process_spider_output [as 別名]
 def test_process_spider_output(self):
     i1 = {'name': 'item', 'item': 'i1'}
     i2 = {'name': 'item', 'item': 'i2'}
     no_requests = 3
     result = [r1, r2, r3, i1, i2]
     resp = Response(fr1.url, request=Request(fr1.url, meta={b'frontier_request': fr1}))
     crawler = FakeCrawler()
     fs = FronteraScheduler(crawler, manager=FakeFrontierManager)
     fs.open(Spider)
     out = list(fs.process_spider_output(resp, result, Spider))
     assert len(out) == len(result)
     out_request = out[:no_requests]
     assert set(r.url for r in out_request) == set(r.url for r in result[:no_requests])
     out_items = out[no_requests:]
     assert sorted(out_items, key=lambda i: sorted(i['item'])) == \
         sorted([i1, i2], key=lambda i: sorted(i['item']))
     assert isinstance(fs.frontier.manager.responses[0], FResponse)
     assert fs.frontier.manager.responses[0].url == resp.url
     assert set([request.url for request in fs.frontier.manager.links]) == set([r1.url, r2.url, r3.url])
     assert all([isinstance(request, FRequest) for request in fs.frontier.manager.links])
     assert fs.stats_manager.stats.get_value('frontera/crawled_pages_count') == 1
     assert fs.stats_manager.stats.get_value('frontera/crawled_pages_count/200') == 1
     assert fs.stats_manager.stats.get_value('frontera/links_extracted_count') == 3
開發者ID:Preetwinder,項目名稱:frontera,代碼行數:25,代碼來源:test_frontera_scheduler.py


注:本文中的frontera.contrib.scrapy.schedulers.frontier.FronteraScheduler.process_spider_output方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。