当前位置: 首页>>代码示例>>Python>>正文


Python engine.Pomp类代码示例

本文整理汇总了Python中pomp.core.engine.Pomp的典型用法代码示例。如果您正苦于以下问题:Python Pomp类的具体用法?Python Pomp怎么用?Python Pomp使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Pomp类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_concurrent_urllib_downloader

    def test_concurrent_urllib_downloader(self):
        req_resp_midlleware = RequestResponseMiddleware(
            prefix_url=self.httpd.location,
            request_factory=UrllibHttpRequest,
        )

        collect_middleware = CollectRequestResponseMiddleware()

        downloader = ConcurrentUrllibDownloader(
            middlewares=[collect_middleware]
        )

        downloader.middlewares.insert(0, req_resp_midlleware)

        pomp = Pomp(
            downloader=downloader,
            pipelines=[],
        )

        class Crawler(DummyCrawler):
            ENTRY_REQUESTS = '/root'

        pomp.pump(Crawler())

        assert_set_equal(
            set([r.url.replace(self.httpd.location, '')
                for r in collect_middleware.requests]),
            set(self.httpd.sitemap.keys())
        )
开发者ID:sibelius,项目名称:pomp,代码行数:29,代码来源:test_contrib_concurrent.py

示例2: test_concurrent_crawler

    def test_concurrent_crawler(self):
        req_resp_midlleware = RequestResponseMiddleware(
            prefix_url=self.httpd.location,
            request_factory=lambda x: x,
        )

        collect_middleware = CollectRequestResponseMiddleware()

        downloader = ConcurrentUrllibDownloader(
            pool_size=2,
        )

        pomp = Pomp(
            downloader=downloader,
            middlewares=(
                req_resp_midlleware,
                UrllibAdapterMiddleware(),
                collect_middleware,
            ),
            pipelines=[],
        )

        pomp.pump(ConcurrentCrawler(
            pool_size=2,
            worker_class=MockedCrawlerWorker,
        ))

        assert \
            set([r.url.replace(self.httpd.location, '')
                for r in collect_middleware.requests]) == \
            set(self.httpd.sitemap.keys())
开发者ID:estin,项目名称:pomp,代码行数:31,代码来源:test_contrib_concurrent.py

示例3: do_simple_test

    def do_simple_test(self, queue=None):
        req_resp_middleware = RequestResponseMiddleware(
            prefix_url=self.httpd.location,
            request_factory=TwistedHttpRequest,
        )
        collect_middleware = CollectRequestResponseMiddleware()
        downloader = TwistedDownloader(
            reactor, middlewares=[collect_middleware])

        downloader.middlewares.insert(0, req_resp_middleware)

        pomp = Pomp(
            downloader=downloader,
            pipelines=[PrintPipeline()],
            queue=queue,
        )

        class Crawler(DummyCrawler):
            ENTRY_REQUESTS = '/root'

        done_defer = defer.Deferred()
        d = pomp.pump(Crawler())

        d.add_callback(done_defer.callback)

        def check(x):
            assert_set_equal(
                set([r.url.replace(self.httpd.location, '')
                    for r in collect_middleware.requests]),
                set(self.httpd.sitemap.keys())
            )

        done_defer.addCallback(check)
        return done_defer
开发者ID:sibelius,项目名称:pomp,代码行数:34,代码来源:test_contrib_twisted.py

示例4: test_exceptions

    def test_exceptions(self):

        req_resp_middleware = RequestResponseMiddleware(
            prefix_url='invalid url',
            request_factory=TwistedHttpRequest,
        )
        collect_middleware = CollectRequestResponseMiddleware()

        downloader = TwistedDownloader(
            reactor, middlewares=[collect_middleware])

        downloader.middlewares.insert(0, req_resp_middleware)

        pomp = Pomp(
            downloader=downloader,
            pipelines=[PrintPipeline()],
        )

        class Crawler(DummyCrawler):
            ENTRY_REQUESTS = '/root'

        done_defer = defer.Deferred()
        d = pomp.pump(Crawler())

        d.add_callback(done_defer.callback)

        def check(x):
            assert len(collect_middleware.exceptions) == 1
            assert isinstance(
                collect_middleware.exceptions[0], BaseDownloadException)

        done_defer.addCallback(check)
        return done_defer
开发者ID:sibelius,项目名称:pomp,代码行数:33,代码来源:test_contrib_twisted.py

示例5: test_concurrent_downloader

    def test_concurrent_downloader(self):
        req_resp_midlleware = RequestResponseMiddleware(
            prefix_url='http://localhost',
            request_factory=UrllibHttpRequest,
        )

        collect_middleware = CollectRequestResponseMiddleware()

        downloader = ConcurrentDownloader(
            pool_size=5,
            worker_class=MockedDownloadWorker,
            worker_kwargs=None,
        )

        pomp = Pomp(
            downloader=downloader,
            middlewares=(req_resp_midlleware, collect_middleware, ),
            pipelines=[],
        )

        class Crawler(DummyCrawler):
            ENTRY_REQUESTS = '/root'

        pomp.pump(Crawler())

        assert \
            set([r.url.replace('http://localhost', '')
                for r in collect_middleware.requests]) == \
            set(MockedDownloadWorker.sitemap.keys())
开发者ID:estin,项目名称:pomp,代码行数:29,代码来源:test_contrib_concurrent.py

示例6: test_exception_on_crawler_worker

    def test_exception_on_crawler_worker(self):
        req_resp_midlleware = RequestResponseMiddleware(
            prefix_url=self.httpd.location,
            request_factory=lambda x: x,
        )

        collect_middleware = CollectRequestResponseMiddleware()

        downloader = ConcurrentUrllibDownloader(
            pool_size=2,
        )

        pomp = Pomp(
            downloader=downloader,
            middlewares=(
                req_resp_midlleware,
                UrllibAdapterMiddleware(),
                collect_middleware,
            ),
            pipelines=[],
        )

        pomp.pump(ConcurrentCrawler(
            pool_size=2,
            worker_class=MockedCrawlerWorkerWithException,
        ))

        assert len(collect_middleware.requests) == 1
        assert len(collect_middleware.exceptions) == 1
开发者ID:estin,项目名称:pomp,代码行数:29,代码来源:test_contrib_concurrent.py

示例7: test_urllib_downloader

    def test_urllib_downloader(self):
        req_resp_midlleware = RequestResponseMiddleware(
            prefix_url=self.httpd.location,
            request_factory=lambda x: x,
        )

        collect_middleware = CollectRequestResponseMiddleware()

        downloader = UrllibDownloader()

        pomp = Pomp(
            downloader=downloader,
            middlewares=(
                req_resp_midlleware,
                UrllibAdapterMiddleware(),
                collect_middleware,
            ),
            pipelines=[],
        )

        class Crawler(DummyCrawler):
            ENTRY_REQUESTS = '/root'

        pomp.pump(Crawler())

        assert \
            set([r.url.replace(self.httpd.location, '')
                for r in collect_middleware.requests]) == \
            set(self.httpd.sitemap.keys())
开发者ID:estin,项目名称:pomp,代码行数:29,代码来源:test_contrib_urllib.py

示例8: test_queue_crawler

    def test_queue_crawler(self):
        road = RoadPipeline()

        class SimpleQueue(BaseQueue):

            def __init__(self):
                self.requests = []

            def get_requests(self):
                try:
                    return self.requests.pop()
                except IndexError:
                    return  # empty queue

            def put_requests(self, request):
                self.requests.append(request)

        queue = SimpleQueue()

        pomp = Pomp(
            downloader=DummyDownloader(middlewares=[url_to_request_middl]),
            pipelines=[
                road,
            ],
            queue=queue,
        )

        pomp.pump(Crawler())

        assert_equal(set([item.url for item in road.collection]), set([
            'http://python.org/1',
            'http://python.org/1/trash',
            'http://python.org/2',
        ]))
开发者ID:danielnaab,项目名称:pomp,代码行数:34,代码来源:test_simple_crawler.py

示例9: test_thread_pooled_downloader

    def test_thread_pooled_downloader(self):
        req_resp_midlleware = RequestResponseMiddleware(
            prefix_url=self.httpd.location,
            request_factory=lambda x: x,
        )

        collect_middleware = CollectRequestResponseMiddleware()

        downloader = ThreadedDownloader(
            middlewares=[UrllibAdapterMiddleware(), collect_middleware]
        )

        downloader.middlewares.insert(0, req_resp_midlleware)

        pomp = Pomp(
            downloader=downloader,
            pipelines=[],
        )

        class Crawler(DummyCrawler):
            ENTRY_REQUESTS = '/root'

        pomp.pump(Crawler())

        assert_set_equal(
            set([r.url.replace(self.httpd.location, '')
                for r in collect_middleware.requests]),
            set(self.httpd.sitemap.keys())
        )
开发者ID:sibelius,项目名称:pomp,代码行数:29,代码来源:test_contrib_urllib.py

示例10: test_exception_on_downloader_worker

    def test_exception_on_downloader_worker(self):
        req_resp_midlleware = RequestResponseMiddleware(
            prefix_url='http://localhost',
            request_factory=UrllibHttpRequest,
        )

        collect_middleware = CollectRequestResponseMiddleware()

        downloader = ConcurrentDownloader(
            pool_size=5,
            worker_class=MockedDownloadWorkerWithException,
            worker_kwargs=None,
        )

        pomp = Pomp(
            downloader=downloader,
            middlewares=(req_resp_midlleware, collect_middleware, ),
            pipelines=[],
        )

        class Crawler(DummyCrawler):
            ENTRY_REQUESTS = '/root'

        pomp.pump(Crawler())

        assert len(collect_middleware.requests) == 1
        assert len(collect_middleware.exceptions) == 1
开发者ID:estin,项目名称:pomp,代码行数:27,代码来源:test_contrib_concurrent.py

示例11: test_queue_get_requests_with_count

    def test_queue_get_requests_with_count(self):

        class DummyDownloaderWithWorkers(DummyDownloader):

            def get_workers_count(self):
                return 5

        class SimpleQueue(BaseQueue):

            def __init__(self):
                self.requests = []

            def get_requests(self, count=None):
                # Downloader can fetch only one request at moment
                assert count == 5
                try:
                    return self.requests.pop()
                except IndexError:
                    return  # empty queue

            def put_requests(self, request):
                self.requests.append(request)

        pomp = Pomp(
            downloader=DummyDownloaderWithWorkers(),
            middlewares=(url_to_request_middl, ),
        )

        # override internal queue with own
        pomp.queue = SimpleQueue()

        pomp.pump(Crawler())
开发者ID:estin,项目名称:pomp,代码行数:32,代码来源:test_simple_crawler.py

示例12: test_exception_on_processing_response

def test_exception_on_processing_response():

    collect_middleware = CollectRequestResponseMiddleware()
    pomp = Pomp(downloader=DummyDownloader(middlewares=[RaiseOnResponseMiddleware(), collect_middleware]))

    pomp.pump(Crawler())

    assert_equal(len(collect_middleware.exceptions), 1)
    assert_equal(len(collect_middleware.requests), 1)
    assert_equal(len(collect_middleware.responses), 1)
开发者ID:danielnaab,项目名称:pomp,代码行数:10,代码来源:test_middleware.py

示例13: crawler_worker

def crawler_worker(crawler_class, source_queue, stop_event):
    pid = os.getpid()
    log.debug('Start crawler worker: %s', pid)
    pomp = Pomp(
        downloader=UrllibDownloader(timeout=3),
        pipelines=[],
        queue=WrappedQueue(source_queue, stop_event),
    )
    pomp.pump(crawler_class())
    log.debug('Stop crawler worker: %s', pid)
    return True
开发者ID:danielnaab,项目名称:pomp,代码行数:11,代码来源:e03_queue.py

示例14: test_exception_on_processing_response_callback

def test_exception_on_processing_response_callback():
    class CrawlerWithException(Crawler):
        def extract_items(self, *args, **kwargs):
            raise Exception("some exception on extract items")

    collect_middleware = CollectRequestResponseMiddleware()
    pomp = Pomp(downloader=DummyDownloader(middlewares=[collect_middleware]))

    pomp.pump(CrawlerWithException())

    assert_equal(len(collect_middleware.exceptions), 1)
    assert_equal(len(collect_middleware.requests), 1)
    assert_equal(len(collect_middleware.responses), 1)
开发者ID:danielnaab,项目名称:pomp,代码行数:13,代码来源:test_middleware.py

示例15: test_crawler_return_none

    def test_crawler_return_none(self):

        class CrawlerWithoutItems(BaseCrawler):
            ENTRY_REQUESTS = 'http://localhost/'

            def extract_items(self, *args, **kwargs):
                pass

            def next_requests(self, *args, **kwargs):
                pass

        pomp = Pomp(
            downloader=DummyDownloader(middlewares=[url_to_request_middl]),
        )
        pomp.pump(CrawlerWithoutItems())
开发者ID:danielnaab,项目名称:pomp,代码行数:15,代码来源:test_simple_crawler.py


注:本文中的pomp.core.engine.Pomp类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。