本文整理汇总了Python中pomp.core.engine.Pomp.pump方法的典型用法代码示例。如果您正苦于以下问题:Python Pomp.pump方法的具体用法?Python Pomp.pump怎么用?Python Pomp.pump使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pomp.core.engine.Pomp
的用法示例。
在下文中一共展示了Pomp.pump方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_concurrent_crawler
# 需要导入模块: from pomp.core.engine import Pomp [as 别名]
# 或者: from pomp.core.engine.Pomp import pump [as 别名]
def test_concurrent_crawler(self):
req_resp_midlleware = RequestResponseMiddleware(
prefix_url=self.httpd.location,
request_factory=lambda x: x,
)
collect_middleware = CollectRequestResponseMiddleware()
downloader = ConcurrentUrllibDownloader(
pool_size=2,
)
pomp = Pomp(
downloader=downloader,
middlewares=(
req_resp_midlleware,
UrllibAdapterMiddleware(),
collect_middleware,
),
pipelines=[],
)
pomp.pump(ConcurrentCrawler(
pool_size=2,
worker_class=MockedCrawlerWorker,
))
assert \
set([r.url.replace(self.httpd.location, '')
for r in collect_middleware.requests]) == \
set(self.httpd.sitemap.keys())
示例2: test_exception_on_downloader_worker
# 需要导入模块: from pomp.core.engine import Pomp [as 别名]
# 或者: from pomp.core.engine.Pomp import pump [as 别名]
def test_exception_on_downloader_worker(self):
req_resp_midlleware = RequestResponseMiddleware(
prefix_url='http://localhost',
request_factory=UrllibHttpRequest,
)
collect_middleware = CollectRequestResponseMiddleware()
downloader = ConcurrentDownloader(
pool_size=5,
worker_class=MockedDownloadWorkerWithException,
worker_kwargs=None,
)
pomp = Pomp(
downloader=downloader,
middlewares=(req_resp_midlleware, collect_middleware, ),
pipelines=[],
)
class Crawler(DummyCrawler):
ENTRY_REQUESTS = '/root'
pomp.pump(Crawler())
assert len(collect_middleware.requests) == 1
assert len(collect_middleware.exceptions) == 1
示例3: test_concurrent_downloader
# 需要导入模块: from pomp.core.engine import Pomp [as 别名]
# 或者: from pomp.core.engine.Pomp import pump [as 别名]
def test_concurrent_downloader(self):
req_resp_midlleware = RequestResponseMiddleware(
prefix_url='http://localhost',
request_factory=UrllibHttpRequest,
)
collect_middleware = CollectRequestResponseMiddleware()
downloader = ConcurrentDownloader(
pool_size=5,
worker_class=MockedDownloadWorker,
worker_kwargs=None,
)
pomp = Pomp(
downloader=downloader,
middlewares=(req_resp_midlleware, collect_middleware, ),
pipelines=[],
)
class Crawler(DummyCrawler):
ENTRY_REQUESTS = '/root'
pomp.pump(Crawler())
assert \
set([r.url.replace('http://localhost', '')
for r in collect_middleware.requests]) == \
set(MockedDownloadWorker.sitemap.keys())
示例4: test_exception_on_crawler_worker
# 需要导入模块: from pomp.core.engine import Pomp [as 别名]
# 或者: from pomp.core.engine.Pomp import pump [as 别名]
def test_exception_on_crawler_worker(self):
req_resp_midlleware = RequestResponseMiddleware(
prefix_url=self.httpd.location,
request_factory=lambda x: x,
)
collect_middleware = CollectRequestResponseMiddleware()
downloader = ConcurrentUrllibDownloader(
pool_size=2,
)
pomp = Pomp(
downloader=downloader,
middlewares=(
req_resp_midlleware,
UrllibAdapterMiddleware(),
collect_middleware,
),
pipelines=[],
)
pomp.pump(ConcurrentCrawler(
pool_size=2,
worker_class=MockedCrawlerWorkerWithException,
))
assert len(collect_middleware.requests) == 1
assert len(collect_middleware.exceptions) == 1
示例5: test_urllib_downloader
# 需要导入模块: from pomp.core.engine import Pomp [as 别名]
# 或者: from pomp.core.engine.Pomp import pump [as 别名]
def test_urllib_downloader(self):
req_resp_midlleware = RequestResponseMiddleware(
prefix_url=self.httpd.location,
request_factory=lambda x: x,
)
collect_middleware = CollectRequestResponseMiddleware()
downloader = UrllibDownloader()
pomp = Pomp(
downloader=downloader,
middlewares=(
req_resp_midlleware,
UrllibAdapterMiddleware(),
collect_middleware,
),
pipelines=[],
)
class Crawler(DummyCrawler):
ENTRY_REQUESTS = '/root'
pomp.pump(Crawler())
assert \
set([r.url.replace(self.httpd.location, '')
for r in collect_middleware.requests]) == \
set(self.httpd.sitemap.keys())
示例6: test_queue_crawler
# 需要导入模块: from pomp.core.engine import Pomp [as 别名]
# 或者: from pomp.core.engine.Pomp import pump [as 别名]
def test_queue_crawler(self):
road = RoadPipeline()
class SimpleQueue(BaseQueue):
def __init__(self):
self.requests = []
def get_requests(self):
try:
return self.requests.pop()
except IndexError:
return # empty queue
def put_requests(self, request):
self.requests.append(request)
queue = SimpleQueue()
pomp = Pomp(
downloader=DummyDownloader(middlewares=[url_to_request_middl]),
pipelines=[
road,
],
queue=queue,
)
pomp.pump(Crawler())
assert_equal(set([item.url for item in road.collection]), set([
'http://python.org/1',
'http://python.org/1/trash',
'http://python.org/2',
]))
示例7: test_thread_pooled_downloader
# 需要导入模块: from pomp.core.engine import Pomp [as 别名]
# 或者: from pomp.core.engine.Pomp import pump [as 别名]
def test_thread_pooled_downloader(self):
req_resp_midlleware = RequestResponseMiddleware(
prefix_url=self.httpd.location,
request_factory=lambda x: x,
)
collect_middleware = CollectRequestResponseMiddleware()
downloader = ThreadedDownloader(
middlewares=[UrllibAdapterMiddleware(), collect_middleware]
)
downloader.middlewares.insert(0, req_resp_midlleware)
pomp = Pomp(
downloader=downloader,
pipelines=[],
)
class Crawler(DummyCrawler):
ENTRY_REQUESTS = '/root'
pomp.pump(Crawler())
assert_set_equal(
set([r.url.replace(self.httpd.location, '')
for r in collect_middleware.requests]),
set(self.httpd.sitemap.keys())
)
示例8: test_concurrent_urllib_downloader
# 需要导入模块: from pomp.core.engine import Pomp [as 别名]
# 或者: from pomp.core.engine.Pomp import pump [as 别名]
def test_concurrent_urllib_downloader(self):
req_resp_midlleware = RequestResponseMiddleware(
prefix_url=self.httpd.location,
request_factory=UrllibHttpRequest,
)
collect_middleware = CollectRequestResponseMiddleware()
downloader = ConcurrentUrllibDownloader(
middlewares=[collect_middleware]
)
downloader.middlewares.insert(0, req_resp_midlleware)
pomp = Pomp(
downloader=downloader,
pipelines=[],
)
class Crawler(DummyCrawler):
ENTRY_REQUESTS = '/root'
pomp.pump(Crawler())
assert_set_equal(
set([r.url.replace(self.httpd.location, '')
for r in collect_middleware.requests]),
set(self.httpd.sitemap.keys())
)
示例9: test_queue_get_requests_with_count
# 需要导入模块: from pomp.core.engine import Pomp [as 别名]
# 或者: from pomp.core.engine.Pomp import pump [as 别名]
def test_queue_get_requests_with_count(self):
class DummyDownloaderWithWorkers(DummyDownloader):
def get_workers_count(self):
return 5
class SimpleQueue(BaseQueue):
def __init__(self):
self.requests = []
def get_requests(self, count=None):
# Downloader can fetch only one request at moment
assert count == 5
try:
return self.requests.pop()
except IndexError:
return # empty queue
def put_requests(self, request):
self.requests.append(request)
pomp = Pomp(
downloader=DummyDownloaderWithWorkers(),
middlewares=(url_to_request_middl, ),
)
# override internal queue with own
pomp.queue = SimpleQueue()
pomp.pump(Crawler())
示例10: test_exception_on_processing_response
# 需要导入模块: from pomp.core.engine import Pomp [as 别名]
# 或者: from pomp.core.engine.Pomp import pump [as 别名]
def test_exception_on_processing_response():
collect_middleware = CollectRequestResponseMiddleware()
pomp = Pomp(downloader=DummyDownloader(middlewares=[RaiseOnResponseMiddleware(), collect_middleware]))
pomp.pump(Crawler())
assert_equal(len(collect_middleware.exceptions), 1)
assert_equal(len(collect_middleware.requests), 1)
assert_equal(len(collect_middleware.responses), 1)
示例11: crawler_worker
# 需要导入模块: from pomp.core.engine import Pomp [as 别名]
# 或者: from pomp.core.engine.Pomp import pump [as 别名]
def crawler_worker(crawler_class, source_queue, stop_event):
pid = os.getpid()
log.debug('Start crawler worker: %s', pid)
pomp = Pomp(
downloader=UrllibDownloader(timeout=3),
pipelines=[],
queue=WrappedQueue(source_queue, stop_event),
)
pomp.pump(crawler_class())
log.debug('Stop crawler worker: %s', pid)
return True
示例12: test_exception_on_processing_response_callback
# 需要导入模块: from pomp.core.engine import Pomp [as 别名]
# 或者: from pomp.core.engine.Pomp import pump [as 别名]
def test_exception_on_processing_response_callback():
class CrawlerWithException(Crawler):
def extract_items(self, *args, **kwargs):
raise Exception("some exception on extract items")
collect_middleware = CollectRequestResponseMiddleware()
pomp = Pomp(downloader=DummyDownloader(middlewares=[collect_middleware]))
pomp.pump(CrawlerWithException())
assert_equal(len(collect_middleware.exceptions), 1)
assert_equal(len(collect_middleware.requests), 1)
assert_equal(len(collect_middleware.responses), 1)
示例13: test_crawler_return_none
# 需要导入模块: from pomp.core.engine import Pomp [as 别名]
# 或者: from pomp.core.engine.Pomp import pump [as 别名]
def test_crawler_return_none(self):
class CrawlerWithoutItems(BaseCrawler):
ENTRY_REQUESTS = 'http://localhost/'
def extract_items(self, *args, **kwargs):
pass
def next_requests(self, *args, **kwargs):
pass
pomp = Pomp(
downloader=DummyDownloader(middlewares=[url_to_request_middl]),
)
pomp.pump(CrawlerWithoutItems())
示例14: do_simple_test
# 需要导入模块: from pomp.core.engine import Pomp [as 别名]
# 或者: from pomp.core.engine.Pomp import pump [as 别名]
def do_simple_test(self, queue=None):
req_resp_middleware = RequestResponseMiddleware(
prefix_url=self.httpd.location,
request_factory=TwistedHttpRequest,
)
collect_middleware = CollectRequestResponseMiddleware()
downloader = TwistedDownloader(
reactor, middlewares=[collect_middleware])
downloader.middlewares.insert(0, req_resp_middleware)
pomp = Pomp(
downloader=downloader,
pipelines=[PrintPipeline()],
queue=queue,
)
class Crawler(DummyCrawler):
ENTRY_REQUESTS = '/root'
done_defer = defer.Deferred()
d = pomp.pump(Crawler())
d.add_callback(done_defer.callback)
def check(x):
assert_set_equal(
set([r.url.replace(self.httpd.location, '')
for r in collect_middleware.requests]),
set(self.httpd.sitemap.keys())
)
done_defer.addCallback(check)
return done_defer
示例15: test_exceptions
# 需要导入模块: from pomp.core.engine import Pomp [as 别名]
# 或者: from pomp.core.engine.Pomp import pump [as 别名]
def test_exceptions(self):
req_resp_middleware = RequestResponseMiddleware(
prefix_url='invalid url',
request_factory=TwistedHttpRequest,
)
collect_middleware = CollectRequestResponseMiddleware()
downloader = TwistedDownloader(
reactor, middlewares=[collect_middleware])
downloader.middlewares.insert(0, req_resp_middleware)
pomp = Pomp(
downloader=downloader,
pipelines=[PrintPipeline()],
)
class Crawler(DummyCrawler):
ENTRY_REQUESTS = '/root'
done_defer = defer.Deferred()
d = pomp.pump(Crawler())
d.add_callback(done_defer.callback)
def check(x):
assert len(collect_middleware.exceptions) == 1
assert isinstance(
collect_middleware.exceptions[0], BaseDownloadException)
done_defer.addCallback(check)
return done_defer