本文整理汇总了Python中pyspider.libs.multiprocessing_queue.Queue类的典型用法代码示例。如果您正苦于以下问题:Python Queue类的具体用法?Python Queue怎么用?Python Queue使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Queue类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: setUpClass
def setUpClass(self):
import tests.data_test_webpage
import httpbin
self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run, port=14887, passthrough_errors=False)
self.httpbin = 'http://127.0.0.1:14887'
self.inqueue = Queue(10)
self.outqueue = Queue(10)
self.fetcher = Fetcher(self.inqueue, self.outqueue)
self.fetcher.phantomjs_proxy = '127.0.0.1:25555'
self.rpc = xmlrpc_client.ServerProxy('http://localhost:%d' % 24444)
self.xmlrpc_thread = utils.run_in_thread(self.fetcher.xmlrpc_run, port=24444)
self.thread = utils.run_in_thread(self.fetcher.run)
self.proxy_thread = subprocess.Popen(['pyproxy', '--username=binux',
'--password=123456', '--port=14830',
'--debug'], close_fds=True)
self.proxy = '127.0.0.1:14830'
try:
self.phantomjs = subprocess.Popen(['phantomjs',
os.path.join(os.path.dirname(__file__),
'../pyspider/fetcher/phantomjs_fetcher.js'),
'25555'])
except OSError:
self.phantomjs = None
time.sleep(0.5)
示例2: setUpClass
def setUpClass(self):
import tests.data_test_webpage
import httpbin
self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run, port=14887)
self.httpbin = "http://127.0.0.1:14887"
self.inqueue = Queue(10)
self.outqueue = Queue(10)
self.fetcher = Fetcher(self.inqueue, self.outqueue)
self.fetcher.phantomjs_proxy = "127.0.0.1:25555"
self.rpc = xmlrpc_client.ServerProxy("http://localhost:%d" % 24444)
self.xmlrpc_thread = utils.run_in_thread(self.fetcher.xmlrpc_run, port=24444)
self.thread = utils.run_in_thread(self.fetcher.run)
self.proxy_thread = subprocess.Popen(
["pyproxy", "--username=binux", "--password=123456", "--port=14830", "--debug"], close_fds=True
)
self.proxy = "127.0.0.1:14830"
try:
self.phantomjs = subprocess.Popen(
[
"phantomjs",
os.path.join(os.path.dirname(__file__), "../pyspider/fetcher/phantomjs_fetcher.js"),
"25555",
]
)
except OSError:
self.phantomjs = None
time.sleep(0.5)
示例3: setUpClass
def setUpClass(self):
shutil.rmtree("./data/tests", ignore_errors=True)
os.makedirs("./data/tests")
def get_taskdb():
return taskdb.TaskDB(self.taskdb_path)
self.taskdb = get_taskdb()
def get_projectdb():
return projectdb.ProjectDB(self.projectdb_path)
self.projectdb = get_projectdb()
def get_resultdb():
return resultdb.ResultDB(self.resultdb_path)
self.resultdb = get_resultdb()
self.newtask_queue = Queue(10)
self.status_queue = Queue(10)
self.scheduler2fetcher = Queue(10)
self.rpc = xmlrpc_client.ServerProxy("http://localhost:%d" % self.scheduler_xmlrpc_port)
def run_scheduler():
scheduler = Scheduler(
taskdb=get_taskdb(),
projectdb=get_projectdb(),
newtask_queue=self.newtask_queue,
status_queue=self.status_queue,
out_queue=self.scheduler2fetcher,
data_path="./data/tests/",
resultdb=get_resultdb(),
)
scheduler.UPDATE_PROJECT_INTERVAL = 0.1
scheduler.LOOP_INTERVAL = 0.1
scheduler.INQUEUE_LIMIT = 10
scheduler.DELETE_TIME = 0
scheduler.DEFAULT_RETRY_DELAY = {"": 5}
scheduler._last_tick = int(time.time()) # not dispatch cronjob
self.xmlrpc_thread = run_in_thread(scheduler.xmlrpc_run, port=self.scheduler_xmlrpc_port)
scheduler.run()
self.process = run_in_thread(run_scheduler)
time.sleep(1)
示例4: setUpClass
def setUpClass(self):
shutil.rmtree('./data/tests/', ignore_errors=True)
os.makedirs('./data/tests/')
def get_projectdb():
return projectdb.ProjectDB(self.projectdb_path)
self.projectdb = get_projectdb()
self.in_queue = Queue(10)
self.status_queue = Queue(10)
self.newtask_queue = Queue(10)
self.result_queue = Queue(10)
def run_processor():
self.processor = Processor(get_projectdb(), self.in_queue,
self.status_queue, self.newtask_queue, self.result_queue)
self.processor.project_manager.CHECK_PROJECTS_INTERVAL = 0.1
self.processor.run()
self.process = run_in_thread(run_processor)
time.sleep(1)
示例5: setUpClass
def setUpClass(self):
shutil.rmtree('./data/tests/', ignore_errors=True)
os.makedirs('./data/tests/')
def get_resultdb():
return resultdb.ResultDB(self.resultdb_path)
self.resultdb = get_resultdb()
self.inqueue = Queue(10)
def run_result_worker():
self.result_worker = ResultWorker(get_resultdb(), self.inqueue)
self.result_worker.run()
self.process = run_in_thread(run_result_worker)
time.sleep(1)
示例6: TestScheduler
class TestScheduler(unittest.TestCase):
taskdb_path = './data/tests/task.db'
projectdb_path = './data/tests/project.db'
resultdb_path = './data/tests/result.db'
check_project_time = 1
scheduler_xmlrpc_port = 23333
@classmethod
def setUpClass(self):
shutil.rmtree('./data/tests', ignore_errors=True)
os.makedirs('./data/tests')
def get_taskdb():
return taskdb.TaskDB(self.taskdb_path)
self.taskdb = get_taskdb()
def get_projectdb():
return projectdb.ProjectDB(self.projectdb_path)
self.projectdb = get_projectdb()
def get_resultdb():
return resultdb.ResultDB(self.resultdb_path)
self.resultdb = get_resultdb()
self.newtask_queue = Queue(10)
self.status_queue = Queue(10)
self.scheduler2fetcher = Queue(10)
self.rpc = xmlrpc_client.ServerProxy('http://localhost:%d' % self.scheduler_xmlrpc_port)
def run_scheduler():
scheduler = Scheduler(taskdb=get_taskdb(), projectdb=get_projectdb(),
newtask_queue=self.newtask_queue, status_queue=self.status_queue,
out_queue=self.scheduler2fetcher, data_path="./data/tests/",
resultdb=get_resultdb())
scheduler.UPDATE_PROJECT_INTERVAL = 0.1
scheduler.LOOP_INTERVAL = 0.1
scheduler.INQUEUE_LIMIT = 10
scheduler.DELETE_TIME = 0
scheduler.DEFAULT_RETRY_DELAY = {'': 5}
scheduler._last_tick = int(time.time()) # not dispatch cronjob
self.xmlrpc_thread = run_in_thread(scheduler.xmlrpc_run, port=self.scheduler_xmlrpc_port)
scheduler.run()
self.process = run_in_thread(run_scheduler)
time.sleep(1)
@classmethod
def tearDownClass(self):
if self.process.is_alive():
self.rpc._quit()
self.process.join(5)
self.xmlrpc_thread.join()
assert not self.process.is_alive()
shutil.rmtree('./data/tests', ignore_errors=True)
time.sleep(1)
assert not utils.check_port_open(5000)
assert not utils.check_port_open(self.scheduler_xmlrpc_port)
assert not utils.check_port_open(24444)
assert not utils.check_port_open(25555)
def test_10_new_task_ignore(self):
'''
task_queue = [ ]
'''
self.newtask_queue.put({
'taskid': 'taskid',
'project': 'test_project',
'url': 'url'
}) # unknown project: test_project
self.assertEqual(self.rpc.size(), 0)
self.assertEqual(len(self.rpc.get_active_tasks()), 0)
def test_20_new_project(self):
'''
task_queue = [ ]
'''
self.projectdb.insert('test_project', {
'name': 'test_project',
'group': 'group',
'status': 'TODO',
'script': 'import time\nprint(time.time())',
'comments': 'test project',
'rate': 1.0,
'burst': 10,
})
def test_30_update_project(self):
'''
task_queue = [ ]
'''
from six.moves import queue as Queue
with self.assertRaises(Queue.Empty):
task = self.scheduler2fetcher.get(timeout=1)
self.projectdb.update('test_project', status="DEBUG")
time.sleep(0.1)
self.rpc.update_project()
task = self.scheduler2fetcher.get(timeout=10)
self.assertIsNotNone(task)
#.........这里部分代码省略.........
示例7: TestFetcher
class TestFetcher(unittest.TestCase):
sample_task_http = {
'taskid': 'taskid',
'project': 'project',
'url': '',
'fetch': {
'method': 'GET',
'headers': {
'Cookie': 'a=b',
'a': 'b'
},
'cookies': {
'c': 'd',
},
'timeout': 60,
'save': 'abc',
},
'process': {
'callback': 'callback',
'save': [1, 2, 3],
},
}
@classmethod
def setUpClass(self):
import tests.data_test_webpage
import httpbin
self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run, port=14887, passthrough_errors=False)
self.httpbin = 'http://127.0.0.1:14887'
self.inqueue = Queue(10)
self.outqueue = Queue(10)
self.fetcher = Fetcher(self.inqueue, self.outqueue)
self.fetcher.phantomjs_proxy = '127.0.0.1:25555'
self.rpc = xmlrpc_client.ServerProxy('http://localhost:%d' % 24444)
self.xmlrpc_thread = utils.run_in_thread(self.fetcher.xmlrpc_run, port=24444)
self.thread = utils.run_in_thread(self.fetcher.run)
self.proxy_thread = subprocess.Popen(['pyproxy', '--username=binux',
'--password=123456', '--port=14830',
'--debug'], close_fds=True)
self.proxy = '127.0.0.1:14830'
try:
self.phantomjs = subprocess.Popen(['phantomjs',
os.path.join(os.path.dirname(__file__),
'../pyspider/fetcher/phantomjs_fetcher.js'),
'25555'])
except OSError:
self.phantomjs = None
time.sleep(0.5)
@classmethod
def tearDownClass(self):
self.proxy_thread.terminate()
self.proxy_thread.wait()
self.httpbin_thread.terminate()
self.httpbin_thread.join()
if self.phantomjs:
self.phantomjs.kill()
self.phantomjs.wait()
self.rpc._quit()
self.thread.join()
assert not utils.check_port_open(5000)
assert not utils.check_port_open(23333)
assert not utils.check_port_open(24444)
assert not utils.check_port_open(25555)
assert not utils.check_port_open(14887)
time.sleep(1)
def test_10_http_get(self):
request = copy.deepcopy(self.sample_task_http)
request['url'] = self.httpbin+'/get'
result = self.fetcher.sync_fetch(request)
response = rebuild_response(result)
self.assertEqual(response.status_code, 200, result)
self.assertEqual(response.orig_url, request['url'])
self.assertEqual(response.save, request['fetch']['save'])
self.assertIsNotNone(response.json, response.content)
self.assertEqual(response.json['headers'].get('A'), 'b', response.json)
self.assertIn('c=d', response.json['headers'].get('Cookie'), response.json)
self.assertIn('a=b', response.json['headers'].get('Cookie'), response.json)
def test_15_http_post(self):
request = copy.deepcopy(self.sample_task_http)
request['url'] = self.httpbin+'/post'
request['fetch']['method'] = 'POST'
request['fetch']['data'] = 'binux'
request['fetch']['cookies'] = {'c': 'd'}
result = self.fetcher.sync_fetch(request)
response = rebuild_response(result)
self.assertEqual(response.status_code, 200)
self.assertEqual(response.orig_url, request['url'])
self.assertEqual(response.save, request['fetch']['save'])
self.assertIsNotNone(response.json, response.content)
#.........这里部分代码省略.........
示例8: TestProcessor
class TestProcessor(unittest.TestCase):
resultdb_path = './data/tests/result.db'
@classmethod
def setUpClass(self):
shutil.rmtree('./data/tests/', ignore_errors=True)
os.makedirs('./data/tests/')
def get_resultdb():
return resultdb.ResultDB(self.resultdb_path)
self.resultdb = get_resultdb()
self.inqueue = Queue(10)
def run_result_worker():
self.result_worker = ResultWorker(get_resultdb(), self.inqueue)
self.result_worker.run()
self.process = run_in_thread(run_result_worker)
time.sleep(1)
@classmethod
def tearDownClass(self):
if self.process.is_alive():
self.result_worker.quit()
self.process.join(2)
assert not self.process.is_alive()
shutil.rmtree('./data/tests/', ignore_errors=True)
def test_10_bad_result(self):
self.inqueue.put(({'project': 'test_project'}, {}))
self.resultdb._list_project()
self.assertEqual(len(self.resultdb.projects), 0)
self.assertEqual(self.resultdb.count('test_project'), 0)
def test_10_bad_result_2(self):
self.inqueue.put(({'project': 'test_project'}, {'a': 'b'}))
self.resultdb._list_project()
self.assertEqual(len(self.resultdb.projects), 0)
self.assertEqual(self.resultdb.count('test_project'), 0)
def test_20_insert_result(self):
data = {
'a': 'b'
}
self.inqueue.put(({
'project': 'test_project',
'taskid': 'id1',
'url': 'url1'
}, data))
time.sleep(0.5)
self.resultdb._list_project()
self.assertEqual(len(self.resultdb.projects), 1)
self.assertEqual(self.resultdb.count('test_project'), 1)
result = self.resultdb.get('test_project', 'id1')
self.assertEqual(result['result'], data)
def test_30_overwrite(self):
self.inqueue.put(({
'project': 'test_project',
'taskid': 'id1',
'url': 'url1'
}, "abc"))
time.sleep(0.1)
result = self.resultdb.get('test_project', 'id1')
self.assertEqual(result['result'], "abc")
def test_40_insert_list(self):
self.inqueue.put(({
'project': 'test_project',
'taskid': 'id2',
'url': 'url1'
}, ['a', 'b']))
time.sleep(0.1)
result = self.resultdb.get('test_project', 'id2')
self.assertEqual(result['result'], ['a', 'b'])
示例9: TestProcessor
class TestProcessor(unittest.TestCase):
projectdb_path = './data/tests/project.db'
@classmethod
def setUpClass(self):
shutil.rmtree('./data/tests/', ignore_errors=True)
os.makedirs('./data/tests/')
def get_projectdb():
return projectdb.ProjectDB(self.projectdb_path)
self.projectdb = get_projectdb()
self.in_queue = Queue(10)
self.status_queue = Queue(10)
self.newtask_queue = Queue(10)
self.result_queue = Queue(10)
def run_processor():
self.processor = Processor(get_projectdb(), self.in_queue,
self.status_queue, self.newtask_queue, self.result_queue)
self.processor.project_manager.CHECK_PROJECTS_INTERVAL = 0.1
self.processor.run()
self.process = run_in_thread(run_processor)
time.sleep(1)
@classmethod
def tearDownClass(self):
if self.process.is_alive():
self.processor.quit()
self.process.join(2)
assert not self.process.is_alive()
shutil.rmtree('./data/tests/', ignore_errors=True)
def test_10_update_project(self):
self.assertIsNone(self.processor.project_manager.get('test_project'))
self.projectdb.insert('test_project', {
'name': 'test_project',
'group': 'group',
'status': 'TODO',
'script': inspect.getsource(sample_handler),
'comments': 'test project',
'rate': 1.0,
'burst': 10,
})
self.assertIsNone(self.processor.project_manager.get('not_exists'))
self.assertIsNotNone(self.processor.project_manager.get('test_project'))
task = {
"process": {
"callback": "on_start"
},
"project": "not_exists",
"taskid": "data:,on_start",
"url": "data:,on_start"
}
self.in_queue.put((task, {}))
time.sleep(1)
self.assertFalse(self.status_queue.empty())
while not self.status_queue.empty():
status = self.status_queue.get()
self.assertEqual(status['track']['process']['ok'], False)
self.assertIsNone(self.processor.project_manager.get('not_exists'))
def test_20_broken_project(self):
self.assertIsNone(self.processor.project_manager.get('test_broken_project'))
self.projectdb.insert('test_broken_project', {
'name': 'test_broken_project',
'group': 'group',
'status': 'DEBUG',
'script': inspect.getsource(sample_handler)[:10],
'comments': 'test project',
'rate': 1.0,
'burst': 10,
})
self.assertIsNone(self.processor.project_manager.get('not_exists'))
self.assertIsNotNone(self.processor.project_manager.get('test_broken_project'))
project_data = self.processor.project_manager.get('test_broken_project')
self.assertIsNotNone(project_data.get('exception'))
def test_30_new_task(self):
self.assertTrue(self.status_queue.empty())
self.assertTrue(self.newtask_queue.empty())
task = {
"process": {
"callback": "on_start"
},
"project": "test_project",
"taskid": "data:,on_start",
"url": "data:,on_start"
}
fetch_result = {
"orig_url": "data:,on_start",
"content": "on_start",
"headers": {},
"status_code": 200,
"url": "data:,on_start",
"time": 0,
}
self.in_queue.put((task, fetch_result))
time.sleep(1)
self.assertFalse(self.status_queue.empty())
#.........这里部分代码省略.........
示例10: TestScheduler
class TestScheduler(unittest.TestCase):
taskdb_path = "./data/tests/task.db"
projectdb_path = "./data/tests/project.db"
resultdb_path = "./data/tests/result.db"
check_project_time = 1
scheduler_xmlrpc_port = 23333
@classmethod
def setUpClass(self):
shutil.rmtree("./data/tests", ignore_errors=True)
os.makedirs("./data/tests")
def get_taskdb():
return taskdb.TaskDB(self.taskdb_path)
self.taskdb = get_taskdb()
def get_projectdb():
return projectdb.ProjectDB(self.projectdb_path)
self.projectdb = get_projectdb()
def get_resultdb():
return resultdb.ResultDB(self.resultdb_path)
self.resultdb = get_resultdb()
self.newtask_queue = Queue(10)
self.status_queue = Queue(10)
self.scheduler2fetcher = Queue(10)
self.rpc = xmlrpc_client.ServerProxy("http://localhost:%d" % self.scheduler_xmlrpc_port)
def run_scheduler():
scheduler = Scheduler(
taskdb=get_taskdb(),
projectdb=get_projectdb(),
newtask_queue=self.newtask_queue,
status_queue=self.status_queue,
out_queue=self.scheduler2fetcher,
data_path="./data/tests/",
resultdb=get_resultdb(),
)
scheduler.UPDATE_PROJECT_INTERVAL = 0.1
scheduler.LOOP_INTERVAL = 0.1
scheduler.INQUEUE_LIMIT = 10
scheduler.DELETE_TIME = 0
scheduler.DEFAULT_RETRY_DELAY = {"": 5}
scheduler._last_tick = int(time.time()) # not dispatch cronjob
self.xmlrpc_thread = run_in_thread(scheduler.xmlrpc_run, port=self.scheduler_xmlrpc_port)
scheduler.run()
self.process = run_in_thread(run_scheduler)
time.sleep(1)
@classmethod
def tearDownClass(self):
if self.process.is_alive():
self.rpc._quit()
self.process.join(5)
self.xmlrpc_thread.join()
assert not self.process.is_alive()
shutil.rmtree("./data/tests", ignore_errors=True)
time.sleep(1)
assert not utils.check_port_open(5000)
assert not utils.check_port_open(self.scheduler_xmlrpc_port)
assert not utils.check_port_open(24444)
assert not utils.check_port_open(25555)
def test_10_new_task_ignore(self):
"""
task_queue = [ ]
"""
self.newtask_queue.put(
{"taskid": "taskid", "project": "test_project", "url": "url"}
) # unknown project: test_project
self.assertEqual(self.rpc.size(), 0)
self.assertEqual(len(self.rpc.get_active_tasks()), 0)
def test_20_new_project(self):
"""
task_queue = [ ]
"""
self.projectdb.insert(
"test_project",
{
"name": "test_project",
"group": "group",
"status": "TODO",
"script": "import time\nprint(time.time())",
"comments": "test project",
"rate": 1.0,
"burst": 10,
},
)
def test_30_update_project(self):
"""
task_queue = [ ]
"""
#.........这里部分代码省略.........
示例11: TestFetcher
class TestFetcher(unittest.TestCase):
sample_task_http = {
"taskid": "taskid",
"project": "project",
"url": "",
"fetch": {
"method": "GET",
"headers": {"Cookie": "a=b", "a": "b"},
"cookies": {"c": "d"},
"timeout": 60,
"save": "abc",
},
"process": {"callback": "callback", "save": [1, 2, 3]},
}
@classmethod
def setUpClass(self):
import tests.data_test_webpage
import httpbin
self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run, port=14887)
self.httpbin = "http://127.0.0.1:14887"
self.inqueue = Queue(10)
self.outqueue = Queue(10)
self.fetcher = Fetcher(self.inqueue, self.outqueue)
self.fetcher.phantomjs_proxy = "127.0.0.1:25555"
self.rpc = xmlrpc_client.ServerProxy("http://localhost:%d" % 24444)
self.xmlrpc_thread = utils.run_in_thread(self.fetcher.xmlrpc_run, port=24444)
self.thread = utils.run_in_thread(self.fetcher.run)
self.proxy_thread = subprocess.Popen(
["pyproxy", "--username=binux", "--password=123456", "--port=14830", "--debug"], close_fds=True
)
self.proxy = "127.0.0.1:14830"
try:
self.phantomjs = subprocess.Popen(
[
"phantomjs",
os.path.join(os.path.dirname(__file__), "../pyspider/fetcher/phantomjs_fetcher.js"),
"25555",
]
)
except OSError:
self.phantomjs = None
time.sleep(0.5)
@classmethod
def tearDownClass(self):
self.proxy_thread.terminate()
self.proxy_thread.wait()
self.httpbin_thread.terminate()
self.httpbin_thread.join()
if self.phantomjs:
self.phantomjs.kill()
self.phantomjs.wait()
self.rpc._quit()
self.thread.join()
assert not utils.check_port_open(5000)
assert not utils.check_port_open(23333)
assert not utils.check_port_open(24444)
assert not utils.check_port_open(25555)
assert not utils.check_port_open(14887)
time.sleep(1)
def test_10_http_get(self):
request = copy.deepcopy(self.sample_task_http)
request["url"] = self.httpbin + "/get"
result = self.fetcher.sync_fetch(request)
response = rebuild_response(result)
self.assertEqual(response.status_code, 200, result)
self.assertEqual(response.orig_url, request["url"])
self.assertEqual(response.save, request["fetch"]["save"])
self.assertIsNotNone(response.json, response.content)
self.assertEqual(response.json["headers"].get("A"), "b", response.json)
self.assertIn("c=d", response.json["headers"].get("Cookie"), response.json)
self.assertIn("a=b", response.json["headers"].get("Cookie"), response.json)
def test_15_http_post(self):
request = copy.deepcopy(self.sample_task_http)
request["url"] = self.httpbin + "/post"
request["fetch"]["method"] = "POST"
request["fetch"]["data"] = "binux"
request["fetch"]["cookies"] = {"c": "d"}
result = self.fetcher.sync_fetch(request)
response = rebuild_response(result)
self.assertEqual(response.status_code, 200)
self.assertEqual(response.orig_url, request["url"])
self.assertEqual(response.save, request["fetch"]["save"])
self.assertIsNotNone(response.json, response.content)
self.assertEqual(response.json["form"].get("binux"), "")
self.assertEqual(response.json["headers"].get("A"), "b", response.json)
self.assertIn("c=d", response.json["headers"].get("Cookie"), response.json)
self.assertIn("a=b", response.json["headers"].get("Cookie"), response.json)
#.........这里部分代码省略.........