本文整理汇总了Python中hubstorage.HubstorageClient.get_job方法的典型用法代码示例。如果您正苦于以下问题:Python HubstorageClient.get_job方法的具体用法?Python HubstorageClient.get_job怎么用?Python HubstorageClient.get_job使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类hubstorage.HubstorageClient
的用法示例。
在下文中一共展示了HubstorageClient.get_job方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_job
# 需要导入模块: from hubstorage import HubstorageClient [as 别名]
# 或者: from hubstorage.HubstorageClient import get_job [as 别名]
def get_job(job):
jobid, apikey = get_job_specs(job)
hsc = HubstorageClient(auth=apikey)
job = hsc.get_job(jobid)
if not job.metadata:
raise NotFoundException("Job {} does not exist".format(jobid))
return job
示例2: test_retrier_catches_badstatusline_and_429
# 需要导入模块: from hubstorage import HubstorageClient [as 别名]
# 或者: from hubstorage.HubstorageClient import get_job [as 别名]
def test_retrier_catches_badstatusline_and_429(self):
# Prepare
client = HubstorageClient(auth=self.auth, endpoint=self.endpoint, max_retries=3)
job_metadata = {'project': self.projectid, 'spider': self.spidername, 'state': 'pending'}
attempts_count = [0] # use a list for nonlocal mutability used in request_callback
def request_callback(request):
attempts_count[0] += 1
if attempts_count[0] <= 2:
raise ConnectionError("Connection aborted.", BadStatusLine("''"))
if attempts_count[0] == 3:
return (429, {}, {})
else:
resp_body = dict(job_metadata)
return (200, {}, json.dumps(resp_body))
self.mock_api(callback=request_callback)
# Act
job = client.get_job('%s/%s/%s' % (self.projectid, self.spiderid, 42))
# Assert
self.assertEqual(dict(job_metadata), dict(job.metadata))
self.assertEqual(attempts_count[0], 4)
示例3: test_allows_msgpack
# 需要导入模块: from hubstorage import HubstorageClient [as 别名]
# 或者: from hubstorage.HubstorageClient import get_job [as 别名]
def test_allows_msgpack(monkeypatch, msgpack_available, path, expected_result):
monkeypatch.setattr("hubstorage.resourcetype.MSGPACK_AVAILABLE", msgpack_available)
hsclient = HubstorageClient()
job = hsclient.get_job("2222000/1/1")
for resource in [job.items, job.logs, job.samples]:
assert resource._allows_mpack(path) is (msgpack_available and expected_result)
assert job.requests._allows_mpack(path) is False
assert job.metadata._allows_mpack(path) is False
assert job.jobq._allows_mpack(path) is False
示例4: test_auth
# 需要导入模块: from hubstorage import HubstorageClient [as 别名]
# 或者: from hubstorage.HubstorageClient import get_job [as 别名]
def test_auth(self):
# client without global auth set
hsc = HubstorageClient(endpoint=self.hsclient.endpoint)
self.assertEqual(hsc.auth, None)
# check no-auth access
try:
hsc.push_job(self.projectid, self.spidername)
except HTTPError as exc:
self.assertTrue(exc.response.status_code, 401)
else:
self.assertTrue(False, '401 not raised')
try:
hsc.get_project(self.projectid).push_job(self.spidername)
except HTTPError as exc:
self.assertTrue(exc.response.status_code, 401)
else:
self.assertTrue(False, '401 not raised')
try:
hsc.get_job((self.projectid, 1, 1)).items.list()
except HTTPError as exc:
self.assertTrue(exc.response.status_code, 401)
else:
self.assertTrue(False, '401 not raised')
try:
hsc.get_project(self.projectid).get_job((self.projectid, 1, 1)).items.list()
except HTTPError as exc:
self.assertTrue(exc.response.status_code, 401)
else:
self.assertTrue(False, '401 not raised')
# create project with auth
auth = self.hsclient.auth
project = hsc.get_project(self.projectid, auth)
self.assertEqual(project.auth, auth)
job = project.push_job(self.spidername)
samejob = project.get_job(job.key)
self.assertEqual(samejob.key, job.key)
示例5: fetch_and_save_items
# 需要导入模块: from hubstorage import HubstorageClient [as 别名]
# 或者: from hubstorage.HubstorageClient import get_job [as 别名]
def fetch_and_save_items():
hc = HubstorageClient(auth=API_KEY)
project = hc.get_project(SH_PROJECT)
for spider in SPIDERS:
print("\nworking on spider {}".format(spider['spider_name']))
spider_id = project.ids.spider(spider['spider_name'])
summary = project.spiders.lastjobsummary(spiderid=spider_id)
for element in summary:
print(element['key'])
job = hc.get_job(element['key'])
items = job.items.iter_values()
save_items(items, spider['institution_name'])
示例6: test_get_job_does_retry
# 需要导入模块: from hubstorage import HubstorageClient [as 别名]
# 或者: from hubstorage.HubstorageClient import get_job [as 别名]
def test_get_job_does_retry(self):
# Prepare
client = HubstorageClient(auth=self.auth, endpoint=self.endpoint, max_retries=3)
job_metadata = {'project': self.projectid, 'spider': self.spidername, 'state': 'pending'}
callback, attempts_count = self.make_request_callback(2, job_metadata)
self.mock_api(callback=callback)
# Act
job = client.get_job('%s/%s/%s' % (self.projectid, self.spiderid, 42))
# Assert
self.assertEqual(dict(job_metadata), dict(job.metadata))
self.assertEqual(attempts_count[0], 3)
示例7: test_metadata_save_does_retry
# 需要导入模块: from hubstorage import HubstorageClient [as 别名]
# 或者: from hubstorage.HubstorageClient import get_job [as 别名]
def test_metadata_save_does_retry(self):
# Prepare
client = HubstorageClient(auth=self.auth, endpoint=self.endpoint, max_retries=3)
job_metadata = {'project': self.projectid, 'spider': self.spidername, 'state': 'pending'}
callback_get, attempts_count_get = self.make_request_callback(0, job_metadata)
callback_post, attempts_count_post = self.make_request_callback(2, job_metadata)
self.mock_api(method=GET, callback=callback_get)
self.mock_api(method=POST, callback=callback_post)
# Act
job = client.get_job('%s/%s/%s' % (self.projectid, self.spiderid, 42))
job.metadata['foo'] = 'bar'
job.metadata.save()
# Assert
self.assertEqual(attempts_count_post[0], 3)
示例8: test_api_delete_can_be_set_to_non_idempotent
# 需要导入模块: from hubstorage import HubstorageClient [as 别名]
# 或者: from hubstorage.HubstorageClient import get_job [as 别名]
def test_api_delete_can_be_set_to_non_idempotent(self):
# Prepare
client = HubstorageClient(auth=self.auth, endpoint=self.endpoint, max_retries=3)
job_metadata = {'project': self.projectid, 'spider': self.spidername, 'state': 'pending'}
callback_delete, attempts_count_delete = self.make_request_callback(2, job_metadata)
self.mock_api(method=DELETE, callback=callback_delete)
# Act
job = client.get_job('%s/%s/%s' % (self.projectid, self.spiderid, 42))
err = None
try:
job.metadata.apidelete('/my/non/idempotent/delete/', is_idempotent=False)
except HTTPError as e:
err = e
# Assert
self.assertEqual(attempts_count_delete[0], 1)
self.assertIsNotNone(err)
示例9: test_retrier_does_not_catch_unwanted_exception
# 需要导入模块: from hubstorage import HubstorageClient [as 别名]
# 或者: from hubstorage.HubstorageClient import get_job [as 别名]
def test_retrier_does_not_catch_unwanted_exception(self):
# Prepare
client = HubstorageClient(auth=self.auth, endpoint=self.endpoint, max_retries=2)
job_metadata = {'project': self.projectid, 'spider': self.spidername, 'state': 'pending'}
callback, attempts_count = self.make_request_callback(3, job_metadata, http_error_status=403)
self.mock_api(callback=callback)
# Act
job, metadata, err = None, None, None
try:
job = client.get_job('%s/%s/%s' % (self.projectid, self.spiderid, 42))
metadata = dict(job.metadata)
except HTTPError as e:
err = e
# Assert
self.assertIsNone(metadata)
self.assertIsNotNone(err)
self.assertEqual(err.response.status_code, 403)
self.assertEqual(attempts_count[0], 1)
示例10: test_get_job_does_fails_on_too_many_retries
# 需要导入模块: from hubstorage import HubstorageClient [as 别名]
# 或者: from hubstorage.HubstorageClient import get_job [as 别名]
def test_get_job_does_fails_on_too_many_retries(self):
# Prepare
client = HubstorageClient(auth=self.auth, endpoint=self.endpoint, max_retries=2, max_retry_time=1)
job_metadata = {'project': self.projectid, 'spider': self.spidername, 'state': 'pending'}
callback, attempts_count = self.make_request_callback(3, job_metadata)
self.mock_api(callback=callback)
# Act
job, metadata, err = None, None, None
try:
job = client.get_job('%s/%s/%s' % (self.projectid, self.spiderid, 42))
metadata = dict(job.metadata)
except HTTPError as e:
err = e
# Assert
self.assertIsNone(metadata)
self.assertIsNotNone(err)
self.assertEqual(err.response.status_code, 504)
self.assertEqual(attempts_count[0], 3)
示例11: main
# 需要导入模块: from hubstorage import HubstorageClient [as 别名]
# 或者: from hubstorage.HubstorageClient import get_job [as 别名]
def main(argv):
apikey = ''
spider = ''
try:
opts, args = getopt.getopt(argv, "hi:o", ["apikey=","spider="])
except getopt.GetoptError:
print 'alljobs.py -k <API Key> -s <ProjectID\Spider>'
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print 'alljobs.py -k <API Key> -s <ProjectID\Spider>'
sys.exit()
elif opt in("-k", "--apikey"):
apikey = arg
elif opt in("-s", "--spider"):
spider = arg
hc = HubstorageClient(auth=apikey)
itemslist = hc.get_job(spider).items.list()
itemslistIterator = itemslist.__iter__()
for items in itemslistIterator:
print json.dumps(items)
示例12: HubstorageClient
# 需要导入模块: from hubstorage import HubstorageClient [as 别名]
# 或者: from hubstorage.HubstorageClient import get_job [as 别名]
#coding=UTF-8
from hubstorage import HubstorageClient
hc = HubstorageClient(auth='bc2aa25cc40f4ed4b03988e8e0b9e89e')
project = hc.get_project('53883')
itemslist = hc.get_job('53883/1/5').items.list()
itemslist_size = itemslist.__sizeof__()
for element in itemslist:
element.__delitem__('_type')
element.__delitem__('_cached_page_id')
element.__delitem__('_template')
elementIterator = element.iteritems()
for fields in elementIterator:
fieldIterator = fields.__iter__()
for values in fieldIterator:
if isinstance(values, basestring):
print values
else:
print values.pop()
示例13: __init__
# 需要导入模块: from hubstorage import HubstorageClient [as 别名]
# 或者: from hubstorage.HubstorageClient import get_job [as 别名]
def __init__(self, project: str, spider: str):
hc = HubstorageClient(auth=shub_cfg.get('apikey'))
key = next(hc.get_project(project).jobq.list(spider=spider)).get('key')
self.job = hc.get_job(key)
示例14: SystemTest
# 需要导入模块: from hubstorage import HubstorageClient [as 别名]
# 或者: from hubstorage.HubstorageClient import get_job [as 别名]
class SystemTest(HSTestCase):
MAGICN = 1211
def setUp(self):
super(HSTestCase, self).setUp()
endpoint = self.hsclient.endpoint
# Panel - no client auth, only project auth using user auth token
self.panelclient = HubstorageClient(endpoint=endpoint)
self.panelproject = self.panelclient.get_project(self.projectid, auth=self.auth)
# Runner - client uses global auth to poll jobq
self.runnerclient = HubstorageClient(endpoint=endpoint, auth=self.auth)
# Scraper - uses job level auth, no global or project auth available
self.scraperclient = HubstorageClient(endpoint=endpoint)
def test_succeed_with_close_reason(self):
p = self.panelproject
pushed = p.jobq.push(self.spidername)
# check pending state
job = p.get_jobs(self.spiderid).next()
self.assertEqual(job.metadata.get('state'), 'pending')
# consume msg from runner
self._run_runner(pushed, close_reason='all-good')
# query again from panel
job = p.get_jobs(self.spiderid).next()
self.assertEqual(job.metadata.get('state'), 'finished')
self.assertEqual(job.metadata.get('close_reason'), 'all-good')
self.assertEqual(job.items.stats()['totals']['input_values'], self.MAGICN)
self.assertEqual(job.logs.stats()['totals']['input_values'], self.MAGICN * 4)
self.assertEqual(job.requests.stats()['totals']['input_values'], self.MAGICN)
def test_succeed_without_close_reason(self):
p = self.panelproject
pushed = p.jobq.push(self.spidername)
# check pending state
job = p.get_jobs(self.spiderid).next()
self.assertEqual(job.metadata.get('state'), 'pending')
# consume msg from runner
self._run_runner(pushed, close_reason=None)
# query again from panel
job = p.get_jobs(self.spiderid).next()
self.assertEqual(job.metadata.get('state'), 'finished')
self.assertEqual(job.metadata.get('close_reason'), 'no_reason')
self.assertEqual(job.items.stats()['totals']['input_values'], self.MAGICN)
self.assertEqual(job.logs.stats()['totals']['input_values'], self.MAGICN * 4)
self.assertEqual(job.requests.stats()['totals']['input_values'], self.MAGICN)
def test_scraper_failure(self):
p = self.panelproject
pushed = p.jobq.push(self.spidername)
# check pending state
job = p.get_jobs(self.spiderid).next()
self.assertEqual(job.metadata.get('state'), 'pending')
# consume msg from runner
self._run_runner(pushed, close_reason=IOError('no more resources, ha!'))
# query again from panel
job = p.get_jobs(self.spiderid).next()
self.assertEqual(job.metadata.get('state'), 'finished')
self.assertEqual(job.metadata.get('close_reason'), 'failed')
# MAGICN per log level messages plus one of last failure
stats = job.logs.stats()
self.assertTrue(stats)
self.assertEqual(stats['totals']['input_values'], self.MAGICN * 4 + 1)
def _run_runner(self, pushed, close_reason):
job = self.runnerclient.start_job(self.projectid)
self.assertFalse(job.metadata.get('stop_requested'))
job.metadata.update(host='localhost', slot=1)
self.assertEqual(job.metadata.get('state'), 'running')
# run scraper
try:
self._run_scraper(job.key, job.jobauth, close_reason=close_reason)
except Exception as exc:
job.failed(message=str(exc))
# logging from runner must append and never remove messages logged
# by scraper
self.assertTrue(job.logs.batch_append)
else:
job.finished()
self.runnerclient.close()
def _run_scraper(self, jobkey, jobauth, close_reason=None):
httpmethods = 'GET PUT POST DELETE HEAD OPTIONS TRACE CONNECT'.split()
job = self.scraperclient.get_job(jobkey, auth=jobauth)
for idx in xrange(self.MAGICN):
iid = job.items.write({'uuid': idx})
job.logs.debug('log debug %s' % idx, idx=idx)
job.logs.info('log info %s' % idx, idx=idx)
job.logs.warn('log warn %s' % idx, idx=idx)
job.logs.error('log error %s' % idx, idx=idx)
sid = job.samples.write([idx, idx, idx])
rid = job.requests.add(
url='http://test.com/%d' % idx,
status=random.randint(100, 1000),
method=random.choice(httpmethods),
rs=random.randint(0, 100000),
duration=random.randint(0, 1000),
parent=random.randrange(0, idx + 1) if idx > 10 else None,
ts=millitime() + random.randint(100, 100000),
#.........这里部分代码省略.........