当前位置: 首页>>代码示例>>Python>>正文


Python HubstorageClient.get_job方法代码示例

本文整理汇总了Python中hubstorage.HubstorageClient.get_job方法的典型用法代码示例。如果您正苦于以下问题:Python HubstorageClient.get_job方法的具体用法?Python HubstorageClient.get_job怎么用?Python HubstorageClient.get_job使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在hubstorage.HubstorageClient的用法示例。


在下文中一共展示了HubstorageClient.get_job方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_job

# 需要导入模块: from hubstorage import HubstorageClient [as 别名]
# 或者: from hubstorage.HubstorageClient import get_job [as 别名]
def get_job(job):
    jobid, apikey = get_job_specs(job)
    hsc = HubstorageClient(auth=apikey)
    job = hsc.get_job(jobid)
    if not job.metadata:
        raise NotFoundException("Job {} does not exist".format(jobid))
    return job
开发者ID:jdemaeyer,项目名称:shub,代码行数:9,代码来源:utils.py

示例2: test_retrier_catches_badstatusline_and_429

# 需要导入模块: from hubstorage import HubstorageClient [as 别名]
# 或者: from hubstorage.HubstorageClient import get_job [as 别名]
    def test_retrier_catches_badstatusline_and_429(self):
        # Prepare
        client = HubstorageClient(auth=self.auth, endpoint=self.endpoint, max_retries=3)
        job_metadata = {'project': self.projectid, 'spider': self.spidername, 'state': 'pending'}

        attempts_count = [0]  # use a list for nonlocal mutability used in request_callback

        def request_callback(request):
            attempts_count[0] += 1

            if attempts_count[0] <= 2:
                raise ConnectionError("Connection aborted.", BadStatusLine("''"))
            if attempts_count[0] == 3:
                return (429, {}, {})
            else:
                resp_body = dict(job_metadata)
                return (200, {}, json.dumps(resp_body))

        self.mock_api(callback=request_callback)

        # Act
        job = client.get_job('%s/%s/%s' % (self.projectid, self.spiderid, 42))

        # Assert
        self.assertEqual(dict(job_metadata), dict(job.metadata))
        self.assertEqual(attempts_count[0], 4)
开发者ID:stummjr,项目名称:python-hubstorage,代码行数:28,代码来源:test_retry.py

示例3: test_allows_msgpack

# 需要导入模块: from hubstorage import HubstorageClient [as 别名]
# 或者: from hubstorage.HubstorageClient import get_job [as 别名]
def test_allows_msgpack(monkeypatch, msgpack_available, path, expected_result):
    monkeypatch.setattr("hubstorage.resourcetype.MSGPACK_AVAILABLE", msgpack_available)
    hsclient = HubstorageClient()
    job = hsclient.get_job("2222000/1/1")
    for resource in [job.items, job.logs, job.samples]:
        assert resource._allows_mpack(path) is (msgpack_available and expected_result)
    assert job.requests._allows_mpack(path) is False
    assert job.metadata._allows_mpack(path) is False
    assert job.jobq._allows_mpack(path) is False
开发者ID:scrapinghub,项目名称:python-hubstorage,代码行数:11,代码来源:test_project.py

示例4: test_auth

# 需要导入模块: from hubstorage import HubstorageClient [as 别名]
# 或者: from hubstorage.HubstorageClient import get_job [as 别名]
    def test_auth(self):
        # client without global auth set
        hsc = HubstorageClient(endpoint=self.hsclient.endpoint)
        self.assertEqual(hsc.auth, None)

        # check no-auth access
        try:
            hsc.push_job(self.projectid, self.spidername)
        except HTTPError as exc:
            self.assertTrue(exc.response.status_code, 401)
        else:
            self.assertTrue(False, '401 not raised')

        try:
            hsc.get_project(self.projectid).push_job(self.spidername)
        except HTTPError as exc:
            self.assertTrue(exc.response.status_code, 401)
        else:
            self.assertTrue(False, '401 not raised')

        try:
            hsc.get_job((self.projectid, 1, 1)).items.list()
        except HTTPError as exc:
            self.assertTrue(exc.response.status_code, 401)
        else:
            self.assertTrue(False, '401 not raised')

        try:
            hsc.get_project(self.projectid).get_job((self.projectid, 1, 1)).items.list()
        except HTTPError as exc:
            self.assertTrue(exc.response.status_code, 401)
        else:
            self.assertTrue(False, '401 not raised')

        # create project with auth
        auth = self.hsclient.auth
        project = hsc.get_project(self.projectid, auth)
        self.assertEqual(project.auth, auth)
        job = project.push_job(self.spidername)
        samejob = project.get_job(job.key)
        self.assertEqual(samejob.key, job.key)
开发者ID:tpeng,项目名称:python-hubstorage,代码行数:43,代码来源:test_project.py

示例5: fetch_and_save_items

# 需要导入模块: from hubstorage import HubstorageClient [as 别名]
# 或者: from hubstorage.HubstorageClient import get_job [as 别名]
def fetch_and_save_items():
    hc = HubstorageClient(auth=API_KEY)
    project = hc.get_project(SH_PROJECT)
    for spider in SPIDERS:
        print("\nworking on spider {}".format(spider['spider_name']))
        spider_id = project.ids.spider(spider['spider_name'])
        summary = project.spiders.lastjobsummary(spiderid=spider_id)
        for element in summary:
            print(element['key'])
            job = hc.get_job(element['key'])
            items = job.items.iter_values()
            save_items(items, spider['institution_name'])
开发者ID:manolo-rocks,项目名称:manolo_scraper,代码行数:14,代码来源:save_items.py

示例6: test_get_job_does_retry

# 需要导入模块: from hubstorage import HubstorageClient [as 别名]
# 或者: from hubstorage.HubstorageClient import get_job [as 别名]
    def test_get_job_does_retry(self):
        # Prepare
        client = HubstorageClient(auth=self.auth, endpoint=self.endpoint, max_retries=3)
        job_metadata = {'project': self.projectid, 'spider': self.spidername, 'state': 'pending'}
        callback, attempts_count = self.make_request_callback(2, job_metadata)

        self.mock_api(callback=callback)

        # Act
        job = client.get_job('%s/%s/%s' % (self.projectid, self.spiderid, 42))

        # Assert
        self.assertEqual(dict(job_metadata), dict(job.metadata))
        self.assertEqual(attempts_count[0], 3)
开发者ID:stummjr,项目名称:python-hubstorage,代码行数:16,代码来源:test_retry.py

示例7: test_metadata_save_does_retry

# 需要导入模块: from hubstorage import HubstorageClient [as 别名]
# 或者: from hubstorage.HubstorageClient import get_job [as 别名]
    def test_metadata_save_does_retry(self):
        # Prepare
        client = HubstorageClient(auth=self.auth, endpoint=self.endpoint, max_retries=3)
        job_metadata = {'project': self.projectid, 'spider': self.spidername, 'state': 'pending'}
        callback_get, attempts_count_get = self.make_request_callback(0, job_metadata)
        callback_post, attempts_count_post = self.make_request_callback(2, job_metadata)

        self.mock_api(method=GET, callback=callback_get)
        self.mock_api(method=POST, callback=callback_post)

        # Act
        job = client.get_job('%s/%s/%s' % (self.projectid, self.spiderid, 42))
        job.metadata['foo'] = 'bar'
        job.metadata.save()

        # Assert
        self.assertEqual(attempts_count_post[0], 3)
开发者ID:stummjr,项目名称:python-hubstorage,代码行数:19,代码来源:test_retry.py

示例8: test_api_delete_can_be_set_to_non_idempotent

# 需要导入模块: from hubstorage import HubstorageClient [as 别名]
# 或者: from hubstorage.HubstorageClient import get_job [as 别名]
    def test_api_delete_can_be_set_to_non_idempotent(self):
        # Prepare
        client = HubstorageClient(auth=self.auth, endpoint=self.endpoint, max_retries=3)
        job_metadata = {'project': self.projectid, 'spider': self.spidername, 'state': 'pending'}
        callback_delete, attempts_count_delete = self.make_request_callback(2, job_metadata)

        self.mock_api(method=DELETE, callback=callback_delete)

        # Act
        job = client.get_job('%s/%s/%s' % (self.projectid, self.spiderid, 42))

        err = None
        try:
            job.metadata.apidelete('/my/non/idempotent/delete/', is_idempotent=False)
        except HTTPError as e:
            err = e

        # Assert
        self.assertEqual(attempts_count_delete[0], 1)
        self.assertIsNotNone(err)
开发者ID:stummjr,项目名称:python-hubstorage,代码行数:22,代码来源:test_retry.py

示例9: test_retrier_does_not_catch_unwanted_exception

# 需要导入模块: from hubstorage import HubstorageClient [as 别名]
# 或者: from hubstorage.HubstorageClient import get_job [as 别名]
    def test_retrier_does_not_catch_unwanted_exception(self):
        # Prepare
        client = HubstorageClient(auth=self.auth, endpoint=self.endpoint, max_retries=2)
        job_metadata = {'project': self.projectid, 'spider': self.spidername, 'state': 'pending'}
        callback, attempts_count = self.make_request_callback(3, job_metadata, http_error_status=403)

        self.mock_api(callback=callback)

        # Act
        job, metadata, err = None, None, None
        try:
            job = client.get_job('%s/%s/%s' % (self.projectid, self.spiderid, 42))
            metadata = dict(job.metadata)
        except HTTPError as e:
            err = e

        # Assert
        self.assertIsNone(metadata)
        self.assertIsNotNone(err)
        self.assertEqual(err.response.status_code, 403)
        self.assertEqual(attempts_count[0], 1)
开发者ID:stummjr,项目名称:python-hubstorage,代码行数:23,代码来源:test_retry.py

示例10: test_get_job_does_fails_on_too_many_retries

# 需要导入模块: from hubstorage import HubstorageClient [as 别名]
# 或者: from hubstorage.HubstorageClient import get_job [as 别名]
    def test_get_job_does_fails_on_too_many_retries(self):
        # Prepare
        client = HubstorageClient(auth=self.auth, endpoint=self.endpoint, max_retries=2, max_retry_time=1)
        job_metadata = {'project': self.projectid, 'spider': self.spidername, 'state': 'pending'}
        callback, attempts_count = self.make_request_callback(3, job_metadata)

        self.mock_api(callback=callback)

        # Act
        job, metadata, err = None, None, None
        try:
            job = client.get_job('%s/%s/%s' % (self.projectid, self.spiderid, 42))
            metadata = dict(job.metadata)
        except HTTPError as e:
            err = e

        # Assert
        self.assertIsNone(metadata)
        self.assertIsNotNone(err)
        self.assertEqual(err.response.status_code, 504)
        self.assertEqual(attempts_count[0], 3)
开发者ID:scrapinghub,项目名称:python-hubstorage,代码行数:23,代码来源:test_retry.py

示例11: main

# 需要导入模块: from hubstorage import HubstorageClient [as 别名]
# 或者: from hubstorage.HubstorageClient import get_job [as 别名]
def main(argv):
	apikey = ''
	spider = ''
	
	try:
		opts, args = getopt.getopt(argv, "hi:o", ["apikey=","spider="])
	except getopt.GetoptError:
		print 'alljobs.py -k <API Key> -s <ProjectID\Spider>'
		sys.exit(2)
	for opt, arg in opts:
		if opt == '-h':
			print 'alljobs.py -k <API Key> -s <ProjectID\Spider>'
			sys.exit()
		elif opt in("-k", "--apikey"):
			apikey = arg		
		elif opt in("-s", "--spider"):
			spider = arg

	hc = HubstorageClient(auth=apikey)
	itemslist = hc.get_job(spider).items.list()
	
	itemslistIterator = itemslist.__iter__()
	for items in itemslistIterator:
		print json.dumps(items)
开发者ID:watogo,项目名称:BDA,代码行数:26,代码来源:allitems.py

示例12: HubstorageClient

# 需要导入模块: from hubstorage import HubstorageClient [as 别名]
# 或者: from hubstorage.HubstorageClient import get_job [as 别名]
#coding=UTF-8
from hubstorage import HubstorageClient
hc = HubstorageClient(auth='bc2aa25cc40f4ed4b03988e8e0b9e89e')
project = hc.get_project('53883')
itemslist = hc.get_job('53883/1/5').items.list()
itemslist_size = itemslist.__sizeof__()
for element in itemslist:
	element.__delitem__('_type')
	element.__delitem__('_cached_page_id')
	element.__delitem__('_template')
	elementIterator = element.iteritems()
	for fields in elementIterator:
		fieldIterator = fields.__iter__()
		for values in fieldIterator:
			if isinstance(values, basestring):
				print values
			else:
				print values.pop()
开发者ID:watogo,项目名称:BDA,代码行数:20,代码来源:itemstest.py

示例13: __init__

# 需要导入模块: from hubstorage import HubstorageClient [as 别名]
# 或者: from hubstorage.HubstorageClient import get_job [as 别名]
 def __init__(self, project: str, spider: str):
     hc = HubstorageClient(auth=shub_cfg.get('apikey'))
     key = next(hc.get_project(project).jobq.list(spider=spider)).get('key')
     self.job = hc.get_job(key)
开发者ID:ranisalt,项目名称:cagr-api,代码行数:6,代码来源:hub.py

示例14: SystemTest

# 需要导入模块: from hubstorage import HubstorageClient [as 别名]
# 或者: from hubstorage.HubstorageClient import get_job [as 别名]
class SystemTest(HSTestCase):

    MAGICN = 1211

    def setUp(self):
        super(HSTestCase, self).setUp()
        endpoint = self.hsclient.endpoint
        # Panel - no client auth, only project auth using user auth token
        self.panelclient = HubstorageClient(endpoint=endpoint)
        self.panelproject = self.panelclient.get_project(self.projectid, auth=self.auth)
        # Runner - client uses global auth to poll jobq
        self.runnerclient = HubstorageClient(endpoint=endpoint, auth=self.auth)
        # Scraper - uses job level auth, no global or project auth available
        self.scraperclient = HubstorageClient(endpoint=endpoint)

    def test_succeed_with_close_reason(self):
        p = self.panelproject
        pushed = p.jobq.push(self.spidername)
        # check pending state
        job = p.get_jobs(self.spiderid).next()
        self.assertEqual(job.metadata.get('state'), 'pending')
        # consume msg from runner
        self._run_runner(pushed, close_reason='all-good')
        # query again from panel
        job = p.get_jobs(self.spiderid).next()
        self.assertEqual(job.metadata.get('state'), 'finished')
        self.assertEqual(job.metadata.get('close_reason'), 'all-good')
        self.assertEqual(job.items.stats()['totals']['input_values'], self.MAGICN)
        self.assertEqual(job.logs.stats()['totals']['input_values'], self.MAGICN * 4)
        self.assertEqual(job.requests.stats()['totals']['input_values'], self.MAGICN)

    def test_succeed_without_close_reason(self):
        p = self.panelproject
        pushed = p.jobq.push(self.spidername)
        # check pending state
        job = p.get_jobs(self.spiderid).next()
        self.assertEqual(job.metadata.get('state'), 'pending')
        # consume msg from runner
        self._run_runner(pushed, close_reason=None)
        # query again from panel
        job = p.get_jobs(self.spiderid).next()
        self.assertEqual(job.metadata.get('state'), 'finished')
        self.assertEqual(job.metadata.get('close_reason'), 'no_reason')
        self.assertEqual(job.items.stats()['totals']['input_values'], self.MAGICN)
        self.assertEqual(job.logs.stats()['totals']['input_values'], self.MAGICN * 4)
        self.assertEqual(job.requests.stats()['totals']['input_values'], self.MAGICN)

    def test_scraper_failure(self):
        p = self.panelproject
        pushed = p.jobq.push(self.spidername)
        # check pending state
        job = p.get_jobs(self.spiderid).next()
        self.assertEqual(job.metadata.get('state'), 'pending')
        # consume msg from runner
        self._run_runner(pushed, close_reason=IOError('no more resources, ha!'))
        # query again from panel
        job = p.get_jobs(self.spiderid).next()
        self.assertEqual(job.metadata.get('state'), 'finished')
        self.assertEqual(job.metadata.get('close_reason'), 'failed')
        # MAGICN per log level messages plus one of last failure
        stats = job.logs.stats()
        self.assertTrue(stats)
        self.assertEqual(stats['totals']['input_values'], self.MAGICN * 4 + 1)

    def _run_runner(self, pushed, close_reason):
        job = self.runnerclient.start_job(self.projectid)
        self.assertFalse(job.metadata.get('stop_requested'))
        job.metadata.update(host='localhost', slot=1)
        self.assertEqual(job.metadata.get('state'), 'running')
        # run scraper
        try:
            self._run_scraper(job.key, job.jobauth, close_reason=close_reason)
        except Exception as exc:
            job.failed(message=str(exc))
            # logging from runner must append and never remove messages logged
            # by scraper
            self.assertTrue(job.logs.batch_append)
        else:
            job.finished()

        self.runnerclient.close()

    def _run_scraper(self, jobkey, jobauth, close_reason=None):
        httpmethods = 'GET PUT POST DELETE HEAD OPTIONS TRACE CONNECT'.split()
        job = self.scraperclient.get_job(jobkey, auth=jobauth)
        for idx in xrange(self.MAGICN):
            iid = job.items.write({'uuid': idx})
            job.logs.debug('log debug %s' % idx, idx=idx)
            job.logs.info('log info %s' % idx, idx=idx)
            job.logs.warn('log warn %s' % idx, idx=idx)
            job.logs.error('log error %s' % idx, idx=idx)
            sid = job.samples.write([idx, idx, idx])
            rid = job.requests.add(
                url='http://test.com/%d' % idx,
                status=random.randint(100, 1000),
                method=random.choice(httpmethods),
                rs=random.randint(0, 100000),
                duration=random.randint(0, 1000),
                parent=random.randrange(0, idx + 1) if idx > 10 else None,
                ts=millitime() + random.randint(100, 100000),
#.........这里部分代码省略.........
开发者ID:OSadovy,项目名称:python-hubstorage,代码行数:103,代码来源:test_system.py


注:本文中的hubstorage.HubstorageClient.get_job方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。