本文整理汇总了Python中hubstorage.HubstorageClient类的典型用法代码示例。如果您正苦于以下问题:Python HubstorageClient类的具体用法?Python HubstorageClient怎么用?Python HubstorageClient使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了HubstorageClient类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_retrier_catches_badstatusline_and_429
def test_retrier_catches_badstatusline_and_429(self):
# Prepare
client = HubstorageClient(auth=self.auth, endpoint=self.endpoint, max_retries=3)
job_metadata = {'project': self.projectid, 'spider': self.spidername, 'state': 'pending'}
attempts_count = [0] # use a list for nonlocal mutability used in request_callback
def request_callback(request):
attempts_count[0] += 1
if attempts_count[0] <= 2:
raise ConnectionError("Connection aborted.", BadStatusLine("''"))
if attempts_count[0] == 3:
return (429, {}, {})
else:
resp_body = dict(job_metadata)
return (200, {}, json.dumps(resp_body))
self.mock_api(callback=request_callback)
# Act
job = client.get_job('%s/%s/%s' % (self.projectid, self.spiderid, 42))
# Assert
self.assertEqual(dict(job_metadata), dict(job.metadata))
self.assertEqual(attempts_count[0], 4)
示例2: get_job
def get_job(job):
jobid, apikey = get_job_specs(job)
hsc = HubstorageClient(auth=apikey)
job = hsc.get_job(jobid)
if not job.metadata:
raise NotFoundException("Job {} does not exist".format(jobid))
return job
示例3: test_allows_msgpack
def test_allows_msgpack(monkeypatch, msgpack_available, path, expected_result):
monkeypatch.setattr("hubstorage.resourcetype.MSGPACK_AVAILABLE", msgpack_available)
hsclient = HubstorageClient()
job = hsclient.get_job("2222000/1/1")
for resource in [job.items, job.logs, job.samples]:
assert resource._allows_mpack(path) is (msgpack_available and expected_result)
assert job.requests._allows_mpack(path) is False
assert job.metadata._allows_mpack(path) is False
assert job.jobq._allows_mpack(path) is False
示例4: test_connect_retry
def test_connect_retry(self):
c = HubstorageClient(auth=self.auth,
endpoint=self.endpoint, max_retries=2)
job = c.push_job(self.projectid, self.spidername,
state='running')
m = job.metadata
self.assertEqual(m.get('state'), u'running', c.auth)
m.expire()
self.assertEqual(c.session.adapters['http://'].max_retries, 2)
示例5: setUp
def setUp(self):
super(HSTestCase, self).setUp()
endpoint = self.hsclient.endpoint
# Panel - no client auth, only project auth using user auth token
self.panelclient = HubstorageClient(endpoint=endpoint)
self.panelproject = self.panelclient.get_project(self.projectid, auth=self.auth)
# Runner - client uses global auth to poll jobq
self.runnerclient = HubstorageClient(endpoint=endpoint, auth=self.auth)
# Scraper - uses job level auth, no global or project auth available
self.scraperclient = HubstorageClient(endpoint=endpoint)
示例6: fetch_and_save_items
def fetch_and_save_items():
hc = HubstorageClient(auth=API_KEY)
project = hc.get_project(SH_PROJECT)
for spider in SPIDERS:
print("\nworking on spider {}".format(spider['spider_name']))
spider_id = project.ids.spider(spider['spider_name'])
summary = project.spiders.lastjobsummary(spiderid=spider_id)
for element in summary:
print(element['key'])
job = hc.get_job(element['key'])
items = job.items.iter_values()
save_items(items, spider['institution_name'])
示例7: test_get_job_does_retry
def test_get_job_does_retry(self):
# Prepare
client = HubstorageClient(auth=self.auth, endpoint=self.endpoint, max_retries=3)
job_metadata = {'project': self.projectid, 'spider': self.spidername, 'state': 'pending'}
callback, attempts_count = self.make_request_callback(2, job_metadata)
self.mock_api(callback=callback)
# Act
job = client.get_job('%s/%s/%s' % (self.projectid, self.spiderid, 42))
# Assert
self.assertEqual(dict(job_metadata), dict(job.metadata))
self.assertEqual(attempts_count[0], 3)
示例8: __init__
def __init__(self, crawler):
settings = crawler.settings
self.hs_endpoint = settings.get("HS_ENDPOINT")
self.hs_auth = self._get_config(settings, "HS_AUTH")
self.hs_projectid = self._get_config(settings, "HS_PROJECTID", os.environ.get('SCRAPY_PROJECT_ID'))
self.hs_frontier = self._get_config(settings, "HS_FRONTIER")
self.hs_consume_from_slot = self._get_config(settings, "HS_CONSUME_FROM_SLOT")
self.hs_number_of_slots = settings.getint("HS_NUMBER_OF_SLOTS", DEFAULT_HS_NUMBER_OF_SLOTS)
self.hs_max_links = settings.getint("HS_MAX_LINKS", DEFAULT_MAX_LINKS)
self.hs_start_job_enabled = settings.getbool("HS_START_JOB_ENABLED", False)
self.hs_start_job_on_reason = settings.getlist("HS_START_JOB_ON_REASON", ['finished'])
conn = Connection(self.hs_auth)
self.panel_project = conn[self.hs_projectid]
self.hsclient = HubstorageClient(auth=self.hs_auth, endpoint=self.hs_endpoint)
self.project = self.hsclient.get_project(self.hs_projectid)
self.fclient = self.project.frontier
self.new_links = defaultdict(set)
self.batch_ids = []
crawler.signals.connect(self.close_spider, signals.spider_closed)
# Make sure the logger for hubstorage.batchuploader is configured
logging.basicConfig()
示例9: __init__
def __init__(self, crawler):
self.crawler = crawler
self.hs_endpoint = crawler.settings.get("HS_ENDPOINT")
self.hs_auth = self._get_config(crawler, "HS_AUTH")
self.hs_projectid = self._get_config(crawler, "HS_PROJECTID")
self.hs_frontier = self._get_config(crawler, "HS_FRONTIER")
self.hs_consume_from_slot = self._get_config(crawler, "HS_CONSUME_FROM_SLOT")
try:
self.hs_number_of_slots = int(crawler.settings.get("HS_NUMBER_OF_SLOTS", DEFAULT_HS_NUMBER_OF_SLOTS))
except ValueError:
self.hs_number_of_slots = DEFAULT_HS_NUMBER_OF_SLOTS
try:
self.hs_max_links = int(crawler.settings.get("HS_MAX_LINKS", DEFAULT_MAX_LINKS))
except ValueError:
self.hs_max_links = DEFAULT_MAX_LINKS
self.hs_start_job_enabled = crawler.settings.get("HS_START_JOB_ENABLED", False)
self.hs_start_job_on_reason = crawler.settings.get("HS_START_JOB_ON_REASON", ['finished'])
self.hs_start_job_new_panel = crawler.settings.get("HS_START_JOB_NEW_PANEL", False)
if not self.hs_start_job_new_panel:
conn = Connection(self.hs_auth)
self.oldpanel_project = conn[self.hs_projectid]
self.hsclient = HubstorageClient(auth=self.hs_auth, endpoint=self.hs_endpoint)
self.project = self.hsclient.get_project(self.hs_projectid)
self.fclient = self.project.frontier
self.new_links = defaultdict(set)
self.batch_ids = []
crawler.signals.connect(self.close_spider, signals.spider_closed)
# Make sure the logger for hubstorage.batchuploader is configured
logging.basicConfig()
示例10: test_metadata_save_does_retry
def test_metadata_save_does_retry(self):
# Prepare
client = HubstorageClient(auth=self.auth, endpoint=self.endpoint, max_retries=3)
job_metadata = {'project': self.projectid, 'spider': self.spidername, 'state': 'pending'}
callback_get, attempts_count_get = self.make_request_callback(0, job_metadata)
callback_post, attempts_count_post = self.make_request_callback(2, job_metadata)
self.mock_api(method=GET, callback=callback_get)
self.mock_api(method=POST, callback=callback_post)
# Act
job = client.get_job('%s/%s/%s' % (self.projectid, self.spiderid, 42))
job.metadata['foo'] = 'bar'
job.metadata.save()
# Assert
self.assertEqual(attempts_count_post[0], 3)
示例11: test_debug_queries
def test_debug_queries(self):
self.hsclient = HubstorageClient(auth=self.auth, endpoint=self.endpoint, debug=True)
self.assertEqual(self.hsclient.queries, [])
self.project = self.hsclient.get_project(self.projectid)
list(self.project.get_jobs(self.spiderid))
self.assertEqual(len(self.hsclient.queries), 1)
q = self.hsclient.queries[0]
self.assertEqual(q['method'], 'GET')
self.assert_(q['time'] > 0)
self.assert_('url' in q)
示例12: test_collection_store_and_delete_are_retried
def test_collection_store_and_delete_are_retried(self):
# Prepare
client = HubstorageClient(auth=self.auth, endpoint=self.endpoint, max_retries=3)
callback_post, attempts_count_post = self.make_request_callback(2, [])
callback_delete, attempts_count_delete = self.make_request_callback(2, [])
self.mock_api(method=POST, callback=callback_delete, url_match='/.*/deleted')
self.mock_api(method=POST, callback=callback_post) # /!\ default regexp matches all paths, has to be added last
# Act
project = client.get_project(self.projectid)
store = project.collections.new_store('foo')
store.set({'_key': 'bar', 'content': 'value'})
store.delete('baz')
# Assert
self.assertEqual(attempts_count_post[0], 3)
self.assertEqual(attempts_count_delete[0], 3)
示例13: test_push_job_does_not_retry
def test_push_job_does_not_retry(self):
# Prepare
client = HubstorageClient(auth=self.auth, endpoint=self.endpoint, max_retries=3)
callback, attempts_count = self.make_request_callback(2, {'key': '1/2/3'})
self.mock_api(POST, callback=callback)
# Act
job, err = None, None
try:
job = client.push_job(self.projectid, self.spidername)
except HTTPError as e:
err = e
# Assert
self.assertIsNone(job)
self.assertIsNotNone(err)
self.assertEqual(err.response.status_code, 504)
self.assertEqual(attempts_count[0], 1)
示例14: __init__
def __init__(self, auth, project_id, frontier, batch_size=0, flush_interval=30):
self._hs_client = HubstorageClient(auth=auth)
self._hcf = self._hs_client.get_project(project_id).frontier
self._hcf.batch_size = batch_size
self._hcf.batch_interval = flush_interval
self._frontier = frontier
self._links_count = defaultdict(int)
self._links_to_flush_count = defaultdict(int)
self._hcf_retries = 10
self.logger = logging.getLogger("hubstorage-wrapper")
示例15: test_api_delete_can_be_set_to_non_idempotent
def test_api_delete_can_be_set_to_non_idempotent(self):
# Prepare
client = HubstorageClient(auth=self.auth, endpoint=self.endpoint, max_retries=3)
job_metadata = {'project': self.projectid, 'spider': self.spidername, 'state': 'pending'}
callback_delete, attempts_count_delete = self.make_request_callback(2, job_metadata)
self.mock_api(method=DELETE, callback=callback_delete)
# Act
job = client.get_job('%s/%s/%s' % (self.projectid, self.spiderid, 42))
err = None
try:
job.metadata.apidelete('/my/non/idempotent/delete/', is_idempotent=False)
except HTTPError as e:
err = e
# Assert
self.assertEqual(attempts_count_delete[0], 1)
self.assertIsNotNone(err)