本文整理汇总了Python中dlstats.fetchers._commons.Datasets类的典型用法代码示例。如果您正苦于以下问题:Python Datasets类的具体用法?Python Datasets怎么用?Python Datasets使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Datasets类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_not_recordable_dataset
def test_not_recordable_dataset(self):
# nosetests -s -v dlstats.tests.fetchers.test__commons:DBDatasetTestCase.test_not_recordable_dataset
self._collections_is_empty()
f = Fetcher(provider_name="p1",
db=self.db)
d = Datasets(provider_name="p1",
dataset_code="d1",
name="d1 Name",
last_update=datetime.now(),
doc_href="http://www.example.com",
fetcher=f,
is_load_previous_version=False)
d.dimension_list.update_entry("Scale", "Billions", "Billions")
d.dimension_list.update_entry("country", "AFG", "AFG")
class EmptySeriesIterator():
def __next__(self):
raise StopIteration
datas = EmptySeriesIterator()
d.series.data_iterator = datas
id = d.update_database()
self.assertIsNone(id)
self.assertEqual(self.db[constants.COL_DATASETS].count(), 0)
示例2: upsert_dataset
def upsert_dataset(self, dataset_code):
settings = self._get_datasets_settings()[dataset_code]
dataset = Datasets(provider_name=self.provider_name,
dataset_code=dataset_code,
name=settings["name"],
doc_href='http://www.bea.gov',
fetcher=self)
url = settings["metadata"]["url"]
filename = settings["metadata"]["filename"]
sheet_name = settings["metadata"]["sheet_name"]
sheet = self._get_sheet(url, filename, sheet_name)
fetcher_data = BeaData(dataset, url=url, sheet=sheet)
if dataset.last_update and fetcher_data.release_date >= dataset.last_update and not self.force_update:
comments = "update-date[%s]" % fetcher_data.release_date
raise errors.RejectUpdatedDataset(provider_name=self.provider_name,
dataset_code=dataset_code,
comments=comments)
dataset.last_update = fetcher_data.release_date
dataset.series.data_iterator = fetcher_data
return dataset.update_database()
示例3: test_unique_constraint
def test_unique_constraint(self):
# nosetests -s -v dlstats.tests.fetchers.test__commons:DBDatasetTestCase.test_unique_constraint
self._collections_is_empty()
f = Fetcher(provider_name="p1",
db=self.db)
d = Datasets(provider_name="p1",
dataset_code="d1",
name="d1 Name",
last_update=datetime.now(),
doc_href="http://www.example.com",
fetcher=f,
is_load_previous_version=False)
d.dimension_list.update_entry("Country", "AFG", "AFG")
d.dimension_list.update_entry("Scale", "Billions", "Billions")
datas = FakeDatas(provider_name="p1",
dataset_code="d1",
fetcher=f)
d.series.data_iterator = datas
result = d.update_database()
self.assertIsNotNone(result)
self.assertEqual(self.db[constants.COL_DATASETS].count(), 1)
with self.assertRaises(DuplicateKeyError):
existing_dataset = dict(provider_name="p1", dataset_code="d1")
self.db[constants.COL_DATASETS].insert(existing_dataset)
示例4: upsert_dataset
def upsert_dataset(self, dataset_code):
start = time.time()
logger.info("upsert dataset[%s] - START" % (dataset_code))
#TODO: control si existe ou update !!!
dataset = Datasets(provider_name=self.provider_name,
dataset_code=dataset_code,
name=None,
doc_href=self.provider.website,
last_update=datetime.now(),
fetcher=self)
_data = ECB_Data(dataset=dataset)
dataset.series.data_iterator = _data
try:
result = dataset.update_database()
except:
raise
_data = None
end = time.time() - start
logger.info("upsert dataset[%s] - END - time[%.3f seconds]" % (dataset_code, end))
return result
示例5: test_constructor
def test_constructor(self):
# nosetests -s -v dlstats.tests.fetchers.test__commons:DatasetTestCase.test_constructor
with self.assertRaises(ValueError):
Datasets(is_load_previous_version=False)
f = Fetcher(provider_name="p1", is_indexes=False)
d = Datasets(provider_name="p1",
dataset_code="d1",
name="d1 Name",
doc_href="http://www.example.com",
fetcher=f,
is_load_previous_version=False)
d.dimension_list.update_entry("country", "country", "country")
self.assertTrue(isinstance(d.series, Series))
self.assertTrue(isinstance(d.dimension_list, CodeDict))
self.assertTrue(isinstance(d.attribute_list, CodeDict))
bson = d.bson
self.assertEqual(bson['provider_name'], "p1")
self.assertEqual(bson["dataset_code"], "d1")
self.assertEqual(bson["name"], "d1 Name")
self.assertEqual(bson["doc_href"], "http://www.example.com")
self.assertTrue(isinstance(bson["dimension_list"], dict))
self.assertTrue(isinstance(bson["attribute_list"], dict))
self.assertIsNone(bson["last_update"])
self.assertEqual(bson["slug"], "p1-d1")
#TODO: last_update
d.last_update = datetime.now()
示例6: upsert_dataset
def upsert_dataset(self, dataset_code):
start = time.time()
logger.info("upsert dataset[%s] - START" % (dataset_code))
#TODO: control si existe ou update !!!
dataset = Datasets(provider_name=self.provider_name,
dataset_code=dataset_code,
name=DATASETS[dataset_code]['name'],
doc_href=DATASETS[dataset_code]['doc_href'],
last_update=datetime.now(),
fetcher=self)
_data = FED_Data(dataset=dataset,
url=DATASETS[dataset_code]['url'])
dataset.series.data_iterator = _data
result = dataset.update_database()
_data = None
end = time.time() - start
logger.info("upsert dataset[%s] - END - time[%.3f seconds]" % (dataset_code, end))
return result
示例7: upsert_dataset
def upsert_dataset(self, dataset_code, datas=None):
start = time.time()
logger.info("upsert dataset[%s] - START" % (dataset_code))
if not DATASETS.get(dataset_code):
raise Exception("This dataset is unknown" + dataset_code)
dataset = Datasets(provider_name=self.provider_name,
dataset_code=dataset_code,
name=DATASETS[dataset_code]['name'],
doc_href=DATASETS[dataset_code]['doc_href'],
fetcher=self)
fetcher_data = OECD_Data(dataset)
dataset.series.data_iterator = fetcher_data
dataset.update_database()
end = time.time() - start
logger.info("upsert dataset[%s] - END-BEFORE-METAS - time[%.3f seconds]" % (dataset_code, end))
self.update_metas(dataset_code)
end = time.time() - start
logger.info("upsert dataset[%s] - END - time[%.3f seconds]" % (dataset_code, end))
示例8: upsert_dataset
def upsert_dataset(self, dataset_code):
"""Updates data in Database for selected datasets
"""
self.get_selected_datasets()
doc = self.db[constants.COL_DATASETS].find_one(
{'provider_name': self.provider_name, 'dataset_code': dataset_code},
{'dataset_code': 1, 'last_update': 1})
dataset_settings = self.selected_datasets[dataset_code]
if doc and doc['last_update'] >= dataset_settings['last_update']:
comments = "update-date[%s]" % doc['last_update']
raise errors.RejectUpdatedDataset(provider_name=self.provider_name,
dataset_code=dataset_code,
comments=comments)
dataset = Datasets(provider_name=self.provider_name,
dataset_code=dataset_code,
name=dataset_settings["name"],
doc_href=dataset_settings["metadata"].get("doc_href"),
last_update=dataset_settings["last_update"],
fetcher=self)
dataset.series.data_iterator = EurostatData(dataset)
return dataset.update_database()
示例9: test_process_series_data
def test_process_series_data(self):
# nosetests -s -v dlstats.tests.fetchers.test__commons:DBSeriesTestCase.test_process_series_data
self._collections_is_empty()
provider_name = "p1"
dataset_code = "d1"
dataset_name = "d1 name"
f = Fetcher(provider_name=provider_name,
db=self.db)
d = Datasets(provider_name=provider_name,
dataset_code=dataset_code,
name=dataset_name,
last_update=datetime.now(),
doc_href="http://www.example.com",
fetcher=f,
is_load_previous_version=False)
d.dimension_list.update_entry("Scale", "Billions", "Billions")
d.dimension_list.update_entry("Country", "AFG", "AFG")
s = Series(provider_name=f.provider_name,
dataset_code=dataset_code,
last_update=datetime(2013,10,28),
bulk_size=1,
fetcher=f)
datas = FakeDatas(provider_name=provider_name,
dataset_code=dataset_code,
fetcher=f)
s.data_iterator = datas
d.series = s
d.update_database()
'''Count All series'''
self.assertEqual(self.db[constants.COL_SERIES].count(), datas.max_record)
'''Count series for this provider and dataset'''
series = self.db[constants.COL_SERIES].find({'provider_name': f.provider_name,
"dataset_code": dataset_code})
self.assertEqual(series.count(), datas.max_record)
tags.update_tags(self.db,
provider_name=f.provider_name, dataset_code=dataset_code,
col_name=constants.COL_SERIES)
'''Count series for this provider and dataset and in keys[]'''
series = self.db[constants.COL_SERIES].find({'provider_name': f.provider_name,
"dataset_code": dataset_code,
"key": {"$in": datas.keys}})
self.assertEqual(series.count(), datas.max_record)
for doc in series:
self.assertTrue("tags" in doc)
self.assertTrue(len(doc['tags']) > 0)
示例10: upsert_dataset
def upsert_dataset(self, dataset_code):
self.get_selected_datasets()
self.dataset_settings = self.selected_datasets[dataset_code]
dataset = Datasets(provider_name=self.provider_name,
dataset_code=dataset_code,
name=self.dataset_settings["name"],
last_update=self.dataset_settings['last_update'],
fetcher=self)
url = self.dataset_settings['metadata']['url']
dataset.series.data_iterator = BDF_Data(dataset,url)
return dataset.update_database()
示例11: upsert_gem
def upsert_gem(self, dataset_code):
d = DATASETS[dataset_code]
url = d['url']
dataset = Datasets(provider_name=self.provider_name,
dataset_code=dataset_code,
name=d['name'],
doc_href=d['doc_href'],
fetcher=self)
gem_data = GemData(dataset, url)
dataset.last_update = gem_data.release_date
dataset.series.data_iterator = gem_data
dataset.update_database()
示例12: upsert_dataset
def upsert_dataset(self, dataset_code):
dataset = Datasets(provider_name=self.provider_name,
dataset_code=dataset_code,
name="My Dataset Name",
last_update=clean_datetime(),
fetcher=self)
fetcher_data = DUMMY_Data(dataset)
dataset.series.data_iterator = fetcher_data
return dataset.update_database()
示例13: load_datasets_update
def load_datasets_update(self):
for d in self._parse_agenda():
if d['dataflow_key'] in self.datasets_filter:
dataset = Datasets(provider_name=self.provider_name,
dataset_code=d['dataflow_key'],
name=d['name'],
last_update=d['last_update'],
fetcher=self)
url = d['url']
dataset.series.data_iterator = BDF_Data(dataset, url)
dataset.update_database()
msg = "get update - provider[%s] - dataset[%s] - last-update-dataset[%s]"
logger.info(msg % (self.provider_name, d['dataflow_key'], d['last_update']))
示例14: upsert_dataset
def upsert_dataset(self, dataset_code):
self._load_structure()
dataset = Datasets(provider_name=self.provider_name,
dataset_code=dataset_code,
name=None,
doc_href=self.provider.website,
last_update=utils.clean_datetime(),
fetcher=self)
_data = ECB_Data(dataset=dataset)
dataset.series.data_iterator = _data
return dataset.update_database()
示例15: _common_tests
def _common_tests(self):
self._collections_is_empty()
url = DATASETS[self.dataset_code]['url']
self.filepath = get_filepath(self.dataset_code)
self.assertTrue(os.path.exists(self.filepath))
httpretty.register_uri(httpretty.GET,
url,
body=mock_streaming(self.filepath),
status=200,
content_type='application/octet-stream;charset=UTF-8',
streaming=True)
# provider.update_database
self.fetcher.provider.update_database()
provider = self.db[constants.COL_PROVIDERS].find_one({"name": self.fetcher.provider_name})
self.assertIsNotNone(provider)
# upsert_data_tree
self.fetcher.upsert_data_tree()
provider = self.db[constants.COL_PROVIDERS].find_one({"name": self.fetcher.provider_name})
self.assertIsNotNone(provider['data_tree'])
dataset = Datasets(provider_name=self.fetcher.provider_name,
dataset_code=self.dataset_code,
name=DATASETS[self.dataset_code]['name'],
doc_href=DATASETS[self.dataset_code]['doc_href'],
fetcher=self.fetcher)
fetcher_data = bis.BIS_Data(dataset,
url=url,
filename=DATASETS[self.dataset_code]['filename'],
store_filepath=os.path.dirname(self.filepath))
dataset.series.data_iterator = fetcher_data
dataset.update_database()
self.dataset = self.db[constants.COL_DATASETS].find_one({'provider_name': self.fetcher.provider_name,
"dataset_code": self.dataset_code})
self.assertIsNotNone(self.dataset)
self.assertEqual(len(self.dataset["dimension_list"]), DATASETS[self.dataset_code]["dimensions_count"])
series = self.db[constants.COL_SERIES].find({'provider_name': self.fetcher.provider_name,
"dataset_code": self.dataset_code})
self.assertEqual(series.count(), SERIES_COUNT)