本文整理汇总了Python中datalad.api.Dataset.create方法的典型用法代码示例。如果您正苦于以下问题:Python Dataset.create方法的具体用法?Python Dataset.create怎么用?Python Dataset.create使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类datalad.api.Dataset
的用法示例。
在下文中一共展示了Dataset.create方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_basic_metadata
# 需要导入模块: from datalad.api import Dataset [as 别名]
# 或者: from datalad.api.Dataset import create [as 别名]
def test_basic_metadata(path):
ds = Dataset(opj(path, 'origin'))
meta = get_metadata(ds)
assert_equal(sorted(meta[0].keys()),
['@context', 'dcterms:conformsTo'])
ds.create(force=True, save=False)
# with subdataset
sub = ds.create('sub', force=True)
ds.save()
meta = get_metadata(ds)
assert_equal(
sorted(meta[0].keys()),
['@context', '@id', 'availableFrom', 'dcterms:conformsTo',
'dcterms:modified', 'type', 'version'])
assert_equal(meta[0]['type'], 'Dataset')
# clone and get relationship info in metadata
sibling = install(opj(path, 'sibling'), source=opj(path, 'origin'))
sibling_meta = get_metadata(sibling)
assert_equal(sibling_meta[0]['@id'], ds.id)
# origin should learn about the clone
sibling.repo.push(remote='origin', refspec='git-annex')
meta = get_metadata(ds)
assert_equal([m['@id'] for m in meta[0]['availableFrom']],
[m['@id'] for m in sibling_meta[0]['availableFrom']])
meta = get_metadata(ds, guess_type=True)
# without aggregation there is not trace of subdatasets in the metadata
assert_not_in('dcterms:hasPart', meta[0])
示例2: test_aggregate_with_missing_or_duplicate_id
# 需要导入模块: from datalad.api import Dataset [as 别名]
# 或者: from datalad.api.Dataset import create [as 别名]
def test_aggregate_with_missing_or_duplicate_id(path):
# a hierarchy of three (super/sub)datasets, each with some native metadata
ds = Dataset(opj(path, 'origin')).create(force=True)
subds = ds.create('sub', force=True)
subds.remove(opj('.datalad', 'config'), if_dirty='ignore')
assert_false(exists(opj(subds.path, '.datalad', 'config')))
subsubds = subds.create('subsub', force=True)
# aggregate from bottom to top, guess native data, no compacting of graph
# should yield 6 meta data sets, one implicit, and one native per dataset
# and a second native set for the topmost dataset
aggregate_metadata(ds, guess_native_type=True, recursive=True)
# no only ask the top superdataset, no recursion, just reading from the cache
meta = get_metadata(
ds, guess_type=False, ignore_subdatasets=False, ignore_cache=False)
# and we know nothing subsub
for name in ('grandchild_äöü東',):
assert_true(sum([s.get('name', '') == assure_unicode(name) for s in meta]))
# but search should not fail
with swallow_outputs():
res1 = list(search_('.', regex=True, dataset=ds))
assert res1
# and let's see now if we wouldn't fail if dataset is duplicate if we
# install the same dataset twice
subds_clone = ds.install(source=subds.path, path="subds2")
with swallow_outputs():
res2 = list(search_('.', regex=True, dataset=ds))
示例3: test_aggregation
# 需要导入模块: from datalad.api import Dataset [as 别名]
# 或者: from datalad.api.Dataset import create [as 别名]
def test_aggregation(path):
with chpwd(path):
assert_raises(InsufficientArgumentsError, aggregate_metadata, None)
# a hierarchy of three (super/sub)datasets, each with some native metadata
ds = Dataset(opj(path, 'origin')).create(force=True)
# before anything aggregated we would get nothing and only a log warning
with swallow_logs(new_level=logging.WARNING) as cml:
assert_equal(list(query_aggregated_metadata('all', ds, [])), [])
assert_re_in('.*Found no aggregated metadata.*update', cml.out)
ds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage',
where='dataset')
subds = ds.create('sub', force=True)
subds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage',
where='dataset')
subsubds = subds.create('subsub', force=True)
subsubds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage',
where='dataset')
ds.add('.', recursive=True)
ok_clean_git(ds.path)
# aggregate metadata from all subdatasets into any superdataset, including
# intermediate ones
res = ds.aggregate_metadata(recursive=True, update_mode='all')
# we get success report for both subdatasets and the superdataset,
# and they get saved
assert_result_count(res, 6)
assert_result_count(res, 3, status='ok', action='aggregate_metadata')
assert_result_count(res, 3, status='ok', action='save')
# nice and tidy
ok_clean_git(ds.path)
# quick test of aggregate report
aggs = ds.metadata(get_aggregates=True)
# one for each dataset
assert_result_count(aggs, 3)
# mother also report layout version
assert_result_count(aggs, 1, path=ds.path, layout_version=1)
# store clean direct result
origres = ds.metadata(recursive=True)
# basic sanity check
assert_result_count(origres, 6)
assert_result_count(origres, 3, type='dataset')
assert_result_count(origres, 3, type='file') # Now that we have annex.key
# three different IDs
assert_equal(3, len(set([s['dsid'] for s in origres if s['type'] == 'dataset'])))
# and we know about all three datasets
for name in ('MOTHER_äöü東', 'child_äöü東', 'grandchild_äöü東'):
assert_true(
sum([s['metadata']['frictionless_datapackage']['name'] \
== assure_unicode(name) for s in origres
if s['type'] == 'dataset']))
# now clone the beast to simulate a new user installing an empty dataset
clone = install(
opj(path, 'clone'), source=ds.path,
result_xfm='datasets', return_type='item-or-list')
# ID mechanism works
assert_equal(ds.id, clone.id)
# get fresh metadata
cloneres = clone.metadata()
# basic sanity check
assert_result_count(cloneres, 2)
assert_result_count(cloneres, 1, type='dataset')
assert_result_count(cloneres, 1, type='file')
# now loop over the previous results from the direct metadata query of
# origin and make sure we get the extact same stuff from the clone
_compare_metadata_helper(origres, clone)
# now obtain a subdataset in the clone, should make no difference
assert_status('ok', clone.install('sub', result_xfm=None, return_type='list'))
_compare_metadata_helper(origres, clone)
# test search in search tests, not all over the place
## query smoke test
assert_result_count(clone.search('mother', mode='egrep'), 1)
assert_result_count(clone.search('(?i)MoTHER', mode='egrep'), 1)
child_res = clone.search('child', mode='egrep')
assert_result_count(child_res, 2)
for r in child_res:
if r['type'] == 'dataset':
assert_in(
r['query_matched']['frictionless_datapackage.name'],
r['metadata']['frictionless_datapackage']['name'])
示例4: supers
# 需要导入模块: from datalad.api import Dataset [as 别名]
# 或者: from datalad.api.Dataset import create [as 别名]
class supers(SuprocBenchmarks):
"""
Benchmarks on common operations on collections of datasets using datalad API
"""
timeout = 3600
# need to assure that we are working in a different repository now
# see https://github.com/datalad/datalad/issues/1512
# might not be sufficient due to side effects between tests and
# thus getting into the same situation
ds_count = 0
def setup_cache(self):
# creating in CWD so things get removed when ASV is done
ds_path = create_test_dataset("testds1", spec='2/-2/-2', seed=0)[0]
# Will store into a tarfile since otherwise install -r is way too slow
# to be invoked for every benchmark
tarfile_path = opj(osp.dirname(ds_path), 'testds1.tar')
with tarfile.open(tarfile_path, "w") as tar:
# F.CK -- Python tarfile can't later extract those because key dirs are
# read-only. For now just a workaround - make it all writeable
from datalad.utils import rotree
rotree('testds1', ro=False, chmod_files=False)
tar.add('testds1', recursive=True)
rmtree('testds1')
return tarfile_path
def setup(self, tarfile_path):
import tarfile
tempdir = osp.dirname(tarfile_path)
with tarfile.open(tarfile_path) as tar:
tar.extractall(tempdir)
# TODO -- remove this abomination after https://github.com/datalad/datalad/issues/1512 is fixed
epath = opj(tempdir, 'testds1')
epath_unique = epath + str(self.__class__.ds_count)
os.rename(epath, epath_unique)
self.__class__.ds_count += 1
self.ds = Dataset(epath_unique)
print("Finished setup for %s" % tempdir)
def teardown(self, tarfile_path):
for path in [self.ds.path + '_', self.ds.path]:
print("Cleaning up %s" % path)
if osp.exists(path):
rmtree(path)
def time_installr(self, tarfile_path):
# somewhat duplicating setup but lazy to do different one for now
assert install(self.ds.path + '_', source=self.ds.path, recursive=True)
def time_createadd(self, tarfile_path):
assert self.ds.create('newsubds')
def time_createadd_to_dataset(self, tarfile_path):
subds = create(opj(self.ds.path, 'newsubds'))
self.ds.add(subds.path)
def time_ls(self, tarfile_path):
ls(self.ds.path)
def time_ls_recursive(self, tarfile_path):
ls(self.ds.path, recursive=True)
def time_ls_recursive_long_all(self, tarfile_path):
ls(self.ds.path, recursive=True, long_=True, all_=True)
# TODO: since doesn't really allow to uninstall top level ds... bleh ;)
#def time_uninstall(self, tarfile_path):
# uninstall(self.ds.path, recursive=True)
def time_remove(self, tarfile_path):
remove(self.ds.path, recursive=True)
示例5: test_aggregation
# 需要导入模块: from datalad.api import Dataset [as 别名]
# 或者: from datalad.api.Dataset import create [as 别名]
def test_aggregation(path):
with chpwd(path):
assert_raises(InsufficientArgumentsError, aggregate_metadata, None)
# a hierarchy of three (super/sub)datasets, each with some native metadata
ds = Dataset(opj(path, 'origin')).create(force=True)
subds = ds.create('sub', force=True)
subsubds = subds.create('subsub', force=True)
# aggregate from bottom to top, guess native data, no compacting of graph
# should yield 6 meta data sets, one implicit, and one native per dataset
# and a second natiev set for the topmost dataset
aggregate_metadata(ds, guess_native_type=True, recursive=True)
# no only ask the top superdataset, no recursion, just reading from the cache
meta = get_metadata(
ds, guess_type=False, ignore_subdatasets=False, ignore_cache=False)
assert_equal(len(meta), 10)
# same schema
assert_equal(
10,
sum([s.get('@context', {'@vocab': None})['@vocab'] == 'http://schema.org/'
for s in meta]))
# three different IDs
assert_equal(3, len(set([s.get('@id') for s in meta])))
# and we know about all three datasets
for name in ('mother_äöü東', 'child_äöü東', 'grandchild_äöü東'):
assert_true(sum([s.get('name', None) == assure_unicode(name) for s in meta]))
#print(meta)
assert_equal(
# first implicit, then two natives, then aggregate
meta[3]['dcterms:hasPart']['@id'],
subds.id)
success = False
for m in meta:
p = m.get('dcterms:hasPart', {})
if p.get('@id', None) == subsubds.id:
assert_equal(opj('sub', 'subsub'), p.get('location', None))
success = True
assert_true(success)
# save the toplevel dataset only (see below)
ds.save('with aggregated meta data', all_changes=True)
# now clone the beast to simulate a new user installing an empty dataset
clone = install(opj(path, 'clone'), source=ds.path)
# ID mechanism works
assert_equal(ds.id, clone.id)
# get fresh meta data, the implicit one for the top-most datasets should
# differ, but the rest not
clonemeta = get_metadata(
clone, guess_type=False, ignore_subdatasets=False, ignore_cache=False)
# make sure the implicit md for the topmost come first
assert_equal(clonemeta[0]['@id'], clone.id)
assert_equal(clonemeta[0]['@id'], ds.id)
assert_equal(clone.repo.get_hexsha(), ds.repo.get_hexsha())
assert_equal(clonemeta[0]['version'], ds.repo.get_hexsha())
# all but the implicit is identical
assert_equal(clonemeta[1:], meta[1:])
# the implicit md of the clone should list a dataset ID for its subds,
# although it has not been obtained!
assert_equal(
clonemeta[3]['dcterms:hasPart']['@id'],
subds.id)
# now obtain a subdataset in the clone and the IDs should be updated
clone.install('sub')
partial = get_metadata(clone, guess_type=False, ignore_cache=True)
# ids don't change
assert_equal(partial[0]['@id'], clonemeta[0]['@id'])
# datasets are properly connected
assert_equal(partial[1]['dcterms:hasPart']['@id'],
partial[2]['@id'])
# query smoke test
if os.environ.get('DATALAD_TESTS_NONETWORK'):
raise SkipTest
assert_equal(len(list(clone.search('mother'))), 1)
assert_equal(len(list(clone.search('MoTHER'))), 1) # case insensitive
child_res = list(clone.search('child'))
assert_equal(len(child_res), 2)
# little helper to match names
def assert_names(res, names, path=clone.path):
assert_equal(list(map(itemgetter(0), res)),
[opj(path, n) for n in names])
# should yield (location, report) tuples
assert_names(child_res, ['sub', 'sub/subsub'])
# result should be identical to invoking search from api
# and search_ should spit out locations out
with swallow_outputs() as cmo:
res = list(search_('child', dataset=clone))
assert_equal(res, child_res)
assert_in(res[0][0], cmo.out)
# and overarching search_ just for smoke testing of processing outputs
# and not puking (e.g. under PY3)
with swallow_outputs() as cmo:
assert list(search_('.', regex=True, dataset=clone))
#.........这里部分代码省略.........