本文整理汇总了Python中datalad.distribution.dataset.Dataset.create方法的典型用法代码示例。如果您正苦于以下问题:Python Dataset.create方法的具体用法?Python Dataset.create怎么用?Python Dataset.create使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类datalad.distribution.dataset.Dataset
的用法示例。
在下文中一共展示了Dataset.create方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_add_recursive
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import create [as 别名]
def test_add_recursive(path):
# make simple hierarchy
parent = Dataset(path).create()
assert_repo_status(parent.path)
sub1 = parent.create(op.join('down', 'sub1'))
assert_repo_status(parent.path)
sub2 = parent.create('sub2')
# next one make the parent dirty
subsub = sub2.create('subsub')
assert_repo_status(parent.path, modified=['sub2'])
res = parent.save()
assert_repo_status(parent.path)
# now add content deep in the hierarchy
create_tree(subsub.path, {'new': 'empty'})
assert_repo_status(parent.path, modified=['sub2'])
# recursive add should not even touch sub1, because
# it knows that it is clean
res = parent.save(recursive=True)
# the key action is done
assert_result_count(
res, 1, path=op.join(subsub.path, 'new'), action='add', status='ok')
# saved all the way up
assert_result_count(res, 3, action='save', status='ok')
assert_repo_status(parent.path)
示例2: test_reaggregate_with_unavailable_objects
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import create [as 别名]
def test_reaggregate_with_unavailable_objects(path):
base = Dataset(opj(path, 'origin')).create(force=True)
# force all metadata objects into the annex
with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
f.write(
'** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n')
sub = base.create('sub', force=True)
subsub = base.create(opj('sub', 'subsub'), force=True)
base.add('.', recursive=True)
ok_clean_git(base.path)
base.aggregate_metadata(recursive=True, update_mode='all')
ok_clean_git(base.path)
objpath = opj('.datalad', 'metadata', 'objects')
objs = list(sorted(base.repo.find(objpath)))
# we have 3x2 metadata sets (dataset/files) under annex
eq_(len(objs), 6)
eq_(all(base.repo.file_has_content(objs)), True)
# drop all object content
base.drop(objs, check=False)
eq_(all(base.repo.file_has_content(objs)), False)
ok_clean_git(base.path)
# now re-aggregate, the state hasn't changed, so the file names will
# be the same
base.aggregate_metadata(recursive=True, update_mode='all', force_extraction=True)
eq_(all(base.repo.file_has_content(objs)), True)
# and there are no new objects
eq_(
objs,
list(sorted(base.repo.find(objpath)))
)
示例3: test_publish_aggregated
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import create [as 别名]
def test_publish_aggregated(path):
base = Dataset(opj(path, 'origin')).create(force=True)
# force all metadata objects into the annex
with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
f.write(
'** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n')
base.create('sub', force=True)
base.add('.', recursive=True)
ok_clean_git(base.path)
base.aggregate_metadata(recursive=True, update_mode='all')
ok_clean_git(base.path)
# create sibling and publish to it
spath = opj(path, 'remote')
base.create_sibling(
name="local_target",
sshurl="ssh://localhost",
target_dir=spath)
base.publish('.', to='local_target', transfer_data='all')
remote = Dataset(spath)
objpath = opj('.datalad', 'metadata', 'objects')
objs = list(sorted(base.repo.find(objpath)))
# all object files a present in both datasets
eq_(all(base.repo.file_has_content(objs)), True)
eq_(all(remote.repo.file_has_content(objs)), True)
# and we can squeeze the same metadata out
eq_(
[{k: v for k, v in i.items() if k not in ('path', 'refds', 'parentds')}
for i in base.metadata('sub')],
[{k: v for k, v in i.items() if k not in ('path', 'refds', 'parentds')}
for i in remote.metadata('sub')],
)
示例4: test_aggregate_with_unavailable_objects_from_subds
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import create [as 别名]
def test_aggregate_with_unavailable_objects_from_subds(path, target):
base = Dataset(opj(path, 'origin')).create(force=True)
# force all metadata objects into the annex
with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
f.write(
'** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n')
sub = base.create('sub', force=True)
subsub = base.create(opj('sub', 'subsub'), force=True)
base.add('.', recursive=True)
ok_clean_git(base.path)
base.aggregate_metadata(recursive=True, update_mode='all')
ok_clean_git(base.path)
# now make that a subdataset of a new one, so aggregation needs to get the
# metadata objects first:
super = Dataset(target).create()
super.install("base", source=base.path)
ok_clean_git(super.path)
clone = Dataset(opj(super.path, "base"))
ok_clean_git(clone.path)
objpath = opj('.datalad', 'metadata', 'objects')
objs = [o for o in sorted(clone.repo.get_annexed_files(with_content_only=False)) if o.startswith(objpath)]
eq_(len(objs), 6)
eq_(all(clone.repo.file_has_content(objs)), False)
# now aggregate should get those metadata objects
super.aggregate_metadata(recursive=True, update_mode='all',
force_extraction=False)
eq_(all(clone.repo.file_has_content(objs)), True)
示例5: test_basic_aggregate
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import create [as 别名]
def test_basic_aggregate(path):
# TODO give datasets some more metadata to actually aggregate stuff
base = Dataset(opj(path, 'origin')).create(force=True)
sub = base.create('sub', force=True)
#base.metadata(sub.path, init=dict(homepage='this'), apply2global=True)
subsub = base.create(opj('sub', 'subsub'), force=True)
base.add('.', recursive=True)
ok_clean_git(base.path)
# we will first aggregate the middle dataset on its own, this will
# serve as a smoke test for the reuse of metadata objects later on
sub.aggregate_metadata()
base.save()
ok_clean_git(base.path)
base.aggregate_metadata(recursive=True, update_mode='all')
ok_clean_git(base.path)
direct_meta = base.metadata(recursive=True, return_type='list')
# loose the deepest dataset
sub.uninstall('subsub', check=False)
# no we should eb able to reaggregate metadata, and loose nothing
# because we can aggregate aggregated metadata of subsub from sub
base.aggregate_metadata(recursive=True, update_mode='all')
# same result for aggregate query than for (saved) direct query
agg_meta = base.metadata(recursive=True, return_type='list')
for d, a in zip(direct_meta, agg_meta):
print(d['path'], a['path'])
assert_dict_equal(d, a)
# no we can throw away the subdataset tree, and loose no metadata
base.uninstall('sub', recursive=True, check=False)
assert(not sub.is_installed())
ok_clean_git(base.path)
# same result for aggregate query than for (saved) direct query
agg_meta = base.metadata(recursive=True, return_type='list')
for d, a in zip(direct_meta, agg_meta):
assert_dict_equal(d, a)
示例6: test_dirty
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import create [as 别名]
def test_dirty(path):
for mode in _dirty_modes:
# does nothing without a dataset
handle_dirty_dataset(None, mode)
# placeholder, but not yet created
ds = Dataset(path)
# unknown mode
assert_raises(ValueError, handle_dirty_dataset, ds, 'MADEUP')
# not yet created is very dirty
assert_raises(RuntimeError, handle_dirty_dataset, ds, 'fail')
handle_dirty_dataset(ds, 'ignore')
assert_raises(RuntimeError, handle_dirty_dataset, ds, 'save-before')
# should yield a clean repo
ds.create()
orig_state = ds.repo.get_hexsha()
_check_all_clean(ds, orig_state)
# tainted: untracked
with open(opj(ds.path, 'something'), 'w') as f:
f.write('some')
# we don't want to auto-add untracked files by saving (anymore)
assert_raises(AssertionError, _check_auto_save, ds, orig_state)
# tainted: staged
ds.repo.add('something', git=True)
orig_state = _check_auto_save(ds, orig_state)
# tainted: submodule
# not added to super on purpose!
subds = ds.create('subds')
_check_all_clean(subds, subds.repo.get_hexsha())
ok_clean_git(ds.path)
# subdataset must be added as a submodule!
assert_equal(ds.subdatasets(result_xfm='relpaths'), ['subds'])
示例7: test_ls_uninstalled
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import create [as 别名]
def test_ls_uninstalled(path):
ds = Dataset(path)
ds.create()
ds.create('sub')
ds.uninstall('sub', check=False)
with swallow_outputs() as cmo:
ls([path], recursive=True)
assert_in('not installed', cmo.out)
示例8: make_demo_hierarchy_datasets
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import create [as 别名]
def make_demo_hierarchy_datasets(path, tree):
created_ds = []
for node, items in tree.items():
node_path = opj(path, node)
if isinstance(items, dict):
ds = make_demo_hierarchy_datasets(node_path, items)
created_ds.append(ds)
topds = Dataset(path)
if not topds.is_installed():
topds.create(force=True)
# TODO this farce would not be necessary if add() could add subdatasets
for ds in created_ds:
_install_subds_inplace(ds=topds, path=ds.path, relativepath=relpath(ds.path, topds.path))
ds.save()
return topds
示例9: test_subdataset_save
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import create [as 别名]
def test_subdataset_save(path):
parent = Dataset(path).create()
sub = parent.create('sub')
assert_repo_status(parent.path)
create_tree(parent.path, {
"untracked": 'ignore',
'sub': {
"new": "wanted"}})
sub.save('new')
# defined state: one untracked, modified (but clean in itself) subdataset
assert_repo_status(sub.path)
assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])
# `save sub` does not save the parent!!
with chpwd(parent.path):
assert_status('notneeded', save(dataset=sub.path))
assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])
# `save -u .` saves the state change in the subdataset,
# but leaves any untracked content alone
with chpwd(parent.path):
assert_status('ok', parent.save(updated=True))
assert_repo_status(parent.path, untracked=['untracked'])
# get back to the original modified state and check that -S behaves in
# exactly the same way
create_tree(parent.path, {
'sub': {
"new2": "wanted2"}})
sub.save('new2')
assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])
示例10: test_aggregate_removal
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import create [as 别名]
def test_aggregate_removal(path):
base = Dataset(opj(path, 'origin')).create(force=True)
# force all metadata objects into the annex
with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
f.write(
'** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n')
sub = base.create('sub', force=True)
subsub = sub.create(opj('subsub'), force=True)
base.add('.', recursive=True)
base.aggregate_metadata(recursive=True, update_mode='all')
ok_clean_git(base.path)
res = base.metadata(get_aggregates=True)
assert_result_count(res, 3)
assert_result_count(res, 1, path=subsub.path)
# check that we only have object files that are listed in agginfo
eq_(_get_contained_objs(base), _get_referenced_objs(base))
# now delete the deepest subdataset to test cleanup of aggregated objects
# in the top-level ds
base.remove(opj('sub', 'subsub'), check=False)
# now aggregation has to detect that subsub is not simply missing, but gone
# for good
base.aggregate_metadata(recursive=True, update_mode='all')
ok_clean_git(base.path)
# internally consistent state
eq_(_get_contained_objs(base), _get_referenced_objs(base))
# info on subsub was removed at all levels
res = base.metadata(get_aggregates=True)
assert_result_count(res, 0, path=subsub.path)
assert_result_count(res, 2)
res = sub.metadata(get_aggregates=True)
assert_result_count(res, 0, path=subsub.path)
assert_result_count(res, 1)
示例11: test_create_subdataset_hierarchy_from_top
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import create [as 别名]
def test_create_subdataset_hierarchy_from_top(path):
# how it would look like to overlay a subdataset hierarchy onto
# an existing directory tree
ds = Dataset(op.join(path, 'origin')).create(force=True)
# we got a dataset ....
ok_(ds.is_installed())
# ... but it has untracked content
ok_(ds.repo.dirty)
subds = ds.create(u"ds-" + OBSCURE_FILENAME, force=True)
ok_(subds.is_installed())
ok_(subds.repo.dirty)
subsubds = subds.create('subsub', force=True)
ok_(subsubds.is_installed())
ok_(subsubds.repo.dirty)
ok_(ds.id != subds.id != subsubds.id)
ds.save(updated=True, recursive=True)
# 'file*' in each repo was untracked before and should remain as such
# (we don't want a #1419 resurrection
ok_(ds.repo.dirty)
ok_(subds.repo.dirty)
ok_(subsubds.repo.dirty)
# if we add these three, we should get clean
ds.save([
'file1',
op.join(subds.path, 'file2'),
op.join(subsubds.path, 'file3')])
assert_repo_status(ds.path)
ok_(ds.id != subds.id != subsubds.id)
示例12: make_demo_hierarchy_datasets
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import create [as 别名]
def make_demo_hierarchy_datasets(path, tree, parent=None):
if parent is None:
parent = Dataset(path).create(force=True)
for node, items in tree.items():
if isinstance(items, dict):
node_path = opj(path, node)
nodeds = parent.create(node_path, force=True)
make_demo_hierarchy_datasets(node_path, items, parent=nodeds)
return parent
示例13: test_create_sub
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import create [as 别名]
def test_create_sub(path):
ds = Dataset(path)
ds.create()
# 1. create sub and add to super:
subds = ds.create(op.join("some", "what", "deeper"))
ok_(isinstance(subds, Dataset))
ok_(subds.is_installed())
assert_repo_status(subds.path, annex=True)
assert_in(
'submodule.some/what/deeper.datalad-id={}'.format(
subds.id),
ds.repo._git_custom_command(
'',
['git', 'config', '--file', '.gitmodules', '--list'])[0]
)
# subdataset is known to superdataset:
assert_in(op.join("some", "what", "deeper"),
ds.subdatasets(result_xfm='relpaths'))
# and was committed:
assert_repo_status(ds.path)
# subds finds superdataset
ok_(subds.get_superdataset() == ds)
# 2. create sub without adding to super:
subds2 = Dataset(op.join(path, "someother")).create()
ok_(isinstance(subds2, Dataset))
ok_(subds2.is_installed())
assert_repo_status(subds2.path, annex=True)
# unknown to superdataset:
assert_not_in("someother", ds.subdatasets(result_xfm='relpaths'))
# 3. create sub via super:
subds3 = ds.create("third", no_annex=True)
ok_(isinstance(subds3, Dataset))
ok_(subds3.is_installed())
assert_repo_status(subds3.path, annex=False)
assert_in("third", ds.subdatasets(result_xfm='relpaths'))
示例14: test_create
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import create [as 别名]
def test_create(path):
ds = Dataset(path)
ds.create(
description="funny",
# custom git init option
initopts=dict(shared='world'))
ok_(ds.is_installed())
assert_repo_status(ds.path, annex=True)
# check default backend
eq_(ds.config.get("annex.backends"), 'MD5E')
eq_(ds.config.get("core.sharedrepository"), '2')
runner = Runner()
# check description in `info`
cmd = ['git', 'annex', 'info']
cmlout = runner.run(cmd, cwd=path)
assert_in('funny [here]', cmlout[0])
# check datset ID
eq_(ds.config.get_value('datalad.dataset', 'id'),
ds.id)
示例15: test_update_strategy
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import create [as 别名]
def test_update_strategy(path):
base = Dataset(opj(path, 'origin')).create(force=True)
# force all metadata objects into the annex
with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
f.write(
'** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n')
sub = base.create('sub', force=True)
subsub = sub.create(opj('subsub'), force=True)
base.add('.', recursive=True)
ok_clean_git(base.path)
# we start clean
for ds in base, sub, subsub:
eq_(len(_get_contained_objs(ds)), 0)
# aggregate the base dataset only, nothing below changes
base.aggregate_metadata()
eq_(len(_get_contained_objs(base)), 2)
for ds in sub, subsub:
eq_(len(_get_contained_objs(ds)), 0)
# aggregate the entire tree, but by default only updates
# the top-level dataset with all objects, none of the leaf
# or intermediate datasets get's touched
base.aggregate_metadata(recursive=True)
eq_(len(_get_contained_objs(base)), 6)
eq_(len(_get_referenced_objs(base)), 6)
for ds in sub, subsub:
eq_(len(_get_contained_objs(ds)), 0)
res = base.metadata(get_aggregates=True)
assert_result_count(res, 3)
# it is impossible to query an intermediate or leaf dataset
# for metadata
for ds in sub, subsub:
assert_status(
'impossible',
ds.metadata(get_aggregates=True, on_failure='ignore'))
# get the full metadata report
target_meta = base.metadata(return_type='list')
# now redo full aggregation, this time updating all
# (intermediate) datasets
base.aggregate_metadata(recursive=True, update_mode='all')
eq_(len(_get_contained_objs(base)), 6)
eq_(len(_get_contained_objs(sub)), 4)
eq_(len(_get_contained_objs(subsub)), 2)
# it is now OK to query an intermediate or leaf dataset
# for metadata
for ds in sub, subsub:
assert_status(
'ok',
ds.metadata(get_aggregates=True, on_failure='ignore'))
# all of that has no impact on the reported metadata
eq_(target_meta, base.metadata(return_type='list'))