本文整理汇总了Python中datalad.distribution.dataset.Dataset.save方法的典型用法代码示例。如果您正苦于以下问题:Python Dataset.save方法的具体用法?Python Dataset.save怎么用?Python Dataset.save使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类datalad.distribution.dataset.Dataset
的用法示例。
在下文中一共展示了Dataset.save方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_add_files
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import save [as 别名]
def test_add_files(path):
ds = Dataset(path).create(force=True)
test_list_1 = ['test_annex.txt']
test_list_2 = ['test.txt']
test_list_3 = ['test1.dat', 'test2.dat']
test_list_4 = [op.join('dir', 'testindir'),
op.join('dir', OBSCURE_FILENAME)]
for arg in [(test_list_1[0], False),
(test_list_2[0], True),
(test_list_3, False),
(test_list_4, False)]:
# special case 4: give the dir:
if arg[0] == test_list_4:
result = ds.save('dir', to_git=arg[1])
status = ds.repo.annexstatus(['dir'])
else:
result = ds.save(arg[0], to_git=arg[1])
for a in assure_list(arg[0]):
assert_result_count(result, 1, path=text_type(ds.pathobj / a))
status = ds.repo.get_content_annexinfo(
ut.Path(p) for p in assure_list(arg[0]))
for f, p in iteritems(status):
if arg[1]:
assert p.get('key', None) is None, f
else:
assert p.get('key', None) is not None, f
示例2: test_gh2043p1
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import save [as 别名]
def test_gh2043p1(path):
# this tests documents the interim agreement on what should happen
# in the case documented in gh-2043
ds = Dataset(path).create(force=True)
ds.save('1')
assert_repo_status(ds.path, untracked=['2', '3'])
ds.unlock('1')
assert_repo_status(
ds.path,
# on windows we are in an unlocked branch by default, hence
# we would see no change
modified=[] if on_windows else ['1'],
untracked=['2', '3'])
# save(.) should recommit unlocked file, and not touch anything else
# this tests the second issue in #2043
with chpwd(path):
# only save modified bits
save(path='.', updated=True)
# state of the file (unlocked/locked) is committed as well, and the
# test doesn't lock the file again
assert_repo_status(ds.path, untracked=['2', '3'])
with chpwd(path):
# but when a path is given, anything that matches this path
# untracked or not is added/saved
save(path='.')
# state of the file (unlocked/locked) is committed as well, and the
# test doesn't lock the file again
assert_repo_status(ds.path)
示例3: test_aggregate_with_unavailable_objects_from_subds
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import save [as 别名]
def test_aggregate_with_unavailable_objects_from_subds(path, target):
base = Dataset(opj(path, 'origin')).create(force=True)
# force all metadata objects into the annex
with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
f.write(
'** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n')
sub = base.create('sub', force=True)
subsub = base.create(opj('sub', 'subsub'), force=True)
base.save(recursive=True)
ok_clean_git(base.path)
base.aggregate_metadata(recursive=True, update_mode='all')
ok_clean_git(base.path)
# now make that a subdataset of a new one, so aggregation needs to get the
# metadata objects first:
super = Dataset(target).create()
super.install("base", source=base.path)
ok_clean_git(super.path)
clone = Dataset(opj(super.path, "base"))
ok_clean_git(clone.path)
objpath = opj('.datalad', 'metadata', 'objects')
objs = [o for o in sorted(clone.repo.get_annexed_files(with_content_only=False)) if o.startswith(objpath)]
eq_(len(objs), 6)
eq_(all(clone.repo.file_has_content(objs)), False)
# now aggregate should get those metadata objects
super.aggregate_metadata(recursive=True, update_mode='all',
force_extraction=False)
eq_(all(clone.repo.file_has_content(objs)), True)
示例4: test_symlinked_relpath
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import save [as 别名]
def test_symlinked_relpath(path):
# initially ran into on OSX https://github.com/datalad/datalad/issues/2406
os.makedirs(op.join(path, "origin"))
dspath = op.join(path, "linked")
os.symlink('origin', dspath)
ds = Dataset(dspath).create()
create_tree(dspath, {
"mike1": 'mike1', # will be added from topdir
"later": "later", # later from within subdir
"d": {
"mike2": 'mike2', # to be added within subdir
}
})
# in the root of ds
with chpwd(dspath):
ds.repo.add("mike1", git=True)
ds.save(message="committing", path="./mike1")
# Let's also do in subdirectory as CWD, check that relative path
# given to a plain command (not dataset method) are treated as
# relative to CWD
with chpwd(op.join(dspath, 'd')):
save(dataset=ds.path,
message="committing",
path="mike2")
later = op.join(op.pardir, "later")
ds.repo.add(later, git=True)
save(dataset=ds.path, message="committing", path=later)
assert_repo_status(dspath)
示例5: test_reaggregate_with_unavailable_objects
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import save [as 别名]
def test_reaggregate_with_unavailable_objects(path):
base = Dataset(opj(path, 'origin')).create(force=True)
# force all metadata objects into the annex
with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
f.write(
'** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n')
sub = base.create('sub', force=True)
subsub = base.create(opj('sub', 'subsub'), force=True)
base.save(recursive=True)
ok_clean_git(base.path)
base.aggregate_metadata(recursive=True, update_mode='all')
ok_clean_git(base.path)
objpath = opj('.datalad', 'metadata', 'objects')
objs = list(sorted(base.repo.find(objpath)))
# we have 3x2 metadata sets (dataset/files) under annex
eq_(len(objs), 6)
eq_(all(base.repo.file_has_content(objs)), True)
# drop all object content
base.drop(objs, check=False)
eq_(all(base.repo.file_has_content(objs)), False)
ok_clean_git(base.path)
# now re-aggregate, the state hasn't changed, so the file names will
# be the same
base.aggregate_metadata(recursive=True, update_mode='all', force_extraction=True)
eq_(all(base.repo.file_has_content(objs)), True)
# and there are no new objects
eq_(
objs,
list(sorted(base.repo.find(objpath)))
)
示例6: test_add_recursive
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import save [as 别名]
def test_add_recursive(path):
# make simple hierarchy
parent = Dataset(path).create()
assert_repo_status(parent.path)
sub1 = parent.create(op.join('down', 'sub1'))
assert_repo_status(parent.path)
sub2 = parent.create('sub2')
# next one make the parent dirty
subsub = sub2.create('subsub')
assert_repo_status(parent.path, modified=['sub2'])
res = parent.save()
assert_repo_status(parent.path)
# now add content deep in the hierarchy
create_tree(subsub.path, {'new': 'empty'})
assert_repo_status(parent.path, modified=['sub2'])
# recursive add should not even touch sub1, because
# it knows that it is clean
res = parent.save(recursive=True)
# the key action is done
assert_result_count(
res, 1, path=op.join(subsub.path, 'new'), action='add', status='ok')
# saved all the way up
assert_result_count(res, 3, action='save', status='ok')
assert_repo_status(parent.path)
示例7: test_aggregate_removal
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import save [as 别名]
def test_aggregate_removal(path):
base = Dataset(opj(path, 'origin')).create(force=True)
# force all metadata objects into the annex
with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
f.write(
'** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n')
sub = base.create('sub', force=True)
subsub = sub.create(opj('subsub'), force=True)
base.save(recursive=True)
base.aggregate_metadata(recursive=True, update_mode='all')
ok_clean_git(base.path)
res = base.metadata(get_aggregates=True)
assert_result_count(res, 3)
assert_result_count(res, 1, path=subsub.path)
# check that we only have object files that are listed in agginfo
eq_(_get_contained_objs(base), _get_referenced_objs(base))
# now delete the deepest subdataset to test cleanup of aggregated objects
# in the top-level ds
base.remove(opj('sub', 'subsub'), check=False)
# now aggregation has to detect that subsub is not simply missing, but gone
# for good
base.aggregate_metadata(recursive=True, update_mode='all')
ok_clean_git(base.path)
# internally consistent state
eq_(_get_contained_objs(base), _get_referenced_objs(base))
# info on subsub was removed at all levels
res = base.metadata(get_aggregates=True)
assert_result_count(res, 0, path=subsub.path)
assert_result_count(res, 2)
res = sub.metadata(get_aggregates=True)
assert_result_count(res, 0, path=subsub.path)
assert_result_count(res, 1)
示例8: test_publish_aggregated
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import save [as 别名]
def test_publish_aggregated(path):
base = Dataset(opj(path, 'origin')).create(force=True)
# force all metadata objects into the annex
with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
f.write(
'** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n')
base.create('sub', force=True)
base.save(recursive=True)
ok_clean_git(base.path)
base.aggregate_metadata(recursive=True, update_mode='all')
ok_clean_git(base.path)
# create sibling and publish to it
spath = opj(path, 'remote')
base.create_sibling(
name="local_target",
sshurl="ssh://localhost",
target_dir=spath)
base.publish('.', to='local_target', transfer_data='all')
remote = Dataset(spath)
objpath = opj('.datalad', 'metadata', 'objects')
objs = list(sorted(base.repo.find(objpath)))
# all object files a present in both datasets
eq_(all(base.repo.file_has_content(objs)), True)
eq_(all(remote.repo.file_has_content(objs)), True)
# and we can squeeze the same metadata out
eq_(
[{k: v for k, v in i.items() if k not in ('path', 'refds', 'parentds')}
for i in base.metadata('sub')],
[{k: v for k, v in i.items() if k not in ('path', 'refds', 'parentds')}
for i in remote.metadata('sub')],
)
示例9: test_basic_aggregate
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import save [as 别名]
def test_basic_aggregate(path):
# TODO give datasets some more metadata to actually aggregate stuff
base = Dataset(opj(path, 'origin')).create(force=True)
sub = base.create('sub', force=True)
#base.metadata(sub.path, init=dict(homepage='this'), apply2global=True)
subsub = base.create(opj('sub', 'subsub'), force=True)
base.add('.', recursive=True)
ok_clean_git(base.path)
# we will first aggregate the middle dataset on its own, this will
# serve as a smoke test for the reuse of metadata objects later on
sub.aggregate_metadata()
base.save()
ok_clean_git(base.path)
base.aggregate_metadata(recursive=True, update_mode='all')
ok_clean_git(base.path)
direct_meta = base.metadata(recursive=True, return_type='list')
# loose the deepest dataset
sub.uninstall('subsub', check=False)
# no we should eb able to reaggregate metadata, and loose nothing
# because we can aggregate aggregated metadata of subsub from sub
base.aggregate_metadata(recursive=True, update_mode='all')
# same result for aggregate query than for (saved) direct query
agg_meta = base.metadata(recursive=True, return_type='list')
for d, a in zip(direct_meta, agg_meta):
print(d['path'], a['path'])
assert_dict_equal(d, a)
# no we can throw away the subdataset tree, and loose no metadata
base.uninstall('sub', recursive=True, check=False)
assert(not sub.is_installed())
ok_clean_git(base.path)
# same result for aggregate query than for (saved) direct query
agg_meta = base.metadata(recursive=True, return_type='list')
for d, a in zip(direct_meta, agg_meta):
assert_dict_equal(d, a)
示例10: test_create_subdataset_hierarchy_from_top
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import save [as 别名]
def test_create_subdataset_hierarchy_from_top(path):
# how it would look like to overlay a subdataset hierarchy onto
# an existing directory tree
ds = Dataset(op.join(path, 'origin')).create(force=True)
# we got a dataset ....
ok_(ds.is_installed())
# ... but it has untracked content
ok_(ds.repo.dirty)
subds = ds.create(u"ds-" + OBSCURE_FILENAME, force=True)
ok_(subds.is_installed())
ok_(subds.repo.dirty)
subsubds = subds.create('subsub', force=True)
ok_(subsubds.is_installed())
ok_(subsubds.repo.dirty)
ok_(ds.id != subds.id != subsubds.id)
ds.save(updated=True, recursive=True)
# 'file*' in each repo was untracked before and should remain as such
# (we don't want a #1419 resurrection
ok_(ds.repo.dirty)
ok_(subds.repo.dirty)
ok_(subsubds.repo.dirty)
# if we add these three, we should get clean
ds.save([
'file1',
op.join(subds.path, 'file2'),
op.join(subsubds.path, 'file3')])
assert_repo_status(ds.path)
ok_(ds.id != subds.id != subsubds.id)
示例11: test_get_metadata
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import save [as 别名]
def test_get_metadata(path):
ds = Dataset(path).create(force=True)
ds.save()
meta = MetadataExtractor(ds, [])._get_dataset_metadata()
assert_equal(
dumps(meta, sort_keys=True, indent=2),
"""\
{
"citation": "Cool (2016)",
"conformsto": "http://docs.datalad.org/metadata.html#v0-1",
"description": "A text with arbitrary length and content that can span multiple\\nparagraphs (this is a new one)",
"fundedby": "BMBFGQ1411, NSF 1429999",
"homepage": "http://studyforrest.org",
"issuetracker": "https://github.com/psychoinformatics-de/studyforrest-data-phase2/issues",
"license": [
"CC0",
"The person who associated a work with this deed has dedicated the work to the public domain by waiving all of his or her rights to the work worldwide under copyright law, including all related and neighboring rights, to the extent allowed by law.\\nYou can copy, modify, distribute and perform the work, even for commercial purposes, all without asking permission."
],
"maintainer": [
"Mike One <[email protected]>",
"Anna Two <[email protected]>"
],
"name": "studyforrest_phase2",
"sameas": "http://dx.doi.org/10.5281/zenodo.48421",
"shortdescription": "Basic summary",
"version": "1.0.0-rc3"
}""")
示例12: test_add_mimetypes
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import save [as 别名]
def test_add_mimetypes(path):
ds = Dataset(path).create(force=True)
ds.repo.add('.gitattributes')
ds.repo.commit('added attributes to git explicitly')
# now test that those files will go into git/annex correspondingly
# WINDOWS FAILURE NEXT
__not_tested__ = ds.save(['file.txt', 'empty'])
assert_repo_status(path, untracked=['file2.txt'])
# But we should be able to force adding file to annex when desired
ds.save('file2.txt', to_git=False)
# check annex file status
annexinfo = ds.repo.get_content_annexinfo()
for path, in_annex in (
# Empty one considered to be application/octet-stream
# i.e. non-text
('empty', True),
('file.txt', False),
('file2.txt', True)):
# low-level API report -> repo path reference, no ds path
p = ds.repo.pathobj / path
assert_in(p, annexinfo)
if in_annex:
assert_in('key', annexinfo[p], p)
else:
assert_not_in('key', annexinfo[p], p)
示例13: check_renamed_file
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import save [as 别名]
def check_renamed_file(recursive, no_annex, path):
ds = Dataset(path).create(no_annex=no_annex)
create_tree(path, {'old': ''})
ds.repo.add('old')
ds.repo._git_custom_command(['old', 'new'], ['git', 'mv'])
ds.save(recursive=recursive)
assert_repo_status(path)
示例14: test_encoding
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import save [as 别名]
def test_encoding(path):
staged = OBSCURE_FILENAME + u'_staged'
untracked = OBSCURE_FILENAME + u'_untracked'
ds = Dataset(path).create(force=True)
ds.repo.add(staged)
assert_repo_status(ds.path, added=[staged], untracked=[untracked])
ds.save(updated=True)
assert_repo_status(ds.path, untracked=[untracked])
示例15: test_save_hierarchy
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import save [as 别名]
def test_save_hierarchy(path):
# this test doesn't use API`remove` to avoid circularities
ds = make_demo_hierarchy_datasets(path, demo_hierarchy)
ds.add('.', recursive=True)
ok_clean_git(ds.path)
ds_bb = Dataset(opj(ds.path, 'b', 'bb'))
ds_bba = Dataset(opj(ds_bb.path, 'bba'))
ds_bbaa = Dataset(opj(ds_bba.path, 'bbaa'))
# introduce a change at the lowest level
ds_bbaa.repo.remove('file_bbaa')
for d in (ds, ds_bb, ds_bba, ds_bbaa):
ok_(d.repo.dirty)
# need to give file specifically, otherwise it will simply just preserve
# staged changes
ds_bb.save(path=opj(ds_bbaa.path, 'file_bbaa'))
# it has saved all changes in the subtrees spanned
# by the given datasets, but nothing else
for d in (ds_bb, ds_bba, ds_bbaa):
ok_clean_git(d.path)
ok_(ds.repo.dirty)
# now with two modified repos
d = Dataset(opj(ds.path, 'd'))
da = Dataset(opj(d.path, 'da'))
da.repo.remove('file_da')
db = Dataset(opj(d.path, 'db'))
db.repo.remove('file_db')
# generator
d.save(recursive=True)
for d in (d, da, db):
ok_clean_git(d.path)
ok_(ds.repo.dirty)
# and now with files all over the place and saving
# all the way to the root
aa = Dataset(opj(ds.path, 'a', 'aa'))
aa.repo.remove('file_aa')
ba = Dataset(opj(ds.path, 'b', 'ba'))
ba.repo.remove('file_ba')
bb = Dataset(opj(ds.path, 'b', 'bb'))
bb.repo.remove('file_bb')
c = Dataset(opj(ds.path, 'c'))
c.repo.remove('file_c')
ca = Dataset(opj(ds.path, 'c', 'ca'))
ca.repo.remove('file_ca')
d = Dataset(opj(ds.path, 'd'))
d.repo.remove('file_d')
ds.save(
# append trailing slashes to the path to indicate that we want to
# have the staged content in the dataset saved, rather than only the
# subdataset state in the respective superds.
# an alternative would have been to pass `save` annotated paths of
# type {'path': dspath, 'process_content': True} for each dataset
# in question, but here we want to test how this would most likely
# by used from cmdline
path=[opj(p, '')
for p in (aa.path, ba.path, bb.path, c.path, ca.path, d.path)],
super_datasets=True)