本文整理汇总了Python中datalad.distribution.dataset.Dataset.remove方法的典型用法代码示例。如果您正苦于以下问题:Python Dataset.remove方法的具体用法?Python Dataset.remove怎么用?Python Dataset.remove使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类datalad.distribution.dataset.Dataset
的用法示例。
在下文中一共展示了Dataset.remove方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_aggregate_removal
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import remove [as 别名]
def test_aggregate_removal(path):
base = Dataset(opj(path, 'origin')).create(force=True)
# force all metadata objects into the annex
with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
f.write(
'** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n')
sub = base.create('sub', force=True)
subsub = sub.create(opj('subsub'), force=True)
base.add('.', recursive=True)
base.aggregate_metadata(recursive=True, update_mode='all')
ok_clean_git(base.path)
res = base.metadata(get_aggregates=True)
assert_result_count(res, 3)
assert_result_count(res, 1, path=subsub.path)
# check that we only have object files that are listed in agginfo
eq_(_get_contained_objs(base), _get_referenced_objs(base))
# now delete the deepest subdataset to test cleanup of aggregated objects
# in the top-level ds
base.remove(opj('sub', 'subsub'), check=False)
# now aggregation has to detect that subsub is not simply missing, but gone
# for good
base.aggregate_metadata(recursive=True, update_mode='all')
ok_clean_git(base.path)
# internally consistent state
eq_(_get_contained_objs(base), _get_referenced_objs(base))
# info on subsub was removed at all levels
res = base.metadata(get_aggregates=True)
assert_result_count(res, 0, path=subsub.path)
assert_result_count(res, 2)
res = sub.metadata(get_aggregates=True)
assert_result_count(res, 0, path=subsub.path)
assert_result_count(res, 1)
示例2: test_rerun_outofdate_tree
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import remove [as 别名]
def test_rerun_outofdate_tree(path):
ds = Dataset(path).create()
input_file = opj(path, "foo")
output_file = opj(path, "out")
with open(input_file, "w") as f:
f.write("abc\ndef")
ds.add("foo", to_git=True)
# Create inital run.
ds.run('grep def foo > out')
eq_('def\n', open(output_file).read())
# Change tree so that it is no longer compatible.
ds.remove("foo")
# Now rerunning should fail because foo no longer exists.
assert_raises(CommandError, ds.rerun, revision="HEAD~")
示例3: test_get_modified_subpaths
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import remove [as 别名]
def test_get_modified_subpaths(path):
ds = Dataset(path).create(force=True)
suba = ds.create('ba', force=True)
subb = ds.create('bb', force=True)
subsub = ds.create(opj('bb', 'bba', 'bbaa'), force=True)
ds.save(recursive=True)
ok_clean_git(path)
orig_base_commit = ds.repo.repo.commit().hexsha
# nothing was modified compared to the status quo, output must be empty
eq_([],
list(get_modified_subpaths(
[dict(path=ds.path)],
ds, orig_base_commit)))
# modify one subdataset
create_tree(subsub.path, {'added': 'test'})
subsub.save('added')
# it will replace the requested path with the path of the closest
# submodule that is modified
assert_result_count(
get_modified_subpaths(
[dict(path=ds.path)],
ds, orig_base_commit),
1,
type='dataset', path=subb.path)
# make another one dirty
create_tree(suba.path, {'added': 'test'})
# now a single query path will result in the two modified subdatasets
assert_result_count(
get_modified_subpaths(
[dict(path=ds.path)],
ds, orig_base_commit),
2,
type='dataset')
# now save uptop, this will the new state of subb, but keep suba dirty
ds.save(subb.path, recursive=True)
# now if we ask for what was last saved, we only get the new state of subb
assert_result_count(
get_modified_subpaths(
[dict(path=ds.path)],
ds,
'HEAD~1..HEAD'),
1,
type='dataset', path=subb.path)
# comparing the working tree to head will the dirty suba instead
assert_result_count(
get_modified_subpaths(
[dict(path=ds.path)],
ds,
'HEAD'),
1,
type='dataset', path=suba.path)
# add/save everything, become clean
ds.save(recursive=True)
ok_clean_git(path)
# nothing is reported as modified
assert_result_count(
get_modified_subpaths(
[dict(path=ds.path)],
ds,
'HEAD'),
0)
# but looking all the way back, we find all changes
assert_result_count(
get_modified_subpaths(
[dict(path=ds.path)],
ds,
orig_base_commit),
2,
type='dataset')
# now we ask specifically for the file we added to subsub above
query = [dict(path=opj(subsub.path, 'added'))]
res = list(get_modified_subpaths(query, ds, orig_base_commit))
# we only get this one result back, and not all the submodule state changes
# that were also saved in the superdatasets
assert_result_count(res, 1)
assert_result_count(
res, 1, type='file', path=opj(subsub.path, 'added'), state='added')
# but if we are only looking at the last saved change (suba), we will not
# find our query return something
res = get_modified_subpaths(query, ds, 'HEAD^')
assert_result_count(res, 0)
# deal with removal (force insufiicient copies error)
ds.remove(suba.path, check=False)
ok_clean_git(path)
res = list(get_modified_subpaths([dict(path=ds.path)], ds, 'HEAD~1..HEAD'))
# removed submodule + .gitmodules update
assert_result_count(res, 2)
assert_result_count(
res, 1,
type_src='dataset', path=suba.path)
示例4: _update_ds_agginfo
# 需要导入模块: from datalad.distribution.dataset import Dataset [as 别名]
# 或者: from datalad.distribution.dataset.Dataset import remove [as 别名]
def _update_ds_agginfo(refds_path, ds_path, subds_paths, incremental, agginfo_db, to_save):
"""Perform metadata aggregation for ds and a given list of subdataset paths
Parameters
----------
refds_path : str
Absolute path to the reference dataset that aggregate_metadata() was
called on.
ds_path : str
Absolute path to the dataset to have its aggregate info updates
subds_paths : list(str)
Sequence of absolute paths of subdatasets of the to-be-updated dataset,
whose agginfo shall be updated within the to-be-updated dataset.
Any subdataset that is not listed here is assumed to be gone (i.e. no longer
a subdataset at all, not just not locally installed)
incremental : bool
If set, the update will not remove any information on datasets not listed in
subds_paths
agginfo_db : dict
Dictionary with all information on aggregate metadata on all datasets.
Keys are absolute paths of datasets.
to_save : list
List of paths to save eventually. This function will add new paths as
necessary.
"""
ds = Dataset(ds_path)
# load existing aggregate info dict
# makes sure all file/dataset paths become absolute
# TODO take from cache, once used in _get_dsinfo_from_aggmetadata()
agginfo_fpath, agg_base_path = get_ds_aggregate_db_locations(ds)
ds_agginfos = load_ds_aggregate_db(ds, abspath=True)
# object locations referenced initially
objlocs_was = set(ai[k]
for ai in ds_agginfos.values()
for k in location_keys
if k in ai)
# track which objects need to be copied (each item is a from/to tuple
objs2copy = []
# for each subdataset (any depth level)
procds_paths = [ds.path] + subds_paths
for dpath in procds_paths:
ds_dbinfo = agginfo_db.get(dpath, {}).copy()
# relative path of the currect dataset within the dataset we are updating
drelpath = op.relpath(dpath, start=ds.path)
for loclabel in location_keys:
# TODO filepath_info is obsolete
if loclabel == 'filepath_info' and drelpath == op.curdir:
# do not write a file list into the dataset it is from
if 'filepath_info' in ds_dbinfo:
del ds_dbinfo['filepath_info']
continue
# abspath to object
objloc = ds_dbinfo.get(loclabel, None)
if objloc is None:
continue
# XXX needs to change when layout of object store is changed
# current is ./datalad/metadata/objects/{hash}/{hash}
target_objpath = op.join(agg_base_path, *objloc.split(os.sep)[-3:])
# make sure we copy the file from its current location to where it is
# needed in this dataset
objs2copy.append((
# this needs to turn into an absolute path
# `dpath` will be relative to the reference dataset
#op.normpath(op.join(ds.path, dpath, op.dirname(agginfo_relpath), objloc)),
objloc,
target_objpath))
# now build needed local relpath
ds_dbinfo[loclabel] = target_objpath
# (re)assign in case record is new
ds_agginfos[dpath] = ds_dbinfo
# remove all entries for which we did not (no longer) have a corresponding
# subdataset to take care of
if not incremental:
ds_agginfos = {k: v
for k, v in ds_agginfos.items()
if k in procds_paths}
# set of metadata objects now referenced
objlocs_is = set(
ai[k]
for sdsrpath, ai in ds_agginfos.items()
for k in location_keys
if k in ai)
objs2add = objlocs_is
# yoh: we appanretly do need to filter the ones to remove - I did
# "git reset --hard HEAD^" and
# aggregate-metadata failed upon next run trying to remove
# an unknown to git file. I am yet to figure out why that
# mattered (hopefully not that reflog is used somehow)
objs2remove = []
for obj in objlocs_was.difference(objlocs_is):
if op.lexists(obj):
objs2remove.append(obj)
else:
# not really a warning, we don't need it anymore, it is already gone
lgr.debug(
"To-be-deleted metadata object not found, skip deletion (%s)",
obj
)
#.........这里部分代码省略.........