本文整理汇总了Python中datalad.api.Dataset.save方法的典型用法代码示例。如果您正苦于以下问题:Python Dataset.save方法的具体用法?Python Dataset.save怎么用?Python Dataset.save使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类datalad.api.Dataset
的用法示例。
在下文中一共展示了Dataset.save方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_addurls_dry_run
# 需要导入模块: from datalad.api import Dataset [as 别名]
# 或者: from datalad.api.Dataset import save [as 别名]
def test_addurls_dry_run(path):
ds = Dataset(path).create(force=True)
with chpwd(path):
json_file = "links.json"
with open(json_file, "w") as jfh:
json.dump([{"url": "URL/a.dat", "name": "a", "subdir": "foo"},
{"url": "URL/b.dat", "name": "b", "subdir": "bar"},
{"url": "URL/c.dat", "name": "c", "subdir": "foo"}],
jfh)
ds.save(message="setup")
with swallow_logs(new_level=logging.INFO) as cml:
ds.addurls(json_file,
"{url}",
"{subdir}//{_url_filename_root}",
dry_run=True)
for dir_ in ["foo", "bar"]:
assert_in("Would create a subdataset at {}".format(dir_),
cml.out)
assert_in(
"Would download URL/a.dat to {}".format(
os.path.join(path, "foo", "BASE")),
cml.out)
assert_in("Metadata: {}".format([u"name=a", u"subdir=foo"]),
cml.out)
示例2: test_audio
# 需要导入模块: from datalad.api import Dataset [as 别名]
# 或者: from datalad.api.Dataset import save [as 别名]
def test_audio(path):
ds = Dataset(path).create()
ds.config.add('datalad.metadata.nativetype', 'audio', where='dataset')
copy(
opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'audio.mp3'),
path)
ds.save()
ok_clean_git(ds.path)
res = ds.aggregate_metadata()
assert_status('ok', res)
res = ds.metadata('audio.mp3')
assert_result_count(res, 1)
# from this extractor
meta = res[0]['metadata']['audio']
for k, v in target.items():
eq_(meta[k], v)
assert_in('@context', meta)
uniques = ds.metadata(
reporton='datasets', return_type='item-or-list')['metadata']['datalad_unique_content_properties']
# test file has it, but uniques have it blanked out, because the extractor considers it worthless
# for discovering whole datasets
assert_in('bitrate', meta)
eq_(uniques['audio']['bitrate'], None)
# 'date' field carries not value, hence gets exclude from the unique report
assert_in('date', meta)
assert(not meta['date'])
assert_not_in('date', uniques['audio'])
示例3: test_ignore_nondatasets
# 需要导入模块: from datalad.api import Dataset [as 别名]
# 或者: from datalad.api.Dataset import save [as 别名]
def test_ignore_nondatasets(path):
# we want to ignore the version/commits for this test
def _kill_time(meta):
for m in meta:
for k in ('version', 'shasum'):
if k in m:
del m[k]
return meta
ds = Dataset(path).create()
meta = _kill_time(ds.metadata(reporton='datasets', on_failure='ignore'))
n_subm = 0
# placing another repo in the dataset has no effect on metadata
for cls, subpath in ((GitRepo, 'subm'), (AnnexRepo, 'annex_subm')):
subm_path = opj(ds.path, subpath)
r = cls(subm_path, create=True)
with open(opj(subm_path, 'test'), 'w') as f:
f.write('test')
r.add('test')
r.commit('some')
assert_true(Dataset(subm_path).is_installed())
assert_equal(meta, _kill_time(ds.metadata(reporton='datasets', on_failure='ignore')))
# making it a submodule has no effect either
ds.save(subpath)
assert_equal(len(ds.subdatasets()), n_subm + 1)
assert_equal(meta, _kill_time(ds.metadata(reporton='datasets', on_failure='ignore')))
n_subm += 1
示例4: test_basic_metadata
# 需要导入模块: from datalad.api import Dataset [as 别名]
# 或者: from datalad.api.Dataset import save [as 别名]
def test_basic_metadata(path):
ds = Dataset(opj(path, 'origin'))
meta = get_metadata(ds)
assert_equal(sorted(meta[0].keys()),
['@context', 'dcterms:conformsTo'])
ds.create(force=True, save=False)
# with subdataset
sub = ds.create('sub', force=True)
ds.save()
meta = get_metadata(ds)
assert_equal(
sorted(meta[0].keys()),
['@context', '@id', 'availableFrom', 'dcterms:conformsTo',
'dcterms:modified', 'type', 'version'])
assert_equal(meta[0]['type'], 'Dataset')
# clone and get relationship info in metadata
sibling = install(opj(path, 'sibling'), source=opj(path, 'origin'))
sibling_meta = get_metadata(sibling)
assert_equal(sibling_meta[0]['@id'], ds.id)
# origin should learn about the clone
sibling.repo.push(remote='origin', refspec='git-annex')
meta = get_metadata(ds)
assert_equal([m['@id'] for m in meta[0]['availableFrom']],
[m['@id'] for m in sibling_meta[0]['availableFrom']])
meta = get_metadata(ds, guess_type=True)
# without aggregation there is not trace of subdatasets in the metadata
assert_not_in('dcterms:hasPart', meta[0])
示例5: test_addurls
# 需要导入模块: from datalad.api import Dataset [as 别名]
# 或者: from datalad.api.Dataset import save [as 别名]
def test_addurls(self, path):
ds = Dataset(path).create(force=True)
def get_annex_commit_counts():
return int(
ds.repo.repo.git.rev_list("--count", "git-annex").strip())
n_annex_commits = get_annex_commit_counts()
with chpwd(path):
ds.addurls(self.json_file, "{url}", "{name}")
filenames = ["a", "b", "c"]
for fname in filenames:
ok_exists(fname)
for (fname, meta), subdir in zip(ds.repo.get_metadata(filenames),
["foo", "bar", "foo"]):
assert_dict_equal(meta,
{"subdir": [subdir], "name": [fname]})
# Ignore this check if we're faking dates because that disables
# batch mode.
if not os.environ.get('DATALAD_FAKE__DATES'):
# We should have two new commits on the git-annex: one for the
# added urls and one for the added metadata.
eq_(n_annex_commits + 2, get_annex_commit_counts())
# Add to already existing links, overwriting.
with swallow_logs(new_level=logging.DEBUG) as cml:
ds.addurls(self.json_file, "{url}", "{name}",
ifexists="overwrite")
for fname in filenames:
assert_in("Removing {}".format(os.path.join(path, fname)),
cml.out)
# Add to already existing links, skipping.
assert_in_results(
ds.addurls(self.json_file, "{url}", "{name}", ifexists="skip"),
action="addurls",
status="notneeded")
# Add to already existing links works, as long content is the same.
ds.addurls(self.json_file, "{url}", "{name}")
# But it fails if something has changed.
ds.unlock("a")
with open("a", "w") as ofh:
ofh.write("changed")
ds.save("a")
assert_raises(IncompleteResultsError,
ds.addurls,
self.json_file, "{url}", "{name}")
示例6: test_bf2458
# 需要导入模块: from datalad.api import Dataset [as 别名]
# 或者: from datalad.api.Dataset import save [as 别名]
def test_bf2458(src, dst):
ds = Dataset(src).create(force=True)
ds.save(to_git=False)
# no clone (empty) into new dst
clone = install(source=ds.path, path=dst)
# content is not here
eq_(clone.repo.whereis('dummy'), [ds.config.get('annex.uuid')])
# check that plain metadata access does not `get` stuff
clone.metadata('.', on_failure='ignore')
eq_(clone.repo.whereis('dummy'), [ds.config.get('annex.uuid')])
示例7: check_api
# 需要导入模块: from datalad.api import Dataset [as 别名]
# 或者: from datalad.api.Dataset import save [as 别名]
def check_api(no_annex, path):
ds = Dataset(path).create(force=True, no_annex=no_annex)
ds.save()
ok_clean_git(ds.path)
processed_extractors, skipped_extractors = [], []
for extractor_ep in iter_entry_points('datalad.metadata.extractors'):
# we need to be able to query for metadata, even if there is none
# from any extractor
try:
extractor_cls = extractor_ep.load()
except Exception as exc:
exc_ = str(exc)
skipped_extractors += [exc_]
continue
extractor = extractor_cls(
ds, paths=['file.dat'])
meta = extractor.get_metadata(
dataset=True,
content=True)
# we also get something for the dataset and something for the content
# even if any of the two is empty
assert_equal(len(meta), 2)
dsmeta, contentmeta = meta
assert (isinstance(dsmeta, dict))
assert hasattr(contentmeta, '__len__') or isgenerator(contentmeta)
# verify that generator does not blow and has an entry for our
# precious file
cm = dict(contentmeta)
# datalad_core does provide some (not really) information about our
# precious file
if extractor_ep.name == 'datalad_core':
assert 'file.dat' in cm
elif extractor_ep.name == 'annex':
if not no_annex:
# verify correct key, which is the same for all files of 0 size
assert_equal(
cm['file.dat']['key'],
'MD5E-s0--d41d8cd98f00b204e9800998ecf8427e.dat'
)
else:
# no metadata on that file
assert not cm
processed_extractors.append(extractor_ep.name)
assert "datalad_core" in processed_extractors, \
"Should have managed to find at least the core extractor extractor"
if skipped_extractors:
raise SkipTest(
"Not fully tested/succeded since some extractors failed"
" to load:\n%s" % ("\n".join(skipped_extractors)))
示例8: test_zip_archive
# 需要导入模块: from datalad.api import Dataset [as 别名]
# 或者: from datalad.api.Dataset import save [as 别名]
def test_zip_archive(path):
ds = Dataset(opj(path, 'ds')).create(force=True, no_annex=True)
ds.save()
with chpwd(path):
ds.export_archive(filename='my', archivetype='zip')
assert_true(os.path.exists('my.zip'))
custom1_md5 = md5sum('my.zip')
time.sleep(1.1)
ds.export_archive(filename='my', archivetype='zip')
assert_equal(md5sum('my.zip'), custom1_md5)
# should be able to export without us cd'ing to that ds directory
ds.export_archive(filename=ds.path, archivetype='zip')
default_name = 'datalad_{}.zip'.format(ds.id)
assert_true(os.path.exists(os.path.join(ds.path, default_name)))
示例9: test_archive
# 需要导入模块: from datalad.api import Dataset [as 别名]
# 或者: from datalad.api.Dataset import save [as 别名]
def test_archive(path):
ds = Dataset(opj(path, 'ds')).create(force=True)
ds.save()
committed_date = ds.repo.get_commit_date()
default_outname = opj(path, 'datalad_{}.tar.gz'.format(ds.id))
with chpwd(path):
res = list(ds.export_archive())
assert_status('ok', res)
assert_result_count(res, 1)
assert(isabs(res[0]['path']))
assert_true(os.path.exists(default_outname))
custom_outname = opj(path, 'myexport.tar.gz')
# feed in without extension
ds.export_archive(filename=custom_outname[:-7])
assert_true(os.path.exists(custom_outname))
custom1_md5 = md5sum(custom_outname)
# encodes the original archive filename -> different checksum, despit
# same content
assert_not_equal(md5sum(default_outname), custom1_md5)
# should really sleep so if they stop using time.time - we know
time.sleep(1.1)
ds.export_archive(filename=custom_outname)
# should not encode mtime, so should be identical
assert_equal(md5sum(custom_outname), custom1_md5)
def check_contents(outname, prefix):
with tarfile.open(outname) as tf:
nfiles = 0
for ti in tf:
# any annex links resolved
assert_false(ti.issym())
ok_startswith(ti.name, prefix + '/')
assert_equal(ti.mtime, committed_date)
if '.datalad' not in ti.name:
# ignore any files in .datalad for this test to not be
# susceptible to changes in how much we generate a meta info
nfiles += 1
# we have exactly four files (includes .gitattributes for default
# MD5E backend), and expect no content for any directory
assert_equal(nfiles, 4)
check_contents(default_outname, 'datalad_%s' % ds.id)
check_contents(custom_outname, 'myexport')
# now loose some content
ds.drop('file_up', check=False)
assert_raises(IOError, ds.export_archive, filename=opj(path, 'my'))
ds.export_archive(filename=opj(path, 'partial'), missing_content='ignore')
assert_true(os.path.exists(opj(path, 'partial.tar.gz')))
示例10: test_exif
# 需要导入模块: from datalad.api import Dataset [as 别名]
# 或者: from datalad.api.Dataset import save [as 别名]
def test_exif(path):
ds = Dataset(path).create()
ds.config.add('datalad.metadata.nativetype', 'exif', where='dataset')
copy(
opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'exif.jpg'),
path)
ds.save()
ok_clean_git(ds.path)
res = ds.aggregate_metadata()
assert_status('ok', res)
res = ds.metadata('exif.jpg')
assert_result_count(res, 1)
# from this extractor
meta = res[0]['metadata']['exif']
for k, v in target.items():
eq_(meta[k], v)
assert_in('@context', meta)
示例11: test_tarball
# 需要导入模块: from datalad.api import Dataset [as 别名]
# 或者: from datalad.api.Dataset import save [as 别名]
def test_tarball(path):
ds = Dataset(opj(path, 'ds')).create(force=True)
ds.save(all_changes=True)
committed_date = ds.repo.get_committed_date()
with chpwd(path):
_mod, tarball1 = ds.export('tarball')
assert(not isabs(tarball1))
tarball1 = opj(path, tarball1)
default_outname = opj(path, 'datalad_{}.tar.gz'.format(ds.id))
assert_equal(tarball1, default_outname)
assert_true(os.path.exists(default_outname))
custom_outname = opj(path, 'myexport.tar.gz')
# feed in without extension
ds.export('tarball', output=custom_outname[:-7])
assert_true(os.path.exists(custom_outname))
custom1_md5 = md5sum(custom_outname)
# encodes the original tarball filename -> different checksum, despit
# same content
assert_not_equal(md5sum(default_outname), custom1_md5)
# should really sleep so if they stop using time.time - we know
time.sleep(1.1)
ds.export('tarball', output=custom_outname)
# should not encode mtime, so should be identical
assert_equal(md5sum(custom_outname), custom1_md5)
def check_contents(outname, prefix):
with tarfile.open(outname) as tf:
nfiles = 0
for ti in tf:
# any annex links resolved
assert_false(ti.issym())
ok_startswith(ti.name, prefix + '/')
assert_equal(ti.mtime, committed_date)
if '.datalad' not in ti.name:
# ignore any files in .datalad for this test to not be
# susceptible to changes in how much we generate a meta info
nfiles += 1
# we have exactly three files, and expect no content for any directory
assert_equal(nfiles, 3)
check_contents(default_outname, 'datalad_%s' % ds.id)
check_contents(custom_outname, 'myexport')
示例12: test_dont_trip_over_missing_subds
# 需要导入模块: from datalad.api import Dataset [as 别名]
# 或者: from datalad.api.Dataset import save [as 别名]
def test_dont_trip_over_missing_subds(path):
ds1 = Dataset(opj(path, 'ds1')).create()
ds2 = Dataset(opj(path, 'ds2')).create()
subds2 = ds1.install(source=ds2.path, path='subds2')
assert_true(subds2.is_installed())
assert_in('subds2', ds1.get_subdatasets())
subds2.uninstall()
assert_in('subds2', ds1.get_subdatasets())
assert_false(subds2.is_installed())
# this will deinit the submodule
ds1.save(files=['subds2'])
# see if it wants to talk to github (and fail), or if it trips over something
# before
assert_raises(gh.BadCredentialsException, ds1.create_sibling_github, 'bogus', recursive=True, github_user='')
# inject remote config prior run
assert_not_in('github', ds1.repo.get_remotes())
# fail on existing
ds1.repo.add_remote('github', 'http://nothere')
assert_raises(ValueError, ds1.create_sibling_github, 'bogus', recursive=True, github_user='')
# talk to github when existing is OK
assert_raises(gh.BadCredentialsException, ds1.create_sibling_github, 'bogus', recursive=True, github_user='', existing='reconfigure')
# return happy emptiness when all is skipped
assert_equal(ds1.create_sibling_github('bogus', recursive=True, github_user='', existing='skip'), [])
示例13: test_addurls_subdataset
# 需要导入模块: from datalad.api import Dataset [as 别名]
# 或者: from datalad.api.Dataset import save [as 别名]
def test_addurls_subdataset(self, path):
ds = Dataset(path).create(force=True)
with chpwd(path):
for save in True, False:
label = "save" if save else "nosave"
ds.addurls(self.json_file, "{url}",
"{subdir}-" + label + "//{name}",
save=save)
subdirs = ["{}-{}".format(d, label) for d in ["foo", "bar"]]
subdir_files = dict(zip(subdirs, [["a", "c"], ["b"]]))
for subds, fnames in subdir_files.items():
for fname in fnames:
ok_exists(op.join(subds, fname))
if save:
assert_repo_status(path)
else:
# The datasets are create and saved ...
assert_repo_status(path, modified=subdirs)
# but the downloaded files aren't.
for subds, fnames in subdir_files.items():
assert_repo_status(subds, added=fnames)
# Now save the "--nosave" changes and check that we have
# all the subdatasets.
ds.save()
eq_(set(subdatasets(dataset=ds, recursive=True,
result_xfm="relpaths")),
{"foo-save", "bar-save", "foo-nosave", "bar-nosave"})
# We don't try to recreate existing subdatasets.
with swallow_logs(new_level=logging.DEBUG) as cml:
ds.addurls(self.json_file, "{url}", "{subdir}-nosave//{name}")
assert_in("Not creating subdataset at existing path", cml.out)
示例14: test_within_ds_file_search
# 需要导入模块: from datalad.api import Dataset [as 别名]
# 或者: from datalad.api.Dataset import save [as 别名]
def test_within_ds_file_search(path):
try:
import mutagen
except ImportError:
raise SkipTest
ds = Dataset(path).create(force=True)
# override default and search for datasets and files for this test
for m in ('egrep', 'textblob', 'autofield'):
ds.config.add(
'datalad.search.index-{}-documenttype'.format(m), 'all',
where='dataset')
ds.config.add('datalad.metadata.nativetype', 'audio', where='dataset')
makedirs(opj(path, 'stim'))
for src, dst in (
('audio.mp3', opj('stim', 'stim1.mp3')),):
copy(
opj(dirname(dirname(__file__)), 'tests', 'data', src),
opj(path, dst))
ds.save()
ok_file_under_git(path, opj('stim', 'stim1.mp3'), annexed=True)
# If it is not under annex, below addition of metadata silently does
# not do anything
ds.repo.set_metadata(
opj('stim', 'stim1.mp3'), init={'importance': 'very'})
ds.aggregate_metadata()
ok_clean_git(ds.path)
# basic sanity check on the metadata structure of the dataset
dsmeta = ds.metadata('.', reporton='datasets')[0]['metadata']
for src in ('audio',):
# something for each one
assert_in(src, dsmeta)
# each src declares its own context
assert_in('@context', dsmeta[src])
# we have a unique content metadata summary for each src
assert_in(src, dsmeta['datalad_unique_content_properties'])
# test default behavior
with swallow_outputs() as cmo:
ds.search(show_keys='name', mode='textblob')
assert_in("""\
id
meta
parentds
path
type
""", cmo.out)
target_out = """\
annex.importance
annex.key
audio.bitrate
audio.duration(s)
audio.format
audio.music-Genre
audio.music-album
audio.music-artist
audio.music-channels
audio.music-sample_rate
audio.name
audio.tracknumber
datalad_core.id
datalad_core.refcommit
id
parentds
path
type
"""
# check generated autofield index keys
with swallow_outputs() as cmo:
ds.search(mode='autofield', show_keys='name')
# it is impossible to assess what is different from that dump
assert_in(target_out, cmo.out)
assert_result_count(ds.search('blablob#'), 0)
# now check that we can discover things from the aggregated metadata
for mode, query, hitpath, matched in (
('egrep',
':mp3',
opj('stim', 'stim1.mp3'),
{'audio.format': 'mp3'}),
# same as above, leading : is stripped, in indicates "ALL FIELDS"
('egrep',
'mp3',
opj('stim', 'stim1.mp3'),
{'audio.format': 'mp3'}),
# same as above, but with AND condition
# get both matches
('egrep',
['mp3', 'type:file'],
opj('stim', 'stim1.mp3'),
{'type': 'file', 'audio.format': 'mp3'}),
# case insensitive search
('egrep',
'mp3',
opj('stim', 'stim1.mp3'),
{'audio.format': 'mp3'}),
# field selection by expression
('egrep',
#.........这里部分代码省略.........
示例15: test_aggregation
# 需要导入模块: from datalad.api import Dataset [as 别名]
# 或者: from datalad.api.Dataset import save [as 别名]
def test_aggregation(path):
with chpwd(path):
assert_raises(InsufficientArgumentsError, aggregate_metadata, None)
# a hierarchy of three (super/sub)datasets, each with some native metadata
ds = Dataset(opj(path, 'origin')).create(force=True)
# before anything aggregated we would get nothing and only a log warning
with swallow_logs(new_level=logging.WARNING) as cml:
assert_equal(list(query_aggregated_metadata('all', ds, [])), [])
assert_re_in('.*Found no aggregated metadata.*update', cml.out)
ds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage',
where='dataset')
subds = ds.create('sub', force=True)
subds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage',
where='dataset')
subsubds = subds.create('subsub', force=True)
subsubds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage',
where='dataset')
ds.save(recursive=True)
ok_clean_git(ds.path)
# aggregate metadata from all subdatasets into any superdataset, including
# intermediate ones
res = ds.aggregate_metadata(recursive=True, update_mode='all')
# we get success report for both subdatasets and the superdataset,
# and they get saved
assert_result_count(res, 6)
assert_result_count(res, 3, status='ok', action='aggregate_metadata')
assert_result_count(res, 3, status='ok', action='save')
# nice and tidy
ok_clean_git(ds.path)
# quick test of aggregate report
aggs = ds.metadata(get_aggregates=True)
# one for each dataset
assert_result_count(aggs, 3)
# mother also report layout version
assert_result_count(aggs, 1, path=ds.path, layout_version=1)
# store clean direct result
origres = ds.metadata(recursive=True)
# basic sanity check
assert_result_count(origres, 6)
assert_result_count(origres, 3, type='dataset')
assert_result_count(origres, 3, type='file') # Now that we have annex.key
# three different IDs
assert_equal(3, len(set([s['dsid'] for s in origres if s['type'] == 'dataset'])))
# and we know about all three datasets
for name in ('MOTHER_äöü東', 'child_äöü東', 'grandchild_äöü東'):
assert_true(
sum([s['metadata']['frictionless_datapackage']['name'] \
== assure_unicode(name) for s in origres
if s['type'] == 'dataset']))
# now clone the beast to simulate a new user installing an empty dataset
clone = install(
opj(path, 'clone'), source=ds.path,
result_xfm='datasets', return_type='item-or-list')
# ID mechanism works
assert_equal(ds.id, clone.id)
# get fresh metadata
cloneres = clone.metadata()
# basic sanity check
assert_result_count(cloneres, 2)
assert_result_count(cloneres, 1, type='dataset')
assert_result_count(cloneres, 1, type='file')
# now loop over the previous results from the direct metadata query of
# origin and make sure we get the extact same stuff from the clone
_compare_metadata_helper(origres, clone)
# now obtain a subdataset in the clone, should make no difference
assert_status('ok', clone.install('sub', result_xfm=None, return_type='list'))
_compare_metadata_helper(origres, clone)
# test search in search tests, not all over the place
## query smoke test
assert_result_count(clone.search('mother', mode='egrep'), 1)
assert_result_count(clone.search('(?i)MoTHER', mode='egrep'), 1)
child_res = clone.search('child', mode='egrep')
assert_result_count(child_res, 2)
for r in child_res:
if r['type'] == 'dataset':
assert_in(
r['query_matched']['frictionless_datapackage.name'],
r['metadata']['frictionless_datapackage']['name'])