本文整理汇总了Python中datalad.api.Dataset类的典型用法代码示例。如果您正苦于以下问题:Python Dataset类的具体用法?Python Dataset怎么用?Python Dataset使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Dataset类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_addurls_dry_run
def test_addurls_dry_run(path):
ds = Dataset(path).create(force=True)
with chpwd(path):
json_file = "links.json"
with open(json_file, "w") as jfh:
json.dump([{"url": "URL/a.dat", "name": "a", "subdir": "foo"},
{"url": "URL/b.dat", "name": "b", "subdir": "bar"},
{"url": "URL/c.dat", "name": "c", "subdir": "foo"}],
jfh)
ds.save(message="setup")
with swallow_logs(new_level=logging.INFO) as cml:
ds.addurls(json_file,
"{url}",
"{subdir}//{_url_filename_root}",
dry_run=True)
for dir_ in ["foo", "bar"]:
assert_in("Would create a subdataset at {}".format(dir_),
cml.out)
assert_in(
"Would download URL/a.dat to {}".format(
os.path.join(path, "foo", "BASE")),
cml.out)
assert_in("Metadata: {}".format([u"name=a", u"subdir=foo"]),
cml.out)
示例2: test_ignore_nondatasets
def test_ignore_nondatasets(path):
# we want to ignore the version/commits for this test
def _kill_time(meta):
for m in meta:
for k in ('version', 'dcterms:modified'):
if k in m:
del m[k]
return meta
ds = Dataset(path).create()
meta = _kill_time(get_metadata(ds))
n_subm = 0
# placing another repo in the dataset has no effect on metadata
for cls, subpath in ((GitRepo, 'subm'), (AnnexRepo, 'annex_subm')):
subm_path = opj(ds.path, subpath)
r = cls(subm_path, create=True)
with open(opj(subm_path, 'test'), 'w') as f:
f.write('test')
r.add('test')
r.commit('some')
assert_true(Dataset(subm_path).is_installed())
assert_equal(meta, _kill_time(get_metadata(ds)))
# making it a submodule has no effect either
ds.add(subpath)
assert_equal(len(ds.get_subdatasets()), n_subm + 1)
assert_equal(meta, _kill_time(get_metadata(ds)))
n_subm += 1
示例3: test_addurls_url_parts
def test_addurls_url_parts(self, path):
ds = Dataset(path).create(force=True)
with chpwd(path):
ds.addurls(self.json_file, "{url}", "{_url0}/{_url_basename}")
for fname in ["a.dat", "b.dat", "c.dat"]:
ok_exists(op.join("udir", fname))
示例4: test_aggregate_with_missing_or_duplicate_id
def test_aggregate_with_missing_or_duplicate_id(path):
# a hierarchy of three (super/sub)datasets, each with some native metadata
ds = Dataset(opj(path, 'origin')).create(force=True)
subds = ds.create('sub', force=True)
subds.remove(opj('.datalad', 'config'), if_dirty='ignore')
assert_false(exists(opj(subds.path, '.datalad', 'config')))
subsubds = subds.create('subsub', force=True)
# aggregate from bottom to top, guess native data, no compacting of graph
# should yield 6 meta data sets, one implicit, and one native per dataset
# and a second native set for the topmost dataset
aggregate_metadata(ds, guess_native_type=True, recursive=True)
# no only ask the top superdataset, no recursion, just reading from the cache
meta = get_metadata(
ds, guess_type=False, ignore_subdatasets=False, ignore_cache=False)
# and we know nothing subsub
for name in ('grandchild_äöü東',):
assert_true(sum([s.get('name', '') == assure_unicode(name) for s in meta]))
# but search should not fail
with swallow_outputs():
res1 = list(search_('.', regex=True, dataset=ds))
assert res1
# and let's see now if we wouldn't fail if dataset is duplicate if we
# install the same dataset twice
subds_clone = ds.install(source=subds.path, path="subds2")
with swallow_outputs():
res2 = list(search_('.', regex=True, dataset=ds))
示例5: test_dont_trip_over_missing_subds
def test_dont_trip_over_missing_subds(path):
ds1 = Dataset(opj(path, 'ds1')).create()
ds2 = Dataset(opj(path, 'ds2')).create()
subds2 = ds1.install(
source=ds2.path, path='subds2',
result_xfm='datasets', return_type='item-or-list')
assert_true(subds2.is_installed())
assert_in('subds2', ds1.subdatasets(result_xfm='relpaths'))
subds2.uninstall()
assert_in('subds2', ds1.subdatasets(result_xfm='relpaths'))
assert_false(subds2.is_installed())
# see if it wants to talk to github (and fail), or if it trips over something
# before
assert_raises(gh.BadCredentialsException,
ds1.create_sibling_github, 'bogus', recursive=True,
github_login='disabledloginfortesting')
# inject remote config prior run
assert_not_in('github', ds1.repo.get_remotes())
# fail on existing
ds1.repo.add_remote('github', 'http://nothere')
assert_raises(ValueError,
ds1.create_sibling_github, 'bogus', recursive=True,
github_login='disabledloginfortesting')
# talk to github when existing is OK
assert_raises(gh.BadCredentialsException,
ds1.create_sibling_github, 'bogus', recursive=True,
github_login='disabledloginfortesting', existing='reconfigure')
# return happy emptiness when all is skipped
assert_equal(
ds1.create_sibling_github(
'bogus', recursive=True,
github_login='disabledloginfortesting', existing='skip'),
[])
示例6: test_bf2458
def test_bf2458(src, dst):
ds = Dataset(src).create(force=True)
ds.save(to_git=False)
# no clone (empty) into new dst
clone = install(source=ds.path, path=dst)
# content is not here
eq_(clone.repo.whereis('dummy'), [ds.config.get('annex.uuid')])
# check that plain metadata access does not `get` stuff
clone.metadata('.', on_failure='ignore')
eq_(clone.repo.whereis('dummy'), [ds.config.get('annex.uuid')])
示例7: test_addurls_repindex
def test_addurls_repindex(self, path):
ds = Dataset(path).create(force=True)
with chpwd(path):
with assert_raises(IncompleteResultsError) as raised:
ds.addurls(self.json_file, "{url}", "{subdir}")
assert_in("There are file name collisions", str(raised.exception))
ds.addurls(self.json_file, "{url}", "{subdir}-{_repindex}")
for fname in ["foo-0", "bar-0", "foo-1"]:
ok_exists(fname)
示例8: test_addurls_metafail
def test_addurls_metafail(self, path):
ds = Dataset(path).create(force=True)
# Force failure by passing a non-existent file name to annex.
fn = ds.repo.set_metadata_
def set_meta(_, **kwargs):
for i in fn("wreaking-havoc-and-such", **kwargs):
yield i
with chpwd(path), patch.object(ds.repo, 'set_metadata_', set_meta):
with assert_raises(IncompleteResultsError):
ds.addurls(self.json_file, "{url}", "{name}")
示例9: check_api
def check_api(no_annex, path):
ds = Dataset(path).create(force=True, no_annex=no_annex)
ds.add('.')
ok_clean_git(ds.path)
processed_extractors, skipped_extractors = [], []
for extractor_ep in iter_entry_points('datalad.metadata.extractors'):
# we need to be able to query for metadata, even if there is none
# from any extractor
try:
extractor_cls = extractor_ep.load()
except Exception as exc:
exc_ = str(exc)
skipped_extractors += [exc_]
continue
extractor = extractor_cls(
ds, paths=['file.dat'])
meta = extractor.get_metadata(
dataset=True,
content=True)
# we also get something for the dataset and something for the content
# even if any of the two is empty
assert_equal(len(meta), 2)
dsmeta, contentmeta = meta
assert (isinstance(dsmeta, dict))
assert hasattr(contentmeta, '__len__') or isgenerator(contentmeta)
# verify that generator does not blow and has an entry for our
# precious file
cm = dict(contentmeta)
# datalad_core does provide some (not really) information about our
# precious file
if extractor_ep.name == 'datalad_core':
assert 'file.dat' in cm
elif extractor_ep.name == 'annex':
if not no_annex:
# verify correct key, which is the same for all files of 0 size
assert_equal(
cm['file.dat']['key'],
'MD5E-s0--d41d8cd98f00b204e9800998ecf8427e.dat'
)
else:
# no metadata on that file
assert not cm
processed_extractors.append(extractor_ep.name)
assert "datalad_core" in processed_extractors, \
"Should have managed to find at least the core extractor extractor"
if skipped_extractors:
raise SkipTest(
"Not fully tested/succeded since some extractors failed"
" to load:\n%s" % ("\n".join(skipped_extractors)))
示例10: test_reproin_largely_smoke
def test_reproin_largely_smoke(tmpdir, heuristic, invocation):
is_bids = True if heuristic == 'reproin' else False
arg = "--random-seed 1 -f %s -c dcm2niix -o %s" \
% (heuristic, tmpdir)
if is_bids:
arg += " -b"
arg += " --datalad "
args = (
arg + invocation
).split(' ')
# Test some safeguards
if invocation == "--files %s" % TESTS_DATA_PATH:
# Multiple subjects must not be specified -- only a single one could
# be overridden from the command line
with pytest.raises(ValueError):
runner(args + ['--subjects', 'sub1', 'sub2'])
if heuristic != 'reproin':
# none other heuristic has mighty infotoids atm
with pytest.raises(NotImplementedError):
runner(args)
return
runner(args)
ds = Dataset(str(tmpdir))
assert ds.is_installed()
assert not ds.repo.dirty
head = ds.repo.get_hexsha()
# and if we rerun -- should fail
lgr.info(
"RERUNNING, expecting to FAIL since the same everything "
"and -c specified so we did conversion already"
)
with pytest.raises(RuntimeError):
runner(args)
# but there should be nothing new
assert not ds.repo.dirty
assert head == ds.repo.get_hexsha()
# unless we pass 'overwrite' flag
runner(args + ['--overwrite'])
# but result should be exactly the same, so it still should be clean
# and at the same commit
assert ds.is_installed()
assert not ds.repo.dirty
assert head == ds.repo.get_hexsha()
示例11: test_addurls_subdataset
def test_addurls_subdataset(self, path):
ds = Dataset(path).create(force=True)
with chpwd(path):
for save in True, False:
label = "save" if save else "nosave"
hexsha_before = ds.repo.get_hexsha()
ds.addurls(self.json_file, "{url}",
"{subdir}-" + label + "//{name}",
save=save)
hexsha_after = ds.repo.get_hexsha()
for fname in ["foo-{}/a", "bar-{}/b", "foo-{}/c"]:
ok_exists(fname.format(label))
assert_true(save ^ (hexsha_before == hexsha_after))
assert_true(save ^ ds.repo.dirty)
# Now save the "--nosave" changes and check that we have
# all the subdatasets.
ds.add(".")
eq_(set(subdatasets(ds, recursive=True,
result_xfm="relpaths")),
{"foo-save", "bar-save", "foo-nosave", "bar-nosave"})
# We don't try to recreate existing subdatasets.
with swallow_logs(new_level=logging.DEBUG) as cml:
ds.addurls(self.json_file, "{url}", "{subdir}-nosave//{name}")
assert_in("Not creating subdataset at existing path", cml.out)
示例12: test_exif
def test_exif(path):
ds = Dataset(path).create()
ds.config.add('datalad.metadata.nativetype', 'exif', where='dataset')
copy(
opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'exif.jpg'),
path)
ds.save()
ok_clean_git(ds.path)
res = ds.aggregate_metadata()
assert_status('ok', res)
res = ds.metadata('exif.jpg')
assert_result_count(res, 1)
# from this extractor
meta = res[0]['metadata']['exif']
for k, v in target.items():
eq_(meta[k], v)
assert_in('@context', meta)
示例13: test_zip_archive
def test_zip_archive(path):
ds = Dataset(opj(path, 'ds')).create(force=True, no_annex=True)
ds.save()
with chpwd(path):
ds.export_archive(filename='my', archivetype='zip')
assert_true(os.path.exists('my.zip'))
custom1_md5 = md5sum('my.zip')
time.sleep(1.1)
ds.export_archive(filename='my', archivetype='zip')
assert_equal(md5sum('my.zip'), custom1_md5)
# should be able to export without us cd'ing to that ds directory
ds.export_archive(filename=ds.path, archivetype='zip')
default_name = 'datalad_{}.zip'.format(ds.id)
assert_true(os.path.exists(os.path.join(ds.path, default_name)))
示例14: test_audio
def test_audio(path):
ds = Dataset(path).create()
ds.config.add('datalad.metadata.nativetype', 'audio', where='dataset')
copy(
opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'audio.mp3'),
path)
ds.add('.')
ok_clean_git(ds.path)
res = ds.aggregate_metadata()
assert_status('ok', res)
res = ds.metadata('audio.mp3')
assert_result_count(res, 1)
# from this extractor
meta = res[0]['metadata']['audio']
for k, v in target.items():
eq_(meta[k], v)
assert_in('@context', meta)
uniques = ds.metadata(
reporton='datasets', return_type='item-or-list')['metadata']['datalad_unique_content_properties']
# test file has it, but uniques have it blanked out, because the extractor considers it worthless
# for discovering whole datasets
assert_in('bitrate', meta)
eq_(uniques['audio']['bitrate'], None)
# 'date' field carries not value, hence gets exclude from the unique report
assert_in('date', meta)
assert(not meta['date'])
assert_not_in('date', uniques['audio'])
示例15: test_basic_metadata
def test_basic_metadata(path):
ds = Dataset(opj(path, 'origin'))
meta = get_metadata(ds)
assert_equal(sorted(meta[0].keys()),
['@context', 'dcterms:conformsTo'])
ds.create(force=True, save=False)
# with subdataset
sub = ds.create('sub', force=True)
ds.save()
meta = get_metadata(ds)
assert_equal(
sorted(meta[0].keys()),
['@context', '@id', 'availableFrom', 'dcterms:conformsTo',
'dcterms:modified', 'type', 'version'])
assert_equal(meta[0]['type'], 'Dataset')
# clone and get relationship info in metadata
sibling = install(opj(path, 'sibling'), source=opj(path, 'origin'))
sibling_meta = get_metadata(sibling)
assert_equal(sibling_meta[0]['@id'], ds.id)
# origin should learn about the clone
sibling.repo.push(remote='origin', refspec='git-annex')
meta = get_metadata(ds)
assert_equal([m['@id'] for m in meta[0]['availableFrom']],
[m['@id'] for m in sibling_meta[0]['availableFrom']])
meta = get_metadata(ds, guess_type=True)
# without aggregation there is not trace of subdatasets in the metadata
assert_not_in('dcterms:hasPart', meta[0])