本文整理汇总了Python中datalad.api.Dataset.aggregate_metadata方法的典型用法代码示例。如果您正苦于以下问题:Python Dataset.aggregate_metadata方法的具体用法?Python Dataset.aggregate_metadata怎么用?Python Dataset.aggregate_metadata使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类datalad.api.Dataset
的用法示例。
在下文中一共展示了Dataset.aggregate_metadata方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_audio
# 需要导入模块: from datalad.api import Dataset [as 别名]
# 或者: from datalad.api.Dataset import aggregate_metadata [as 别名]
def test_audio(path):
ds = Dataset(path).create()
ds.config.add('datalad.metadata.nativetype', 'audio', where='dataset')
copy(
opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'audio.mp3'),
path)
ds.add('.')
ok_clean_git(ds.path)
res = ds.aggregate_metadata()
assert_status('ok', res)
res = ds.metadata('audio.mp3')
assert_result_count(res, 1)
# from this extractor
meta = res[0]['metadata']['audio']
for k, v in target.items():
eq_(meta[k], v)
assert_in('@context', meta)
uniques = ds.metadata(
reporton='datasets', return_type='item-or-list')['metadata']['datalad_unique_content_properties']
# test file has it, but uniques have it blanked out, because the extractor considers it worthless
# for discovering whole datasets
assert_in('bitrate', meta)
eq_(uniques['audio']['bitrate'], None)
# 'date' field carries not value, hence gets exclude from the unique report
assert_in('date', meta)
assert(not meta['date'])
assert_not_in('date', uniques['audio'])
示例2: test_exif
# 需要导入模块: from datalad.api import Dataset [as 别名]
# 或者: from datalad.api.Dataset import aggregate_metadata [as 别名]
def test_exif(path):
ds = Dataset(path).create()
ds.config.add('datalad.metadata.nativetype', 'exif', where='dataset')
copy(
opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'exif.jpg'),
path)
ds.save()
ok_clean_git(ds.path)
res = ds.aggregate_metadata()
assert_status('ok', res)
res = ds.metadata('exif.jpg')
assert_result_count(res, 1)
# from this extractor
meta = res[0]['metadata']['exif']
for k, v in target.items():
eq_(meta[k], v)
assert_in('@context', meta)
示例3: test_within_ds_file_search
# 需要导入模块: from datalad.api import Dataset [as 别名]
# 或者: from datalad.api.Dataset import aggregate_metadata [as 别名]
def test_within_ds_file_search(path):
try:
import mutagen
except ImportError:
raise SkipTest
ds = Dataset(path).create(force=True)
# override default and search for datasets and files for this test
for m in ('egrep', 'textblob', 'autofield'):
ds.config.add(
'datalad.search.index-{}-documenttype'.format(m), 'all',
where='dataset')
ds.config.add('datalad.metadata.nativetype', 'audio', where='dataset')
makedirs(opj(path, 'stim'))
for src, dst in (
('audio.mp3', opj('stim', 'stim1.mp3')),):
copy(
opj(dirname(dirname(__file__)), 'tests', 'data', src),
opj(path, dst))
ds.save()
ok_file_under_git(path, opj('stim', 'stim1.mp3'), annexed=True)
# If it is not under annex, below addition of metadata silently does
# not do anything
ds.repo.set_metadata(
opj('stim', 'stim1.mp3'), init={'importance': 'very'})
ds.aggregate_metadata()
ok_clean_git(ds.path)
# basic sanity check on the metadata structure of the dataset
dsmeta = ds.metadata('.', reporton='datasets')[0]['metadata']
for src in ('audio',):
# something for each one
assert_in(src, dsmeta)
# each src declares its own context
assert_in('@context', dsmeta[src])
# we have a unique content metadata summary for each src
assert_in(src, dsmeta['datalad_unique_content_properties'])
# test default behavior
with swallow_outputs() as cmo:
ds.search(show_keys='name', mode='textblob')
assert_in("""\
id
meta
parentds
path
type
""", cmo.out)
target_out = """\
annex.importance
annex.key
audio.bitrate
audio.duration(s)
audio.format
audio.music-Genre
audio.music-album
audio.music-artist
audio.music-channels
audio.music-sample_rate
audio.name
audio.tracknumber
datalad_core.id
datalad_core.refcommit
id
parentds
path
type
"""
# check generated autofield index keys
with swallow_outputs() as cmo:
ds.search(mode='autofield', show_keys='name')
# it is impossible to assess what is different from that dump
assert_in(target_out, cmo.out)
assert_result_count(ds.search('blablob#'), 0)
# now check that we can discover things from the aggregated metadata
for mode, query, hitpath, matched in (
('egrep',
':mp3',
opj('stim', 'stim1.mp3'),
{'audio.format': 'mp3'}),
# same as above, leading : is stripped, in indicates "ALL FIELDS"
('egrep',
'mp3',
opj('stim', 'stim1.mp3'),
{'audio.format': 'mp3'}),
# same as above, but with AND condition
# get both matches
('egrep',
['mp3', 'type:file'],
opj('stim', 'stim1.mp3'),
{'type': 'file', 'audio.format': 'mp3'}),
# case insensitive search
('egrep',
'mp3',
opj('stim', 'stim1.mp3'),
{'audio.format': 'mp3'}),
# field selection by expression
('egrep',
#.........这里部分代码省略.........
示例4: test_aggregation
# 需要导入模块: from datalad.api import Dataset [as 别名]
# 或者: from datalad.api.Dataset import aggregate_metadata [as 别名]
def test_aggregation(path):
with chpwd(path):
assert_raises(InsufficientArgumentsError, aggregate_metadata, None)
# a hierarchy of three (super/sub)datasets, each with some native metadata
ds = Dataset(opj(path, 'origin')).create(force=True)
# before anything aggregated we would get nothing and only a log warning
with swallow_logs(new_level=logging.WARNING) as cml:
assert_equal(list(query_aggregated_metadata('all', ds, [])), [])
assert_re_in('.*Found no aggregated metadata.*update', cml.out)
ds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage',
where='dataset')
subds = ds.create('sub', force=True)
subds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage',
where='dataset')
subsubds = subds.create('subsub', force=True)
subsubds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage',
where='dataset')
ds.add('.', recursive=True)
ok_clean_git(ds.path)
# aggregate metadata from all subdatasets into any superdataset, including
# intermediate ones
res = ds.aggregate_metadata(recursive=True, update_mode='all')
# we get success report for both subdatasets and the superdataset,
# and they get saved
assert_result_count(res, 6)
assert_result_count(res, 3, status='ok', action='aggregate_metadata')
assert_result_count(res, 3, status='ok', action='save')
# nice and tidy
ok_clean_git(ds.path)
# quick test of aggregate report
aggs = ds.metadata(get_aggregates=True)
# one for each dataset
assert_result_count(aggs, 3)
# mother also report layout version
assert_result_count(aggs, 1, path=ds.path, layout_version=1)
# store clean direct result
origres = ds.metadata(recursive=True)
# basic sanity check
assert_result_count(origres, 6)
assert_result_count(origres, 3, type='dataset')
assert_result_count(origres, 3, type='file') # Now that we have annex.key
# three different IDs
assert_equal(3, len(set([s['dsid'] for s in origres if s['type'] == 'dataset'])))
# and we know about all three datasets
for name in ('MOTHER_äöü東', 'child_äöü東', 'grandchild_äöü東'):
assert_true(
sum([s['metadata']['frictionless_datapackage']['name'] \
== assure_unicode(name) for s in origres
if s['type'] == 'dataset']))
# now clone the beast to simulate a new user installing an empty dataset
clone = install(
opj(path, 'clone'), source=ds.path,
result_xfm='datasets', return_type='item-or-list')
# ID mechanism works
assert_equal(ds.id, clone.id)
# get fresh metadata
cloneres = clone.metadata()
# basic sanity check
assert_result_count(cloneres, 2)
assert_result_count(cloneres, 1, type='dataset')
assert_result_count(cloneres, 1, type='file')
# now loop over the previous results from the direct metadata query of
# origin and make sure we get the extact same stuff from the clone
_compare_metadata_helper(origres, clone)
# now obtain a subdataset in the clone, should make no difference
assert_status('ok', clone.install('sub', result_xfm=None, return_type='list'))
_compare_metadata_helper(origres, clone)
# test search in search tests, not all over the place
## query smoke test
assert_result_count(clone.search('mother', mode='egrep'), 1)
assert_result_count(clone.search('(?i)MoTHER', mode='egrep'), 1)
child_res = clone.search('child', mode='egrep')
assert_result_count(child_res, 2)
for r in child_res:
if r['type'] == 'dataset':
assert_in(
r['query_matched']['frictionless_datapackage.name'],
r['metadata']['frictionless_datapackage']['name'])