当前位置: 首页>>代码示例>>Python>>正文


Python api.Dataset类代码示例

本文整理汇总了Python中datalad.api.Dataset的典型用法代码示例。如果您正苦于以下问题:Python Dataset类的具体用法?Python Dataset怎么用?Python Dataset使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Dataset类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_addurls_dry_run

def test_addurls_dry_run(path):
    ds = Dataset(path).create(force=True)

    with chpwd(path):
        json_file = "links.json"
        with open(json_file, "w") as jfh:
            json.dump([{"url": "URL/a.dat", "name": "a", "subdir": "foo"},
                       {"url": "URL/b.dat", "name": "b", "subdir": "bar"},
                       {"url": "URL/c.dat", "name": "c", "subdir": "foo"}],
                      jfh)

        ds.save(message="setup")

        with swallow_logs(new_level=logging.INFO) as cml:
            ds.addurls(json_file,
                       "{url}",
                       "{subdir}//{_url_filename_root}",
                       dry_run=True)

            for dir_ in ["foo", "bar"]:
                assert_in("Would create a subdataset at {}".format(dir_),
                          cml.out)
            assert_in(
                "Would download URL/a.dat to {}".format(
                    os.path.join(path, "foo", "BASE")),
                cml.out)

            assert_in("Metadata: {}".format([u"name=a", u"subdir=foo"]),
                      cml.out)
开发者ID:datalad,项目名称:datalad,代码行数:29,代码来源:test_addurls.py

示例2: test_ignore_nondatasets

def test_ignore_nondatasets(path):
    # we want to ignore the version/commits for this test
    def _kill_time(meta):
        for m in meta:
            for k in ('version', 'dcterms:modified'):
                if k in m:
                    del m[k]
        return meta

    ds = Dataset(path).create()
    meta = _kill_time(get_metadata(ds))
    n_subm = 0
    # placing another repo in the dataset has no effect on metadata
    for cls, subpath in ((GitRepo, 'subm'), (AnnexRepo, 'annex_subm')):
        subm_path = opj(ds.path, subpath)
        r = cls(subm_path, create=True)
        with open(opj(subm_path, 'test'), 'w') as f:
            f.write('test')
        r.add('test')
        r.commit('some')
        assert_true(Dataset(subm_path).is_installed())
        assert_equal(meta, _kill_time(get_metadata(ds)))
        # making it a submodule has no effect either
        ds.add(subpath)
        assert_equal(len(ds.get_subdatasets()), n_subm + 1)
        assert_equal(meta, _kill_time(get_metadata(ds)))
        n_subm += 1
开发者ID:debanjum,项目名称:datalad,代码行数:27,代码来源:test_base.py

示例3: test_addurls_url_parts

    def test_addurls_url_parts(self, path):
        ds = Dataset(path).create(force=True)
        with chpwd(path):
            ds.addurls(self.json_file, "{url}", "{_url0}/{_url_basename}")

            for fname in ["a.dat", "b.dat", "c.dat"]:
                ok_exists(op.join("udir", fname))
开发者ID:datalad,项目名称:datalad,代码行数:7,代码来源:test_addurls.py

示例4: test_aggregate_with_missing_or_duplicate_id

def test_aggregate_with_missing_or_duplicate_id(path):
    # a hierarchy of three (super/sub)datasets, each with some native metadata
    ds = Dataset(opj(path, 'origin')).create(force=True)
    subds = ds.create('sub', force=True)
    subds.remove(opj('.datalad', 'config'), if_dirty='ignore')
    assert_false(exists(opj(subds.path, '.datalad', 'config')))
    subsubds = subds.create('subsub', force=True)
    # aggregate from bottom to top, guess native data, no compacting of graph
    # should yield 6 meta data sets, one implicit, and one native per dataset
    # and a second native set for the topmost dataset
    aggregate_metadata(ds, guess_native_type=True, recursive=True)
    # no only ask the top superdataset, no recursion, just reading from the cache
    meta = get_metadata(
        ds, guess_type=False, ignore_subdatasets=False, ignore_cache=False)
    # and we know nothing subsub
    for name in ('grandchild_äöü東',):
        assert_true(sum([s.get('name', '') == assure_unicode(name) for s in meta]))

    # but search should not fail
    with swallow_outputs():
        res1 = list(search_('.', regex=True, dataset=ds))
    assert res1

    # and let's see now if we wouldn't fail if dataset is duplicate if we
    # install the same dataset twice
    subds_clone = ds.install(source=subds.path, path="subds2")
    with swallow_outputs():
        res2 = list(search_('.', regex=True, dataset=ds))
开发者ID:debanjum,项目名称:datalad,代码行数:28,代码来源:test_base.py

示例5: test_dont_trip_over_missing_subds

def test_dont_trip_over_missing_subds(path):
    ds1 = Dataset(opj(path, 'ds1')).create()
    ds2 = Dataset(opj(path, 'ds2')).create()
    subds2 = ds1.install(
        source=ds2.path, path='subds2',
        result_xfm='datasets', return_type='item-or-list')
    assert_true(subds2.is_installed())
    assert_in('subds2', ds1.subdatasets(result_xfm='relpaths'))
    subds2.uninstall()
    assert_in('subds2', ds1.subdatasets(result_xfm='relpaths'))
    assert_false(subds2.is_installed())
    # see if it wants to talk to github (and fail), or if it trips over something
    # before
    assert_raises(gh.BadCredentialsException,
        ds1.create_sibling_github, 'bogus', recursive=True,
        github_login='disabledloginfortesting')
    # inject remote config prior run
    assert_not_in('github', ds1.repo.get_remotes())
    # fail on existing
    ds1.repo.add_remote('github', 'http://nothere')
    assert_raises(ValueError,
        ds1.create_sibling_github, 'bogus', recursive=True,
        github_login='disabledloginfortesting')
    # talk to github when existing is OK
    assert_raises(gh.BadCredentialsException,
        ds1.create_sibling_github, 'bogus', recursive=True,
        github_login='disabledloginfortesting', existing='reconfigure')
    # return happy emptiness when all is skipped
    assert_equal(
        ds1.create_sibling_github(
            'bogus', recursive=True,
            github_login='disabledloginfortesting', existing='skip'),
        [])
开发者ID:datalad,项目名称:datalad,代码行数:33,代码来源:test_create_github.py

示例6: test_bf2458

def test_bf2458(src, dst):
    ds = Dataset(src).create(force=True)
    ds.save(to_git=False)

    # no clone (empty) into new dst
    clone = install(source=ds.path, path=dst)
    # content is not here
    eq_(clone.repo.whereis('dummy'), [ds.config.get('annex.uuid')])
    # check that plain metadata access does not `get` stuff
    clone.metadata('.', on_failure='ignore')
    eq_(clone.repo.whereis('dummy'), [ds.config.get('annex.uuid')])
开发者ID:datalad,项目名称:datalad,代码行数:11,代码来源:test_base.py

示例7: test_addurls_repindex

    def test_addurls_repindex(self, path):
        ds = Dataset(path).create(force=True)

        with chpwd(path):
            with assert_raises(IncompleteResultsError) as raised:
                ds.addurls(self.json_file, "{url}", "{subdir}")
            assert_in("There are file name collisions", str(raised.exception))

            ds.addurls(self.json_file, "{url}", "{subdir}-{_repindex}")

            for fname in ["foo-0", "bar-0", "foo-1"]:
                ok_exists(fname)
开发者ID:datalad,项目名称:datalad,代码行数:12,代码来源:test_addurls.py

示例8: test_addurls_metafail

    def test_addurls_metafail(self, path):
        ds = Dataset(path).create(force=True)

        # Force failure by passing a non-existent file name to annex.
        fn = ds.repo.set_metadata_

        def set_meta(_, **kwargs):
            for i in fn("wreaking-havoc-and-such", **kwargs):
                yield i

        with chpwd(path), patch.object(ds.repo, 'set_metadata_', set_meta):
            with assert_raises(IncompleteResultsError):
                ds.addurls(self.json_file, "{url}", "{name}")
开发者ID:datalad,项目名称:datalad,代码行数:13,代码来源:test_addurls.py

示例9: check_api

def check_api(no_annex, path):
    ds = Dataset(path).create(force=True, no_annex=no_annex)
    ds.add('.')
    ok_clean_git(ds.path)

    processed_extractors, skipped_extractors = [], []
    for extractor_ep in iter_entry_points('datalad.metadata.extractors'):
        # we need to be able to query for metadata, even if there is none
        # from any extractor
        try:
            extractor_cls = extractor_ep.load()
        except Exception as exc:
            exc_ = str(exc)
            skipped_extractors += [exc_]
            continue
        extractor = extractor_cls(
            ds, paths=['file.dat'])
        meta = extractor.get_metadata(
            dataset=True,
            content=True)
        # we also get something for the dataset and something for the content
        # even if any of the two is empty
        assert_equal(len(meta), 2)
        dsmeta, contentmeta = meta
        assert (isinstance(dsmeta, dict))
        assert hasattr(contentmeta, '__len__') or isgenerator(contentmeta)
        # verify that generator does not blow and has an entry for our
        # precious file
        cm = dict(contentmeta)
        # datalad_core does provide some (not really) information about our
        # precious file
        if extractor_ep.name == 'datalad_core':
            assert 'file.dat' in cm
        elif extractor_ep.name == 'annex':
            if not no_annex:
                # verify correct key, which is the same for all files of 0 size
                assert_equal(
                    cm['file.dat']['key'],
                    'MD5E-s0--d41d8cd98f00b204e9800998ecf8427e.dat'
                )
            else:
                # no metadata on that file
                assert not cm
        processed_extractors.append(extractor_ep.name)
    assert "datalad_core" in processed_extractors, \
        "Should have managed to find at least the core extractor extractor"
    if skipped_extractors:
        raise SkipTest(
            "Not fully tested/succeded since some extractors failed"
            " to load:\n%s" % ("\n".join(skipped_extractors)))
开发者ID:hanke,项目名称:datalad,代码行数:50,代码来源:test_base.py

示例10: test_reproin_largely_smoke

def test_reproin_largely_smoke(tmpdir, heuristic, invocation):
    is_bids = True if heuristic == 'reproin' else False
    arg = "--random-seed 1 -f %s -c dcm2niix -o %s" \
          % (heuristic, tmpdir)
    if is_bids:
        arg += " -b"
    arg += " --datalad "
    args = (
        arg + invocation
    ).split(' ')

    # Test some safeguards
    if invocation == "--files %s" % TESTS_DATA_PATH:
        # Multiple subjects must not be specified -- only a single one could
        # be overridden from the command line
        with pytest.raises(ValueError):
            runner(args + ['--subjects', 'sub1', 'sub2'])

        if heuristic != 'reproin':
            # none other heuristic has mighty infotoids atm
            with pytest.raises(NotImplementedError):
                runner(args)
            return
    runner(args)
    ds = Dataset(str(tmpdir))
    assert ds.is_installed()
    assert not ds.repo.dirty
    head = ds.repo.get_hexsha()

    # and if we rerun -- should fail
    lgr.info(
        "RERUNNING, expecting to FAIL since the same everything "
        "and -c specified so we did conversion already"
    )
    with pytest.raises(RuntimeError):
        runner(args)

    # but there should be nothing new
    assert not ds.repo.dirty
    assert head == ds.repo.get_hexsha()

    # unless we pass 'overwrite' flag
    runner(args + ['--overwrite'])
    # but result should be exactly the same, so it still should be clean
    # and at the same commit
    assert ds.is_installed()
    assert not ds.repo.dirty
    assert head == ds.repo.get_hexsha()
开发者ID:cni-md,项目名称:heudiconv,代码行数:48,代码来源:test_heuristics.py

示例11: test_addurls_subdataset

    def test_addurls_subdataset(self, path):
        ds = Dataset(path).create(force=True)

        with chpwd(path):
            for save in True, False:
                label = "save" if save else "nosave"
                hexsha_before = ds.repo.get_hexsha()
                ds.addurls(self.json_file, "{url}",
                           "{subdir}-" + label + "//{name}",
                           save=save)
                hexsha_after = ds.repo.get_hexsha()

                for fname in ["foo-{}/a", "bar-{}/b", "foo-{}/c"]:
                    ok_exists(fname.format(label))

                assert_true(save ^ (hexsha_before == hexsha_after))
                assert_true(save ^ ds.repo.dirty)

            # Now save the "--nosave" changes and check that we have
            # all the subdatasets.
            ds.add(".")
            eq_(set(subdatasets(ds, recursive=True,
                                result_xfm="relpaths")),
                {"foo-save", "bar-save", "foo-nosave", "bar-nosave"})

            # We don't try to recreate existing subdatasets.
            with swallow_logs(new_level=logging.DEBUG) as cml:
                ds.addurls(self.json_file, "{url}", "{subdir}-nosave//{name}")
                assert_in("Not creating subdataset at existing path", cml.out)
开发者ID:hanke,项目名称:datalad,代码行数:29,代码来源:test_addurls.py

示例12: test_exif

def test_exif(path):
    ds = Dataset(path).create()
    ds.config.add('datalad.metadata.nativetype', 'exif', where='dataset')
    copy(
        opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'exif.jpg'),
        path)
    ds.save()
    ok_clean_git(ds.path)
    res = ds.aggregate_metadata()
    assert_status('ok', res)
    res = ds.metadata('exif.jpg')
    assert_result_count(res, 1)
    # from this extractor
    meta = res[0]['metadata']['exif']
    for k, v in target.items():
        eq_(meta[k], v)

    assert_in('@context', meta)
开发者ID:datalad,项目名称:datalad,代码行数:18,代码来源:test_exif.py

示例13: test_zip_archive

def test_zip_archive(path):
    ds = Dataset(opj(path, 'ds')).create(force=True, no_annex=True)
    ds.save()
    with chpwd(path):
        ds.export_archive(filename='my', archivetype='zip')
        assert_true(os.path.exists('my.zip'))
        custom1_md5 = md5sum('my.zip')
        time.sleep(1.1)
        ds.export_archive(filename='my', archivetype='zip')
        assert_equal(md5sum('my.zip'), custom1_md5)

    # should be able to export without us cd'ing to that ds directory
    ds.export_archive(filename=ds.path, archivetype='zip')
    default_name = 'datalad_{}.zip'.format(ds.id)
    assert_true(os.path.exists(os.path.join(ds.path, default_name)))
开发者ID:datalad,项目名称:datalad,代码行数:15,代码来源:test_export_archive.py

示例14: test_audio

def test_audio(path):
    ds = Dataset(path).create()
    ds.config.add('datalad.metadata.nativetype', 'audio', where='dataset')
    copy(
        opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'audio.mp3'),
        path)
    ds.add('.')
    ok_clean_git(ds.path)
    res = ds.aggregate_metadata()
    assert_status('ok', res)
    res = ds.metadata('audio.mp3')
    assert_result_count(res, 1)

    # from this extractor
    meta = res[0]['metadata']['audio']
    for k, v in target.items():
        eq_(meta[k], v)

    assert_in('@context', meta)

    uniques = ds.metadata(
        reporton='datasets', return_type='item-or-list')['metadata']['datalad_unique_content_properties']
    # test file has it, but uniques have it blanked out, because the extractor considers it worthless
    # for discovering whole datasets
    assert_in('bitrate', meta)
    eq_(uniques['audio']['bitrate'], None)

    # 'date' field carries not value, hence gets exclude from the unique report
    assert_in('date', meta)
    assert(not meta['date'])
    assert_not_in('date', uniques['audio'])
开发者ID:hanke,项目名称:datalad,代码行数:31,代码来源:test_audio.py

示例15: test_basic_metadata

def test_basic_metadata(path):
    ds = Dataset(opj(path, 'origin'))
    meta = get_metadata(ds)
    assert_equal(sorted(meta[0].keys()),
                 ['@context', 'dcterms:conformsTo'])
    ds.create(force=True, save=False)
    # with subdataset
    sub = ds.create('sub', force=True)
    ds.save()
    meta = get_metadata(ds)
    assert_equal(
        sorted(meta[0].keys()),
        ['@context', '@id', 'availableFrom', 'dcterms:conformsTo',
         'dcterms:modified', 'type', 'version'])
    assert_equal(meta[0]['type'], 'Dataset')
    # clone and get relationship info in metadata
    sibling = install(opj(path, 'sibling'), source=opj(path, 'origin'))
    sibling_meta = get_metadata(sibling)
    assert_equal(sibling_meta[0]['@id'], ds.id)
    # origin should learn about the clone
    sibling.repo.push(remote='origin', refspec='git-annex')
    meta = get_metadata(ds)
    assert_equal([m['@id'] for m in meta[0]['availableFrom']],
                 [m['@id'] for m in sibling_meta[0]['availableFrom']])
    meta = get_metadata(ds, guess_type=True)
    # without aggregation there is not trace of subdatasets in the metadata
    assert_not_in('dcterms:hasPart', meta[0])
开发者ID:debanjum,项目名称:datalad,代码行数:27,代码来源:test_base.py


注:本文中的datalad.api.Dataset类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。