Python datasets.fetch_openml方法代码示例

本文整理汇总了Python中sklearn.datasets.fetch_openml方法的典型用法代码示例。如果您正苦于以下问题：Python datasets.fetch_openml方法的具体用法？Python datasets.fetch_openml怎么用？Python datasets.fetch_openml使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.datasets的用法示例。

在下文中一共展示了datasets.fetch_openml方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_fetch_openml_cache

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def test_fetch_openml_cache(monkeypatch, gzip_response, tmpdir):
    def _mock_urlopen_raise(request):
        raise ValueError('This mechanism intends to test correct cache'
                         'handling. As such, urlopen should never be '
                         'accessed. URL: %s' % request.get_full_url())
    data_id = 2
    cache_directory = str(tmpdir.mkdir('scikit_learn_data'))
    _monkey_patch_webbased_functions(
        monkeypatch, data_id, gzip_response)
    X_fetched, y_fetched = fetch_openml(data_id=data_id, cache=True,
                                        data_home=cache_directory,
                                        return_X_y=True)

    monkeypatch.setattr(sklearn.datasets.openml, 'urlopen',
                        _mock_urlopen_raise)

    X_cached, y_cached = fetch_openml(data_id=data_id, cache=True,
                                      data_home=cache_directory,
                                      return_X_y=True)
    np.testing.assert_array_equal(X_fetched, X_cached)
    np.testing.assert_array_equal(y_fetched, y_cached)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:23，代码来源:test_openml.py

示例2: main

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def main():
    from sklearn import preprocessing
    from sklearn.datasets import fetch_openml as fetch_mldata
    from sklearn.model_selection import train_test_split

    db_name = 'diabetes'
    data_set = fetch_mldata(db_name)
    data_set.data = preprocessing.normalize(data_set.data)

    tmp = data_set.target
    tmpL = [ 1 if i == "tested_positive" else -1 for i in tmp]
    data_set.target = tmpL

    X_train, X_test, y_train, y_test = train_test_split(
        data_set.data, data_set.target, test_size=0.4)

    mlelm = MLELM(hidden_units=(10, 30, 200)).fit(X_train, y_train)
    elm = ELM(200).fit(X_train, y_train)

    print("MLELM Accuracy %0.3f " % mlelm.score(X_test, y_test))
    print("ELM Accuracy %0.3f " % elm.score(X_test, y_test))

开发者ID:masaponto，项目名称:Python-ELM，代码行数:23，代码来源:ml_elm.py

示例3: test_warn_ignore_attribute

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def test_warn_ignore_attribute(monkeypatch, gzip_response):
    data_id = 40966
    expected_row_id_msg = "target_column={} has flag is_row_identifier."
    expected_ignore_msg = "target_column={} has flag is_ignore."
    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
    # single column test
    assert_warns_message(UserWarning, expected_row_id_msg.format('MouseID'),
                         fetch_openml, data_id=data_id,
                         target_column='MouseID',
                         cache=False)
    assert_warns_message(UserWarning, expected_ignore_msg.format('Genotype'),
                         fetch_openml, data_id=data_id,
                         target_column='Genotype',
                         cache=False)
    # multi column test
    assert_warns_message(UserWarning, expected_row_id_msg.format('MouseID'),
                         fetch_openml, data_id=data_id,
                         target_column=['MouseID', 'class'],
                         cache=False)
    assert_warns_message(UserWarning, expected_ignore_msg.format('Genotype'),
                         fetch_openml, data_id=data_id,
                         target_column=['Genotype', 'class'],
                         cache=False)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:25，代码来源:test_openml.py

示例4: load_mauna_loa_atmospheric_co2

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def load_mauna_loa_atmospheric_co2():
    ml_data = fetch_openml(data_id=41187)
    months = []
    ppmv_sums = []
    counts = []

    y = ml_data.data[:, 0]
    m = ml_data.data[:, 1]
    month_float = y + (m - 1) / 12
    ppmvs = ml_data.target

    for month, ppmv in zip(month_float, ppmvs):
        if not months or month != months[-1]:
            months.append(month)
            ppmv_sums.append(ppmv)
            counts.append(1)
        else:
            # aggregate monthly sum to produce average
            ppmv_sums[-1] += ppmv
            counts[-1] += 1

    months = np.asarray(months).reshape(-1, 1)
    avg_ppmvs = np.asarray(ppmv_sums) / counts
    return months, avg_ppmvs

开发者ID:AI-DI，项目名称:Brancher，代码行数:26，代码来源:StructuredInferencePlaygroundOscillatoryExperimentC02.py

示例5: main

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def main():
    from sklearn import preprocessing
    from sklearn.datasets import fetch_openml as fetch_mldata
    from sklearn.model_selection import ShuffleSplit, KFold, cross_val_score

    db_name = 'australian'
    hid_nums = [100, 200, 300]

    data_set = fetch_mldata(db_name)
    data_set.data = preprocessing.normalize(data_set.data)
    data_set.target = [1 if i == 1 else -1
                       for i in  data_set.target.astype(int)]

    for hid_num in hid_nums:
        print(hid_num, end=' ')
        e = ELM(hid_num)

        ave = 0
        for i in range(10):
            cv = KFold(n_splits=5, shuffle=True)
            scores = cross_val_score(
                e, data_set.data, data_set.target,
                cv=cv, scoring='accuracy', n_jobs=-1)
            ave += scores.mean()

        ave /= 10

        print("Accuracy: %0.3f " % (ave))

开发者ID:masaponto，项目名称:Python-ELM，代码行数:30，代码来源:elm.py

示例6: main

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def main():
    from sklearn import preprocessing
    from sklearn.datasets import fetch_openml as fetch_mldata
    from sklearn.model_selection import cross_val_score

    db_name = 'iris'
    hid_num = 1000
    data_set = fetch_mldata(db_name, version=1)
    data_set.data = preprocessing.scale(data_set.data)
    data_set.target = preprocessing.LabelEncoder().fit_transform(data_set.target)

    print(db_name)
    print('ECOBELM', hid_num)
    e = ECOBELM(hid_num, c=2**5)
    ave = 0
    for i in range(10):
        scores = cross_val_score(
            e, data_set.data, data_set.target, cv=5, scoring='accuracy')
        ave += scores.mean()
    ave /= 10
    print("Accuracy: %0.2f " % (ave))

    print('ELM', hid_num)
    e = ELM(hid_num)
    ave = 0
    for i in range(10):
        scores = cross_val_score(
            e, data_set.data, data_set.target, cv=5, scoring='accuracy')
        ave += scores.mean()
    ave /= 10
    print("Accuracy: %0.2f " % (ave))

开发者ID:masaponto，项目名称:Python-ELM，代码行数:33，代码来源:ecob_elm.py

示例7: getdataset

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def getdataset(datasetname, onehot_encode_strings=True):
    # load
    dataset = fetch_openml(datasetname)
    # get X and y
    X = dshape(dataset.data)
    try:
        target = dshape(dataset.target)
    except:
        print("WARNING: No target found. Taking last column of data matrix as target")
        target = X[:, -1]
        X = X[:, :-1]
    if (
        len(target.shape) > 1 and target.shape[1] > X.shape[1]
    ):  # some mldata sets are mixed up...
        X = target
        target = dshape(dataset.data)
    if len(X.shape) == 1 or X.shape[1] <= 1:
        for k in dataset.keys():
            if k != "data" and k != "target" and len(dataset[k]) == X.shape[1]:
                X = np.hstack((X, dshape(dataset[k])))
    # one-hot for categorical values
    if onehot_encode_strings:
        cat_ft = [
            i
            for i in range(X.shape[1])
            if "str" in str(type(unpack(X[0, i])))
            or "unicode" in str(type(unpack(X[0, i])))
        ]
        if len(cat_ft):
            for i in cat_ft:
                X[:, i] = tonumeric(X[:, i])
            X = OneHotEncoder(categorical_features=cat_ft).fit_transform(X)
    # if sparse, make dense
    try:
        X = X.toarray()
    except:
        pass
    # convert y to monotonically increasing ints
    y = tonumeric(target).astype(int)
    return np.nan_to_num(X.astype(float)), y

开发者ID:tmadl，项目名称:highdimensional-decision-boundary-plot，代码行数:42，代码来源:uci_loader.py

示例8: fetch_employee_salaries

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def fetch_employee_salaries():
    """fetches the employee_salaries dataset

    The employee_salaries dataset contains information about annual salaries
    (year 2016) for more than 9,000 employees of the Montgomery County
    (Maryland, US).


    Returns
    -------
    dict
        a dictionary containing:

            - a short description of the dataset (under the ``DESCR``
              key)
            - the tabular data (under the ``data`` key)
            - the target (under the ``target`` key)

    References
    ----------
    https://catalog.data.gov/dataset/employee-salaries-2016

    """

    data = fetch_openml(data_id=42125, as_frame=True)
    data.data['Current Annual Salary'] = data['target']
    return data

    # link dead.
    # return fetch_dataset(EMPLOYEE_SALARIES_CONFIG, show_progress=False)

开发者ID:dirty-cat，项目名称:dirty_cat，代码行数:32，代码来源:fetching.py

示例9: _test_features_list

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def _test_features_list(data_id):
    # XXX Test is intended to verify/ensure correct decoding behavior
    # Not usable with sparse data or datasets that have columns marked as
    # {row_identifier, ignore}
    def decode_column(data_bunch, col_idx):
        col_name = data_bunch.feature_names[col_idx]
        if col_name in data_bunch.categories:
            # XXX: This would be faster with np.take, although it does not
            # handle missing values fast (also not with mode='wrap')
            cat = data_bunch.categories[col_name]
            result = [None if is_scalar_nan(idx) else cat[int(idx)]
                      for idx in data_bunch.data[:, col_idx]]
            return np.array(result, dtype='O')
        else:
            # non-nominal attribute
            return data_bunch.data[:, col_idx]

    data_bunch = fetch_openml(data_id=data_id, cache=False, target_column=None)

    # also obtain decoded arff
    data_description = _get_data_description_by_id(data_id, None)
    sparse = data_description['format'].lower() == 'sparse_arff'
    if sparse is True:
        raise ValueError('This test is not intended for sparse data, to keep '
                         'code relatively simple')
    data_arff = _download_data_arff(data_description['file_id'],
                                    sparse, None, False)
    data_downloaded = np.array(list(data_arff['data']), dtype='O')

    for i in range(len(data_bunch.feature_names)):
        # XXX: Test per column, as this makes it easier to avoid problems with
        # missing values

        np.testing.assert_array_equal(data_downloaded[:, i],
                                      decode_column(data_bunch, i))

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:37，代码来源:test_openml.py

示例10: test_fetch_openml_notarget

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def test_fetch_openml_notarget(monkeypatch, gzip_response):
    data_id = 61
    target_column = None
    expected_observations = 150
    expected_features = 5

    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
    data = fetch_openml(data_id=data_id, target_column=target_column,
                        cache=False)
    assert data.data.shape == (expected_observations, expected_features)
    assert data.target is None

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:13，代码来源:test_openml.py

示例11: test_fetch_openml_inactive

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def test_fetch_openml_inactive(monkeypatch, gzip_response):
    # fetch inactive dataset by id
    data_id = 40675
    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
    glas2 = assert_warns_message(
        UserWarning, "Version 1 of dataset glass2 is inactive,", fetch_openml,
        data_id=data_id, cache=False)
    # fetch inactive dataset by name and version
    assert glas2.data.shape == (163, 9)
    glas2_by_version = assert_warns_message(
        UserWarning, "Version 1 of dataset glass2 is inactive,", fetch_openml,
        data_id=None, name="glass2", version=1, cache=False)
    assert int(glas2_by_version.details['id']) == data_id

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:15，代码来源:test_openml.py

示例12: test_fetch_nonexiting

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def test_fetch_nonexiting(monkeypatch, gzip_response):
    # there is no active version of glass2
    data_id = 40675
    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
    # Note that we only want to search by name (not data id)
    assert_raise_message(ValueError, "No active dataset glass2 found",
                         fetch_openml, name='glass2', cache=False)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:9，代码来源:test_openml.py

示例13: test_string_attribute

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def test_string_attribute(monkeypatch, gzip_response):
    data_id = 40945
    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
    # single column test
    assert_raise_message(ValueError,
                         'STRING attributes are not yet supported',
                         fetch_openml, data_id=data_id, cache=False)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:9，代码来源:test_openml.py

示例14: test_dataset_with_openml_error

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def test_dataset_with_openml_error(monkeypatch, gzip_response):
    data_id = 1
    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
    assert_warns_message(
        UserWarning,
        "OpenML registered a problem with the dataset. It might be unusable. "
        "Error:",
        fetch_openml, data_id=data_id, cache=False
    )

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:11，代码来源:test_openml.py

示例15: test_dataset_with_openml_warning

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def test_dataset_with_openml_warning(monkeypatch, gzip_response):
    data_id = 3
    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
    assert_warns_message(
        UserWarning,
        "OpenML raised a warning on the dataset. It might be unusable. "
        "Warning:",
        fetch_openml, data_id=data_id, cache=False
    )

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:11，代码来源:test_openml.py

注：本文中的sklearn.datasets.fetch_openml方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。