本文整理汇总了Python中sklearn.datasets.fetch_openml方法的典型用法代码示例。如果您正苦于以下问题:Python datasets.fetch_openml方法的具体用法?Python datasets.fetch_openml怎么用?Python datasets.fetch_openml使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.datasets
的用法示例。
在下文中一共展示了datasets.fetch_openml方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_fetch_openml_cache
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def test_fetch_openml_cache(monkeypatch, gzip_response, tmpdir):
def _mock_urlopen_raise(request):
raise ValueError('This mechanism intends to test correct cache'
'handling. As such, urlopen should never be '
'accessed. URL: %s' % request.get_full_url())
data_id = 2
cache_directory = str(tmpdir.mkdir('scikit_learn_data'))
_monkey_patch_webbased_functions(
monkeypatch, data_id, gzip_response)
X_fetched, y_fetched = fetch_openml(data_id=data_id, cache=True,
data_home=cache_directory,
return_X_y=True)
monkeypatch.setattr(sklearn.datasets.openml, 'urlopen',
_mock_urlopen_raise)
X_cached, y_cached = fetch_openml(data_id=data_id, cache=True,
data_home=cache_directory,
return_X_y=True)
np.testing.assert_array_equal(X_fetched, X_cached)
np.testing.assert_array_equal(y_fetched, y_cached)
示例2: main
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def main():
from sklearn import preprocessing
from sklearn.datasets import fetch_openml as fetch_mldata
from sklearn.model_selection import train_test_split
db_name = 'diabetes'
data_set = fetch_mldata(db_name)
data_set.data = preprocessing.normalize(data_set.data)
tmp = data_set.target
tmpL = [ 1 if i == "tested_positive" else -1 for i in tmp]
data_set.target = tmpL
X_train, X_test, y_train, y_test = train_test_split(
data_set.data, data_set.target, test_size=0.4)
mlelm = MLELM(hidden_units=(10, 30, 200)).fit(X_train, y_train)
elm = ELM(200).fit(X_train, y_train)
print("MLELM Accuracy %0.3f " % mlelm.score(X_test, y_test))
print("ELM Accuracy %0.3f " % elm.score(X_test, y_test))
示例3: test_warn_ignore_attribute
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def test_warn_ignore_attribute(monkeypatch, gzip_response):
data_id = 40966
expected_row_id_msg = "target_column={} has flag is_row_identifier."
expected_ignore_msg = "target_column={} has flag is_ignore."
_monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
# single column test
assert_warns_message(UserWarning, expected_row_id_msg.format('MouseID'),
fetch_openml, data_id=data_id,
target_column='MouseID',
cache=False)
assert_warns_message(UserWarning, expected_ignore_msg.format('Genotype'),
fetch_openml, data_id=data_id,
target_column='Genotype',
cache=False)
# multi column test
assert_warns_message(UserWarning, expected_row_id_msg.format('MouseID'),
fetch_openml, data_id=data_id,
target_column=['MouseID', 'class'],
cache=False)
assert_warns_message(UserWarning, expected_ignore_msg.format('Genotype'),
fetch_openml, data_id=data_id,
target_column=['Genotype', 'class'],
cache=False)
示例4: load_mauna_loa_atmospheric_co2
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def load_mauna_loa_atmospheric_co2():
ml_data = fetch_openml(data_id=41187)
months = []
ppmv_sums = []
counts = []
y = ml_data.data[:, 0]
m = ml_data.data[:, 1]
month_float = y + (m - 1) / 12
ppmvs = ml_data.target
for month, ppmv in zip(month_float, ppmvs):
if not months or month != months[-1]:
months.append(month)
ppmv_sums.append(ppmv)
counts.append(1)
else:
# aggregate monthly sum to produce average
ppmv_sums[-1] += ppmv
counts[-1] += 1
months = np.asarray(months).reshape(-1, 1)
avg_ppmvs = np.asarray(ppmv_sums) / counts
return months, avg_ppmvs
示例5: main
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def main():
from sklearn import preprocessing
from sklearn.datasets import fetch_openml as fetch_mldata
from sklearn.model_selection import ShuffleSplit, KFold, cross_val_score
db_name = 'australian'
hid_nums = [100, 200, 300]
data_set = fetch_mldata(db_name)
data_set.data = preprocessing.normalize(data_set.data)
data_set.target = [1 if i == 1 else -1
for i in data_set.target.astype(int)]
for hid_num in hid_nums:
print(hid_num, end=' ')
e = ELM(hid_num)
ave = 0
for i in range(10):
cv = KFold(n_splits=5, shuffle=True)
scores = cross_val_score(
e, data_set.data, data_set.target,
cv=cv, scoring='accuracy', n_jobs=-1)
ave += scores.mean()
ave /= 10
print("Accuracy: %0.3f " % (ave))
示例6: main
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def main():
from sklearn import preprocessing
from sklearn.datasets import fetch_openml as fetch_mldata
from sklearn.model_selection import cross_val_score
db_name = 'iris'
hid_num = 1000
data_set = fetch_mldata(db_name, version=1)
data_set.data = preprocessing.scale(data_set.data)
data_set.target = preprocessing.LabelEncoder().fit_transform(data_set.target)
print(db_name)
print('ECOBELM', hid_num)
e = ECOBELM(hid_num, c=2**5)
ave = 0
for i in range(10):
scores = cross_val_score(
e, data_set.data, data_set.target, cv=5, scoring='accuracy')
ave += scores.mean()
ave /= 10
print("Accuracy: %0.2f " % (ave))
print('ELM', hid_num)
e = ELM(hid_num)
ave = 0
for i in range(10):
scores = cross_val_score(
e, data_set.data, data_set.target, cv=5, scoring='accuracy')
ave += scores.mean()
ave /= 10
print("Accuracy: %0.2f " % (ave))
示例7: getdataset
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def getdataset(datasetname, onehot_encode_strings=True):
# load
dataset = fetch_openml(datasetname)
# get X and y
X = dshape(dataset.data)
try:
target = dshape(dataset.target)
except:
print("WARNING: No target found. Taking last column of data matrix as target")
target = X[:, -1]
X = X[:, :-1]
if (
len(target.shape) > 1 and target.shape[1] > X.shape[1]
): # some mldata sets are mixed up...
X = target
target = dshape(dataset.data)
if len(X.shape) == 1 or X.shape[1] <= 1:
for k in dataset.keys():
if k != "data" and k != "target" and len(dataset[k]) == X.shape[1]:
X = np.hstack((X, dshape(dataset[k])))
# one-hot for categorical values
if onehot_encode_strings:
cat_ft = [
i
for i in range(X.shape[1])
if "str" in str(type(unpack(X[0, i])))
or "unicode" in str(type(unpack(X[0, i])))
]
if len(cat_ft):
for i in cat_ft:
X[:, i] = tonumeric(X[:, i])
X = OneHotEncoder(categorical_features=cat_ft).fit_transform(X)
# if sparse, make dense
try:
X = X.toarray()
except:
pass
# convert y to monotonically increasing ints
y = tonumeric(target).astype(int)
return np.nan_to_num(X.astype(float)), y
示例8: fetch_employee_salaries
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def fetch_employee_salaries():
"""fetches the employee_salaries dataset
The employee_salaries dataset contains information about annual salaries
(year 2016) for more than 9,000 employees of the Montgomery County
(Maryland, US).
Returns
-------
dict
a dictionary containing:
- a short description of the dataset (under the ``DESCR``
key)
- the tabular data (under the ``data`` key)
- the target (under the ``target`` key)
References
----------
https://catalog.data.gov/dataset/employee-salaries-2016
"""
data = fetch_openml(data_id=42125, as_frame=True)
data.data['Current Annual Salary'] = data['target']
return data
# link dead.
# return fetch_dataset(EMPLOYEE_SALARIES_CONFIG, show_progress=False)
示例9: _test_features_list
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def _test_features_list(data_id):
# XXX Test is intended to verify/ensure correct decoding behavior
# Not usable with sparse data or datasets that have columns marked as
# {row_identifier, ignore}
def decode_column(data_bunch, col_idx):
col_name = data_bunch.feature_names[col_idx]
if col_name in data_bunch.categories:
# XXX: This would be faster with np.take, although it does not
# handle missing values fast (also not with mode='wrap')
cat = data_bunch.categories[col_name]
result = [None if is_scalar_nan(idx) else cat[int(idx)]
for idx in data_bunch.data[:, col_idx]]
return np.array(result, dtype='O')
else:
# non-nominal attribute
return data_bunch.data[:, col_idx]
data_bunch = fetch_openml(data_id=data_id, cache=False, target_column=None)
# also obtain decoded arff
data_description = _get_data_description_by_id(data_id, None)
sparse = data_description['format'].lower() == 'sparse_arff'
if sparse is True:
raise ValueError('This test is not intended for sparse data, to keep '
'code relatively simple')
data_arff = _download_data_arff(data_description['file_id'],
sparse, None, False)
data_downloaded = np.array(list(data_arff['data']), dtype='O')
for i in range(len(data_bunch.feature_names)):
# XXX: Test per column, as this makes it easier to avoid problems with
# missing values
np.testing.assert_array_equal(data_downloaded[:, i],
decode_column(data_bunch, i))
示例10: test_fetch_openml_notarget
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def test_fetch_openml_notarget(monkeypatch, gzip_response):
data_id = 61
target_column = None
expected_observations = 150
expected_features = 5
_monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
data = fetch_openml(data_id=data_id, target_column=target_column,
cache=False)
assert data.data.shape == (expected_observations, expected_features)
assert data.target is None
示例11: test_fetch_openml_inactive
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def test_fetch_openml_inactive(monkeypatch, gzip_response):
# fetch inactive dataset by id
data_id = 40675
_monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
glas2 = assert_warns_message(
UserWarning, "Version 1 of dataset glass2 is inactive,", fetch_openml,
data_id=data_id, cache=False)
# fetch inactive dataset by name and version
assert glas2.data.shape == (163, 9)
glas2_by_version = assert_warns_message(
UserWarning, "Version 1 of dataset glass2 is inactive,", fetch_openml,
data_id=None, name="glass2", version=1, cache=False)
assert int(glas2_by_version.details['id']) == data_id
示例12: test_fetch_nonexiting
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def test_fetch_nonexiting(monkeypatch, gzip_response):
# there is no active version of glass2
data_id = 40675
_monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
# Note that we only want to search by name (not data id)
assert_raise_message(ValueError, "No active dataset glass2 found",
fetch_openml, name='glass2', cache=False)
示例13: test_string_attribute
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def test_string_attribute(monkeypatch, gzip_response):
data_id = 40945
_monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
# single column test
assert_raise_message(ValueError,
'STRING attributes are not yet supported',
fetch_openml, data_id=data_id, cache=False)
示例14: test_dataset_with_openml_error
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def test_dataset_with_openml_error(monkeypatch, gzip_response):
data_id = 1
_monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
assert_warns_message(
UserWarning,
"OpenML registered a problem with the dataset. It might be unusable. "
"Error:",
fetch_openml, data_id=data_id, cache=False
)
示例15: test_dataset_with_openml_warning
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import fetch_openml [as 别名]
def test_dataset_with_openml_warning(monkeypatch, gzip_response):
data_id = 3
_monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
assert_warns_message(
UserWarning,
"OpenML raised a warning on the dataset. It might be unusable. "
"Warning:",
fetch_openml, data_id=data_id, cache=False
)