本文整理汇总了Python中sklearn.model_selection.RepeatedKFold方法的典型用法代码示例。如果您正苦于以下问题:Python model_selection.RepeatedKFold方法的具体用法?Python model_selection.RepeatedKFold怎么用?Python model_selection.RepeatedKFold使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.model_selection
的用法示例。
在下文中一共展示了model_selection.RepeatedKFold方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_2d_y
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import RepeatedKFold [as 别名]
def test_2d_y():
# smoke test for 2d y and multi-label
n_samples = 30
rng = np.random.RandomState(1)
X = rng.randint(0, 3, size=(n_samples, 2))
y = rng.randint(0, 3, size=(n_samples,))
y_2d = y.reshape(-1, 1)
y_multilabel = rng.randint(0, 2, size=(n_samples, 3))
groups = rng.randint(0, 3, size=(n_samples,))
splitters = [LeaveOneOut(), LeavePOut(p=2), KFold(), StratifiedKFold(),
RepeatedKFold(), RepeatedStratifiedKFold(),
ShuffleSplit(), StratifiedShuffleSplit(test_size=.5),
GroupShuffleSplit(), LeaveOneGroupOut(),
LeavePGroupsOut(n_groups=2), GroupKFold(), TimeSeriesSplit(),
PredefinedSplit(test_fold=groups)]
for splitter in splitters:
list(splitter.split(X, y, groups))
list(splitter.split(X, y_2d, groups))
try:
list(splitter.split(X, y_multilabel, groups))
except ValueError as e:
allowed_target_types = ('binary', 'multiclass')
msg = "Supported target types are: {}. Got 'multilabel".format(
allowed_target_types)
assert msg in str(e)
示例2: generate_kfold
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import RepeatedKFold [as 别名]
def generate_kfold(X, y=None, n_splits=5, random_state=0, stratified=False, n_repeats=1):
if stratified and (y is not None):
if n_repeats > 1:
kf = RepeatedStratifiedKFold(n_splits=n_splits, n_repeats=n_repeats, random_state=random_state)
else:
kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=random_state)
kf.get_n_splits(X, y)
return [[train_index, test_index] for train_index, test_index in kf.split(X, y)]
else:
if n_repeats > 1:
kf = RepeatedKFold(n_splits=n_splits, n_repeats=n_repeats, random_state=random_state)
else:
kf = KFold(n_splits=n_splits, shuffle=True, random_state=random_state)
kf.get_n_splits(X)
return [[train_index, test_index] for train_index, test_index in kf.split(X)]
示例3: test_repeated_cv_value_errors
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import RepeatedKFold [as 别名]
def test_repeated_cv_value_errors():
# n_repeats is not integer or <= 0
for cv in (RepeatedKFold, RepeatedStratifiedKFold):
assert_raises(ValueError, cv, n_repeats=0)
assert_raises(ValueError, cv, n_repeats=1.5)
示例4: test_repeated_kfold_determinstic_split
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import RepeatedKFold [as 别名]
def test_repeated_kfold_determinstic_split():
X = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]
random_state = 258173307
rkf = RepeatedKFold(
n_splits=2,
n_repeats=2,
random_state=random_state)
# split should produce same and deterministic splits on
# each call
for _ in range(3):
splits = rkf.split(X)
train, test = next(splits)
assert_array_equal(train, [2, 4])
assert_array_equal(test, [0, 1, 3])
train, test = next(splits)
assert_array_equal(train, [0, 1, 3])
assert_array_equal(test, [2, 4])
train, test = next(splits)
assert_array_equal(train, [0, 1])
assert_array_equal(test, [2, 3, 4])
train, test = next(splits)
assert_array_equal(train, [2, 3, 4])
assert_array_equal(test, [0, 1])
assert_raises(StopIteration, next, splits)
示例5: test_get_n_splits_for_repeated_kfold
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import RepeatedKFold [as 别名]
def test_get_n_splits_for_repeated_kfold():
n_splits = 3
n_repeats = 4
rkf = RepeatedKFold(n_splits, n_repeats)
expected_n_splits = n_splits * n_repeats
assert_equal(expected_n_splits, rkf.get_n_splits())
示例6: __init__
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import RepeatedKFold [as 别名]
def __init__(self, points, values, rbf_kwds=dict(), cv_kwds=dict(ns=5, nr=1)):
"""
Parameters
----------
points, values : see :class:`Rbf`
rbf_kwds : dict
for ``Rbf(points, values, **rbf_kwds)``
cv_kwds : dict
cross-validation parameters: `ns` = `n_splits`, `nr` = `n_repeats` (see
sklearn.model_selection.RepeatedKFold)
"""
self.points = points
self.values = values
self.rbf_kwds = rbf_kwds
self.cv_kwds = cv_kwds
示例7: cv
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import RepeatedKFold [as 别名]
def cv(self, params):
"""K-fold repeated CV.
Split data (points, values) randomly into K parts ("folds", K = ``ns``
in ``self.cv_kwds``) along axis 0 and use each part once as test set,
the rest as training set. For example `ns=5`: split in 5 parts at
random indices, use 5 times 4/5 data for train, 1/5 for test (each of
the folds), so 5 fits total -> 5 fit errors. Optionally repeat ``nr``
times with different random splits. So, `nr` * `ns` fit errors total.
Each time, build an Rbf interpolator with ``self.rbf_kwds``, fit,
return the fit error (scalar sum of squares from
:meth:`Rbf.fit_error`).
Parameters
----------
params : seq length 1 or 2
| params[0] = p
| params[1] = r (optional)
Returns
-------
errs : 1d array (nr*ns,)
direct fit error from each fold
"""
ns = self.cv_kwds['ns']
nr = self.cv_kwds['nr']
errs = np.empty((ns*nr,), dtype=float)
folds = RepeatedKFold(n_splits=ns,
n_repeats=nr)
for ii, tup in enumerate(folds.split(self.points)):
idxs_train, idxs_test = tup
rbfi = self._get_rbfi(params,
self.points[idxs_train,...],
self.values[idxs_train,...])
errs[ii] = rbfi.fit_error(self.points[idxs_test,...],
self.values[idxs_test,...])
return errs
示例8: fit
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import RepeatedKFold [as 别名]
def fit(self, X, y, savepath=None, refit=True, iid=True):
rst = dict()
param_dict = self._get_grid_param_dict()
if savepath is None:
savepath = os.getcwd()
estimator_name = self._estimator_name
param_dict = self._search_space_generator(param_dict)
if self.cv is None:
self.cv = ms.RepeatedKFold()
model = GridSearchCV(self.estimator, param_dict, scoring=self.scoring, cv=self.cv, refit=refit,
iid=iid, n_jobs=self.n_jobs, verbose=2)
try:
rst[estimator_name] = model.fit(X, y)
except:
log.error('Hyperparameter optimization failed, likely due to inappropriate domain of values to optimize'
' one or more parameters over. Please check your input file and the sklearn docs for the mode'
' you are optimizing for the domain of correct values')
exit()
best_estimator = rst[estimator_name].best_estimator_
self._save_output(savepath, rst)
return best_estimator