本文整理汇总了Python中sklearn.model_selection.check_cv方法的典型用法代码示例。如果您正苦于以下问题:Python model_selection.check_cv方法的具体用法?Python model_selection.check_cv怎么用?Python model_selection.check_cv使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.model_selection
的用法示例。
在下文中一共展示了model_selection.check_cv方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_cv_iterable_wrapper
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import check_cv [as 别名]
def test_cv_iterable_wrapper():
kf_iter = KFold(n_splits=5).split(X, y)
kf_iter_wrapped = check_cv(kf_iter)
# Since the wrapped iterable is enlisted and stored,
# split can be called any number of times to produce
# consistent results.
np.testing.assert_equal(list(kf_iter_wrapped.split(X, y)),
list(kf_iter_wrapped.split(X, y)))
# If the splits are randomized, successive calls to split yields different
# results
kf_randomized_iter = KFold(n_splits=5, shuffle=True).split(X, y)
kf_randomized_iter_wrapped = check_cv(kf_randomized_iter)
# numpy's assert_array_equal properly compares nested lists
np.testing.assert_equal(list(kf_randomized_iter_wrapped.split(X, y)),
list(kf_randomized_iter_wrapped.split(X, y)))
try:
np.testing.assert_equal(list(kf_iter_wrapped.split(X, y)),
list(kf_randomized_iter_wrapped.split(X, y)))
splits_are_equal = True
except AssertionError:
splits_are_equal = False
assert not splits_are_equal, (
"If the splits are randomized, "
"successive calls to split should yield different results")
示例2: check_cv
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import check_cv [as 别名]
def check_cv(cv=3, y=None, classifier=False):
"""Dask aware version of ``sklearn.model_selection.check_cv``
Same as the scikit-learn version, but works if ``y`` is a dask object.
"""
if cv is None:
cv = 3
# If ``cv`` is not an integer, the scikit-learn implementation doesn't
# touch the ``y`` object, so passing on a dask object is fine
if not is_dask_collection(y) or not isinstance(cv, numbers.Integral):
return model_selection.check_cv(cv, y, classifier=classifier)
if classifier:
# ``y`` is a dask object. We need to compute the target type
target_type = delayed(type_of_target, pure=True)(y).compute()
if target_type in ("binary", "multiclass"):
return StratifiedKFold(cv)
return KFold(cv)
示例3: _set_cv
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import check_cv [as 别名]
def _set_cv(cv, X, y, classifier):
"""This method returns either a `sklearn.cross_validation._PartitionIterator` or
`sklearn.model_selection.BaseCrossValidator` depending on whether sklearn-0.17
or sklearn-0.18 is being used.
Parameters
----------
cv : int, `_PartitionIterator` or `BaseCrossValidator`
The CV object or int to check. If an int, will be converted
into the appropriate class of crossvalidator.
X : pd.DataFrame or np.ndarray, shape(n_samples, n_features)
The dataframe or np.ndarray being fit in the grid search.
y : np.ndarray, shape(n_samples,)
The target being fit in the grid search.
classifier : bool
Whether the estimator being fit is a classifier
Returns
-------
`_PartitionIterator` or `BaseCrossValidator`
"""
return check_cv(cv, X, y, classifier) if not SK18 else check_cv(cv, y, classifier)
示例4: check_cv
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import check_cv [as 别名]
def check_cv(cv: Union[int, Iterable, BaseCrossValidator] = 5,
y: Optional[Union[pd.Series, np.ndarray]] = None,
stratified: bool = False,
random_state: int = 0):
if cv is None:
cv = 5
if isinstance(cv, numbers.Integral):
if stratified and (y is not None) and (type_of_target(y) in ('binary', 'multiclass')):
return StratifiedKFold(cv, shuffle=True, random_state=random_state)
else:
return KFold(cv, shuffle=True, random_state=random_state)
return model_selection.check_cv(cv, y, stratified)
示例5: test_check_cv
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import check_cv [as 别名]
def test_check_cv():
X = np.ones(9)
cv = check_cv(3, classifier=False)
# Use numpy.testing.assert_equal which recursively compares
# lists of lists
np.testing.assert_equal(list(KFold(3).split(X)), list(cv.split(X)))
y_binary = np.array([0, 1, 0, 1, 0, 0, 1, 1, 1])
cv = check_cv(3, y_binary, classifier=True)
np.testing.assert_equal(list(StratifiedKFold(3).split(X, y_binary)),
list(cv.split(X, y_binary)))
y_multiclass = np.array([0, 1, 0, 1, 2, 1, 2, 0, 2])
cv = check_cv(3, y_multiclass, classifier=True)
np.testing.assert_equal(list(StratifiedKFold(3).split(X, y_multiclass)),
list(cv.split(X, y_multiclass)))
# also works with 2d multiclass
y_multiclass_2d = y_multiclass.reshape(-1, 1)
cv = check_cv(3, y_multiclass_2d, classifier=True)
np.testing.assert_equal(list(StratifiedKFold(3).split(X, y_multiclass_2d)),
list(cv.split(X, y_multiclass_2d)))
assert not np.all(
next(StratifiedKFold(3).split(X, y_multiclass_2d))[0] ==
next(KFold(3).split(X, y_multiclass_2d))[0])
X = np.ones(5)
y_multilabel = np.array([[0, 0, 0, 0], [0, 1, 1, 0], [0, 0, 0, 1],
[1, 1, 0, 1], [0, 0, 1, 0]])
cv = check_cv(3, y_multilabel, classifier=True)
np.testing.assert_equal(list(KFold(3).split(X)), list(cv.split(X)))
y_multioutput = np.array([[1, 2], [0, 3], [0, 0], [3, 1], [2, 0]])
cv = check_cv(3, y_multioutput, classifier=True)
np.testing.assert_equal(list(KFold(3).split(X)), list(cv.split(X)))
assert_raises(ValueError, check_cv, cv="lolo")
示例6: test_check_cv_default_warn
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import check_cv [as 别名]
def test_check_cv_default_warn():
# Test that warnings are raised. Will be removed in 0.22
assert_warns_message(FutureWarning, CV_WARNING, check_cv)
assert_warns_message(FutureWarning, CV_WARNING, check_cv, None)
assert_no_warnings(check_cv, cv=5)
示例7: fit
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import check_cv [as 别名]
def fit(self, X, y=None, groups=None, **fit_params):
"""
Run fit method with all sets of parameters
Args
----
X : array-like, shape = [n_samples, n_features]
Training vector, where n_samples is the number of samples and
n_features is the number of features
y : array-like, shape = [n_samples] or [n_samples, n_output], optional
Target relative to X for classification or regression;
None for unsupervised learning
groups : array-like, shape = [n_samples], optional
Training vector groups for cross-validation
**fit_params : dict of string -> object
Parameters passed to the ``fit`` method of the estimator
"""
# check estimator and cv methods are valid
self.cv = check_cv(self.cv, y, classifier=is_classifier(self.estimator))
# check for binary response
if len(np.unique(y)) > 2:
raise ValueError('Only a binary response vector is currently supported')
# check that scoring metric has been specified
if self.scoring is None:
raise ValueError('No score function is defined')
# perform cross validation prediction
self.y_pred_ = cross_val_predict(
estimator=self.estimator, X=X, y=y, groups=groups, cv=self.cv,
method='predict_proba', n_jobs=self.n_jobs, **fit_params)
self.y_true = y
# add fold id to the predictions
self.test_idx_ = [indexes[1] for indexes in self.cv.split(X, y, groups)]
示例8: _check_cv_non_float
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import check_cv [as 别名]
def _check_cv_non_float(self, y):
return check_cv(
self.cv,
y=y,
classifier=self.stratified,
)
示例9: check_cv
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import check_cv [as 别名]
def check_cv(self, y):
"""Resolve which cross validation strategy is used."""
y_arr = None
if self.stratified:
# Try to convert y to numpy for sklearn's check_cv; if conversion
# doesn't work, still try.
try:
y_arr = to_numpy(y)
except (AttributeError, TypeError):
y_arr = y
if self._is_float(self.cv):
return self._check_cv_float()
return self._check_cv_non_float(y_arr)
示例10: __call__
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import check_cv [as 别名]
def __call__(self, dataset, y=None, groups=None):
bad_y_error = ValueError(
"Stratified CV requires explicitly passing a suitable y.")
if (y is None) and self.stratified:
raise bad_y_error
cv = self.check_cv(y)
if self.stratified and not self._is_stratified(cv):
raise bad_y_error
# pylint: disable=invalid-name
len_dataset = get_len(dataset)
if y is not None:
len_y = get_len(y)
if len_dataset != len_y:
raise ValueError("Cannot perform a CV split if dataset and y "
"have different lengths.")
args = (np.arange(len_dataset),)
if self._is_stratified(cv):
args = args + (to_numpy(y),)
idx_train, idx_valid = next(iter(cv.split(*args, groups=groups)))
dataset_train = torch.utils.data.Subset(dataset, idx_train)
dataset_valid = torch.utils.data.Subset(dataset, idx_valid)
return dataset_train, dataset_valid
示例11: check_cv
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import check_cv [as 别名]
def check_cv(cv=3, X=None, y=None, classifier=False):
"""Input checker utility for building a cross-validator.
Parameters
----------
* `cv` [integer, cross-validation generator or an iterable, default=`3`]:
Determines the cross-validation splitting strategy.
Possible inputs for cv are:
- integer, to specify the number of folds.
- An object to be used as a cross-validation generator.
- An iterable yielding train/test splits.
For integer/None inputs, if classifier is True and `y` is either
binary or multiclass, `StratifiedKFold` used. In all other
cases, `KFold` is used.
* `y` [array-like, optional]:
The target variable for supervised learning problems.
* `classifier` [boolean, default=`False`]:
Whether the task is a classification task, in which case
stratified `KFold` will be used.
Returns
-------
* `checked_cv` [a cross-validator instance]:
The return value is a cross-validator which generates the train/test
splits via the `split` method.
Note
----
This method is backported from scikit-learn 0.18.
"""
return sklearn_check_cv(cv, y=y, classifier=classifier)
示例12: _check_cv
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import check_cv [as 别名]
def _check_cv(self, y):
"""Overrides base class _check_cv
"""
# Squeezed target should be 1-dimensional
if len(y.shape) != 1:
raise NotImplementedError("StackedClassifier does not currently "
"support multi-column classification "
"problems. If your target is a one-hot "
"encoded multi-class problem, please "
"recast it to a single column.")
return check_cv(self.cv, y=y, classifier=True)
示例13: our_check_cv
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import check_cv [as 别名]
def our_check_cv(cv, X, y, classifier):
ret = base_check_cv(cv, y, classifier)
return ret.n_splits, list(ret.split(X, y=y))
示例14: test_cv_iterable_wrapper
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import check_cv [as 别名]
def test_cv_iterable_wrapper():
y_multiclass = np.array([0, 1, 0, 1, 2, 1, 2, 0, 2])
with warnings.catch_warnings(record=True):
from sklearn.cross_validation import StratifiedKFold as OldSKF
cv = OldSKF(y_multiclass, n_folds=3)
wrapped_old_skf = _CVIterableWrapper(cv)
# Check if split works correctly
np.testing.assert_equal(list(cv), list(wrapped_old_skf.split()))
# Check if get_n_splits works correctly
assert_equal(len(cv), wrapped_old_skf.get_n_splits())
kf_iter = KFold(n_splits=5).split(X, y)
kf_iter_wrapped = check_cv(kf_iter)
# Since the wrapped iterable is enlisted and stored,
# split can be called any number of times to produce
# consistent results.
np.testing.assert_equal(list(kf_iter_wrapped.split(X, y)),
list(kf_iter_wrapped.split(X, y)))
# If the splits are randomized, successive calls to split yields different
# results
kf_randomized_iter = KFold(n_splits=5, shuffle=True).split(X, y)
kf_randomized_iter_wrapped = check_cv(kf_randomized_iter)
# numpy's assert_array_equal properly compares nested lists
np.testing.assert_equal(list(kf_randomized_iter_wrapped.split(X, y)),
list(kf_randomized_iter_wrapped.split(X, y)))
try:
np.testing.assert_equal(list(kf_iter_wrapped.split(X, y)),
list(kf_randomized_iter_wrapped.split(X, y)))
splits_are_equal = True
except AssertionError:
splits_are_equal = False
assert_false(splits_are_equal, "If the splits are randomized, "
"successive calls to split should yield different results")
示例15: test_check_cv
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import check_cv [as 别名]
def test_check_cv():
X = np.ones(9)
cv = check_cv(3, classifier=False)
# Use numpy.testing.assert_equal which recursively compares
# lists of lists
np.testing.assert_equal(list(KFold(3).split(X)), list(cv.split(X)))
y_binary = np.array([0, 1, 0, 1, 0, 0, 1, 1, 1])
cv = check_cv(3, y_binary, classifier=True)
np.testing.assert_equal(list(StratifiedKFold(3).split(X, y_binary)),
list(cv.split(X, y_binary)))
y_multiclass = np.array([0, 1, 0, 1, 2, 1, 2, 0, 2])
cv = check_cv(3, y_multiclass, classifier=True)
np.testing.assert_equal(list(StratifiedKFold(3).split(X, y_multiclass)),
list(cv.split(X, y_multiclass)))
# also works with 2d multiclass
y_multiclass_2d = y_multiclass.reshape(-1, 1)
cv = check_cv(3, y_multiclass_2d, classifier=True)
np.testing.assert_equal(list(StratifiedKFold(3).split(X, y_multiclass_2d)),
list(cv.split(X, y_multiclass_2d)))
assert_false(np.all(
next(StratifiedKFold(3).split(X, y_multiclass_2d))[0] ==
next(KFold(3).split(X, y_multiclass_2d))[0]))
X = np.ones(5)
y_multilabel = np.array([[0, 0, 0, 0], [0, 1, 1, 0], [0, 0, 0, 1],
[1, 1, 0, 1], [0, 0, 1, 0]])
cv = check_cv(3, y_multilabel, classifier=True)
np.testing.assert_equal(list(KFold(3).split(X)), list(cv.split(X)))
y_multioutput = np.array([[1, 2], [0, 3], [0, 0], [3, 1], [2, 0]])
cv = check_cv(3, y_multioutput, classifier=True)
np.testing.assert_equal(list(KFold(3).split(X)), list(cv.split(X)))
# Check if the old style classes are wrapped to have a split method
X = np.ones(9)
y_multiclass = np.array([0, 1, 0, 1, 2, 1, 2, 0, 2])
cv1 = check_cv(3, y_multiclass, classifier=True)
with warnings.catch_warnings(record=True):
from sklearn.cross_validation import StratifiedKFold as OldSKF
cv2 = check_cv(OldSKF(y_multiclass, n_folds=3))
np.testing.assert_equal(list(cv1.split(X, y_multiclass)),
list(cv2.split()))
assert_raises(ValueError, check_cv, cv="lolo")