本文整理汇总了Python中sklearn.model_selection.KFold方法的典型用法代码示例。如果您正苦于以下问题:Python model_selection.KFold方法的具体用法?Python model_selection.KFold怎么用?Python model_selection.KFold使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.model_selection
的用法示例。
在下文中一共展示了model_selection.KFold方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_kfold_no_shuffle
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import KFold [as 别名]
def test_kfold_no_shuffle():
# Manually check that KFold preserves the data ordering on toy datasets
X2 = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]
splits = KFold(2).split(X2[:-1])
train, test = next(splits)
assert_array_equal(test, [0, 1])
assert_array_equal(train, [2, 3])
train, test = next(splits)
assert_array_equal(test, [2, 3])
assert_array_equal(train, [0, 1])
splits = KFold(2).split(X2)
train, test = next(splits)
assert_array_equal(test, [0, 1, 2])
assert_array_equal(train, [3, 4])
train, test = next(splits)
assert_array_equal(test, [3, 4])
assert_array_equal(train, [0, 1, 2])
示例2: test_TargetEncoder
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import KFold [as 别名]
def test_TargetEncoder(generate_data):
df = generate_data()
feature_cols = [x for x in df.columns if x != TARGET_COL]
cat_cols = [x for x in feature_cols if df[x].nunique() < 100]
te = TargetEncoder()
X_cat = te.fit_transform(df[cat_cols], df[TARGET_COL])
print('Without CV:\n{}'.format(X_cat.head()))
assert X_cat.shape[1] == len(cat_cols)
cv = KFold(n_splits=N_FOLD, shuffle=True, random_state=RANDOM_SEED)
te = TargetEncoder(cv=cv)
X_cat = te.fit_transform(df[cat_cols], df[TARGET_COL])
print('With CV (fit_transform()):\n{}'.format(X_cat.head()))
assert X_cat.shape[1] == len(cat_cols)
te = TargetEncoder(cv=cv)
te.fit(df[cat_cols], df[TARGET_COL])
X_cat = te.transform(df[cat_cols])
print('With CV (fit() and transform() separately):\n{}'.format(X_cat.head()))
assert X_cat.shape[1] == len(cat_cols)
示例3: test_FrequencyEncoder
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import KFold [as 别名]
def test_FrequencyEncoder(generate_data):
df = generate_data()
feature_cols = [x for x in df.columns if x != TARGET_COL]
cat_cols = [x for x in feature_cols if df[x].nunique() < 100]
te = FrequencyEncoder()
X_cat = te.fit_transform(df[cat_cols])
print('Without CV:\n{}'.format(X_cat.head()))
assert X_cat.shape[1] == len(cat_cols)
cv = KFold(n_splits=N_FOLD, shuffle=True, random_state=RANDOM_SEED)
te = FrequencyEncoder(cv=cv)
X_cat = te.fit_transform(df[cat_cols])
print('With CV (fit_transform()):\n{}'.format(X_cat.head()))
assert X_cat.shape[1] == len(cat_cols)
te = FrequencyEncoder(cv=cv)
te.fit(df[cat_cols])
X_cat = te.transform(df[cat_cols])
print('With CV (fit() and transform() separately):\n{}'.format(X_cat.head()))
assert X_cat.shape[1] == len(cat_cols)
示例4: split_trials
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import KFold [as 别名]
def split_trials(trial_ids, n_splits=5, rng_seed=0):
"""
Assign each trial to testing or training fold
:param trial_ids:
:type trial_ids: array-like
:param n_splits: one split used for testing; remaining splits used for training
:type n_splits: int
:param rng_seed: set random state for shuffling trials
:type rng_seed: int
:return: list of dicts of indices with keys `train` and `test`
"""
from sklearn.model_selection import KFold
shuffle = True if rng_seed is not None else False
kf = KFold(n_splits=n_splits, random_state=rng_seed, shuffle=shuffle)
kf.get_n_splits(trial_ids)
idxs = [None for _ in range(n_splits)]
for i, t0 in enumerate(kf.split(trial_ids)):
idxs[i] = {'train': t0[0], 'test': t0[1]}
return idxs
示例5: kfold
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import KFold [as 别名]
def kfold(self, k=5, stratify=False, shuffle=True, seed=33):
"""K-Folds cross validation iterator.
Parameters
----------
k : int, default 5
stratify : bool, default False
shuffle : bool, default True
seed : int, default 33
Yields
-------
X_train, y_train, X_test, y_test, train_index, test_index
"""
if stratify:
kf = StratifiedKFold(n_splits=k, random_state=seed, shuffle=shuffle)
else:
kf = KFold(n_splits=k, random_state=seed, shuffle=shuffle)
for train_index, test_index in kf.split(self.X_train, self.y_train):
X_train, y_train = idx(self.X_train, train_index), self.y_train[train_index]
X_test, y_test = idx(self.X_train, test_index), self.y_train[test_index]
yield X_train, y_train, X_test, y_test, train_index, test_index
示例6: test_skip
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import KFold [as 别名]
def test_skip():
df = pd.DataFrame()
df['id'] = np.arange(10)
kf = split.Skip(2, KFold(5))
folds = kf.split(df)
assert kf.get_n_splits() == 3
train_index, test_index = next(folds)
assert np.array_equal(test_index, np.array([4, 5]))
train_index, test_index = next(folds)
assert np.array_equal(test_index, np.array([6, 7]))
train_index, test_index = next(folds)
assert np.array_equal(test_index, np.array([8, 9]))
with pytest.raises(StopIteration):
next(folds)
示例7: test_nth
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import KFold [as 别名]
def test_nth():
df = pd.DataFrame()
df['id'] = np.arange(10)
kf = split.Nth(3, KFold(5))
folds = kf.split(df)
assert kf.get_n_splits() == 1
train_index, test_index = next(folds)
assert np.array_equal(test_index, np.array([4, 5]))
with pytest.raises(StopIteration):
next(folds)
kf = split.Nth(1, KFold(5))
folds = kf.split(df)
assert kf.get_n_splits() == 1
train_index, test_index = next(folds)
assert np.array_equal(test_index, np.array([0, 1]))
with pytest.raises(StopIteration):
next(folds)
示例8: test_2d_y
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import KFold [as 别名]
def test_2d_y():
# smoke test for 2d y and multi-label
n_samples = 30
rng = np.random.RandomState(1)
X = rng.randint(0, 3, size=(n_samples, 2))
y = rng.randint(0, 3, size=(n_samples,))
y_2d = y.reshape(-1, 1)
y_multilabel = rng.randint(0, 2, size=(n_samples, 3))
groups = rng.randint(0, 3, size=(n_samples,))
splitters = [LeaveOneOut(), LeavePOut(p=2), KFold(), StratifiedKFold(),
RepeatedKFold(), RepeatedStratifiedKFold(),
ShuffleSplit(), StratifiedShuffleSplit(test_size=.5),
GroupShuffleSplit(), LeaveOneGroupOut(),
LeavePGroupsOut(n_groups=2), GroupKFold(), TimeSeriesSplit(),
PredefinedSplit(test_fold=groups)]
for splitter in splitters:
list(splitter.split(X, y, groups))
list(splitter.split(X, y_2d, groups))
try:
list(splitter.split(X, y_multilabel, groups))
except ValueError as e:
allowed_target_types = ('binary', 'multiclass')
msg = "Supported target types are: {}. Got 'multilabel".format(
allowed_target_types)
assert msg in str(e)
示例9: test_shuffle_kfold
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import KFold [as 别名]
def test_shuffle_kfold():
# Check the indices are shuffled properly
kf = KFold(3)
kf2 = KFold(3, shuffle=True, random_state=0)
kf3 = KFold(3, shuffle=True, random_state=1)
X = np.ones(300)
all_folds = np.zeros(300)
for (tr1, te1), (tr2, te2), (tr3, te3) in zip(
kf.split(X), kf2.split(X), kf3.split(X)):
for tr_a, tr_b in combinations((tr1, tr2, tr3), 2):
# Assert that there is no complete overlap
assert_not_equal(len(np.intersect1d(tr_a, tr_b)), len(tr1))
# Set all test indices in successive iterations of kf2 to 1
all_folds[te2] = 1
# Check that all indices are returned in the different test folds
assert_equal(sum(all_folds), 300)
示例10: test_cv_iterable_wrapper
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import KFold [as 别名]
def test_cv_iterable_wrapper():
kf_iter = KFold(n_splits=5).split(X, y)
kf_iter_wrapped = check_cv(kf_iter)
# Since the wrapped iterable is enlisted and stored,
# split can be called any number of times to produce
# consistent results.
np.testing.assert_equal(list(kf_iter_wrapped.split(X, y)),
list(kf_iter_wrapped.split(X, y)))
# If the splits are randomized, successive calls to split yields different
# results
kf_randomized_iter = KFold(n_splits=5, shuffle=True).split(X, y)
kf_randomized_iter_wrapped = check_cv(kf_randomized_iter)
# numpy's assert_array_equal properly compares nested lists
np.testing.assert_equal(list(kf_randomized_iter_wrapped.split(X, y)),
list(kf_randomized_iter_wrapped.split(X, y)))
try:
np.testing.assert_equal(list(kf_iter_wrapped.split(X, y)),
list(kf_randomized_iter_wrapped.split(X, y)))
splits_are_equal = True
except AssertionError:
splits_are_equal = False
assert not splits_are_equal, (
"If the splits are randomized, "
"successive calls to split should yield different results")
示例11: test_cross_val_score_mask
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import KFold [as 别名]
def test_cross_val_score_mask():
# test that cross_val_score works with boolean masks
svm = SVC(kernel="linear")
iris = load_iris()
X, y = iris.data, iris.target
kfold = KFold(5)
scores_indices = cross_val_score(svm, X, y, cv=kfold)
kfold = KFold(5)
cv_masks = []
for train, test in kfold.split(X, y):
mask_train = np.zeros(len(y), dtype=np.bool)
mask_test = np.zeros(len(y), dtype=np.bool)
mask_train[train] = 1
mask_test[test] = 1
cv_masks.append((train, test))
scores_masks = cross_val_score(svm, X, y, cv=cv_masks)
assert_array_equal(scores_indices, scores_masks)
示例12: test_learning_curve_with_boolean_indices
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import KFold [as 别名]
def test_learning_curve_with_boolean_indices():
X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
n_redundant=0, n_classes=2,
n_clusters_per_class=1, random_state=0)
estimator = MockImprovingEstimator(20)
cv = KFold(n_splits=3)
train_sizes, train_scores, test_scores = learning_curve(
estimator, X, y, cv=cv, train_sizes=np.linspace(0.1, 1.0, 10))
assert_array_equal(train_sizes, np.linspace(2, 20, 10))
assert_array_almost_equal(train_scores.mean(axis=1),
np.linspace(1.9, 1.0, 10))
assert_array_almost_equal(test_scores.mean(axis=1),
np.linspace(0.1, 1.0, 10))
# 0.23. warning about tol not having its correct default value.
示例13: check_cross_val_predict_binary
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import KFold [as 别名]
def check_cross_val_predict_binary(est, X, y, method):
"""Helper for tests of cross_val_predict with binary classification"""
cv = KFold(n_splits=3, shuffle=False)
# Generate expected outputs
if y.ndim == 1:
exp_shape = (len(X),) if method == 'decision_function' else (len(X), 2)
else:
exp_shape = y.shape
expected_predictions = np.zeros(exp_shape)
for train, test in cv.split(X, y):
est = clone(est).fit(X[train], y[train])
expected_predictions[test] = getattr(est, method)(X[test])
# Check actual outputs for several representations of y
for tg in [y, y + 1, y - 2, y.astype('str')]:
assert_allclose(cross_val_predict(est, X, tg, method=method, cv=cv),
expected_predictions)
示例14: test_deprecated_grid_search_iid
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import KFold [as 别名]
def test_deprecated_grid_search_iid():
depr_message = ("The default of the `iid` parameter will change from True "
"to False in version 0.22")
X, y = make_blobs(n_samples=54, random_state=0, centers=2)
grid = GridSearchCV(SVC(gamma='scale', random_state=0),
param_grid={'C': [10]}, cv=3)
# no warning with equally sized test sets
assert_no_warnings(grid.fit, X, y)
grid = GridSearchCV(SVC(gamma='scale', random_state=0),
param_grid={'C': [10]}, cv=5)
# warning because 54 % 5 != 0
assert_warns_message(DeprecationWarning, depr_message, grid.fit, X, y)
grid = GridSearchCV(SVC(gamma='scale', random_state=0),
param_grid={'C': [10]}, cv=2)
# warning because stratification into two classes and 27 % 2 != 0
assert_warns_message(DeprecationWarning, depr_message, grid.fit, X, y)
grid = GridSearchCV(SVC(gamma='scale', random_state=0),
param_grid={'C': [10]}, cv=KFold(2))
# no warning because no stratification and 54 % 2 == 0
assert_no_warnings(grid.fit, X, y)
示例15: test_empty_cv_iterator_error
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import KFold [as 别名]
def test_empty_cv_iterator_error():
# Use global X, y
# create cv
cv = KFold(n_splits=3).split(X)
# pop all of it, this should cause the expected ValueError
[u for u in cv]
# cv is empty now
train_size = 100
ridge = RandomizedSearchCV(Ridge(), {'alpha': [1e-3, 1e-2, 1e-1]},
cv=cv, n_jobs=-1)
# assert that this raises an error
with pytest.raises(ValueError,
match='No fits were performed. '
'Was the CV iterator empty\\? '
'Were there no candidates\\?'):
ridge.fit(X[:train_size], y[:train_size])