本文整理汇总了Python中sklearn.model_selection.RepeatedStratifiedKFold方法的典型用法代码示例。如果您正苦于以下问题:Python model_selection.RepeatedStratifiedKFold方法的具体用法?Python model_selection.RepeatedStratifiedKFold怎么用?Python model_selection.RepeatedStratifiedKFold使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.model_selection
的用法示例。
在下文中一共展示了model_selection.RepeatedStratifiedKFold方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_2d_y
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import RepeatedStratifiedKFold [as 别名]
def test_2d_y():
# smoke test for 2d y and multi-label
n_samples = 30
rng = np.random.RandomState(1)
X = rng.randint(0, 3, size=(n_samples, 2))
y = rng.randint(0, 3, size=(n_samples,))
y_2d = y.reshape(-1, 1)
y_multilabel = rng.randint(0, 2, size=(n_samples, 3))
groups = rng.randint(0, 3, size=(n_samples,))
splitters = [LeaveOneOut(), LeavePOut(p=2), KFold(), StratifiedKFold(),
RepeatedKFold(), RepeatedStratifiedKFold(),
ShuffleSplit(), StratifiedShuffleSplit(test_size=.5),
GroupShuffleSplit(), LeaveOneGroupOut(),
LeavePGroupsOut(n_groups=2), GroupKFold(), TimeSeriesSplit(),
PredefinedSplit(test_fold=groups)]
for splitter in splitters:
list(splitter.split(X, y, groups))
list(splitter.split(X, y_2d, groups))
try:
list(splitter.split(X, y_multilabel, groups))
except ValueError as e:
allowed_target_types = ('binary', 'multiclass')
msg = "Supported target types are: {}. Got 'multilabel".format(
allowed_target_types)
assert msg in str(e)
示例2: execute
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import RepeatedStratifiedKFold [as 别名]
def execute():
env = Environment(
train_dataset=get_toy_classification_data(),
results_path="HyperparameterHunterAssets",
metrics=["roc_auc_score"],
cv_type=RepeatedStratifiedKFold,
cv_params=dict(n_splits=5, n_repeats=2, random_state=32),
runs=2,
# Just instantiate `Environment` with your list of callbacks, and go about business as usual
experiment_callbacks=[printer_callback(), confusion_matrix_oof()],
# In addition to `printer_callback` made above, we're also adding the `confusion_matrix_oof` callback
# This, and other callbacks, can be found in `hyperparameter_hunter.callbacks.recipes`
)
experiment = CVExperiment(
model_initializer=XGBClassifier,
model_init_params={},
model_extra_params=dict(fit=dict(verbose=False)),
)
示例3: rdm_lda_kfold
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import RepeatedStratifiedKFold [as 别名]
def rdm_lda_kfold(x, labels):
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import cross_val_score
lda = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto')
folding = RepeatedStratifiedKFold(n_splits=3, n_repeats=3)
objects = numpy.unique(labels)
pairs = list(itertools.combinations(objects, 2))
npairs = len(pairs)
utv = numpy.full([npairs,], numpy.nan)
for p in trange(npairs, desc='pairs', leave=False, ascii=True):
pair = pairs[p]
pair_mask = numpy.isin(labels, pair)
x_pair = x[pair_mask, :]
labels_pair = labels[pair_mask]
scores = cross_val_score(lda, x_pair, labels_pair, cv=folding)
utv[p] = scores.mean()
return utv
示例4: generate_kfold
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import RepeatedStratifiedKFold [as 别名]
def generate_kfold(X, y=None, n_splits=5, random_state=0, stratified=False, n_repeats=1):
if stratified and (y is not None):
if n_repeats > 1:
kf = RepeatedStratifiedKFold(n_splits=n_splits, n_repeats=n_repeats, random_state=random_state)
else:
kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=random_state)
kf.get_n_splits(X, y)
return [[train_index, test_index] for train_index, test_index in kf.split(X, y)]
else:
if n_repeats > 1:
kf = RepeatedKFold(n_splits=n_splits, n_repeats=n_repeats, random_state=random_state)
else:
kf = KFold(n_splits=n_splits, shuffle=True, random_state=random_state)
kf.get_n_splits(X)
return [[train_index, test_index] for train_index, test_index in kf.split(X)]
示例5: test_repeated_cv_value_errors
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import RepeatedStratifiedKFold [as 别名]
def test_repeated_cv_value_errors():
# n_repeats is not integer or <= 0
for cv in (RepeatedKFold, RepeatedStratifiedKFold):
assert_raises(ValueError, cv, n_repeats=0)
assert_raises(ValueError, cv, n_repeats=1.5)
示例6: test_get_n_splits_for_repeated_stratified_kfold
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import RepeatedStratifiedKFold [as 别名]
def test_get_n_splits_for_repeated_stratified_kfold():
n_splits = 3
n_repeats = 4
rskf = RepeatedStratifiedKFold(n_splits, n_repeats)
expected_n_splits = n_splits * n_repeats
assert_equal(expected_n_splits, rskf.get_n_splits())
示例7: test_repeated_stratified_kfold_determinstic_split
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import RepeatedStratifiedKFold [as 别名]
def test_repeated_stratified_kfold_determinstic_split():
X = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]
y = [1, 1, 1, 0, 0]
random_state = 1944695409
rskf = RepeatedStratifiedKFold(
n_splits=2,
n_repeats=2,
random_state=random_state)
# split should produce same and deterministic splits on
# each call
for _ in range(3):
splits = rskf.split(X, y)
train, test = next(splits)
assert_array_equal(train, [1, 4])
assert_array_equal(test, [0, 2, 3])
train, test = next(splits)
assert_array_equal(train, [0, 2, 3])
assert_array_equal(test, [1, 4])
train, test = next(splits)
assert_array_equal(train, [2, 3])
assert_array_equal(test, [0, 1, 4])
train, test = next(splits)
assert_array_equal(train, [0, 1, 4])
assert_array_equal(test, [2, 3])
assert_raises(StopIteration, next, splits)
示例8: execute
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import RepeatedStratifiedKFold [as 别名]
def execute():
env = Environment(
train_dataset=get_toy_classification_data(),
results_path="HyperparameterHunterAssets",
metrics=["roc_auc_score"],
cv_type=RepeatedStratifiedKFold,
cv_params=dict(n_splits=3, n_repeats=2, random_state=32),
do_full_save=do_full_save,
)
experiment_0 = CVExperiment(
model_initializer=XGBClassifier, model_init_params=dict(subsample=0.01)
)
# Pro Tip: By setting XGBoost's subsample ridiculously low, we can get bad scores on purpose
# Upon completion of this Experiment, we see a warning that not all result files will be saved
# This is because the final score of the Experiment was below our threshold of 0.75
# Specifically, we skipped saving prediction files (OOF, holdout, test, or in-fold), and the heartbeat file
# What still got saved is the Experiment's: key information, leaderboard position, and description file
# These are saved to allow us to use the information for future hyperparameter optimization, and detect repeated Experiments
# Additionally, the Experiment's script backup is saved, but that's because its one of the first things that happens
# For even finer control over what gets saved, use `do_full_save` together with `file_blacklist`
# Now, lets perform another Experiment that does a bit better than our intentionally miserable one
experiment_1 = CVExperiment(
model_initializer=XGBClassifier, model_init_params=dict(subsample=0.5)
)
# Our second Experiment was executed in the same Environment, so it was still subject to the `do_full_save` constraint
# However, because it scored above 0.75 (hopefully), all of the result files were saved
示例9: env_0
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import RepeatedStratifiedKFold [as 别名]
def env_0():
def do_full_save(experiment_result):
return experiment_result["final_evaluations"]["oof"]["roc_auc_score"] > 0.75
return Environment(
train_dataset=get_toy_classification_data(),
results_path=assets_dir,
metrics=["roc_auc_score"],
cv_type=RepeatedStratifiedKFold,
cv_params=dict(n_splits=3, n_repeats=2, random_state=32),
do_full_save=do_full_save,
)
示例10: env_3
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import RepeatedStratifiedKFold [as 别名]
def env_3():
def printer_callback():
def printer_helper(_rep, _fold, _run, last_evaluation_results):
print(f"{_rep}.{_fold}.{_run} {last_evaluation_results}")
return lambda_callback(
on_exp_start=printer_helper,
on_exp_end=printer_helper,
on_rep_start=printer_helper,
on_rep_end=printer_helper,
on_fold_start=printer_helper,
on_fold_end=printer_helper,
on_run_start=printer_helper,
on_run_end=printer_helper,
)
return Environment(
train_dataset=get_toy_classification_data(),
results_path=assets_dir,
metrics=["roc_auc_score"],
holdout_dataset=get_toy_classification_data(),
cv_type=RepeatedStratifiedKFold,
cv_params=dict(n_splits=3, n_repeats=2, random_state=32),
runs=2,
experiment_callbacks=[
printer_callback(),
confusion_matrix_oof(),
confusion_matrix_holdout(),
],
)
示例11: test_experiment_callbacks_setter_value_error
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import RepeatedStratifiedKFold [as 别名]
def test_experiment_callbacks_setter_value_error(env_fixture_0):
with pytest.raises(ValueError, match="experiment_callbacks must be LambdaCallback instances.*"):
env_fixture_0.experiment_callbacks = [RepeatedStratifiedKFold]
##################################################
# `define_holdout_set` Scenarios
##################################################
示例12: find_best_threshold
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import RepeatedStratifiedKFold [as 别名]
def find_best_threshold(
y_pred: np.ndarray,
y_true: np.ndarray,
metric_fn: Callable = metrics.roc_auc_score,
num_splits: int = 5,
num_repeats: int = 1,
random_state: int = 42,
):
"""@TODO: Docs. Contribution is welcome."""
rkf = RepeatedStratifiedKFold(
n_splits=num_splits, n_repeats=num_repeats, random_state=random_state
)
fold_thresholds = []
fold_metrics = {k: [] for k in _BINARY_PER_CLASS_METRICS.copy()}
for train_index, test_index in rkf.split(y_true, y_true):
y_pred_train, y_pred_test = y_pred[train_index], y_pred[test_index]
y_true_train, y_true_test = y_true[train_index], y_true[test_index]
best_threshold = find_best_split_threshold(
y_pred_train, y_true_train, metric=metric_fn
)
best_predictions = (y_pred_test >= best_threshold).astype(int)
for metric_name in fold_metrics.keys():
try:
metric_value = metrics.__dict__[metric_name](
y_true_test, best_predictions
)
except ValueError:
metric_value = 0.0
fold_metrics[metric_name].append(metric_value)
fold_thresholds.append(best_threshold)
fold_best_threshold = np.mean(fold_thresholds)
for metric_name in fold_metrics:
fold_metrics[metric_name] = np.mean(fold_metrics[metric_name])
return fold_best_threshold, fold_metrics