本文整理汇总了Python中sklearn.model_selection.PredefinedSplit方法的典型用法代码示例。如果您正苦于以下问题:Python model_selection.PredefinedSplit方法的具体用法?Python model_selection.PredefinedSplit怎么用?Python model_selection.PredefinedSplit使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.model_selection
的用法示例。
在下文中一共展示了model_selection.PredefinedSplit方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_2d_y
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import PredefinedSplit [as 别名]
def test_2d_y():
# smoke test for 2d y and multi-label
n_samples = 30
rng = np.random.RandomState(1)
X = rng.randint(0, 3, size=(n_samples, 2))
y = rng.randint(0, 3, size=(n_samples,))
y_2d = y.reshape(-1, 1)
y_multilabel = rng.randint(0, 2, size=(n_samples, 3))
groups = rng.randint(0, 3, size=(n_samples,))
splitters = [LeaveOneOut(), LeavePOut(p=2), KFold(), StratifiedKFold(),
RepeatedKFold(), RepeatedStratifiedKFold(),
ShuffleSplit(), StratifiedShuffleSplit(test_size=.5),
GroupShuffleSplit(), LeaveOneGroupOut(),
LeavePGroupsOut(n_groups=2), GroupKFold(), TimeSeriesSplit(),
PredefinedSplit(test_fold=groups)]
for splitter in splitters:
list(splitter.split(X, y, groups))
list(splitter.split(X, y_2d, groups))
try:
list(splitter.split(X, y_multilabel, groups))
except ValueError as e:
allowed_target_types = ('binary', 'multiclass')
msg = "Supported target types are: {}. Got 'multilabel".format(
allowed_target_types)
assert msg in str(e)
示例2: test_predefinedsplit_with_kfold_split
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import PredefinedSplit [as 别名]
def test_predefinedsplit_with_kfold_split():
# Check that PredefinedSplit can reproduce a split generated by Kfold.
folds = -1 * np.ones(10)
kf_train = []
kf_test = []
for i, (train_ind, test_ind) in enumerate(KFold(5, shuffle=True).split(X)):
kf_train.append(train_ind)
kf_test.append(test_ind)
folds[test_ind] = i
ps_train = []
ps_test = []
ps = PredefinedSplit(folds)
# n_splits is simply the no of unique folds
assert_equal(len(np.unique(folds)), ps.get_n_splits())
for train_ind, test_ind in ps.split():
ps_train.append(train_ind)
ps_test.append(test_ind)
assert_array_equal(ps_train, kf_train)
assert_array_equal(ps_test, kf_test)
示例3: test_predefinedsplit_with_kfold_split
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import PredefinedSplit [as 别名]
def test_predefinedsplit_with_kfold_split():
# Check that PredefinedSplit can reproduce a split generated by Kfold.
folds = np.full(10, -1.)
kf_train = []
kf_test = []
for i, (train_ind, test_ind) in enumerate(KFold(5, shuffle=True).split(X)):
kf_train.append(train_ind)
kf_test.append(test_ind)
folds[test_ind] = i
ps = PredefinedSplit(folds)
# n_splits is simply the no of unique folds
assert_equal(len(np.unique(folds)), ps.get_n_splits())
ps_train, ps_test = zip(*ps.split())
assert_array_equal(ps_train, kf_train)
assert_array_equal(ps_test, kf_test)
示例4: test_predefined_split
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import PredefinedSplit [as 别名]
def test_predefined_split(self, cv_split_cls, data):
from sklearn.model_selection import PredefinedSplit
indices = (data.y > 0).astype(int)
split = PredefinedSplit(indices)
dataset_train, dataset_valid = cv_split_cls(split)(data)
y_train = data_from_dataset(dataset_train)[1]
y_valid = data_from_dataset(dataset_valid)[1]
assert (y_train > 0).all()
assert (y_valid == 0).all()
示例5: main
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import PredefinedSplit [as 别名]
def main(data_dir, log_dir, source='xl-1542M-k40', n_train=500000, n_valid=10000, n_jobs=None, verbose=False):
train_texts, train_labels = load_split(data_dir, source, 'train', n=n_train)
valid_texts, valid_labels = load_split(data_dir, source, 'valid', n=n_valid)
test_texts, test_labels = load_split(data_dir, source, 'test')
vect = TfidfVectorizer(ngram_range=(1, 2), min_df=5, max_features=2**21)
train_features = vect.fit_transform(train_texts)
valid_features = vect.transform(valid_texts)
test_features = vect.transform(test_texts)
model = LogisticRegression(solver='liblinear')
params = {'C': [1/64, 1/32, 1/16, 1/8, 1/4, 1/2, 1, 2, 4, 8, 16, 32, 64]}
split = PredefinedSplit([-1]*n_train+[0]*n_valid)
search = GridSearchCV(model, params, cv=split, n_jobs=n_jobs, verbose=verbose, refit=False)
search.fit(sparse.vstack([train_features, valid_features]), train_labels+valid_labels)
model = model.set_params(**search.best_params_)
model.fit(train_features, train_labels)
valid_accuracy = model.score(valid_features, valid_labels)*100.
test_accuracy = model.score(test_features, test_labels)*100.
data = {
'source':source,
'n_train':n_train,
'valid_accuracy':valid_accuracy,
'test_accuracy':test_accuracy
}
print(data)
json.dump(data, open(os.path.join(log_dir, f'{source}.json'), 'w'))
示例6: test_objectmapper
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import PredefinedSplit [as 别名]
def test_objectmapper(self):
df = pdml.ModelFrame([])
# Splitter Classes
self.assertIs(df.model_selection.KFold, ms.KFold)
self.assertIs(df.model_selection.GroupKFold, ms.GroupKFold)
self.assertIs(df.model_selection.StratifiedKFold, ms.StratifiedKFold)
self.assertIs(df.model_selection.LeaveOneGroupOut, ms.LeaveOneGroupOut)
self.assertIs(df.model_selection.LeavePGroupsOut, ms.LeavePGroupsOut)
self.assertIs(df.model_selection.LeaveOneOut, ms.LeaveOneOut)
self.assertIs(df.model_selection.LeavePOut, ms.LeavePOut)
self.assertIs(df.model_selection.ShuffleSplit, ms.ShuffleSplit)
self.assertIs(df.model_selection.GroupShuffleSplit,
ms.GroupShuffleSplit)
# self.assertIs(df.model_selection.StratifiedShuffleSplit,
# ms.StratifiedShuffleSplit)
self.assertIs(df.model_selection.PredefinedSplit, ms.PredefinedSplit)
self.assertIs(df.model_selection.TimeSeriesSplit, ms.TimeSeriesSplit)
# Splitter Functions
# Hyper-parameter optimizers
self.assertIs(df.model_selection.GridSearchCV, ms.GridSearchCV)
self.assertIs(df.model_selection.RandomizedSearchCV, ms.RandomizedSearchCV)
self.assertIs(df.model_selection.ParameterGrid, ms.ParameterGrid)
self.assertIs(df.model_selection.ParameterSampler, ms.ParameterSampler)
# Model validation
示例7: test_objectmapper_abbr
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import PredefinedSplit [as 别名]
def test_objectmapper_abbr(self):
df = pdml.ModelFrame([])
# Splitter Classes
self.assertIs(df.ms.KFold, ms.KFold)
self.assertIs(df.ms.GroupKFold, ms.GroupKFold)
self.assertIs(df.ms.StratifiedKFold, ms.StratifiedKFold)
self.assertIs(df.ms.LeaveOneGroupOut, ms.LeaveOneGroupOut)
self.assertIs(df.ms.LeavePGroupsOut, ms.LeavePGroupsOut)
self.assertIs(df.ms.LeaveOneOut, ms.LeaveOneOut)
self.assertIs(df.ms.LeavePOut, ms.LeavePOut)
self.assertIs(df.ms.ShuffleSplit, ms.ShuffleSplit)
self.assertIs(df.ms.GroupShuffleSplit,
ms.GroupShuffleSplit)
# self.assertIs(df.ms.StratifiedShuffleSplit,
# ms.StratifiedShuffleSplit)
self.assertIs(df.ms.PredefinedSplit, ms.PredefinedSplit)
self.assertIs(df.ms.TimeSeriesSplit, ms.TimeSeriesSplit)
# Splitter Functions
# Hyper-parameter optimizers
self.assertIs(df.ms.GridSearchCV, ms.GridSearchCV)
self.assertIs(df.ms.RandomizedSearchCV, ms.RandomizedSearchCV)
self.assertIs(df.ms.ParameterGrid, ms.ParameterGrid)
self.assertIs(df.ms.ParameterSampler, ms.ParameterSampler)
# Model validation
示例8: test_cross_validator_with_default_params
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import PredefinedSplit [as 别名]
def test_cross_validator_with_default_params():
n_samples = 4
n_unique_groups = 4
n_splits = 2
p = 2
n_shuffle_splits = 10 # (the default value)
X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
X_1d = np.array([1, 2, 3, 4])
y = np.array([1, 1, 2, 2])
groups = np.array([1, 2, 3, 4])
loo = LeaveOneOut()
lpo = LeavePOut(p)
kf = KFold(n_splits)
skf = StratifiedKFold(n_splits)
lolo = LeaveOneGroupOut()
lopo = LeavePGroupsOut(p)
ss = ShuffleSplit(random_state=0)
ps = PredefinedSplit([1, 1, 2, 2]) # n_splits = np of unique folds = 2
loo_repr = "LeaveOneOut()"
lpo_repr = "LeavePOut(p=2)"
kf_repr = "KFold(n_splits=2, random_state=None, shuffle=False)"
skf_repr = "StratifiedKFold(n_splits=2, random_state=None, shuffle=False)"
lolo_repr = "LeaveOneGroupOut()"
lopo_repr = "LeavePGroupsOut(n_groups=2)"
ss_repr = ("ShuffleSplit(n_splits=10, random_state=0, "
"test_size=None, train_size=None)")
ps_repr = "PredefinedSplit(test_fold=array([1, 1, 2, 2]))"
n_splits_expected = [n_samples, comb(n_samples, p), n_splits, n_splits,
n_unique_groups, comb(n_unique_groups, p),
n_shuffle_splits, 2]
for i, (cv, cv_repr) in enumerate(zip(
[loo, lpo, kf, skf, lolo, lopo, ss, ps],
[loo_repr, lpo_repr, kf_repr, skf_repr, lolo_repr, lopo_repr,
ss_repr, ps_repr])):
# Test if get_n_splits works correctly
assert_equal(n_splits_expected[i], cv.get_n_splits(X, y, groups))
# Test if the cross-validator works as expected even if
# the data is 1d
np.testing.assert_equal(list(cv.split(X, y, groups)),
list(cv.split(X_1d, y, groups)))
# Test that train, test indices returned are integers
for train, test in cv.split(X, y, groups):
assert_equal(np.asarray(train).dtype.kind, 'i')
assert_equal(np.asarray(train).dtype.kind, 'i')
# Test if the repr works without any errors
assert_equal(cv_repr, repr(cv))
# ValueError for get_n_splits methods
msg = "The 'X' parameter should not be None."
assert_raise_message(ValueError, msg,
loo.get_n_splits, None, y, groups)
assert_raise_message(ValueError, msg,
lpo.get_n_splits, None, y, groups)
示例9: test_cross_validator_with_default_params
# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import PredefinedSplit [as 别名]
def test_cross_validator_with_default_params():
n_samples = 4
n_unique_groups = 4
n_splits = 2
p = 2
n_shuffle_splits = 10 # (the default value)
X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
X_1d = np.array([1, 2, 3, 4])
y = np.array([1, 1, 2, 2])
groups = np.array([1, 2, 3, 4])
loo = LeaveOneOut()
lpo = LeavePOut(p)
kf = KFold(n_splits)
skf = StratifiedKFold(n_splits)
lolo = LeaveOneGroupOut()
lopo = LeavePGroupsOut(p)
ss = ShuffleSplit(random_state=0)
ps = PredefinedSplit([1, 1, 2, 2]) # n_splits = np of unique folds = 2
loo_repr = "LeaveOneOut()"
lpo_repr = "LeavePOut(p=2)"
kf_repr = "KFold(n_splits=2, random_state=None, shuffle=False)"
skf_repr = "StratifiedKFold(n_splits=2, random_state=None, shuffle=False)"
lolo_repr = "LeaveOneGroupOut()"
lopo_repr = "LeavePGroupsOut(n_groups=2)"
ss_repr = ("ShuffleSplit(n_splits=10, random_state=0, "
"test_size='default',\n train_size=None)")
ps_repr = "PredefinedSplit(test_fold=array([1, 1, 2, 2]))"
n_splits_expected = [n_samples, comb(n_samples, p), n_splits, n_splits,
n_unique_groups, comb(n_unique_groups, p),
n_shuffle_splits, 2]
for i, (cv, cv_repr) in enumerate(zip(
[loo, lpo, kf, skf, lolo, lopo, ss, ps],
[loo_repr, lpo_repr, kf_repr, skf_repr, lolo_repr, lopo_repr,
ss_repr, ps_repr])):
# Test if get_n_splits works correctly
assert_equal(n_splits_expected[i], cv.get_n_splits(X, y, groups))
# Test if the cross-validator works as expected even if
# the data is 1d
np.testing.assert_equal(list(cv.split(X, y, groups)),
list(cv.split(X_1d, y, groups)))
# Test that train, test indices returned are integers
for train, test in cv.split(X, y, groups):
assert_equal(np.asarray(train).dtype.kind, 'i')
assert_equal(np.asarray(train).dtype.kind, 'i')
# Test if the repr works without any errors
assert_equal(cv_repr, repr(cv))
# ValueError for get_n_splits methods
msg = "The 'X' parameter should not be None."
assert_raise_message(ValueError, msg,
loo.get_n_splits, None, y, groups)
assert_raise_message(ValueError, msg,
lpo.get_n_splits, None, y, groups)