本文整理汇总了Python中sklearn.model_selection方法的典型用法代码示例。如果您正苦于以下问题:Python sklearn.model_selection方法的具体用法?Python sklearn.model_selection怎么用?Python sklearn.model_selection使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn
的用法示例。
在下文中一共展示了sklearn.model_selection方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _create_classifier
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def _create_classifier(self, num_threads, y):
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
C = self.component_config["C"]
kernels = self.component_config["kernels"]
# dirty str fix because sklearn is expecting
# str not instance of basestr...
tuned_parameters = [{"C": C,
"kernel": [str(k) for k in kernels]}]
# aim for 5 examples in each fold
cv_splits = self._num_cv_splits(y)
return GridSearchCV(SVC(C=1,
probability=True,
class_weight='balanced'),
param_grid=tuned_parameters,
n_jobs=num_threads,
cv=cv_splits,
scoring='f1_weighted',
verbose=1)
示例2: _cv_len
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def _cv_len(cv, X, y):
"""This method computes the length of a cross validation
object, agnostic of whether sklearn-0.17 or sklearn-0.18
is being used.
Parameters
----------
cv : `sklearn.cross_validation._PartitionIterator` or `sklearn.model_selection.BaseCrossValidator`
The cv object from which to extract length. If using
sklearn-0.17, this can be computed by calling `len` on
``cv``, else it's computed with `cv.get_n_splits(X, y)`.
X : pd.DataFrame or np.ndarray, shape(n_samples, n_features)
The dataframe or np.ndarray being fit in the grid search.
y : np.ndarray, shape(n_samples,)
The target being fit in the grid search.
Returns
-------
int
"""
return len(cv) if not SK18 else cv.get_n_splits(X, y)
示例3: stratified_kfold_indices
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def stratified_kfold_indices(samples, **xval_kw):
"""
TODO: check xval label frequency
"""
from sklearn import model_selection
X = np.empty((len(samples), 0))
y = samples.encoded_1d().values
groups = samples.group_ids
type_ = xval_kw.pop('type', 'StratifiedGroupKFold')
if type_ == 'StratifiedGroupKFold':
assert groups is not None
# FIXME: The StratifiedGroupKFold could be implemented better.
splitter = sklearn_utils.StratifiedGroupKFold(**xval_kw)
skf_list = list(splitter.split(X=X, y=y, groups=groups))
elif type_ == 'StratifiedKFold':
splitter = model_selection.StratifiedKFold(**xval_kw)
skf_list = list(splitter.split(X=X, y=y))
return skf_list
示例4: gen_crossval_idxs
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def gen_crossval_idxs(problem, n_folds=2):
y = problem.ds.target
rng = 43432
if hasattr(problem.ds, 'nids'):
# Ensure that an individual does not appear in both the train
# and the test dataset
from ibeis_cnn.dataset import stratified_kfold_label_split
labels = problem.ds.nids
_iter = stratified_kfold_label_split(y, labels, n_folds=n_folds, rng=rng)
else:
xvalkw = dict(n_folds=n_folds, shuffle=True, random_state=rng)
import sklearn.cross_validation
skf = sklearn.cross_validation.StratifiedKFold(y, **xvalkw)
_iter = skf
#import sklearn.model_selection
#skf = sklearn.model_selection.StratifiedKFold(**xvalkw)
#_iter = skf.split(X=np.empty(len(y)), y=y)
msg = 'cross-val test on %s' % (problem.ds.name)
progiter = ut.ProgIter(_iter, length=n_folds, lbl=msg)
for train_idx, test_idx in progiter:
yield train_idx, test_idx
# @ut.reloadable_class
示例5: __init__
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def __init__(self,
component_config: Dict[Text, Any] = None,
clf: 'sklearn.model_selection.GridSearchCV' = None,
le: Optional['sklearn.preprocessing.LabelEncoder'] = None
) -> None:
"""Construct a new intent classifier using the sklearn framework."""
from sklearn.preprocessing import LabelEncoder
super(SklearnIntentClassifier, self).__init__(component_config)
if le is not None:
self.le = le
else:
self.le = LabelEncoder()
self.clf = clf
_sklearn_numpy_warning_fix()
示例6: _create_classifier
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def _create_classifier(self, num_threads, y):
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
C = self.component_config["C"]
kernels = self.component_config["kernels"]
gamma = self.component_config["gamma"]
# dirty str fix because sklearn is expecting
# str not instance of basestr...
tuned_parameters = [{"C": C,
"gamma": gamma,
"kernel": [str(k) for k in kernels]}]
# aim for 5 examples in each fold
cv_splits = self._num_cv_splits(y)
return GridSearchCV(SVC(C=1,
probability=True,
class_weight='balanced'),
param_grid=tuned_parameters,
n_jobs=num_threads,
cv=cv_splits,
scoring=self.component_config['scoring_function'],
verbose=1)
示例7: __init__
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def __init__(
self,
component_config: Optional[Dict[Text, Any]] = None,
clf: "sklearn.model_selection.GridSearchCV" = None,
le: Optional["sklearn.preprocessing.LabelEncoder"] = None,
) -> None:
"""Construct a new intent classifier using the sklearn framework."""
from sklearn.preprocessing import LabelEncoder
super().__init__(component_config)
if le is not None:
self.le = le
else:
self.le = LabelEncoder()
self.clf = clf
示例8: __init__
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def __init__(self,
component_config=None, # type: Dict[Text, Any]
clf=None, # type: sklearn.model_selection.GridSearchCV
le=None # type: sklearn.preprocessing.LabelEncoder
):
# type: (...) -> None
"""Construct a new intent classifier using the sklearn framework."""
from sklearn.preprocessing import LabelEncoder
super(SklearnIntentClassifier, self).__init__(component_config)
if le is not None:
self.le = le
else:
self.le = LabelEncoder()
self.clf = clf
_sklearn_numpy_warning_fix()
示例9: _set_cv
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def _set_cv(cv, X, y, classifier):
"""This method returns either a `sklearn.cross_validation._PartitionIterator` or
`sklearn.model_selection.BaseCrossValidator` depending on whether sklearn-0.17
or sklearn-0.18 is being used.
Parameters
----------
cv : int, `_PartitionIterator` or `BaseCrossValidator`
The CV object or int to check. If an int, will be converted
into the appropriate class of crossvalidator.
X : pd.DataFrame or np.ndarray, shape(n_samples, n_features)
The dataframe or np.ndarray being fit in the grid search.
y : np.ndarray, shape(n_samples,)
The target being fit in the grid search.
classifier : bool
Whether the estimator being fit is a classifier
Returns
-------
`_PartitionIterator` or `BaseCrossValidator`
"""
return check_cv(cv, X, y, classifier) if not SK18 else check_cv(cv, y, classifier)
示例10: build_split_dict
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def build_split_dict(X: pd.DataFrame, split_obj: Type[BaseCrossValidator]) -> dict:
"""
Get dictionary of cross-validation training dataset split metadata
Parameters
----------
X: pd.DataFrame
The training dataset that will be split during cross-validation.
split_obj: Type[sklearn.model_selection.BaseCrossValidator]
The cross-validation object that returns train, test indices for splitting.
Returns
-------
split_metadata: Dict[str,Any]
Dictionary of cross-validation train/test split metadata
"""
split_metadata: Dict[str, Any] = dict()
for i, (train_ind, test_ind) in enumerate(split_obj.split(X)):
split_metadata.update(
{
f"fold-{i+1}-train-start": X.index[train_ind[0]],
f"fold-{i+1}-train-end": X.index[train_ind[-1]],
f"fold-{i+1}-test-start": X.index[test_ind[0]],
f"fold-{i+1}-test-end": X.index[test_ind[-1]],
}
)
split_metadata.update({f"fold-{i+1}-n-train": len(train_ind)})
split_metadata.update({f"fold-{i+1}-n-test": len(test_ind)})
return split_metadata
示例11: testdata_smk
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def testdata_smk(*args, **kwargs):
"""
>>> from ibeis.algo.smk.smk_pipeline import * # NOQA
>>> kwargs = {}
"""
import ibeis
import sklearn
import sklearn.cross_validation
# import sklearn.model_selection
ibs, aid_list = ibeis.testdata_aids(defaultdb='PZ_MTEST')
nid_list = np.array(ibs.annots(aid_list).nids)
rng = ut.ensure_rng(0)
xvalkw = dict(n_folds=4, shuffle=False, random_state=rng)
skf = sklearn.cross_validation.StratifiedKFold(nid_list, **xvalkw)
train_idx, test_idx = six.next(iter(skf))
daids = ut.take(aid_list, train_idx)
qaids = ut.take(aid_list, test_idx)
config = {
'num_words': 1000,
}
config.update(**kwargs)
qreq_ = SMKRequest(ibs, qaids, daids, config)
smk = qreq_.smk
#qreq_ = ibs.new_query_request(qaids, daids, cfgdict={'pipeline_root': 'smk', 'proot': 'smk'})
#qreq_ = ibs.new_query_request(qaids, daids, cfgdict={})
return ibs, smk, qreq_
示例12: subsplit_indices
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def subsplit_indices(samples, subset_idx, **xval_kw):
""" split an existing set """
from sklearn import model_selection
X = np.empty((len(subset_idx), 0))
y = samples.encoded_1d().values[subset_idx]
groups = samples.group_ids[subset_idx]
xval_kw_ = xval_kw.copy()
if 'n_splits' not in xval_kw_:
xval_kw_['n_splits'] = 3
type_ = xval_kw_.pop('type', 'StratifiedGroupKFold')
if type_ == 'StratifiedGroupKFold':
assert groups is not None
# FIXME: The StratifiedGroupKFold could be implemented better.
splitter = sklearn_utils.StratifiedGroupKFold(**xval_kw_)
rel_skf_list = list(splitter.split(X=X, y=y, groups=groups))
elif type_ == 'StratifiedKFold':
splitter = model_selection.StratifiedKFold(**xval_kw_)
rel_skf_list = list(splitter.split(X=X, y=y))
# map back into original coords
skf_list = [(subset_idx[rel_idx1], subset_idx[rel_idx2])
for rel_idx1, rel_idx2 in rel_skf_list]
for idx1, idx2 in skf_list:
assert len(np.intersect1d(subset_idx, idx1)) == len(idx1)
assert len(np.intersect1d(subset_idx, idx2)) == len(idx2)
# assert
return skf_list
示例13: _create_classifier
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def _create_classifier(
self, num_threads: int, y
) -> "sklearn.model_selection.GridSearchCV":
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
C = self.component_config["C"]
kernels = self.component_config["kernels"]
gamma = self.component_config["gamma"]
# dirty str fix because sklearn is expecting
# str not instance of basestr...
tuned_parameters = [
{"C": C, "gamma": gamma, "kernel": [str(k) for k in kernels]}
]
# aim for 5 examples in each fold
cv_splits = self._num_cv_splits(y)
return GridSearchCV(
SVC(C=1, probability=True, class_weight="balanced"),
param_grid=tuned_parameters,
n_jobs=num_threads,
cv=cv_splits,
scoring=self.component_config["scoring_function"],
verbose=1,
iid=False,
)
示例14: test_Shap
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def test_Shap(self):
np.random.seed(1)
X_train, X_test, Y_train, Y_test = train_test_split(*shap.datasets.iris(), test_size=0.2, random_state=0)
# K-nearest neighbors
knn = sklearn.neighbors.KNeighborsClassifier()
knn.fit(X_train, Y_train)
v = 100*np.sum(knn.predict(X_test) == Y_test)/len(Y_test)
print("Accuracy = {0}%".format(v))
# Explain a single prediction from the test set
shapexplainer = KernelExplainer(knn.predict_proba, X_train)
shap_values = shapexplainer.explain_instance(X_test.iloc[0,:]) # TODO test against original SHAP Lib
print('knn X_test iloc_0')
print(shap_values)
print(shapexplainer.explainer.expected_value[0])
print(shap_values[0])
# Explain all the predictions in the test set
shap_values = shapexplainer.explain_instance(X_test)
print('knn X_test')
print(shap_values)
print(shapexplainer.explainer.expected_value[0])
print(shap_values[0])
# SV machine with a linear kernel
svc_linear = sklearn.svm.SVC(kernel='linear', probability=True)
svc_linear.fit(X_train, Y_train)
v = 100*np.sum(svc_linear.predict(X_test) == Y_test)/len(Y_test)
print("Accuracy = {0}%".format(v))
# Explain all the predictions in the test set
shapexplainer = KernelExplainer(svc_linear.predict_proba, X_train)
shap_values = shapexplainer.explain_instance(X_test)
print('svc X_test')
print(shap_values)
print(shapexplainer.explainer.expected_value[0])
print(shap_values[0])
np.random.seed(1)
X,y = shap.datasets.adult()
X_train, X_valid, y_train, y_valid = sklearn.model_selection.train_test_split(X, y, test_size=0.2, random_state=7)
knn = sklearn.neighbors.KNeighborsClassifier()
knn.fit(X_train, y_train)
f = lambda x: knn.predict_proba(x)[:,1]
med = X_train.median().values.reshape((1,X_train.shape[1]))
shapexplainer = KernelExplainer(f, med)
shap_values_single = shapexplainer.explain_instance(X.iloc[0,:], nsamples=1000)
print('Shap Tabular Example')
print(shapexplainer.explainer.expected_value)
print(shap_values_single)
print("Invoked Shap KernelExplainer")
示例15: gridsearch_ratio_thresh
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def gridsearch_ratio_thresh(matches):
import sklearn
import sklearn.metrics
import vtool_ibeis as vt
# Param search for vsone
import plottool_ibeis as pt
pt.qt4ensure()
skf = sklearn.model_selection.StratifiedKFold(n_splits=10,
random_state=119372)
y = np.array([m.annot1['nid'] == m.annot2['nid'] for m in matches])
basis = {'ratio_thresh': np.linspace(.6, .7, 50).tolist()}
grid = ut.all_dict_combinations(basis)
xdata = np.array(ut.take_column(grid, 'ratio_thresh'))
def _ratio_thresh(y_true, match_list):
# Try and find optional ratio threshold
auc_list = []
for cfgdict in ut.ProgIter(grid, lbl='gridsearch'):
y_score = [
match.fs.compress(match.ratio_test_flags(cfgdict)).sum()
for match in match_list
]
auc = sklearn.metrics.roc_auc_score(y_true, y_score)
auc_list.append(auc)
auc_list = np.array(auc_list)
return auc_list
auc_list = _ratio_thresh(y, matches)
pt.plot(xdata, auc_list)
subx, suby = vt.argsubmaxima(auc_list, xdata)
best_ratio_thresh = subx[suby.argmax()]
skf_results = []
y_true = y
for train_idx, test_idx in skf.split(matches, y):
match_list_ = ut.take(matches, train_idx)
y_true = y.take(train_idx)
auc_list = _ratio_thresh(y_true, match_list_)
subx, suby = vt.argsubmaxima(auc_list, xdata, maxima_thresh=.8)
best_ratio_thresh = subx[suby.argmax()]
skf_results.append(best_ratio_thresh)
print('skf_results.append = %r' % (np.mean(skf_results),))
import utool
utool.embed()