本文整理汇总了Python中sklearn.base.is_classifier函数的典型用法代码示例。如果您正苦于以下问题:Python is_classifier函数的具体用法?Python is_classifier怎么用?Python is_classifier使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了is_classifier函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_is_classifier
def test_is_classifier():
svc = SVC()
assert_true(is_classifier(svc))
assert_true(is_classifier(GridSearchCV(svc, {'C': [0.1, 1]})))
assert_true(is_classifier(Pipeline([('svc', svc)])))
assert_true(is_classifier(Pipeline(
[('svc_cv', GridSearchCV(svc, {'C': [0.1, 1]}))])))
示例2: fit
def fit(self, X, y):
if is_classifier(self):
self.classes_, y = np.unique(y, return_inverse=True)
self.num_classes_ = len(self.classes_)
else:
self.num_classes_ = -1
# Split data into train/val
X_train, X_val, y_train, y_val = train_test_split(
X,
y,
test_size=self.holdout_split,
random_state=self.random_state,
stratify=y if is_classifier(self) else None,
)
# Define attributes
self.attributes_ = EBMUtils.gen_attributes(self.col_types, self.col_n_bins)
# Build EBM allocation code
if is_classifier(self):
model_type = "classification"
else:
model_type = "regression"
self.intercept_ = 0
self.attribute_sets_ = []
self.attribute_set_models_ = []
main_attr_indices = [[x] for x in range(len(self.attributes_))]
main_attr_sets = EBMUtils.gen_attribute_sets(main_attr_indices)
with closing(
NativeEBM(
self.attributes_,
main_attr_sets,
X_train,
y_train,
X_val,
y_val,
num_inner_bags=self.feature_step_n_inner_bags,
num_classification_states=self.num_classes_,
model_type=model_type,
training_scores=None,
validation_scores=None,
)
) as native_ebm:
# Train main effects
self._fit_main(native_ebm, main_attr_sets)
# Build interaction terms
self.inter_indices_ = self._build_interactions(native_ebm)
self.staged_fit_interactions(X, y, self.inter_indices_)
return self
示例3: _fit
def _fit(self, X, y, parameter_iterable):
"""Actual fitting, performing the search over parameters."""
self.scorer_ = check_scoring(self.estimator, scoring=self.scoring)
X, y = indexable(X, y)
cv = check_cv(self.cv, X, y, classifier=is_classifier(self.estimator))
base_estimator = clone(self.estimator)
best = best_parameters(base_estimator, cv, X, y, parameter_iterable,
self.scorer_, self.fit_params, self.iid)
best = best.compute()
self.best_params_ = best.parameters
self.best_score_ = best.mean_validation_score
if isinstance(base_estimator, Pipeline):
base_estimator = base_estimator.to_sklearn().compute()
if self.refit:
# fit the best estimator using the entire dataset
# clone first to work around broken estimators
best_estimator = base_estimator.set_params(**best.parameters)
if y is not None:
self.best_estimator_ = best_estimator.fit(X, y, **self.fit_params)
else:
self.best_estimator_ = best_estimator.fit(X, **self.fit_params)
return self
示例4: permutation_test_score
def permutation_test_score(estimator, X, y, groups=None, cv=None,
n_permutations=100, n_jobs=1, random_state=0,
verbose=0, scoring=None):
"""
Evaluate the significance of a cross-validated score with permutations,
as in test 1 of [Ojala2010]_.
A modification of original sklearn's permutation test score function
to evaluate p-value outside this function, so that the score can be
reused from outside.
.. [Ojala2010] Ojala and Garriga. Permutation Tests for Studying Classifier
Performance. The Journal of Machine Learning Research (2010)
vol. 11
"""
X, y, groups = indexable(X, y, groups)
cv = check_cv(cv, y, classifier=is_classifier(estimator))
scorer = check_scoring(estimator, scoring=scoring)
random_state = check_random_state(random_state)
# We clone the estimator to make sure that all the folds are
# independent, and that it is pickle-able.
permutation_scores = Parallel(n_jobs=n_jobs, verbose=verbose)(
delayed(_permutation_test_score)(
clone(estimator), X, _shuffle(y, groups, random_state),
groups, cv, scorer)
for _ in range(n_permutations))
permutation_scores = np.array(permutation_scores)
return permutation_scores
示例5: cross_val_score
def cross_val_score(estimator, X, y=None, groups=None, scoring=None, cv=None,
n_jobs=1, verbose=0, fit_params=None,
pre_dispatch='2*n_jobs'):
"""
Evaluate a score by cross-validation
"""
if not isinstance(scoring, (list, tuple)):
scoring = [scoring]
X, y, groups = indexable(X, y, groups)
cv = check_cv(cv, y, classifier=is_classifier(estimator))
splits = list(cv.split(X, y, groups))
scorer = [check_scoring(estimator, scoring=s) for s in scoring]
# We clone the estimator to make sure that all the folds are
# independent, and that it is pickle-able.
parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
pre_dispatch=pre_dispatch)
scores = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorer,
train, test, verbose, None,
fit_params)
for train, test in splits)
group_order = []
if hasattr(cv, 'groups'):
group_order = [np.array(cv.groups)[test].tolist()[0] for _, test in splits]
return np.squeeze(np.array(scores)), group_order
示例6: score_fn
def score_fn(est, X, y, drop_indices):
if is_classifier(est):
prob = EBMUtils.classifier_predict_proba(X, estimator, drop_indices)
return -1.0 * roc_auc_score(y, prob[:, 1])
else:
pred = EBMUtils.regressor_predict(X, estimator, drop_indices)
return mean_squared_error(y, pred)
示例7: build_graph
def build_graph(estimator, cv, scorer, candidate_params, X, y=None,
groups=None, fit_params=None, iid=True, refit=True,
error_score='raise', return_train_score=True, cache_cv=True):
X, y, groups = to_indexable(X, y, groups)
cv = check_cv(cv, y, is_classifier(estimator))
# "pairwise" estimators require a different graph for CV splitting
is_pairwise = getattr(estimator, '_pairwise', False)
dsk = {}
X_name, y_name, groups_name = to_keys(dsk, X, y, groups)
n_splits = compute_n_splits(cv, X, y, groups)
if fit_params:
# A mapping of {name: (name, graph-key)}
param_values = to_indexable(*fit_params.values(), allow_scalars=True)
fit_params = {k: (k, v) for (k, v) in
zip(fit_params, to_keys(dsk, *param_values))}
else:
fit_params = {}
fields, tokens, params = normalize_params(candidate_params)
main_token = tokenize(normalize_estimator(estimator), fields, params,
X_name, y_name, groups_name, fit_params, cv,
error_score == 'raise', return_train_score)
cv_name = 'cv-split-' + main_token
dsk[cv_name] = (cv_split, cv, X_name, y_name, groups_name,
is_pairwise, cache_cv)
if iid:
weights = 'cv-n-samples-' + main_token
dsk[weights] = (cv_n_samples, cv_name)
else:
weights = None
scores = do_fit_and_score(dsk, main_token, estimator, cv_name, fields,
tokens, params, X_name, y_name, fit_params,
n_splits, error_score, scorer,
return_train_score)
cv_results = 'cv-results-' + main_token
candidate_params_name = 'cv-parameters-' + main_token
dsk[candidate_params_name] = (decompress_params, fields, params)
dsk[cv_results] = (create_cv_results, scores, candidate_params_name,
n_splits, error_score, weights)
keys = [cv_results]
if refit:
best_params = 'best-params-' + main_token
dsk[best_params] = (get_best_params, candidate_params_name, cv_results)
best_estimator = 'best-estimator-' + main_token
if fit_params:
fit_params = (dict, (zip, list(fit_params.keys()),
list(pluck(1, fit_params.values()))))
dsk[best_estimator] = (fit_best, clone(estimator), best_params,
X_name, y_name, fit_params)
keys.append(best_estimator)
return dsk, keys, n_splits
示例8: explain_local
def explain_local(self, X, y=None, name=None):
# Produce feature value pairs for each instance.
# Values are the model graph score per respective attribute set.
if name is None:
name = gen_name_from_class(self)
X, y, _, _ = unify_data(X, y, self.feature_names, self.feature_types)
instances = self.preprocessor_.transform(X)
scores_gen = EBMUtils.scores_by_attrib_set(
instances, self.attribute_sets_, self.attribute_set_models_
)
n_rows = instances.shape[0]
data_dicts = []
for _ in range(n_rows):
data_dict = {
"type": "univariate",
"names": [],
"scores": [],
"values": [],
"extra": {
"names": ["Intercept"],
"scores": [self.intercept_],
"values": [1],
},
}
data_dicts.append(data_dict)
for set_idx, attribute_set, scores in scores_gen:
for row_idx in range(n_rows):
feature_name = self.feature_names[set_idx]
data_dicts[row_idx]["names"].append(feature_name)
data_dicts[row_idx]["scores"].append(scores[row_idx])
if attribute_set["n_attributes"] == 1:
data_dicts[row_idx]["values"].append(
X[row_idx, attribute_set["attributes"][0]]
)
else:
data_dicts[row_idx]["values"].append("")
if is_classifier(self):
scores = EBMUtils.classifier_predict_proba(instances, self)[:, 1]
else:
scores = EBMUtils.regressor_predict(instances, self)
for row_idx in range(n_rows):
data_dicts[row_idx]["perf"] = perf_dict(y, scores, row_idx)
selector = gen_local_selector(instances, y, scores)
internal_obj = {"overall": None, "specific": data_dicts}
return EBMExplanation(
"local",
internal_obj,
feature_names=self.feature_names,
feature_types=self.feature_types,
name=name,
selector=selector,
)
示例9: benchmark
def benchmark(clf, X, y, cv=None):
X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True)
cv = check_cv(cv, X, y, classifier=is_classifier(clf))
# learning_curve_ = learning_curve(clf, X_all, y_all, cv=cv)
train_times = []
test_times = []
confusion_matrices = []
confusion_matrix_indices = []
coefs = []
for train, test in cv:
X_train, y_train = X[train], y[train]
X_test, y_test = X[test], y[test]
t0 = time()
clf.fit(X_train, y_train)
train_times.append(time()-t0)
t0 = time()
y_pred = clf.predict(X_test)
test_times.append(time()-t0)
confusion_matrices.append(confusion_matrix(y_test, y_pred))
confusion_matrix_indices.append(np.array([[test[pred] for pred in true] for true in confusion_matrix_instances(y_test, y_pred)]))
coefs.append(clf.coef_)
return dict(
train_times = np.array(train_times),
test_times = np.array(test_times),
confusion_matrices = np.array(confusion_matrices),
confusion_matrix_indices = np.array(confusion_matrix_indices),
coefs = np.array(coefs)
)
示例10: _is_classifier
def _is_classifier(self):
"""Whether the underlying model is a classifier
Return:
(boolean) whether `self.model` is a classifier
"""
return is_classifier(self.model) or hasattr(self.model, 'predict_proba')
示例11: add_del_cv
def add_del_cv(df, predictors, target, model, scoring='roc_auc', cv1=None,
n_folds=8, n_jobs=-1, start=[], selmax=None, selmin=1,
min_ratio=1e-7, max_steps=10, verbosity=0):
""" Forward-Backward (ADD-DEL) selection using model.
Parameters
----------
Returns
-------
selected: list
selected predictors
Example
-------
References
----------
"""
def test_to_break(selected, selected_curr, to_break):
if set(selected) == set(selected_curr):
to_break += 1
else:
to_break = 0
return to_break
X, y, _ = df_xyf(df, predictors=predictors, target=target)
cv1 = cross_validation.check_cv(
cv1, X=X, y=y,
classifier=is_classifier(model))
selected_curr = start
to_break = 0
for i_step in xrange(max_steps):
selected = forward_cv(
df, predictors, target, model, scoring=scoring,
cv1=cv1, n_folds=n_folds, n_jobs=n_jobs,
start=selected_curr, selmax=selmax,
min_ratio=min_ratio, verbosity=verbosity-1)
to_break = test_to_break(selected, selected_curr, to_break)
selected_curr = selected
if verbosity > 0:
print('forward:', ' '.join(selected_curr))
if to_break > 1:
break
selected = backward_cv(
df, selected_curr, target, model, scoring=scoring,
cv1=cv1, n_folds=n_folds, n_jobs=n_jobs, selmin=selmin,
min_ratio=min_ratio, verbosity=verbosity-1)
to_break = test_to_break(selected, selected_curr, to_break)
selected_curr = selected
if verbosity > 0:
print('backward:', ' '.join(selected_curr))
if to_break > 0:
break
return selected_curr
示例12: transform
def transform(self, X, y=None):
cv = check_cv(self.cv, y, classifier=is_classifier(self.estimator))
X_prob = np.zeros((X.shape[0], self.n_classes))
X_pred = np.zeros(X.shape[0])
for estimator, (_, test) in zip(self.estimators_, cv.split(X)):
X_prob[test] = estimator.predict_proba(X[test])
X_pred[test] = estimator.predict(X[test])
return np.hstack([X_prob, np.array([X_pred]).T])
示例13: fit
def fit(self, X, y):
y_labels = self._get_labels(y)
cv = check_cv(self.cv, y_labels, classifier=is_classifier(self.estimator))
self.estimators_ = []
for train, _ in cv.split(X, y_labels):
self.estimators_.append(
clone(self.estimator).fit(X[train], y_labels[train])
)
return self
示例14: _set_cv
def _set_cv(cv, estimator=None, X=None, y=None):
""" Set the default cross-validation depending on whether clf is classifier
or regressor. """
from sklearn.base import is_classifier
# Detect whether classification or regression
if estimator in ['classifier', 'regressor']:
est_is_classifier = estimator == 'classifier'
else:
est_is_classifier = is_classifier(estimator)
# Setup CV
if check_version('sklearn', '0.18'):
from sklearn import model_selection as models
from sklearn.model_selection import (check_cv, StratifiedKFold, KFold)
if isinstance(cv, (int, np.int)):
XFold = StratifiedKFold if est_is_classifier else KFold
cv = XFold(n_splits=cv)
elif isinstance(cv, str):
if not hasattr(models, cv):
raise ValueError('Unknown cross-validation')
cv = getattr(models, cv)
cv = cv()
cv = check_cv(cv=cv, y=y, classifier=est_is_classifier)
else:
from sklearn import cross_validation as models
from sklearn.cross_validation import (check_cv, StratifiedKFold, KFold)
if isinstance(cv, (int, np.int)):
if est_is_classifier:
cv = StratifiedKFold(y=y, n_folds=cv)
else:
cv = KFold(n=len(y), n_folds=cv)
elif isinstance(cv, str):
if not hasattr(models, cv):
raise ValueError('Unknown cross-validation')
cv = getattr(models, cv)
if cv.__name__ not in ['KFold', 'LeaveOneOut']:
raise NotImplementedError('CV cannot be defined with str for'
' sklearn < .017.')
cv = cv(len(y))
cv = check_cv(cv=cv, X=X, y=y, classifier=est_is_classifier)
# Extract train and test set to retrieve them at predict time
if hasattr(cv, 'split'):
cv_splits = [(train, test) for train, test in
cv.split(X=np.zeros_like(y), y=y)]
else:
# XXX support sklearn.cross_validation cv
cv_splits = [(train, test) for train, test in cv]
if not np.all([len(train) for train, _ in cv_splits]):
raise ValueError('Some folds do not have any train epochs.')
return cv, cv_splits
示例15: fit
def fit(self, X, y):
"""Actual fitting, performing the search over parameters."""
parameter_iterable = ParameterSampler(self.param_distributions,
self.n_iter,
random_state=self.random_state)
estimator = self.estimator
cv = self.cv
n_samples = _num_samples(X)
X, y = indexable(X, y)
if y is not None:
if len(y) != n_samples:
raise ValueError('Target variable (y) has a different number '
'of samples (%i) than data (X: %i samples)'
% (len(y), n_samples))
cv = check_cv(cv, X, y, classifier=is_classifier(estimator))
if self.verbose > 0:
if isinstance(parameter_iterable, Sized):
n_candidates = len(parameter_iterable)
print("Fitting {0} folds for each of {1} candidates, totalling"
" {2} fits".format(len(cv), n_candidates,
n_candidates * len(cv)))
base_estimator = clone(self.estimator)
pre_dispatch = self.pre_dispatch
out = Parallel(
n_jobs=self.n_jobs, verbose=self.verbose,
pre_dispatch=pre_dispatch
)(
delayed(cv_fit_and_score)(clone(base_estimator), X, y, self.scoring,
parameters, cv=cv)
for parameters in parameter_iterable)
best = sorted(out, reverse=True)[0]
self.best_params_ = best[1]
self.best_score_ = best[0]
if self.refit:
# fit the best estimator using the entire dataset
# clone first to work around broken estimators
best_estimator = clone(base_estimator).set_params(
**best[1])
if y is not None:
best_estimator.fit(X, y, **self.fit_params)
else:
best_estimator.fit(X, **self.fit_params)
self.best_estimator_ = best_estimator
return self