当前位置: 首页>>代码示例>>Python>>正文


Python base.is_classifier函数代码示例

本文整理汇总了Python中sklearn.base.is_classifier函数的典型用法代码示例。如果您正苦于以下问题:Python is_classifier函数的具体用法?Python is_classifier怎么用?Python is_classifier使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了is_classifier函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_is_classifier

def test_is_classifier():
    svc = SVC()
    assert_true(is_classifier(svc))
    assert_true(is_classifier(GridSearchCV(svc, {'C': [0.1, 1]})))
    assert_true(is_classifier(Pipeline([('svc', svc)])))
    assert_true(is_classifier(Pipeline(
        [('svc_cv', GridSearchCV(svc, {'C': [0.1, 1]}))])))
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:7,代码来源:test_base.py

示例2: fit

    def fit(self, X, y):
        if is_classifier(self):
            self.classes_, y = np.unique(y, return_inverse=True)
            self.num_classes_ = len(self.classes_)
        else:
            self.num_classes_ = -1

        # Split data into train/val
        X_train, X_val, y_train, y_val = train_test_split(
            X,
            y,
            test_size=self.holdout_split,
            random_state=self.random_state,
            stratify=y if is_classifier(self) else None,
        )
        # Define attributes
        self.attributes_ = EBMUtils.gen_attributes(self.col_types, self.col_n_bins)
        # Build EBM allocation code
        if is_classifier(self):
            model_type = "classification"
        else:
            model_type = "regression"

        self.intercept_ = 0
        self.attribute_sets_ = []
        self.attribute_set_models_ = []

        main_attr_indices = [[x] for x in range(len(self.attributes_))]
        main_attr_sets = EBMUtils.gen_attribute_sets(main_attr_indices)
        with closing(
            NativeEBM(
                self.attributes_,
                main_attr_sets,
                X_train,
                y_train,
                X_val,
                y_val,
                num_inner_bags=self.feature_step_n_inner_bags,
                num_classification_states=self.num_classes_,
                model_type=model_type,
                training_scores=None,
                validation_scores=None,
            )
        ) as native_ebm:
            # Train main effects
            self._fit_main(native_ebm, main_attr_sets)

            # Build interaction terms
            self.inter_indices_ = self._build_interactions(native_ebm)

        self.staged_fit_interactions(X, y, self.inter_indices_)

        return self
开发者ID:caskeep,项目名称:interpret,代码行数:53,代码来源:ebm.py

示例3: _fit

    def _fit(self, X, y, parameter_iterable):
        """Actual fitting,  performing the search over parameters."""
        self.scorer_ = check_scoring(self.estimator, scoring=self.scoring)
        X, y = indexable(X, y)
        cv = check_cv(self.cv, X, y, classifier=is_classifier(self.estimator))
        base_estimator = clone(self.estimator)

        best = best_parameters(base_estimator, cv, X, y, parameter_iterable,
                               self.scorer_, self.fit_params, self.iid)
        best = best.compute()

        self.best_params_ = best.parameters
        self.best_score_ = best.mean_validation_score


        if isinstance(base_estimator, Pipeline):
            base_estimator = base_estimator.to_sklearn().compute()

        if self.refit:
            # fit the best estimator using the entire dataset
            # clone first to work around broken estimators
            best_estimator = base_estimator.set_params(**best.parameters)
            if y is not None:
                self.best_estimator_ = best_estimator.fit(X, y, **self.fit_params)
            else:
                self.best_estimator_ = best_estimator.fit(X, **self.fit_params)
        return self
开发者ID:konggas,项目名称:dasklearn,代码行数:27,代码来源:grid_search.py

示例4: permutation_test_score

def permutation_test_score(estimator, X, y, groups=None, cv=None,
                           n_permutations=100, n_jobs=1, random_state=0,
                           verbose=0, scoring=None):
    """
    Evaluate the significance of a cross-validated score with permutations,
    as in test 1 of [Ojala2010]_.

    A modification of original sklearn's permutation test score function
    to evaluate p-value outside this function, so that the score can be
    reused from outside.


    .. [Ojala2010] Ojala and Garriga. Permutation Tests for Studying Classifier
                   Performance.  The Journal of Machine Learning Research (2010)
                   vol. 11

    """
    X, y, groups = indexable(X, y, groups)

    cv = check_cv(cv, y, classifier=is_classifier(estimator))
    scorer = check_scoring(estimator, scoring=scoring)
    random_state = check_random_state(random_state)

    # We clone the estimator to make sure that all the folds are
    # independent, and that it is pickle-able.
    permutation_scores = Parallel(n_jobs=n_jobs, verbose=verbose)(
        delayed(_permutation_test_score)(
            clone(estimator), X, _shuffle(y, groups, random_state),
            groups, cv, scorer)
        for _ in range(n_permutations))
    permutation_scores = np.array(permutation_scores)
    return permutation_scores
开发者ID:oesteban,项目名称:mriqc,代码行数:32,代码来源:_validation.py

示例5: cross_val_score

def cross_val_score(estimator, X, y=None, groups=None, scoring=None, cv=None,
                    n_jobs=1, verbose=0, fit_params=None,
                    pre_dispatch='2*n_jobs'):
    """
    Evaluate a score by cross-validation
    """
    if not isinstance(scoring, (list, tuple)):
        scoring = [scoring]

    X, y, groups = indexable(X, y, groups)

    cv = check_cv(cv, y, classifier=is_classifier(estimator))
    splits = list(cv.split(X, y, groups))
    scorer = [check_scoring(estimator, scoring=s) for s in scoring]
    # We clone the estimator to make sure that all the folds are
    # independent, and that it is pickle-able.
    parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
                        pre_dispatch=pre_dispatch)
    scores = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorer,
                                              train, test, verbose, None,
                                              fit_params)
                      for train, test in splits)

    group_order = []
    if hasattr(cv, 'groups'):
        group_order = [np.array(cv.groups)[test].tolist()[0] for _, test in splits]
    return np.squeeze(np.array(scores)), group_order
开发者ID:oesteban,项目名称:mriqc,代码行数:27,代码来源:_validation.py

示例6: score_fn

 def score_fn(est, X, y, drop_indices):
     if is_classifier(est):
         prob = EBMUtils.classifier_predict_proba(X, estimator, drop_indices)
         return -1.0 * roc_auc_score(y, prob[:, 1])
     else:
         pred = EBMUtils.regressor_predict(X, estimator, drop_indices)
         return mean_squared_error(y, pred)
开发者ID:caskeep,项目名称:interpret,代码行数:7,代码来源:ebm.py

示例7: build_graph

def build_graph(estimator, cv, scorer, candidate_params, X, y=None,
                groups=None, fit_params=None, iid=True, refit=True,
                error_score='raise', return_train_score=True, cache_cv=True):

    X, y, groups = to_indexable(X, y, groups)
    cv = check_cv(cv, y, is_classifier(estimator))
    # "pairwise" estimators require a different graph for CV splitting
    is_pairwise = getattr(estimator, '_pairwise', False)

    dsk = {}
    X_name, y_name, groups_name = to_keys(dsk, X, y, groups)
    n_splits = compute_n_splits(cv, X, y, groups)

    if fit_params:
        # A mapping of {name: (name, graph-key)}
        param_values = to_indexable(*fit_params.values(), allow_scalars=True)
        fit_params = {k: (k, v) for (k, v) in
                      zip(fit_params, to_keys(dsk, *param_values))}
    else:
        fit_params = {}

    fields, tokens, params = normalize_params(candidate_params)
    main_token = tokenize(normalize_estimator(estimator), fields, params,
                          X_name, y_name, groups_name, fit_params, cv,
                          error_score == 'raise', return_train_score)

    cv_name = 'cv-split-' + main_token
    dsk[cv_name] = (cv_split, cv, X_name, y_name, groups_name,
                    is_pairwise, cache_cv)

    if iid:
        weights = 'cv-n-samples-' + main_token
        dsk[weights] = (cv_n_samples, cv_name)
    else:
        weights = None

    scores = do_fit_and_score(dsk, main_token, estimator, cv_name, fields,
                              tokens, params, X_name, y_name, fit_params,
                              n_splits, error_score, scorer,
                              return_train_score)

    cv_results = 'cv-results-' + main_token
    candidate_params_name = 'cv-parameters-' + main_token
    dsk[candidate_params_name] = (decompress_params, fields, params)
    dsk[cv_results] = (create_cv_results, scores, candidate_params_name,
                       n_splits, error_score, weights)
    keys = [cv_results]

    if refit:
        best_params = 'best-params-' + main_token
        dsk[best_params] = (get_best_params, candidate_params_name, cv_results)
        best_estimator = 'best-estimator-' + main_token
        if fit_params:
            fit_params = (dict, (zip, list(fit_params.keys()),
                                list(pluck(1, fit_params.values()))))
        dsk[best_estimator] = (fit_best, clone(estimator), best_params,
                               X_name, y_name, fit_params)
        keys.append(best_estimator)

    return dsk, keys, n_splits
开发者ID:jcrist,项目名称:dask-learn,代码行数:60,代码来源:model_selection.py

示例8: explain_local

    def explain_local(self, X, y=None, name=None):
        # Produce feature value pairs for each instance.
        # Values are the model graph score per respective attribute set.
        if name is None:
            name = gen_name_from_class(self)

        X, y, _, _ = unify_data(X, y, self.feature_names, self.feature_types)
        instances = self.preprocessor_.transform(X)
        scores_gen = EBMUtils.scores_by_attrib_set(
            instances, self.attribute_sets_, self.attribute_set_models_
        )

        n_rows = instances.shape[0]
        data_dicts = []
        for _ in range(n_rows):
            data_dict = {
                "type": "univariate",
                "names": [],
                "scores": [],
                "values": [],
                "extra": {
                    "names": ["Intercept"],
                    "scores": [self.intercept_],
                    "values": [1],
                },
            }
            data_dicts.append(data_dict)

        for set_idx, attribute_set, scores in scores_gen:
            for row_idx in range(n_rows):
                feature_name = self.feature_names[set_idx]
                data_dicts[row_idx]["names"].append(feature_name)
                data_dicts[row_idx]["scores"].append(scores[row_idx])
                if attribute_set["n_attributes"] == 1:
                    data_dicts[row_idx]["values"].append(
                        X[row_idx, attribute_set["attributes"][0]]
                    )
                else:
                    data_dicts[row_idx]["values"].append("")

        if is_classifier(self):
            scores = EBMUtils.classifier_predict_proba(instances, self)[:, 1]
        else:
            scores = EBMUtils.regressor_predict(instances, self)

        for row_idx in range(n_rows):
            data_dicts[row_idx]["perf"] = perf_dict(y, scores, row_idx)

        selector = gen_local_selector(instances, y, scores)

        internal_obj = {"overall": None, "specific": data_dicts}

        return EBMExplanation(
            "local",
            internal_obj,
            feature_names=self.feature_names,
            feature_types=self.feature_types,
            name=name,
            selector=selector,
        )
开发者ID:caskeep,项目名称:interpret,代码行数:60,代码来源:ebm.py

示例9: benchmark

def benchmark(clf, X, y, cv=None):
    X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True)
    cv = check_cv(cv, X, y, classifier=is_classifier(clf))
    
    # learning_curve_ = learning_curve(clf, X_all, y_all, cv=cv)
    
    train_times = []
    test_times = []
    confusion_matrices = []
    confusion_matrix_indices = []
    coefs = []
    for train, test in cv:
        X_train, y_train = X[train], y[train]
        X_test, y_test = X[test], y[test]
        
        t0 = time()
        clf.fit(X_train, y_train)
        train_times.append(time()-t0)
        
        t0 = time()
        y_pred = clf.predict(X_test)
        test_times.append(time()-t0)
    
        confusion_matrices.append(confusion_matrix(y_test, y_pred))
        confusion_matrix_indices.append(np.array([[test[pred] for pred in true] for true in confusion_matrix_instances(y_test, y_pred)]))
    
        coefs.append(clf.coef_)
    
    return dict(
        train_times = np.array(train_times),
        test_times = np.array(test_times),
        confusion_matrices = np.array(confusion_matrices),
        confusion_matrix_indices = np.array(confusion_matrix_indices),
        coefs = np.array(coefs)
    )
开发者ID:EdwardBetts,项目名称:twitter-sentiment,代码行数:35,代码来源:evaluation.py

示例10: _is_classifier

    def _is_classifier(self):
        """Whether the underlying model is a classifier

        Return:
            (boolean) whether `self.model` is a classifier
        """
        return is_classifier(self.model) or hasattr(self.model, 'predict_proba')
开发者ID:RamyaGuru,项目名称:matminer,代码行数:7,代码来源:base.py

示例11: add_del_cv

def add_del_cv(df, predictors, target, model, scoring='roc_auc', cv1=None,
               n_folds=8, n_jobs=-1, start=[], selmax=None, selmin=1,
               min_ratio=1e-7, max_steps=10, verbosity=0):
    """ Forward-Backward (ADD-DEL) selection using model.

    Parameters
    ----------

    Returns
    -------
    selected: list
        selected predictors

    Example
    -------
    References
    ----------
    """
    def test_to_break(selected, selected_curr, to_break):
        if set(selected) == set(selected_curr):
            to_break += 1
        else:
            to_break = 0
        return to_break

    X, y, _ = df_xyf(df, predictors=predictors, target=target)
    cv1 = cross_validation.check_cv(
            cv1, X=X, y=y,
            classifier=is_classifier(model))

    selected_curr = start
    to_break = 0

    for i_step in xrange(max_steps):
        selected = forward_cv(
                        df, predictors, target, model, scoring=scoring,
                        cv1=cv1, n_folds=n_folds, n_jobs=n_jobs,
                        start=selected_curr, selmax=selmax,
                        min_ratio=min_ratio, verbosity=verbosity-1)
        to_break = test_to_break(selected, selected_curr, to_break)
        selected_curr = selected
        if verbosity > 0:
            print('forward:', ' '.join(selected_curr))
        if to_break > 1:
            break
        selected = backward_cv(
                        df, selected_curr, target, model, scoring=scoring,
                        cv1=cv1, n_folds=n_folds, n_jobs=n_jobs, selmin=selmin,
                        min_ratio=min_ratio, verbosity=verbosity-1)
        to_break = test_to_break(selected, selected_curr, to_break)
        selected_curr = selected
        if verbosity > 0:
            print('backward:', ' '.join(selected_curr))
        if to_break > 0:
            break

    return selected_curr
开发者ID:orazaro,项目名称:kgml,代码行数:57,代码来源:feature_selection.py

示例12: transform

 def transform(self, X, y=None):
     cv = check_cv(self.cv, y, classifier=is_classifier(self.estimator))
     
     X_prob = np.zeros((X.shape[0], self.n_classes))
     X_pred = np.zeros(X.shape[0])
     
     for estimator, (_, test) in zip(self.estimators_, cv.split(X)):
         X_prob[test] = estimator.predict_proba(X[test])
         X_pred[test] = estimator.predict(X[test])
     return np.hstack([X_prob, np.array([X_pred]).T])
开发者ID:mengli,项目名称:PcmAudioRecorder,代码行数:10,代码来源:pipline.py

示例13: fit

 def fit(self, X, y):
     y_labels = self._get_labels(y)
     cv = check_cv(self.cv, y_labels, classifier=is_classifier(self.estimator))
     self.estimators_ = []
     
     for train, _ in cv.split(X, y_labels):
         self.estimators_.append(
             clone(self.estimator).fit(X[train], y_labels[train])
         )
     return self
开发者ID:mengli,项目名称:PcmAudioRecorder,代码行数:10,代码来源:pipline.py

示例14: _set_cv

def _set_cv(cv, estimator=None, X=None, y=None):
    """ Set the default cross-validation depending on whether clf is classifier
        or regressor. """

    from sklearn.base import is_classifier

    # Detect whether classification or regression
    if estimator in ['classifier', 'regressor']:
        est_is_classifier = estimator == 'classifier'
    else:
        est_is_classifier = is_classifier(estimator)
    # Setup CV
    if check_version('sklearn', '0.18'):
        from sklearn import model_selection as models
        from sklearn.model_selection import (check_cv, StratifiedKFold, KFold)
        if isinstance(cv, (int, np.int)):
            XFold = StratifiedKFold if est_is_classifier else KFold
            cv = XFold(n_splits=cv)
        elif isinstance(cv, str):
            if not hasattr(models, cv):
                raise ValueError('Unknown cross-validation')
            cv = getattr(models, cv)
            cv = cv()
        cv = check_cv(cv=cv, y=y, classifier=est_is_classifier)
    else:
        from sklearn import cross_validation as models
        from sklearn.cross_validation import (check_cv, StratifiedKFold, KFold)
        if isinstance(cv, (int, np.int)):
            if est_is_classifier:
                cv = StratifiedKFold(y=y, n_folds=cv)
            else:
                cv = KFold(n=len(y), n_folds=cv)
        elif isinstance(cv, str):
            if not hasattr(models, cv):
                raise ValueError('Unknown cross-validation')
            cv = getattr(models, cv)
            if cv.__name__ not in ['KFold', 'LeaveOneOut']:
                raise NotImplementedError('CV cannot be defined with str for'
                                          ' sklearn < .017.')
            cv = cv(len(y))
        cv = check_cv(cv=cv, X=X, y=y, classifier=est_is_classifier)

    # Extract train and test set to retrieve them at predict time
    if hasattr(cv, 'split'):
        cv_splits = [(train, test) for train, test in
                     cv.split(X=np.zeros_like(y), y=y)]
    else:
        # XXX support sklearn.cross_validation cv
        cv_splits = [(train, test) for train, test in cv]

    if not np.all([len(train) for train, _ in cv_splits]):
        raise ValueError('Some folds do not have any train epochs.')

    return cv, cv_splits
开发者ID:annapasca,项目名称:mne-python,代码行数:54,代码来源:base.py

示例15: fit

    def fit(self, X, y):
        """Actual fitting,  performing the search over parameters."""

        parameter_iterable = ParameterSampler(self.param_distributions,
                                              self.n_iter,
                                              random_state=self.random_state)
        estimator = self.estimator
        cv = self.cv

        n_samples = _num_samples(X)
        X, y = indexable(X, y)

        if y is not None:
            if len(y) != n_samples:
                raise ValueError('Target variable (y) has a different number '
                                 'of samples (%i) than data (X: %i samples)'
                                 % (len(y), n_samples))
        cv = check_cv(cv, X, y, classifier=is_classifier(estimator))

        if self.verbose > 0:
            if isinstance(parameter_iterable, Sized):
                n_candidates = len(parameter_iterable)
                print("Fitting {0} folds for each of {1} candidates, totalling"
                      " {2} fits".format(len(cv), n_candidates,
                                         n_candidates * len(cv)))

        base_estimator = clone(self.estimator)

        pre_dispatch = self.pre_dispatch

        out = Parallel(
            n_jobs=self.n_jobs, verbose=self.verbose,
            pre_dispatch=pre_dispatch
        )(
            delayed(cv_fit_and_score)(clone(base_estimator), X, y, self.scoring,
                                      parameters, cv=cv)
            for parameters in parameter_iterable)

        best = sorted(out, reverse=True)[0]
        self.best_params_ = best[1]
        self.best_score_ = best[0]

        if self.refit:
            # fit the best estimator using the entire dataset
            # clone first to work around broken estimators
            best_estimator = clone(base_estimator).set_params(
                **best[1])
            if y is not None:
                best_estimator.fit(X, y, **self.fit_params)
            else:
                best_estimator.fit(X, **self.fit_params)
            self.best_estimator_ = best_estimator

        return self
开发者ID:MD2Korg,项目名称:cStress-model,代码行数:54,代码来源:puffMarker.py


注:本文中的sklearn.base.is_classifier函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。