Python sklearn.model_selection方法代码示例

本文整理汇总了Python中sklearn.model_selection方法的典型用法代码示例。如果您正苦于以下问题：Python sklearn.model_selection方法的具体用法？Python sklearn.model_selection怎么用？Python sklearn.model_selection使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn的用法示例。

在下文中一共展示了sklearn.model_selection方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _create_classifier

# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def _create_classifier(self, num_threads, y):
        from sklearn.model_selection import GridSearchCV
        from sklearn.svm import SVC

        C = self.component_config["C"]
        kernels = self.component_config["kernels"]
        # dirty str fix because sklearn is expecting
        # str not instance of basestr...
        tuned_parameters = [{"C": C,
                             "kernel": [str(k) for k in kernels]}]

        # aim for 5 examples in each fold

        cv_splits = self._num_cv_splits(y)

        return GridSearchCV(SVC(C=1,
                                probability=True,
                                class_weight='balanced'),
                            param_grid=tuned_parameters,
                            n_jobs=num_threads,
                            cv=cv_splits,
                            scoring='f1_weighted',
                            verbose=1)

开发者ID:crownpku，项目名称:Rasa_NLU_Chi，代码行数:25，代码来源:sklearn_intent_classifier.py

示例2: _cv_len

# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def _cv_len(cv, X, y):
    """This method computes the length of a cross validation
    object, agnostic of whether sklearn-0.17 or sklearn-0.18
    is being used.

    Parameters
    ----------

    cv : `sklearn.cross_validation._PartitionIterator` or `sklearn.model_selection.BaseCrossValidator`
        The cv object from which to extract length. If using
        sklearn-0.17, this can be computed by calling `len` on
        ``cv``, else it's computed with `cv.get_n_splits(X, y)`.

    X : pd.DataFrame or np.ndarray, shape(n_samples, n_features)
        The dataframe or np.ndarray being fit in the grid search.

    y : np.ndarray, shape(n_samples,)
        The target being fit in the grid search.

    Returns
    -------

    int
    """
    return len(cv) if not SK18 else cv.get_n_splits(X, y)

开发者ID:tgsmith61591，项目名称:skutil，代码行数:27，代码来源:fixes.py

示例3: stratified_kfold_indices

# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def stratified_kfold_indices(samples, **xval_kw):
        """
        TODO: check xval label frequency


        """
        from sklearn import model_selection

        X = np.empty((len(samples), 0))
        y = samples.encoded_1d().values
        groups = samples.group_ids

        type_ = xval_kw.pop('type', 'StratifiedGroupKFold')
        if type_ == 'StratifiedGroupKFold':
            assert groups is not None
            # FIXME: The StratifiedGroupKFold could be implemented better.
            splitter = sklearn_utils.StratifiedGroupKFold(**xval_kw)
            skf_list = list(splitter.split(X=X, y=y, groups=groups))
        elif type_ == 'StratifiedKFold':
            splitter = model_selection.StratifiedKFold(**xval_kw)
            skf_list = list(splitter.split(X=X, y=y))
        return skf_list

开发者ID:Erotemic，项目名称:ibeis，代码行数:24，代码来源:clf_helpers.py

示例4: gen_crossval_idxs

# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def gen_crossval_idxs(problem, n_folds=2):
        y = problem.ds.target
        rng = 43432
        if hasattr(problem.ds, 'nids'):
            # Ensure that an individual does not appear in both the train
            # and the test dataset
            from ibeis_cnn.dataset import stratified_kfold_label_split
            labels = problem.ds.nids
            _iter = stratified_kfold_label_split(y, labels, n_folds=n_folds, rng=rng)
        else:
            xvalkw = dict(n_folds=n_folds, shuffle=True, random_state=rng)
            import sklearn.cross_validation
            skf = sklearn.cross_validation.StratifiedKFold(y, **xvalkw)
            _iter = skf
            #import sklearn.model_selection
            #skf = sklearn.model_selection.StratifiedKFold(**xvalkw)
            #_iter = skf.split(X=np.empty(len(y)), y=y)
        msg = 'cross-val test on %s' % (problem.ds.name)
        progiter = ut.ProgIter(_iter, length=n_folds, lbl=msg)
        for train_idx, test_idx in progiter:
            yield train_idx, test_idx


# @ut.reloadable_class

开发者ID:Erotemic，项目名称:ibeis，代码行数:26，代码来源:classify_shark.py

示例5: init

# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def __init__(self,
                 component_config: Dict[Text, Any] = None,
                 clf: 'sklearn.model_selection.GridSearchCV' = None,
                 le: Optional['sklearn.preprocessing.LabelEncoder'] = None
                 ) -> None:
        """Construct a new intent classifier using the sklearn framework."""
        from sklearn.preprocessing import LabelEncoder

        super(SklearnIntentClassifier, self).__init__(component_config)

        if le is not None:
            self.le = le
        else:
            self.le = LabelEncoder()
        self.clf = clf

        _sklearn_numpy_warning_fix()

开发者ID:weizhenzhao，项目名称:rasa_nlu，代码行数:19，代码来源:sklearn_intent_classifier.py

示例6: _create_classifier

# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def _create_classifier(self, num_threads, y):
        from sklearn.model_selection import GridSearchCV
        from sklearn.svm import SVC

        C = self.component_config["C"]
        kernels = self.component_config["kernels"]
        gamma = self.component_config["gamma"]
        # dirty str fix because sklearn is expecting
        # str not instance of basestr...
        tuned_parameters = [{"C": C,
                             "gamma": gamma,
                             "kernel": [str(k) for k in kernels]}]

        # aim for 5 examples in each fold

        cv_splits = self._num_cv_splits(y)

        return GridSearchCV(SVC(C=1,
                                probability=True,
                                class_weight='balanced'),
                            param_grid=tuned_parameters,
                            n_jobs=num_threads,
                            cv=cv_splits,
                            scoring=self.component_config['scoring_function'],
                            verbose=1)

开发者ID:weizhenzhao，项目名称:rasa_nlu，代码行数:27，代码来源:sklearn_intent_classifier.py

示例7: init

# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def __init__(
        self,
        component_config: Optional[Dict[Text, Any]] = None,
        clf: "sklearn.model_selection.GridSearchCV" = None,
        le: Optional["sklearn.preprocessing.LabelEncoder"] = None,
    ) -> None:
        """Construct a new intent classifier using the sklearn framework."""
        from sklearn.preprocessing import LabelEncoder

        super().__init__(component_config)

        if le is not None:
            self.le = le
        else:
            self.le = LabelEncoder()
        self.clf = clf

开发者ID:botfront，项目名称:rasa-for-botfront，代码行数:18，代码来源:sklearn_intent_classifier.py

示例8: init

# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def __init__(self,
                 component_config=None,  # type: Dict[Text, Any]
                 clf=None,  # type: sklearn.model_selection.GridSearchCV
                 le=None  # type: sklearn.preprocessing.LabelEncoder
                 ):
        # type: (...) -> None
        """Construct a new intent classifier using the sklearn framework."""
        from sklearn.preprocessing import LabelEncoder

        super(SklearnIntentClassifier, self).__init__(component_config)

        if le is not None:
            self.le = le
        else:
            self.le = LabelEncoder()
        self.clf = clf

        _sklearn_numpy_warning_fix()

开发者ID:crownpku，项目名称:Rasa_NLU_Chi，代码行数:20，代码来源:sklearn_intent_classifier.py

示例9: _set_cv

# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def _set_cv(cv, X, y, classifier):
    """This method returns either a `sklearn.cross_validation._PartitionIterator` or 
    `sklearn.model_selection.BaseCrossValidator` depending on whether sklearn-0.17
    or sklearn-0.18 is being used.

    Parameters
    ----------

    cv : int, `_PartitionIterator` or `BaseCrossValidator`
        The CV object or int to check. If an int, will be converted
        into the appropriate class of crossvalidator.

    X : pd.DataFrame or np.ndarray, shape(n_samples, n_features)
        The dataframe or np.ndarray being fit in the grid search.

    y : np.ndarray, shape(n_samples,)
        The target being fit in the grid search.

    classifier : bool
        Whether the estimator being fit is a classifier

    Returns
    -------

    `_PartitionIterator` or `BaseCrossValidator`
    """
    return check_cv(cv, X, y, classifier) if not SK18 else check_cv(cv, y, classifier)

开发者ID:tgsmith61591，项目名称:skutil，代码行数:29，代码来源:fixes.py

示例10: build_split_dict

# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def build_split_dict(X: pd.DataFrame, split_obj: Type[BaseCrossValidator]) -> dict:
        """
        Get dictionary of cross-validation training dataset split metadata

        Parameters
        ----------
        X: pd.DataFrame
            The training dataset that will be split during cross-validation.
        split_obj: Type[sklearn.model_selection.BaseCrossValidator]
            The cross-validation object that returns train, test indices for splitting.

        Returns
        -------
        split_metadata: Dict[str,Any]
            Dictionary of cross-validation train/test split metadata
        """
        split_metadata: Dict[str, Any] = dict()
        for i, (train_ind, test_ind) in enumerate(split_obj.split(X)):
            split_metadata.update(
                {
                    f"fold-{i+1}-train-start": X.index[train_ind[0]],
                    f"fold-{i+1}-train-end": X.index[train_ind[-1]],
                    f"fold-{i+1}-test-start": X.index[test_ind[0]],
                    f"fold-{i+1}-test-end": X.index[test_ind[-1]],
                }
            )
            split_metadata.update({f"fold-{i+1}-n-train": len(train_ind)})
            split_metadata.update({f"fold-{i+1}-n-test": len(test_ind)})
        return split_metadata

开发者ID:equinor，项目名称:gordo，代码行数:31，代码来源:build_model.py

示例11: testdata_smk

# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def testdata_smk(*args, **kwargs):
    """
    >>> from ibeis.algo.smk.smk_pipeline import *  # NOQA
    >>> kwargs = {}
    """
    import ibeis
    import sklearn
    import sklearn.cross_validation
    # import sklearn.model_selection
    ibs, aid_list = ibeis.testdata_aids(defaultdb='PZ_MTEST')
    nid_list = np.array(ibs.annots(aid_list).nids)
    rng = ut.ensure_rng(0)
    xvalkw = dict(n_folds=4, shuffle=False, random_state=rng)

    skf = sklearn.cross_validation.StratifiedKFold(nid_list, **xvalkw)
    train_idx, test_idx = six.next(iter(skf))
    daids = ut.take(aid_list, train_idx)
    qaids = ut.take(aid_list, test_idx)

    config = {
        'num_words': 1000,
    }
    config.update(**kwargs)
    qreq_ = SMKRequest(ibs, qaids, daids, config)
    smk = qreq_.smk
    #qreq_ = ibs.new_query_request(qaids, daids, cfgdict={'pipeline_root': 'smk', 'proot': 'smk'})
    #qreq_ = ibs.new_query_request(qaids, daids, cfgdict={})
    return ibs, smk, qreq_

开发者ID:Erotemic，项目名称:ibeis，代码行数:30，代码来源:smk_pipeline.py

示例12: subsplit_indices

# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def subsplit_indices(samples, subset_idx, **xval_kw):
        """ split an existing set """
        from sklearn import model_selection

        X = np.empty((len(subset_idx), 0))
        y = samples.encoded_1d().values[subset_idx]
        groups = samples.group_ids[subset_idx]

        xval_kw_ = xval_kw.copy()
        if 'n_splits' not in xval_kw_:
            xval_kw_['n_splits'] = 3
        type_ = xval_kw_.pop('type', 'StratifiedGroupKFold')
        if type_ == 'StratifiedGroupKFold':
            assert groups is not None
            # FIXME: The StratifiedGroupKFold could be implemented better.
            splitter = sklearn_utils.StratifiedGroupKFold(**xval_kw_)
            rel_skf_list = list(splitter.split(X=X, y=y, groups=groups))
        elif type_ == 'StratifiedKFold':
            splitter = model_selection.StratifiedKFold(**xval_kw_)
            rel_skf_list = list(splitter.split(X=X, y=y))

        # map back into original coords
        skf_list = [(subset_idx[rel_idx1], subset_idx[rel_idx2])
                    for rel_idx1, rel_idx2 in rel_skf_list]

        for idx1, idx2 in skf_list:
            assert len(np.intersect1d(subset_idx, idx1)) == len(idx1)
            assert len(np.intersect1d(subset_idx, idx2)) == len(idx2)
            # assert
        return skf_list

开发者ID:Erotemic，项目名称:ibeis，代码行数:32，代码来源:clf_helpers.py

示例13: _create_classifier

# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def _create_classifier(
        self, num_threads: int, y
    ) -> "sklearn.model_selection.GridSearchCV":
        from sklearn.model_selection import GridSearchCV
        from sklearn.svm import SVC

        C = self.component_config["C"]
        kernels = self.component_config["kernels"]
        gamma = self.component_config["gamma"]
        # dirty str fix because sklearn is expecting
        # str not instance of basestr...
        tuned_parameters = [
            {"C": C, "gamma": gamma, "kernel": [str(k) for k in kernels]}
        ]

        # aim for 5 examples in each fold

        cv_splits = self._num_cv_splits(y)

        return GridSearchCV(
            SVC(C=1, probability=True, class_weight="balanced"),
            param_grid=tuned_parameters,
            n_jobs=num_threads,
            cv=cv_splits,
            scoring=self.component_config["scoring_function"],
            verbose=1,
            iid=False,
        )

开发者ID:botfront，项目名称:rasa-for-botfront，代码行数:30，代码来源:sklearn_intent_classifier.py

示例14: test_Shap

# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def test_Shap(self):

        np.random.seed(1)
        X_train, X_test, Y_train, Y_test = train_test_split(*shap.datasets.iris(), test_size=0.2, random_state=0)

        # K-nearest neighbors
        knn = sklearn.neighbors.KNeighborsClassifier()
        knn.fit(X_train, Y_train)
        v = 100*np.sum(knn.predict(X_test) == Y_test)/len(Y_test)
        print("Accuracy = {0}%".format(v))

        # Explain a single prediction from the test set
        shapexplainer = KernelExplainer(knn.predict_proba, X_train)
        shap_values = shapexplainer.explain_instance(X_test.iloc[0,:])  # TODO test against original SHAP Lib
        print('knn X_test iloc_0')
        print(shap_values)
        print(shapexplainer.explainer.expected_value[0])
        print(shap_values[0])

        # Explain all the predictions in the test set
        shap_values = shapexplainer.explain_instance(X_test)
        print('knn X_test')
        print(shap_values)
        print(shapexplainer.explainer.expected_value[0])
        print(shap_values[0])

        # SV machine with a linear kernel
        svc_linear = sklearn.svm.SVC(kernel='linear', probability=True)
        svc_linear.fit(X_train, Y_train)
        v = 100*np.sum(svc_linear.predict(X_test) == Y_test)/len(Y_test)
        print("Accuracy = {0}%".format(v))

        # Explain all the predictions in the test set
        shapexplainer = KernelExplainer(svc_linear.predict_proba, X_train)
        shap_values = shapexplainer.explain_instance(X_test)
        print('svc X_test')
        print(shap_values)
        print(shapexplainer.explainer.expected_value[0])
        print(shap_values[0])

        np.random.seed(1)
        X,y = shap.datasets.adult()
        X_train, X_valid, y_train, y_valid = sklearn.model_selection.train_test_split(X, y, test_size=0.2, random_state=7)

        knn = sklearn.neighbors.KNeighborsClassifier()
        knn.fit(X_train, y_train)

        f = lambda x: knn.predict_proba(x)[:,1]
        med = X_train.median().values.reshape((1,X_train.shape[1]))
        shapexplainer = KernelExplainer(f, med)
        shap_values_single = shapexplainer.explain_instance(X.iloc[0,:], nsamples=1000)
        print('Shap Tabular Example')
        print(shapexplainer.explainer.expected_value)
        print(shap_values_single)
        print("Invoked Shap KernelExplainer")

开发者ID:IBM，项目名称:AIX360，代码行数:57，代码来源:test_shap.py

示例15: gridsearch_ratio_thresh

# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import model_selection [as 别名]
def gridsearch_ratio_thresh(matches):
    import sklearn
    import sklearn.metrics
    import vtool_ibeis as vt
    # Param search for vsone
    import plottool_ibeis as pt
    pt.qt4ensure()

    skf = sklearn.model_selection.StratifiedKFold(n_splits=10,
                                                  random_state=119372)

    y = np.array([m.annot1['nid'] == m.annot2['nid'] for m in matches])

    basis = {'ratio_thresh': np.linspace(.6, .7, 50).tolist()}
    grid = ut.all_dict_combinations(basis)
    xdata = np.array(ut.take_column(grid, 'ratio_thresh'))

    def _ratio_thresh(y_true, match_list):
        # Try and find optional ratio threshold
        auc_list = []
        for cfgdict in ut.ProgIter(grid, lbl='gridsearch'):
            y_score = [
                match.fs.compress(match.ratio_test_flags(cfgdict)).sum()
                for match in match_list
            ]
            auc = sklearn.metrics.roc_auc_score(y_true, y_score)
            auc_list.append(auc)
        auc_list = np.array(auc_list)
        return auc_list

    auc_list = _ratio_thresh(y, matches)
    pt.plot(xdata, auc_list)
    subx, suby = vt.argsubmaxima(auc_list, xdata)
    best_ratio_thresh = subx[suby.argmax()]

    skf_results = []
    y_true = y
    for train_idx, test_idx in skf.split(matches, y):
        match_list_ = ut.take(matches, train_idx)
        y_true = y.take(train_idx)
        auc_list = _ratio_thresh(y_true, match_list_)
        subx, suby = vt.argsubmaxima(auc_list, xdata, maxima_thresh=.8)
        best_ratio_thresh = subx[suby.argmax()]
        skf_results.append(best_ratio_thresh)
    print('skf_results.append = %r' % (np.mean(skf_results),))
    import utool
    utool.embed()

开发者ID:Erotemic，项目名称:ibeis，代码行数:49，代码来源:old_vsone.py

注：本文中的sklearn.model_selection方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。