当前位置: 首页>>代码示例>>Python>>正文


Python model_selection.StratifiedKFold方法代码示例

本文整理汇总了Python中sklearn.model_selection.StratifiedKFold方法的典型用法代码示例。如果您正苦于以下问题:Python model_selection.StratifiedKFold方法的具体用法?Python model_selection.StratifiedKFold怎么用?Python model_selection.StratifiedKFold使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.model_selection的用法示例。


在下文中一共展示了model_selection.StratifiedKFold方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_stratified_kfold_ratios

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedKFold [as 别名]
def test_stratified_kfold_ratios():
    # Check that stratified kfold preserves class ratios in individual splits
    # Repeat with shuffling turned off and on
    n_samples = 1000
    X = np.ones(n_samples)
    y = np.array([4] * int(0.10 * n_samples) +
                 [0] * int(0.89 * n_samples) +
                 [1] * int(0.01 * n_samples))

    for shuffle in (False, True):
        for train, test in StratifiedKFold(5, shuffle=shuffle).split(X, y):
            assert_almost_equal(np.sum(y[train] == 4) / len(train), 0.10, 2)
            assert_almost_equal(np.sum(y[train] == 0) / len(train), 0.89, 2)
            assert_almost_equal(np.sum(y[train] == 1) / len(train), 0.01, 2)
            assert_almost_equal(np.sum(y[test] == 4) / len(test), 0.10, 2)
            assert_almost_equal(np.sum(y[test] == 0) / len(test), 0.89, 2)
            assert_almost_equal(np.sum(y[test] == 1) / len(test), 0.01, 2) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:19,代码来源:test_split.py

示例2: k_fold

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedKFold [as 别名]
def k_fold(dataset, folds):
    skf = StratifiedKFold(folds, shuffle=True, random_state=12345)

    test_indices, train_indices = [], []
    for _, idx in skf.split(torch.zeros(len(dataset)), dataset.data.y):
        test_indices.append(torch.from_numpy(idx).to(torch.long))

    val_indices = [test_indices[i - 1] for i in range(folds)]

    for i in range(folds):
        train_mask = torch.ones(len(dataset), dtype=torch.bool)
        train_mask[test_indices[i]] = 0
        train_mask[val_indices[i]] = 0
        train_indices.append(train_mask.nonzero().view(-1))

    return train_indices, test_indices, val_indices 
开发者ID:rusty1s,项目名称:pytorch_geometric,代码行数:18,代码来源:train_eval.py

示例3: _split_fold10

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedKFold [as 别名]
def _split_fold10(self, labels, fold_idx=0, seed=0, shuffle=True):
        ''' 10 flod '''
        assert 0 <= fold_idx and fold_idx < 10, print(
            "fold_idx must be from 0 to 9.")

        skf = StratifiedKFold(n_splits=10, shuffle=shuffle, random_state=seed)
        idx_list = []
        for idx in skf.split(np.zeros(len(labels)), labels):    # split(x, y)
            idx_list.append(idx)
        train_idx, valid_idx = idx_list[fold_idx]

        print(
            "train_set : test_set = %d : %d",
            len(train_idx), len(valid_idx))

        return train_idx, valid_idx 
开发者ID:dmlc,项目名称:dgl,代码行数:18,代码来源:dataloader.py

示例4: kfold

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedKFold [as 别名]
def kfold(self, k=5, stratify=False, shuffle=True, seed=33):
        """K-Folds cross validation iterator.

        Parameters
        ----------
        k : int, default 5
        stratify : bool, default False
        shuffle : bool, default True
        seed : int, default 33

        Yields
        -------
        X_train, y_train, X_test, y_test, train_index, test_index
        """
        if stratify:
            kf = StratifiedKFold(n_splits=k, random_state=seed, shuffle=shuffle)
        else:
            kf = KFold(n_splits=k, random_state=seed, shuffle=shuffle)

        for train_index, test_index in kf.split(self.X_train, self.y_train):
            X_train, y_train = idx(self.X_train, train_index), self.y_train[train_index]
            X_test, y_test = idx(self.X_train, test_index), self.y_train[test_index]
            yield X_train, y_train, X_test, y_test, train_index, test_index 
开发者ID:rushter,项目名称:heamy,代码行数:25,代码来源:dataset.py

示例5: cvsplit

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedKFold [as 别名]
def cvsplit(fold, totalfold, mydict):
  '''get the split of train and test
  fold is the returned fold th data, from 0 to totalfold-1
  total fold is for the cross validation
  mydict is the return dict from readlabel'''
  skf = StratifiedKFold(n_splits=totalfold)  # default shuffle is false, okay!
  #readdicom(mydict)
  y = mydict.values()
  x = mydict.keys()
  count = 0
  for train, test in skf.split(x,y):
    print(len(train), len(test))
    if count == fold:
      #print test
      return train, test
    count += 1 
开发者ID:wentaozhu,项目名称:deep-mil-for-whole-mammogram-classification,代码行数:18,代码来源:inbreast.py

示例6: test_2d_y

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedKFold [as 别名]
def test_2d_y():
    # smoke test for 2d y and multi-label
    n_samples = 30
    rng = np.random.RandomState(1)
    X = rng.randint(0, 3, size=(n_samples, 2))
    y = rng.randint(0, 3, size=(n_samples,))
    y_2d = y.reshape(-1, 1)
    y_multilabel = rng.randint(0, 2, size=(n_samples, 3))
    groups = rng.randint(0, 3, size=(n_samples,))
    splitters = [LeaveOneOut(), LeavePOut(p=2), KFold(), StratifiedKFold(),
                 RepeatedKFold(), RepeatedStratifiedKFold(),
                 ShuffleSplit(), StratifiedShuffleSplit(test_size=.5),
                 GroupShuffleSplit(), LeaveOneGroupOut(),
                 LeavePGroupsOut(n_groups=2), GroupKFold(), TimeSeriesSplit(),
                 PredefinedSplit(test_fold=groups)]
    for splitter in splitters:
        list(splitter.split(X, y, groups))
        list(splitter.split(X, y_2d, groups))
        try:
            list(splitter.split(X, y_multilabel, groups))
        except ValueError as e:
            allowed_target_types = ('binary', 'multiclass')
            msg = "Supported target types are: {}. Got 'multilabel".format(
                allowed_target_types)
            assert msg in str(e) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:27,代码来源:test_split.py

示例7: test_shuffle_stratifiedkfold

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedKFold [as 别名]
def test_shuffle_stratifiedkfold():
    # Check that shuffling is happening when requested, and for proper
    # sample coverage
    X_40 = np.ones(40)
    y = [0] * 20 + [1] * 20
    kf0 = StratifiedKFold(5, shuffle=True, random_state=0)
    kf1 = StratifiedKFold(5, shuffle=True, random_state=1)
    for (_, test0), (_, test1) in zip(kf0.split(X_40, y),
                                      kf1.split(X_40, y)):
        assert_not_equal(set(test0), set(test1))
    check_cv_coverage(kf0, X_40, y, groups=None, expected_n_splits=5)

    # Ensure that we shuffle each class's samples with different
    # random_state in StratifiedKFold
    # See https://github.com/scikit-learn/scikit-learn/pull/13124
    X = np.arange(10)
    y = [0] * 5 + [1] * 5
    kf1 = StratifiedKFold(5, shuffle=True, random_state=0)
    kf2 = StratifiedKFold(5, shuffle=True, random_state=1)
    test_set1 = sorted([tuple(s[1]) for s in kf1.split(X, y)])
    test_set2 = sorted([tuple(s[1]) for s in kf2.split(X, y)])
    assert test_set1 != test_set2 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:24,代码来源:test_split.py

示例8: test_cross_val_predict_unbalanced

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedKFold [as 别名]
def test_cross_val_predict_unbalanced():
    X, y = make_classification(n_samples=100, n_features=2, n_redundant=0,
                               n_informative=2, n_clusters_per_class=1,
                               random_state=1)
    # Change the first sample to a new class
    y[0] = 2
    clf = LogisticRegression(random_state=1)
    cv = StratifiedKFold(n_splits=2, random_state=1)
    train, test = list(cv.split(X, y))
    yhat_proba = cross_val_predict(clf, X, y, cv=cv, method="predict_proba")
    assert y[test[0]][0] == 2  # sanity check for further assertions
    assert np.all(yhat_proba[test[0]][:, 2] == 0)
    assert np.all(yhat_proba[test[0]][:, 0:1] > 0)
    assert np.all(yhat_proba[test[1]] > 0)
    assert_array_almost_equal(yhat_proba.sum(axis=1), np.ones(y.shape),
                              decimal=12) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:18,代码来源:test_validation.py

示例9: test_grid_search_groups

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedKFold [as 别名]
def test_grid_search_groups():
    # Check if ValueError (when groups is None) propagates to GridSearchCV
    # And also check if groups is correctly passed to the cv object
    rng = np.random.RandomState(0)

    X, y = make_classification(n_samples=15, n_classes=2, random_state=0)
    groups = rng.randint(0, 3, 15)

    clf = LinearSVC(random_state=0)
    grid = {'C': [1]}

    group_cvs = [LeaveOneGroupOut(), LeavePGroupsOut(2), GroupKFold(),
                 GroupShuffleSplit()]
    for cv in group_cvs:
        gs = GridSearchCV(clf, grid, cv=cv)
        assert_raise_message(ValueError,
                             "The 'groups' parameter should not be None.",
                             gs.fit, X, y)
        gs.fit(X, y, groups=groups)

    non_group_cvs = [StratifiedKFold(), StratifiedShuffleSplit()]
    for cv in non_group_cvs:
        gs = GridSearchCV(clf, grid, cv=cv)
        # Should not raise an error
        gs.fit(X, y) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:27,代码来源:test_search.py

示例10: stratified_kfold_indices

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedKFold [as 别名]
def stratified_kfold_indices(samples, **xval_kw):
        """
        TODO: check xval label frequency


        """
        from sklearn import model_selection

        X = np.empty((len(samples), 0))
        y = samples.encoded_1d().values
        groups = samples.group_ids

        type_ = xval_kw.pop('type', 'StratifiedGroupKFold')
        if type_ == 'StratifiedGroupKFold':
            assert groups is not None
            # FIXME: The StratifiedGroupKFold could be implemented better.
            splitter = sklearn_utils.StratifiedGroupKFold(**xval_kw)
            skf_list = list(splitter.split(X=X, y=y, groups=groups))
        elif type_ == 'StratifiedKFold':
            splitter = model_selection.StratifiedKFold(**xval_kw)
            skf_list = list(splitter.split(X=X, y=y))
        return skf_list 
开发者ID:Erotemic,项目名称:ibeis,代码行数:24,代码来源:clf_helpers.py

示例11: setup

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedKFold [as 别名]
def setup(pblm):
        import sklearn.datasets
        iris = sklearn.datasets.load_iris()

        pblm.primary_task_key = 'iris'
        pblm.default_data_key = 'learn(all)'
        pblm.default_clf_key = 'RF'

        X_df = pd.DataFrame(iris.data, columns=iris.feature_names)
        samples = MultiTaskSamples(X_df.index)
        samples.apply_indicators(
            {'iris': {name: iris.target == idx
                      for idx, name in enumerate(iris.target_names)}})
        samples.X_dict = {'learn(all)': X_df}

        pblm.samples = samples
        pblm.xval_kw['type'] = 'StratifiedKFold' 
开发者ID:Erotemic,项目名称:ibeis,代码行数:19,代码来源:clf_helpers.py

示例12: kfold_cv

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedKFold [as 别名]
def kfold_cv(clf_type, data_sets: [DataSet], fold_count=5, repetitions=5, n_jobs=-1, parallel_verbose=1, persist=True):
    """
    Do a kfold cross validation with a SVM classifier.
    :param data_sets: list of data sets
    :param fold_count: count of folds to be made and hence also runs
    :return: a Statistics object
    """
    log.info('Starting {!s}-fold cv. Set count: {!s}'.format(fold_count, len(data_sets)))
    parallel = Parallel(n_jobs=n_jobs, verbose=parallel_verbose)

    skf = StratifiedKFold(n_splits=fold_count, shuffle=True)
    stats_list = parallel(delayed(_fit_and_score)(clf, domains, labels, train_index, test_index, i, data_set_id, fold_count)
                          for domains, labels, data_set_id, clf in _data_sets_generator(data_sets, clf_type)
                          for i in range(repetitions)
                          for train_index, test_index in skf.split(domains, labels)
                          )
    where = settings.EVAL_FOLDER + '/' + '{!s}fold_cv_{!s}_{!s}rep_{!s}sets_{!s}.pkl'.format(fold_count, clf_type, repetitions, len(data_sets),
                                                                                                settings.NOW_STR)
    return _serialize_cv_results(stats_list, persist, where) 
开发者ID:fanci-dga-detection,项目名称:fanci,代码行数:21,代码来源:eval_train_test.py

示例13: example_of_cross_validation_using_model_selection

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedKFold [as 别名]
def example_of_cross_validation_using_model_selection(raw_data, labels, num_subjects, num_epochs_per_subj):
    # NOTE: this method does not work for sklearn.svm.SVC with precomputed kernel
    # when the kernel matrix is computed in portions; also, this method only works
    # for self-correlation, i.e. correlation between the same data matrix.

    # no shrinking, set C=1
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1, gamma='auto')
    #logit_clf = LogisticRegression()
    clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj)
    # doing leave-one-subject-out cross validation
    # no shuffling in cv
    skf = model_selection.StratifiedKFold(n_splits=num_subjects,
                                          shuffle=False)
    scores = model_selection.cross_val_score(clf, list(zip(raw_data, raw_data)),
                                             y=labels,
                                             cv=skf)
    print(scores)
    logger.info(
        'the overall cross validation accuracy is %.2f' %
        np.mean(scores)
    ) 
开发者ID:brainiak,项目名称:brainiak,代码行数:23,代码来源:classification.py

示例14: _sfn

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedKFold [as 别名]
def _sfn(data, mask, myrad, bcast_var):
    """Score classifier on searchlight data using cross-validation.

    The classifier is in `bcast_var[2]`. The labels are in `bast_var[0]`. The
    number of cross-validation folds is in `bast_var[1].
    """
    clf = bcast_var[2]
    masked_data = data[0][mask, :].T
    # print(l[0].shape, mask.shape, data.shape)
    skf = model_selection.StratifiedKFold(n_splits=bcast_var[1],
                                          shuffle=False)
    accuracy = np.mean(model_selection.cross_val_score(clf, masked_data,
                                                       y=bcast_var[0],
                                                       cv=skf,
                                                       n_jobs=1))
    return accuracy 
开发者ID:brainiak,项目名称:brainiak,代码行数:18,代码来源:mvpa_voxelselector.py

示例15: _get_stratified_crossval_split

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import StratifiedKFold [as 别名]
def _get_stratified_crossval_split(stimuli, fixations, split_count, included_splits, random=True, stratified_attributes=None):
    from sklearn.model_selection import StratifiedKFold
    labels = []
    for attribute_name in stratified_attributes:
        attribute_data = np.array(stimuli.attributes[attribute_name])
        if attribute_data.ndim == 1:
            attribute_data = attribute_data[:, np.newaxis]
        labels.append(attribute_data)
    labels = np.vstack(labels)
    X = np.ones((len(stimuli), 1))

    rst = np.random.RandomState(42)

    inds = []
    k_fold = StratifiedKFold(n_splits=split_count, shuffle=random, random_state=rst)
    for i, (train_index, test_index) in enumerate(k_fold.split(X, labels)):
        if i in included_splits:
            inds.extend(test_index)

    stimuli, fixations = create_subset(stimuli, fixations, inds)
    return stimuli, fixations 
开发者ID:matthias-k,项目名称:pysaliency,代码行数:23,代码来源:filter_datasets.py


注:本文中的sklearn.model_selection.StratifiedKFold方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。