Python cross_validation.StratifiedKFold方法代码示例

本文整理汇总了Python中sklearn.cross_validation.StratifiedKFold方法的典型用法代码示例。如果您正苦于以下问题:Python cross_validation.StratifiedKFold方法的具体用法?Python cross_validation.StratifiedKFold怎么用?Python cross_validation.StratifiedKFold使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.cross_validation的用法示例。


示例1: validation

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def validation(self,X,Y,kind):

        print 'validating...'
        folds = list(StratifiedKFold(Y, n_folds=fold_n, random_state=0))
        for j, (train_idx, test_idx) in enumerate(folds):
            print j + 1, '-fold'
            X_train = X[train_idx]
            y_train = Y[train_idx]
            X_test = X[test_idx]
            y_test = Y[test_idx]

            res = self.fit(X_train, y_train, X_test)
            cur = sum(y_test == res) * 1.0 / len(res)
            score[j] = cur
        print score, score.mean()
        return score.mean() 

示例2: test_slice_on_dimension

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def test_slice_on_dimension(self):
        iris = datasets.load_iris()
        y = iris.target
        M = iris.data
        clfs = [{'clf': RandomForestClassifier, 
                 'n_estimators': [10, 100], 
                 'max_depth': [1, 10],
                 'random_state': [0]}, 
                 {'clf': SVC, 'kernel': ['linear', 'rbf'], 
                  'random_state': [0]}]        
        subsets = [{'subset': per.SubsetRandomRowsActualDistribution, 
                    'subset_size': [20, 40, 60, 80, 100],
                    'random_state': [0]}]
        cvs = [{'cv': StratifiedKFold}]
        exp = per.Experiment(M, y, clfs, subsets, cvs)
        result = [str(trial) for trial in exp.slice_on_dimension(
        self.__compare_to_ref_pkl(result, 'slice_on_dimension_clf')
        result = [str(trial) for trial  in exp.slice_on_dimension(
                {'subset_size': 60}).trials]
        self.__compare_to_ref_pkl(result, 'slice_on_dimension_subset_params') 

示例3: test_slice_by_best_score

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def test_slice_by_best_score(self):
        iris = datasets.load_iris()
        y = iris.target
        M = iris.data
        clfs = [{'clf': RandomForestClassifier, 
                 'n_estimators': [10, 100], 
                 'max_depth': [1, 10],
                 'random_state': [0]}, 
                 {'clf': SVC, 'kernel': ['linear', 'rbf'],
                  'random_state': [0]}]        
        subsets = [{'subset': per.SubsetRandomRowsActualDistribution, 
                    'subset_size': [20, 40],
                    'random_state': [0]}]
        cvs = [{'cv': StratifiedKFold}]
        exp = per.Experiment(M, y, clfs, subsets, cvs)
        result = {str(trial): trial.average_score() for trial in 
        self.__compare_to_ref_pkl(result, 'slice_by_best_score') 

示例4: test_make_csv

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def test_make_csv(self):
        M, y = uft.generate_test_matrix(1000, 5, 2, random_state=0)
        clfs = [{'clf': RandomForestClassifier, 
                 'n_estimators': [10, 100], 
                 'max_depth': [5, 25],
                 'random_state': [0]},
                {'clf': SVC, 
                 'kernel': ['linear', 'rbf'], 
                 'probability': [True],
                 'random_state': [0]}]        
        subsets = [{'subset': per.SubsetSweepNumRows, 
                    'num_rows': [[100, 200]],
                    'random_state': [0]}]
        cvs = [{'cv': StratifiedKFold, 
                'n_folds': [2, 3]}]
        exp = per.Experiment(M, y, clfs=clfs, subsets=subsets, cvs=cvs)
        result_path = exp.make_csv() 

示例5: test_report_complex

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def test_report_complex(self):
        M, y = uft.generate_test_matrix(100, 5, 2)
        clfs = [{'clf': RandomForestClassifier, 
                 'n_estimators': [10, 100], 
                 'max_depth': [1, 10],
                 'random_state': [0]}, 
                 {'clf': SVC, 
                  'kernel': ['linear', 'rbf'], 
                  'probability': [True],
                  'random_state': [0]}]        
        subsets = [{'subset': per.SubsetRandomRowsActualDistribution, 
                    'subset_size': [20, 40, 60, 80, 100],
                    'random_state': [0]}]
        cvs = [{'cv': StratifiedKFold}]
        exp = per.Experiment(M, y, clfs, subsets, cvs)
        _, rep = exp.make_report(dimension=per.CLF, return_report_object=True, 
        self.report.add_heading('test_report_complex', 1)

示例6: test_toy_data

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def test_toy_data(name, clf):
    X, y = classification_data()
    k_folds = 5
    cv = StratifiedKFold(y, k_folds, random_state=1234)

    acc, auc = [], []
    for train, test in cv:
        xt, xv, yt, yv = X[train, :], X[test, :], y[train], y[test]
        clf.fit(xt, yt)
        yhat = clf.predict(xv)
        proba = clf.predict_proba(xv)[:, 1]
        acc.append(np.mean(yhat == yv))
        auc.append(roc_auc_score(yv, proba))

    acc_mean, acc_std = np.mean(acc), np.std(acc)
    auc_mean, auc_std = np.mean(auc), np.std(auc)
    print name
    print 'accuracy: {0:.3f} +/- {1:.3f}'.format(acc_mean, acc_std)
    print 'auc: {0:.3f} +/- {1:.3f}'.format(auc_mean, auc_std)
    print '-'*80
    return {'name': name,
            'acc_mean': acc_mean,
            'acc_std': acc_std,
            'auc_mean': auc_mean,
            'auc_std': auc_std} 

示例7: get_weights

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def get_weights():
    # Read validation labels
    _, labels, _, _, _ = utils.load_data()
    skf = StratifiedKFold(labels, n_folds=5, random_state=23)
    test_index = None
    for _, test_idx in skf:
        test_index = np.append(test_index, test_idx) if test_index is not None else test_idx
    val_labels = labels[test_index]
    # Read predictions on validation set
    val_predictions = []
    prediction_files = utils.get_prediction_files()
    for preds_file in prediction_files:
        vp = np.genfromtxt(os.path.join(consts.BLEND_PATH, preds_file), delimiter=',')
    # Minimize blending function
    p0 = [1.] * len(prediction_files)
    p = fmin_cobyla(error, p0, args=(val_predictions, val_labels), cons=[constraint], rhoend=1e-5)

    return p 

示例8: make_blender_cv

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def make_blender_cv(classifier, x, y, calibrate=False):
    skf = StratifiedKFold(y, n_folds=5, random_state=23)
    scores, predictions = [], None
    for train_index, test_index in skf:
        if calibrate:
            # Make training and calibration
            calibrated_classifier = CalibratedClassifierCV(classifier, method='isotonic', cv=get_cv(y[train_index]))
            fitted_classifier = calibrated_classifier.fit(x[train_index, :], y[train_index])
            fitted_classifier = classifier.fit(x[train_index, :], y[train_index])
        preds = fitted_classifier.predict_proba(x[test_index, :])

        # Free memory
        calibrated_classifier, fitted_classifier = None, None

        scores.append(log_loss(y[test_index], preds))
        predictions = np.append(predictions, preds, axis=0) if predictions is not None else preds
    return scores, predictions 

示例9: create_cv_id

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def create_cv_id(target, n_folds_ = 5, cv_id_name=cv_id_name, seed=407):
        a = StratifiedKFold(target['target'],n_folds=n_folds_, shuffle=True, random_state=seed)
        cv_index = a.test_folds
        print 'Done StratifiedKFold'
        cv_index = np.empty(len(target))
        a = KFold(len(target),n_folds=n_folds_, shuffle=True, random_state=seed)
        for idx, i in enumerate(a):
            cv_index[i[1]] = idx
        cv_index = cv_index.astype(int)
        print 'Done Kfold'
    np.save(INPUT_PATH + cv_id_name, cv_index)

######### Utils #########

#feature listを渡してデータを作成するutil関数 

示例10: naive_bayes

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def naive_bayes(pos_samples, neg_samples, n_folds = 2):
    '''Trains a naive bayes classifier with NLTK. It uses stratified 
    n-fold validation. Inputs are the positive and negative samples and 
    the number of folds. Returns the total accuracy and the classifier and 
    the train/test sets of the last fold.'''
    samples = np.array(pos_samples + neg_samples)
    labels = [label for (words, label) in samples]
    cv = cross_validation.StratifiedKFold(labels, n_folds= n_folds, shuffle=True)
    accuracy = 0.0
    for traincv, testcv in cv:
        train_samples = samples[traincv]
        test_samples = samples[testcv]
        classifier = nltk.NaiveBayesClassifier.train(train_samples)
        accuracy += nltk.classify.util.accuracy(classifier, test_samples)
    accuracy /= n_folds
    return (accuracy, classifier, train_samples, test_samples) 

示例11: _validate_link_reconstruction

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def _validate_link_reconstruction(self, samples, lbs):
        # cache = utils.KeyDefaultDict(lambda x: self.embeddings_at(x))
        # feat = []
        # for v in samples:
        #     emb = cache[v[0] - 1]
        #     # feat.append(np.concatenate((emb[v[1]], emb[v[2]]), axis=0))
        #     feat.append(np.abs(emb[v[1]] - emb[v[2]]))
        # feat = np.vstack(feat)
        feat = self.make_features(samples)
        feat = np.abs(feat[:, 0] - feat[:, 1])

        clf = LogisticRegression()
            cv = StratifiedKFold(lbs, n_folds=2, shuffle=True)
            parts = cv
        except TypeError:
            cv = StratifiedKFold(n_splits=2, shuffle=True)
            parts = cv.split(feat, lbs)

        val_score = []
        for tr, te in parts:
            model = clf.fit(feat[tr], lbs[tr])
            p = model.predict(feat[te])
            val_score.append(f1_score(lbs[te], p))
        return np.mean(val_score) 

示例12: _validate_node_classify

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def _validate_node_classify(self, samples, lbs):
        # note that the 1-st dimension of feat is for each node in each sample (time, node1, node2, ...)
        feat = self.make_features(samples)[:, 0]
        assert len(feat) == len(lbs)

        clf = LogisticRegression(class_weight='balanced')
            cv = StratifiedKFold(lbs, n_folds=2, shuffle=True)
            parts = cv
        except TypeError as e:
            cv = StratifiedKFold(n_splits=2, shuffle=True)
            parts = cv.split(feat, lbs)

        val_score = []
        for tr, te in parts:
            model = clf.fit(feat[tr], lbs[tr])
            p = model.predict(feat[te])
            val_score.append(f1_score(lbs[te], p))
        return np.mean(val_score) 

示例13: compute_svm_score_nestedCV

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def compute_svm_score_nestedCV(K, y, n_folds, scoring='accuracy',
                               param_grid=[{'C': np.logspace(-5, 5, 20)}]):
    cv = StratifiedKFold(y, n_folds=n_folds, shuffle=True,
    scores = np.zeros(n_folds)
    for i, (train, test) in enumerate(cv):
        cvclf = SVC(kernel='precomputed')
        y_train = y[train]
        cvcv = StratifiedKFold(y_train, n_folds=n_folds,
        clf = GridSearchCV(cvclf, param_grid=param_grid, scoring=scoring,
                           cv=cvcv, n_jobs=1)
        clf.fit(K[:, train][train, :], y_train)
        scores[i] = clf.score(K[test, :][:, train], y[test])

    return scores.mean() 

示例14: compute_svm_score_nestedCV

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def compute_svm_score_nestedCV(K, y, n_folds,
                               param_grid=[{'C': np.logspace(-5, 5, 25)}]):
    """Compute cross-validated score of SVM using precomputed kernel.
    cv = StratifiedKFold(y, n_folds=n_folds, shuffle=True,
    scores = np.zeros(n_folds)
    for i, (train, test) in enumerate(cv):
        cvclf = SVC(kernel='precomputed')
        y_train = y[train]
        cvcv = StratifiedKFold(y_train, n_folds=n_folds,
        clf = GridSearchCV(cvclf, param_grid=param_grid, scoring=scoring,
                           cv=cvcv, n_jobs=1)
        clf.fit(K[train, :][:, train], y_train)
        # print clf.best_params_
        scores[i] = clf.score(K[test, :][:, train], y[test])

    return scores.mean() 

示例15: fit_layer

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def fit_layer(self, layer_idx, X, y):
        if layer_idx >= len(self.layers):
        elif layer_idx == len(self.layers) - 1:
            self.layers[layer_idx].fit(X, y)
            n_classes = len(set(y)) - 1
            n_classifiers = len(self.layers[layer_idx])
            output = np.zeros((X.shape[0], n_classes * n_classifiers))
            skf = cross_validation.StratifiedKFold(y, self.cv)
            for tra, tst in skf:
                self.layers[layer_idx].fit(X[tra], y[tra])
                out = self.layers[layer_idx].output(X[tst], mode=self.mode)
                output[tst, :] = out[:, 1:, :].reshape(
                    out.shape[0], (out.shape[1] - 1) * out.shape[2])

            self.layers[layer_idx].fit(X, y)
            self.fit_layer(layer_idx + 1, output, y) 
