当前位置: 首页>>代码示例>>Python>>正文


Python cross_validation.KFold方法代码示例

本文整理汇总了Python中sklearn.cross_validation.KFold方法的典型用法代码示例。如果您正苦于以下问题:Python cross_validation.KFold方法的具体用法?Python cross_validation.KFold怎么用?Python cross_validation.KFold使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.cross_validation的用法示例。


在下文中一共展示了cross_validation.KFold方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: crossValidation

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import KFold [as 别名]
def crossValidation(X, y, cvFolds, estimator):
    r2 = np.zeros((cvFolds,1))   
    kf = KFold(len(X), n_folds=cvFolds, shuffle=True, random_state = 30)
    cv_j=0    
    for train_index, test_index in kf:
        train_X = X[train_index,:]
        test_X = X[test_index,:]
        train_y = y[train_index]
        test_y = y[test_index]
        est.fit(train_X,train_y)
        y_true, y_pred = test_y,est.predict(test_X)
        r2[cv_j] = r2_score(y_true, y_pred) 
        cv_j = cv_j + 1
    return r2
    
#parameters: 'X' the predictors, 'y' the target, 'cvFolds' number of folds, 'estimator' machine learning algorithm 
#returns: the R squared for each fold 
开发者ID:h-cel,项目名称:ClimateVegetationDynamics_GrangerCausality,代码行数:19,代码来源:GC_script.py

示例2: nestedCrossValidation

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import KFold [as 别名]
def nestedCrossValidation(X, y, cvFolds, estimator):  
    kf = KFold(len(X), n_folds=cvFolds, shuffle=True, random_state = 30)
    cv_j=0
    param_grid = {'alpha': [0.0000001,0.000001,0.00001,0.0001,0.001,0.01,0.1,1,10,100,1000,10000,100000, 1000000, 10000000,1000000000]}
    r2 = np.zeros((cvFolds,1))   
    for train_index, test_index in kf:
        train_X = X[train_index,:]
        test_X = X[test_index,:]
        train_y = y[train_index]
        test_y = y[test_index]
        grid = GridSearchCV(estimator, param_grid=param_grid, verbose=0, cv=cvFolds, scoring='mean_squared_error')
        grid.fit(train_X,train_y)
        y_true, y_pred = test_y,grid.best_estimator_.predict(test_X)
        r2[cv_j] = r2_score(y_true, y_pred) 
        cv_j = cv_j + 1 
    return r2
    
#%% main script 
开发者ID:h-cel,项目名称:ClimateVegetationDynamics_GrangerCausality,代码行数:20,代码来源:GC_script.py

示例3: create_cv_id

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import KFold [as 别名]
def create_cv_id(target, n_folds_ = 5, cv_id_name=cv_id_name, seed=407):
    try:
        a = StratifiedKFold(target['target'],n_folds=n_folds_, shuffle=True, random_state=seed)
        cv_index = a.test_folds
        print 'Done StratifiedKFold'
    except:
        cv_index = np.empty(len(target))
        a = KFold(len(target),n_folds=n_folds_, shuffle=True, random_state=seed)
        for idx, i in enumerate(a):
            cv_index[i[1]] = idx
        cv_index = cv_index.astype(int)
        print 'Done Kfold'
    
    np.save(INPUT_PATH + cv_id_name, cv_index)
    return 

######### Utils #########

#feature listを渡してデータを作成するutil関数 
开发者ID:ikki407,项目名称:stacking,代码行数:21,代码来源:base.py

示例4: fit_blending_model

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import KFold [as 别名]
def fit_blending_model(self, X_blend, y):
		if self.verbose:
			model_name = "%s" % self.blending_model.__repr__()
			print('Fitting Blending Model:\n%s' % model_name)

		kf = list(KFold(y.shape[0], self.n_folds))
		# run  CV 
		self.blending_model_cv = []

		for j, (train_idx, test_idx) in enumerate(kf):
			if self.verbose:
				print('Fold %d' % j)

			X_train = X_blend[train_idx]
			y_train = y[train_idx]

			model = copy(self.blending_model)

			model.fit(X_train, y_train)

			# add trained model to list of CV'd models
			self.blending_model_cv.append(model) 
开发者ID:dustinstansbury,项目名称:stacked_generalization,代码行数:24,代码来源:stacked_generalizer.py

示例5: run_model

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import KFold [as 别名]
def run_model(model,dtrain,predictor_var,target,scoring_method='mean_squared_error'):
    cv_method = KFold(len(dtrain),5)
    cv_scores = cross_val_score(model,dtrain[predictor_var],dtrain[target],cv=cv_method,scoring=scoring_method)
    #print cv_scores, np.mean(cv_scores), np.sqrt((-1)*np.mean(cv_scores))
    
    dtrain_for_val = dtrain[dtrain['Year']<2000]
    dtest_for_val = dtrain[dtrain['Year']>1999]
    #cv_method = KFold(len(dtrain_for_val),5)
    #cv_scores_2 = cross_val_score(model,dtrain_for_val[predictor_var],dtrain_for_val[target],cv=cv_method,scoring=scoring_method)
    #print cv_scores_2, np.mean(cv_scores_2)
    
    dtrain_for_val_ini = dtrain_for_val[predictor_var]
    dtest_for_val_ini = dtest_for_val[predictor_var]
    model.fit(dtrain_for_val_ini,dtrain_for_val[target])
    pred_for_val = model.predict(dtest_for_val_ini)
        
    #print math.sqrt(mean_squared_error(dtest_for_val['Footfall'],pred_for_val)) 
开发者ID:analyticsvidhya,项目名称:The_Ultimate_Student_Hunt,代码行数:19,代码来源:code.py

示例6: test_kfold_no_shuffle

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import KFold [as 别名]
def test_kfold_no_shuffle():
    # Manually check that KFold preserves the data ordering on toy datasets
    splits = iter(cval.KFold(4, 2))
    train, test = next(splits)
    assert_array_equal(test, [0, 1])
    assert_array_equal(train, [2, 3])

    train, test = next(splits)
    assert_array_equal(test, [2, 3])
    assert_array_equal(train, [0, 1])

    splits = iter(cval.KFold(5, 2))
    train, test = next(splits)
    assert_array_equal(test, [0, 1, 2])
    assert_array_equal(train, [3, 4])

    train, test = next(splits)
    assert_array_equal(test, [3, 4])
    assert_array_equal(train, [0, 1, 2]) 
开发者ID:alvarobartt,项目名称:twitter-stock-recommendation,代码行数:21,代码来源:test_cross_validation.py

示例7: test_shuffle_kfold

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import KFold [as 别名]
def test_shuffle_kfold():
    # Check the indices are shuffled properly, and that all indices are
    # returned in the different test folds
    kf = cval.KFold(300, 3, shuffle=True, random_state=0)
    ind = np.arange(300)

    all_folds = None
    for train, test in kf:
        assert_true(np.any(np.arange(100) != ind[test]))
        assert_true(np.any(np.arange(100, 200) != ind[test]))
        assert_true(np.any(np.arange(200, 300) != ind[test]))

        if all_folds is None:
            all_folds = ind[test].copy()
        else:
            all_folds = np.concatenate((all_folds, ind[test]))

    all_folds.sort()
    assert_array_equal(all_folds, ind) 
开发者ID:alvarobartt,项目名称:twitter-stock-recommendation,代码行数:21,代码来源:test_cross_validation.py

示例8: test_predefinedsplit_with_kfold_split

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import KFold [as 别名]
def test_predefinedsplit_with_kfold_split():
    # Check that PredefinedSplit can reproduce a split generated by Kfold.
    folds = -1 * np.ones(10)
    kf_train = []
    kf_test = []
    for i, (train_ind, test_ind) in enumerate(cval.KFold(10, 5, shuffle=True)):
        kf_train.append(train_ind)
        kf_test.append(test_ind)
        folds[test_ind] = i
    ps_train = []
    ps_test = []
    ps = cval.PredefinedSplit(folds)
    for train_ind, test_ind in ps:
        ps_train.append(train_ind)
        ps_test.append(test_ind)
    assert_array_equal(ps_train, kf_train)
    assert_array_equal(ps_test, kf_test) 
开发者ID:alvarobartt,项目名称:twitter-stock-recommendation,代码行数:19,代码来源:test_cross_validation.py

示例9: test_cross_val_score_mask

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import KFold [as 别名]
def test_cross_val_score_mask():
    # test that cross_val_score works with boolean masks
    svm = SVC(kernel="linear")
    iris = load_iris()
    X, y = iris.data, iris.target
    cv_indices = cval.KFold(len(y), 5)
    scores_indices = cval.cross_val_score(svm, X, y, cv=cv_indices)
    cv_indices = cval.KFold(len(y), 5)
    cv_masks = []
    for train, test in cv_indices:
        mask_train = np.zeros(len(y), dtype=np.bool)
        mask_test = np.zeros(len(y), dtype=np.bool)
        mask_train[train] = 1
        mask_test[test] = 1
        cv_masks.append((train, test))
    scores_masks = cval.cross_val_score(svm, X, y, cv=cv_masks)
    assert_array_equal(scores_indices, scores_masks) 
开发者ID:alvarobartt,项目名称:twitter-stock-recommendation,代码行数:19,代码来源:test_cross_validation.py

示例10: test_cross_val_generator_with_indices

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import KFold [as 别名]
def test_cross_val_generator_with_indices():
    X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
    y = np.array([1, 1, 2, 2])
    labels = np.array([1, 2, 3, 4])
    # explicitly passing indices value is deprecated
    loo = cval.LeaveOneOut(4)
    lpo = cval.LeavePOut(4, 2)
    kf = cval.KFold(4, 2)
    skf = cval.StratifiedKFold(y, 2)
    lolo = cval.LeaveOneLabelOut(labels)
    lopo = cval.LeavePLabelOut(labels, 2)
    ps = cval.PredefinedSplit([1, 1, 2, 2])
    ss = cval.ShuffleSplit(2)
    for cv in [loo, lpo, kf, skf, lolo, lopo, ss, ps]:
        for train, test in cv:
            assert_not_equal(np.asarray(train).dtype.kind, 'b')
            assert_not_equal(np.asarray(train).dtype.kind, 'b')
            X[train], X[test]
            y[train], y[test] 
开发者ID:alvarobartt,项目名称:twitter-stock-recommendation,代码行数:21,代码来源:test_cross_validation.py

示例11: test_check_cv_return_types

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import KFold [as 别名]
def test_check_cv_return_types():
    X = np.ones((9, 2))
    cv = cval.check_cv(3, X, classifier=False)
    assert_true(isinstance(cv, cval.KFold))

    y_binary = np.array([0, 1, 0, 1, 0, 0, 1, 1, 1])
    cv = cval.check_cv(3, X, y_binary, classifier=True)
    assert_true(isinstance(cv, cval.StratifiedKFold))

    y_multiclass = np.array([0, 1, 0, 1, 2, 1, 2, 0, 2])
    cv = cval.check_cv(3, X, y_multiclass, classifier=True)
    assert_true(isinstance(cv, cval.StratifiedKFold))

    X = np.ones((5, 2))
    y_multilabel = [[1, 0, 1], [1, 1, 0], [0, 0, 0], [0, 1, 1], [1, 0, 0]]
    cv = cval.check_cv(3, X, y_multilabel, classifier=True)
    assert_true(isinstance(cv, cval.KFold))

    y_multioutput = np.array([[1, 2], [0, 3], [0, 0], [3, 1], [2, 0]])
    cv = cval.check_cv(3, X, y_multioutput, classifier=True)
    assert_true(isinstance(cv, cval.KFold)) 
开发者ID:alvarobartt,项目名称:twitter-stock-recommendation,代码行数:23,代码来源:test_cross_validation.py

示例12: evaluate_cross_validation

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import KFold [as 别名]
def evaluate_cross_validation(clf, X, y, K):
    # create a k-fold cross validation iterator
    cv = KFold(len(y), K, shuffle=True, random_state=0)
    # by default the score used is the one returned by score method of the estimator (accuracy)
    scores = cross_val_score(clf, X, y, cv=cv)
    print "Scores: ", (scores)
    print ("Mean score: {0:.3f} (+/-{1:.3f})".format(np.mean(scores), sem(scores)))


# Confusion Matrix and Results 
开发者ID:its-izhar,项目名称:Emotion-Recognition-Using-SVMs,代码行数:12,代码来源:Train Classifier and Test Video Feed.py

示例13: folding

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import KFold [as 别名]
def folding(y, n_folds):
    k_fold = KFold(y.size, n_folds=n_folds, random_state=0)

    return k_fold 
开发者ID:MKLab-ITI,项目名称:news-popularity-prediction,代码行数:6,代码来源:ranking.py

示例14: acc

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import KFold [as 别名]
def acc(predict_file):
    print("...Computing accuracy.")
    folds = KFold(n=6000, n_folds=10, shuffle=False)
    thresholds = np.arange(-1.0, 1.0, 0.005)
    accuracy = []
    thd = []
    with open(predict_file, "r") as f:
        predicts = f.readlines()
        predicts = np.array(map(lambda line:line.strip('\n').split(), predicts))
        for idx, (train, test) in enumerate(folds):
            logging.info("processing fold {}...".format(idx))
            best_thresh = find_best_threshold(thresholds, predicts[train])
            accuracy.append(eval_acc(best_thresh, predicts[test]))
            thd.append(best_thresh)
    return accuracy,thd 
开发者ID:tornadomeet,项目名称:mxnet-face,代码行数:17,代码来源:lfw.py

示例15: main

# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import KFold [as 别名]
def main(argv):
    def formatter(prog):
        return argparse.HelpFormatter(prog, max_help_position=100, width=200)

    argparser = argparse.ArgumentParser('K-Folder for Knowledge Graphs', formatter_class=formatter)
    argparser.add_argument('triples', action='store', type=str, default=None)

    args = argparser.parse_args(argv)

    triples_path = args.triples

    triples = read_triples(triples_path)
    nb_triples = len(triples)

    kf = KFold(n=nb_triples, n_folds=10, random_state=0, shuffle=True)

    triples_np = np.array(triples)

    for fold_no, (train_idx, test_idx) in enumerate(kf):
        train_valid_triples = triples_np[train_idx]
        test_triples = triples_np[test_idx]

        train_triples, valid_triples, _, _ = train_test_split(train_valid_triples,
                                                              np.ones(train_valid_triples.shape[0]),
                                                              test_size=len(test_triples), random_state=0)

        train_lines = ['{}\t{}\t{}'.format(s, p, o) for [s, p, o] in train_triples]
        valid_lines = ['{}\t{}\t{}'.format(s, p, o) for [s, p, o] in valid_triples]
        test_lines = ['{}\t{}\t{}'.format(s, p, o) for [s, p, o] in test_triples]

        if not os.path.exists('folds/{}'.format(str(fold_no))):
            os.mkdir('folds/{}'.format(str(fold_no)))

        with open('folds/{}/nations_train.tsv'.format(str(fold_no)), 'w') as f:
            f.writelines(['{}\n'.format(line) for line in train_lines])

        with open('folds/{}/nations_valid.tsv'.format(str(fold_no)), 'w') as f:
            f.writelines(['{}\n'.format(line) for line in valid_lines])

        with open('folds/{}/nations_test.tsv'.format(str(fold_no)), 'w') as f:
            f.writelines(['{}\n'.format(line) for line in test_lines]) 
开发者ID:uclnlp,项目名称:inferbeddings,代码行数:43,代码来源:make_folds.py


注:本文中的sklearn.cross_validation.KFold方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。