当前位置: 首页>>代码示例>>Python>>正文


Python KFold.split方法代码示例

本文整理汇总了Python中sklearn.model_selection.KFold.split方法的典型用法代码示例。如果您正苦于以下问题:Python KFold.split方法的具体用法?Python KFold.split怎么用?Python KFold.split使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.model_selection.KFold的用法示例。


在下文中一共展示了KFold.split方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: validateseq2

# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
def validateseq2(X_all, y, features, clf, score, v = False, esr=50, sk=5):
    temp_user = target_order[(target_order.o_day_series < 336) & (target_order.o_day_series >= 274)][['user_id']].drop_duplicates().reset_index(drop=True)
    temp_user['CreateGroup'] = 336
    print('before delete: {}'.format(X_all.shape))
    X = temp_user.merge(X_all,on=['user_id','CreateGroup'],how = 'left')
    print('after delete: {}'.format(X.shape))
    temp_user = target_order[(target_order.o_day_series < 306) & (target_order.o_day_series >= 215)][['user_id']].drop_duplicates().reset_index(drop=True)
    temp_user['CreateGroup'] = 306
    print('before delete: {}'.format(X_all.shape))
    X2 = temp_user.merge(X_all,on=['user_id','CreateGroup'],how = 'left')
    print('after delete: {}'.format(X.shape))
    kf = KFold(n_splits=sk)
    print(len(features))
    X['Prob_x'] = 0
    for train_index, test_index in kf.split(X2):
        X_train, X_test = X2.ix[train_index,:], X2.ix[test_index,:]
        X_train, X_test = X_train[features], X_test[features]
        y_train, y_test = X2.ix[train_index,:].buy, X2.ix[test_index,:].buy
        clf.fit(X_train,y_train, eval_set = [(X_train, y_train), (X_test, y_test)], eval_metric='auc', verbose=v, early_stopping_rounds=esr)
        X['Prob_x'] = X['Prob_x'] + clf.predict_proba(X[features])[:,1]/sk
    Performance = []
    features.append('Prob_x')
    for train_index, test_index in kf.split(X):
        X_train, X_test = X.ix[train_index,:], X.ix[test_index,:]
        X_train, X_test = X_train[features], X_test[features]
        y_train, y_test = X.ix[train_index,:].buy, X.ix[test_index,:].buy
        clf.fit(X_train,y_train, eval_set = [(X_train, y_train), (X_test, y_test)], eval_metric='auc', verbose=v, early_stopping_rounds=esr)
        pred = clf.predict_proba(X_test)[:,1]
        Performance.append(roc_auc_score(y_test,pred))
    print("Mean Score: {}".format(np.mean(Performance)))
    return np.mean(Performance),clf
开发者ID:ethanww,项目名称:JData-2018,代码行数:33,代码来源:Run_12.py

示例2: test_cross_val_multiscore

# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
def test_cross_val_multiscore():
    """Test cross_val_multiscore for computing scores on decoding over time."""
    from sklearn.model_selection import KFold, StratifiedKFold, cross_val_score
    from sklearn.linear_model import LogisticRegression, LinearRegression

    # compare to cross-val-score
    X = np.random.rand(20, 3)
    y = np.arange(20) % 2
    clf = LogisticRegression()
    cv = KFold(2, random_state=0)
    assert_array_equal(cross_val_score(clf, X, y, cv=cv),
                       cross_val_multiscore(clf, X, y, cv=cv))

    # Test with search light
    X = np.random.rand(20, 4, 3)
    y = np.arange(20) % 2
    clf = SlidingEstimator(LogisticRegression(), scoring='accuracy')
    scores_acc = cross_val_multiscore(clf, X, y, cv=cv)
    assert_array_equal(np.shape(scores_acc), [2, 3])

    # check values
    scores_acc_manual = list()
    for train, test in cv.split(X, y):
        clf.fit(X[train], y[train])
        scores_acc_manual.append(clf.score(X[test], y[test]))
    assert_array_equal(scores_acc, scores_acc_manual)

    # check scoring metric
    # raise an error if scoring is defined at cross-val-score level and
    # search light, because search light does not return a 1-dimensional
    # prediction.
    assert_raises(ValueError, cross_val_multiscore, clf, X, y, cv=cv,
                  scoring='roc_auc')
    clf = SlidingEstimator(LogisticRegression(), scoring='roc_auc')
    scores_auc = cross_val_multiscore(clf, X, y, cv=cv, n_jobs=1)
    scores_auc_manual = list()
    for train, test in cv.split(X, y):
        clf.fit(X[train], y[train])
        scores_auc_manual.append(clf.score(X[test], y[test]))
    assert_array_equal(scores_auc, scores_auc_manual)

    # indirectly test that cross_val_multiscore rightly detects the type of
    # estimator and generates a StratifiedKFold for classiers and a KFold
    # otherwise
    X = np.random.randn(1000, 3)
    y = np.r_[np.zeros(500), np.ones(500)]
    clf = LogisticRegression(random_state=0)
    reg = LinearRegression()
    for cross_val in (cross_val_score, cross_val_multiscore):
        manual = cross_val(clf, X, y, cv=StratifiedKFold(2))
        auto = cross_val(clf, X, y, cv=2)
        assert_array_equal(manual, auto)
        assert_raises(ValueError, cross_val, clf, X, y, cv=KFold(2))

        manual = cross_val(reg, X, y, cv=KFold(2))
        auto = cross_val(reg, X, y, cv=2)
        assert_array_equal(manual, auto)
开发者ID:HSMin,项目名称:mne-python,代码行数:59,代码来源:test_base.py

示例3: predict2

# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
def predict2(X_all, X_new, features, clf, score, v = False, esr=50, sk=3, fn='submission'):
    temp_user = target_order[(target_order.o_day_series < 336) & (target_order.o_day_series >= 274)][['user_id']].drop_duplicates().reset_index(drop=True)
    temp_user['CreateGroup'] = 336
    print('before delete: {}'.format(X_all.shape))
    X = temp_user.merge(X_all,on=['user_id','CreateGroup'],how = 'left')
    print('after delete: {}'.format(X.shape))

    temp_user = target_order[(target_order.o_day_series < 366) & \
                             (target_order.o_day_series >= 366 - 74)][['user_id']].drop_duplicates().reset_index(drop=True)
    temp_user['CreateGroup'] = 366
    print(-1 in temp_user.user_id)
    print(4366 in temp_user.user_id)
    print('before delete: {}'.format(X_new.shape))
    X_new = temp_user.merge(X_new,on=['user_id','CreateGroup'],how = 'left')

    temp_user = target_order[(target_order.o_day_series < 306) & (target_order.o_day_series >= 215)][['user_id']].drop_duplicates().reset_index(drop=True)
    temp_user['CreateGroup'] = 306
    print('before delete: {}'.format(X_all.shape))
    X2 = temp_user.merge(X_all,on=['user_id','CreateGroup'],how = 'left')
    print('Train: {}'.format(X_new.shape))

    kf = KFold(n_splits=sk)
    print(len(features))
    Performance = []
    X_new['Prob'] = 0
    X_new['Prob_x'] = 0
    X['Prob_x'] = 0
    for train_index, test_index in kf.split(X2):
        X_train, X_test = X2.ix[train_index,:], X2.ix[test_index,:]
        X_train, X_test = X_train[features], X_test[features]
        y_train, y_test = X2.ix[train_index,:].buy, X2.ix[test_index,:].buy
        clf.fit(X_train,y_train, eval_set = [(X_train, y_train), (X_test, y_test)], eval_metric='auc', verbose=v, early_stopping_rounds=esr)
        X_new['Prob_x'] = X_new['Prob_x'] + clf.predict_proba(X_new[features])[:,1]/sk
        X['Prob_x'] = X['Prob_x'] + clf.predict_proba(X[features])[:,1]/sk
    features.append('Prob_x')
   
    for train_index, test_index in kf.split(X):
        X_train, X_test = X.ix[train_index,:], X.ix[test_index,:]
        X_train, X_test = X_train[features], X_test[features]
        y_train, y_test = X.ix[train_index,:].buy, X.ix[test_index,:].buy
        clf.fit(X_train,y_train, eval_set = [(X_train, y_train), (X_test, y_test)], eval_metric='auc', verbose=v, early_stopping_rounds=esr)
        pred = clf.predict_proba(X_test)[:,1]
        X_new['Prob'] = X_new['Prob'] + clf.predict_proba(X_new[features])[:,1]/sk
        Performance.append(roc_auc_score(y_test,pred))
    print("Mean Score: {}".format(np.mean(Performance)))
    importantlist = []
    for i, j in zip(features,clf.feature_importances_):
        importantlist.append([j,i])
    print(sorted(importantlist)[::-1])
    first_day = datetime.datetime.strptime('2017-08-31 00:00:00', '%Y-%m-%d %H:%M:%S')
    X_new['Days'] = np.random.randint(15,size=len(X_new))
    X_new['pred_date'] = X_new['Days'].apply(lambda x: (datetime.timedelta(days=x) + first_day).strftime("%Y-%m-%d"))
    X_new.sort_values(by = ['Prob'], ascending = False, inplace = True)
    X_new[['user_id','Prob']].to_csv('prob_{}.csv'.format(fn), index = None)
    X_new[['user_id','pred_date']][:50000].to_csv('{}.csv'.format(fn), index = None)
    return np.mean(Performance),clf
开发者ID:ethanww,项目名称:JData-2018,代码行数:58,代码来源:Run_12.py

示例4: cross_validate

# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
    def cross_validate(self, values_labels, folds=10, processes=1):
        """
        Trains and tests the model agaists folds of labeled data.

        :Parameters:
            values_labels : [( `<feature_values>`, `<label>` )]
                an iterable of labeled data Where <values_labels> is an ordered
                collection of predictive values that correspond to the
                `Feature` s provided to the constructor
            folds : `int`
                When set to 1, cross-validation will run in the parent thread.
                When set to 2 or greater, a :class:`multiprocessing.Pool` will
                be created.
        """
        folds_i = KFold(n_splits=folds, shuffle=True,
                        random_state=0)
        if processes == 1:
            mapper = map
        else:
            pool = Pool(processes=processes or cpu_count())
            mapper = pool.map
        results = mapper(self._cross_score,
                         ((i, [values_labels[i] for i in train_i],
                           [values_labels[i] for i in test_i])
                          for i, (train_i, test_i) in enumerate(
                              folds_i.split(values_labels))))
        agg_score_labels = []
        for score_labels in results:
            agg_score_labels.extend(score_labels)

        self.info['statistics'].fit(agg_score_labels)

        return self.info['statistics']
开发者ID:wiki-ai,项目名称:revscoring,代码行数:35,代码来源:model.py

示例5: calculate_roc

# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10):
    assert(embeddings1.shape[0] == embeddings2.shape[0])
    assert(embeddings1.shape[1] == embeddings2.shape[1])
    nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
    nrof_thresholds = len(thresholds)
    k_fold = KFold(n_splits=nrof_folds, shuffle=False)
    
    tprs = np.zeros((nrof_folds,nrof_thresholds))
    fprs = np.zeros((nrof_folds,nrof_thresholds))
    accuracy = np.zeros((nrof_folds))
    
    diff = np.subtract(embeddings1, embeddings2)
    dist = np.sum(np.square(diff),1)
    indices = np.arange(nrof_pairs)
    
    for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
        
        # Find the best threshold for the fold
        acc_train = np.zeros((nrof_thresholds))
        for threshold_idx, threshold in enumerate(thresholds):
            _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])
        best_threshold_index = np.argmax(acc_train)
        for threshold_idx, threshold in enumerate(thresholds):
            tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set])
        _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set])
          
    tpr = np.mean(tprs,0)
    fpr = np.mean(fprs,0)
    return tpr, fpr, accuracy
开发者ID:kissthink,项目名称:facenet_regonistant,代码行数:31,代码来源:facenet.py

示例6: compute_matrices_for_gradient_totalcverr

# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
 def compute_matrices_for_gradient_totalcverr(self, train_x, train_y, train_z):
     if self.kernelX_use_median:
         sigmax = self.kernelX.get_sigma_median_heuristic(train_x)
         self.kernelX.set_width(float(sigmax))
     if self.kernelY_use_median:
         sigmay = self.kernelY.get_sigma_median_heuristic(train_y)
         self.kernelY.set_width(float(sigmay))
     kf = KFold( n_splits=self.K_folds)
     matrix_results = [[[None] for _ in range(self.K_folds)]for _ in range(8)] 
     # xx=[[None]*10]*6 will give the same id to xx[0][0] and xx[1][0] etc. as 
     # this command simply copied [None] many times. But the above gives different ids.
     count = 0
     for train_index, test_index in kf.split(np.ones((self.num_samples,1))):
         X_tr, X_tst = train_x[train_index], train_x[test_index]
         Y_tr, Y_tst = train_y[train_index], train_y[test_index]
         Z_tr, Z_tst = train_z[train_index], train_z[test_index]
         matrix_results[0][count] = self.kernelX.kernel(X_tst, X_tr) #Kx_tst_tr
         matrix_results[1][count] = self.kernelX.kernel(X_tr, X_tr) #Kx_tr_tr
         matrix_results[2][count] = self.kernelX.kernel(X_tst, X_tst) #Kx_tst_tst
         matrix_results[3][count] = self.kernelY.kernel(Y_tst, Y_tr) #Ky_tst_tr
         matrix_results[4][count] = self.kernelY.kernel(Y_tr, Y_tr) #Ky_tr_tr
         matrix_results[5][count] = self.kernelY.kernel(Y_tst,Y_tst) #Ky_tst_tst
         matrix_results[6][count] = cdist(Z_tst, Z_tr, 'sqeuclidean') #D_tst_tr: square distance matrix
         matrix_results[7][count] = cdist(Z_tr, Z_tr, 'sqeuclidean') #D_tr_tr: square distance matrix
         count = count + 1
     return matrix_results
开发者ID:oxmlcs,项目名称:kerpy,代码行数:28,代码来源:TwoStepCondTestObject.py

示例7: predict_model_kfold

# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
def predict_model_kfold(name,path,features_type,label_name,data):
    kfold = KFold(10, True)
    #RandomForest -I 1000 -K 0 -S 1 -num-slots 1
    model = BalancedRandomForestClassifier(n_estimators=1000,max_depth=5)
    index = 0
    size = data.shape[0]
    all_predictions = 0
    x = data.drop('hasBug', axis=1)
    y = data['hasBug']
    num_of_bugs = data.loc[data['hasBug'] == 1].shape[0]
    num_of_all_instances = data.shape[0]
    bug_precent = float(num_of_bugs) / float(num_of_all_instances)
    for train, test in kfold.split(data):
        index += 1
        prediction_train = model.fit(x.iloc[train], y.iloc[train]).predict(x.iloc[test])
        all_predictions += create_all_eval_results(False,y.iloc[test],prediction_train,name,"training",features_type,num_of_bugs,num_of_all_instances,bug_precent,None)

    all_predictions /= index
    start_list = [name,"training",features_type,"sklearn - python"]
    result_list = start_list+ all_predictions.tolist()

    global results_all_projects
    results_all_projects.loc[len(results_all_projects)] = result_list

    model.fit(x,y)
    return model
开发者ID:amir9979,项目名称:Debugger,代码行数:28,代码来源:from_weka_to_python.py

示例8: _iter_test_masks

# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
    def _iter_test_masks(self, X, y=None, groups=None):
        # yields mask array for test splits
        n_samples = X.shape[0]

        # if groups is not specified, an entire data is specified as one group
        if groups is None:
            groups = np.zeros(n_samples, dtype=int)

        # constants
        indices = np.arange(n_samples)
        test_fold = np.empty(n_samples, dtype=bool)
        rng = check_random_state(self.random_state)
        group_indices = np.unique(groups)
        iters = np.empty(group_indices.shape[0], dtype=object)

        # generate iterators
        cv = KFold(self.n_splits, self.shuffle, rng)
        for i, g in enumerate(group_indices):
            group_member = indices[groups == g]
            iters[i] = cv.split(group_member)

        # generate training and test splits
        for fold in xrange(self.n_splits):
            test_fold[:] = False
            for i, g in enumerate(group_indices):
                group_train_i, group_test_i = next(iters[i])
                test_fold[indices[groups == g][group_test_i]] = True
            yield test_fold
开发者ID:tkamishima,项目名称:kamrecsys,代码行数:30,代码来源:split.py

示例9: original_data

# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
def original_data():
    for target in TARGETS:
        for algo_str in ALGORITHMS:
            algorithm = importlib.import_module('src.multi_class.' + algo_str)
            encoded_data = input_preproc.readFromDataset(
                INPUT_DIR + ORIGINAL_DATA_FILE,
                INPUT_COLS['original'],
                target
            )
            # Split into predictors and target
            X = np.array(encoded_data[encoded_data.columns.difference([target])])
            y = np.array(encoded_data[target])
            kf = KFold(n_splits=CROSS_VALIDATION_K, shuffle=True)

            f1s = []

            for train_index, test_index in kf.split(X):
                X_train, y_train = X[train_index], y[train_index]
                X_test, y_test = X[test_index], y[test_index]

                scaler = preprocessing.StandardScaler()
                X_train = pd.DataFrame(scaler.fit_transform(X_train))  # , columns=X_train.columns)
                X_test = scaler.transform(X_test)

                precision, recall, f1_score, accuracy = algorithm.runClassifier(X_train, X_test, y_train, y_test)
                f1s.append(f1_score)

            final_f1 = sum(f1s) / len(f1s)
            print("\n================================")
            print("%s, %s, F1 Score: %.6f" % (target, algo_str, final_f1))
            print("================================\n")
开发者ID:cassinius,项目名称:right-to-forget-data,代码行数:33,代码来源:iMLBatchProcessing.py

示例10: test_multiclass_classification

# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
def test_multiclass_classification():
    from sklearn.datasets import load_iris
    from sklearn.model_selection import KFold

    def check_pred(preds, labels, output_margin):
        if output_margin:
            err = sum(1 for i in range(len(preds))
                      if preds[i].argmax() != labels[i]) / float(len(preds))
        else:
            err = sum(1 for i in range(len(preds))
                      if preds[i] != labels[i]) / float(len(preds))
        assert err < 0.4

    iris = load_iris()
    y = iris['target']
    X = iris['data']
    kf = KFold(n_splits=2, shuffle=True, random_state=rng)
    for train_index, test_index in kf.split(X, y):
        xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
        preds = xgb_model.predict(X[test_index])
        # test other params in XGBClassifier().fit
        preds2 = xgb_model.predict(X[test_index], output_margin=True,
                                   ntree_limit=3)
        preds3 = xgb_model.predict(X[test_index], output_margin=True,
                                   ntree_limit=0)
        preds4 = xgb_model.predict(X[test_index], output_margin=False,
                                   ntree_limit=3)
        labels = y[test_index]

        check_pred(preds, labels, output_margin=False)
        check_pred(preds2, labels, output_margin=True)
        check_pred(preds3, labels, output_margin=True)
        check_pred(preds4, labels, output_margin=False)
开发者ID:dmlc,项目名称:xgboost,代码行数:35,代码来源:test_with_sklearn.py

示例11: Get_KFolds

# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
def Get_KFolds(data, y_label, num_folds, scale):
    #Creates 5 folds from the train/test set each with a separate training and test set
    folds = []
    kf = KFold(n_splits = num_folds)
    for train_index, test_index in kf.split(data):
        training = []
        test = []
        
        tempdf = Normalize_Scale(data,scale)
        train_x = tempdf.drop([y_label], axis=1).values
        train_y = tempdf[y_label].values
        
        #Creates a training set within the fold
        x = []
        y = []
        
        for index in train_index:
            x.append(train_x[index])
            y.append(train_y[index])
        training = [x,y]
        
        #Creates a test set within the fold
        x = []
        y = []
        for index in test_index:
            x.append(train_x[index])
            y.append(train_y[index])
        test = [x,y]

        folds.append([training,test])
    
    return folds
开发者ID:Andymic,项目名称:Machine-Learning,代码行数:34,代码来源:API.py

示例12: hyperopt_obj

# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
 def hyperopt_obj(self,param,train_X,train_y):
     # 5-fold crossvalidation error
     #ret = xgb.cv(param,dtrain,num_boost_round=param['num_round'])
     kf = KFold(n_splits = 3)
     errors = []
     r2 = []
     int_params = ['max_depth','num_round']
     for item in int_params:
         param[item] = int(param[item])
     for train_ind,test_ind in kf.split(train_X):
         train_valid_x,train_valid_y = train_X[train_ind],train_y[train_ind]
         test_valid_x,test_valid_y = train_X[test_ind],train_y[test_ind]
         dtrain = xgb.DMatrix(train_valid_x,label = train_valid_y)
         dtest = xgb.DMatrix(test_valid_x)
         pred_model = xgb.train(param,dtrain,num_boost_round=int(param['num_round']))
         pred_test = pred_model.predict(dtest)
         errors.append(mean_squared_error(test_valid_y,pred_test))
         r2.append(r2_score(test_valid_y,pred_test))
     all_dtrain = xgb.DMatrix(train_X,label = train_y)
     print('training score:')
     pred_model = xgb.train(param,all_dtrain,num_boost_round= int(param['num_round']))
     all_dtest = xgb.DMatrix(train_X)
     pred_train = pred_model.predict(all_dtest)
     print(str(r2_score(train_y,pred_train)))
     print(np.mean(r2))
     print('\n')
     return {'loss':np.mean(errors),'status': STATUS_OK}
开发者ID:Matafight,项目名称:Kaggle,代码行数:29,代码来源:stacking.py

示例13: computing_cv_accuracy_LDA

# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
def computing_cv_accuracy_LDA(in_path=None, cv_n_fold=10):
    def u65(mod_Y):
        return 1.6 / mod_Y - 0.6 / mod_Y ** 2

    def u80(mod_Y):
        return 2.2 / mod_Y - 1.2 / mod_Y ** 2

    from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

    data = export_data_set('iris.data') if in_path is None else pd.read_csv(in_path)
    print("-----DATA SET TRAINING---", in_path)
    X = data.iloc[:, :-1].values
    y = np.array(data.iloc[:, -1].tolist())
    kf = KFold(n_splits=cv_n_fold, random_state=None, shuffle=True)
    lda = LinearDiscriminantAnalysis(solver="svd", store_covariance=True)
    mean_u65, mean_u80 = 0, 0
    for idx_train, idx_test in kf.split(y):
        print("---k-FOLD-new-executing--")
        X_cv_train, y_cv_train = X[idx_train], y[idx_train]
        X_cv_test, y_cv_test = X[idx_test], y[idx_test]
        lda.fit(X_cv_train, y_cv_train)
        n_test = len(idx_test)
        sum_u65, sum_u80 = 0, 0
        for i, test in enumerate(X_cv_test):
            evaluate = lda.predict([test])
            print("-----TESTING-----", i)
            if y_cv_test[i] in evaluate:
                sum_u65 += u65(len(evaluate))
                sum_u80 += u80(len(evaluate))
        mean_u65 += sum_u65 / n_test
        mean_u80 += sum_u80 / n_test
    print("--->", mean_u65 / cv_n_fold, mean_u80 / cv_n_fold)
开发者ID:sdestercke,项目名称:classifip,代码行数:34,代码来源:qdatest.py

示例14: computing_cv_accuracy_imprecise

# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
def computing_cv_accuracy_imprecise(in_path=None, ell_optimal=0.1, cv_n_fold=10):
    def u65(mod_Y):
        return 1.6 / mod_Y - 0.6 / mod_Y ** 2

    def u80(mod_Y):
        return 2.2 / mod_Y - 1.2 / mod_Y ** 2

    data = export_data_set('iris.data') if in_path is None else pd.read_csv(in_path)
    print("-----DATA SET TRAINING---", in_path)
    X = data.iloc[:, :-1].values
    y = np.array(data.iloc[:, -1].tolist())
    mean_u65, mean_u80 = 0, 0
    lqa = LinearDiscriminant(init_matlab=True)
    kf = KFold(n_splits=cv_n_fold, random_state=None, shuffle=True)
    for idx_train, idx_test in kf.split(y):
        X_cv_train, y_cv_train = X[idx_train], y[idx_train]
        X_cv_test, y_cv_test = X[idx_test], y[idx_test]
        lqa.learn(X_cv_train, y_cv_train, ell=ell_optimal)
        sum_u65, sum_u80 = 0, 0
        n_test, _ = X_cv_test.shape
        for i, test in enumerate(X_cv_test):
            print("--TESTING-----", i, ell_optimal)
            evaluate, _ = lqa.evaluate(test)
            print(evaluate, "-----", y_cv_test[i])
            if y_cv_test[i] in evaluate:
                sum_u65 += u65(len(evaluate))
                sum_u80 += u80(len(evaluate))
        mean_u65 += sum_u65 / n_test
        mean_u80 += sum_u80 / n_test
    mean_u65 = mean_u65 / cv_n_fold
    mean_u80 = mean_u80 / cv_n_fold
    print("--ell-->", ell_optimal, "--->", mean_u65, mean_u80)
开发者ID:sdestercke,项目名称:classifip,代码行数:34,代码来源:qdatest.py

示例15: split_data

# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
def split_data(root_path, num_splits=4):
    mask_list = []
    for ext in ('*.mhd', '*.hdr', '*.nii'):
        mask_list.extend(sorted(glob(join(root_path,'masks',ext))))

    assert len(mask_list) != 0, 'Unable to find any files in {}'.format(join(root_path,'masks'))

    outdir = join(root_path,'split_lists')
    try:
        mkdir(outdir)
    except:
        pass

    kf = KFold(n_splits=num_splits)
    n = 0
    for train_index, test_index in kf.split(mask_list):
        with open(join(outdir,'train_split_' + str(n) + '.csv'), 'wb') as csvfile:
            writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
            for i in train_index:
                writer.writerow([basename(mask_list[i])])
        with open(join(outdir,'test_split_' + str(n) + '.csv'), 'wb') as csvfile:
            writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
            for i in test_index:
                writer.writerow([basename(mask_list[i])])
        n += 1
开发者ID:legendhua,项目名称:SegCaps,代码行数:27,代码来源:load_3D_data.py


注:本文中的sklearn.model_selection.KFold.split方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。