本文整理汇总了Python中sklearn.model_selection.KFold.split方法的典型用法代码示例。如果您正苦于以下问题:Python KFold.split方法的具体用法?Python KFold.split怎么用?Python KFold.split使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.model_selection.KFold
的用法示例。
在下文中一共展示了KFold.split方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: validateseq2
# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
def validateseq2(X_all, y, features, clf, score, v = False, esr=50, sk=5):
temp_user = target_order[(target_order.o_day_series < 336) & (target_order.o_day_series >= 274)][['user_id']].drop_duplicates().reset_index(drop=True)
temp_user['CreateGroup'] = 336
print('before delete: {}'.format(X_all.shape))
X = temp_user.merge(X_all,on=['user_id','CreateGroup'],how = 'left')
print('after delete: {}'.format(X.shape))
temp_user = target_order[(target_order.o_day_series < 306) & (target_order.o_day_series >= 215)][['user_id']].drop_duplicates().reset_index(drop=True)
temp_user['CreateGroup'] = 306
print('before delete: {}'.format(X_all.shape))
X2 = temp_user.merge(X_all,on=['user_id','CreateGroup'],how = 'left')
print('after delete: {}'.format(X.shape))
kf = KFold(n_splits=sk)
print(len(features))
X['Prob_x'] = 0
for train_index, test_index in kf.split(X2):
X_train, X_test = X2.ix[train_index,:], X2.ix[test_index,:]
X_train, X_test = X_train[features], X_test[features]
y_train, y_test = X2.ix[train_index,:].buy, X2.ix[test_index,:].buy
clf.fit(X_train,y_train, eval_set = [(X_train, y_train), (X_test, y_test)], eval_metric='auc', verbose=v, early_stopping_rounds=esr)
X['Prob_x'] = X['Prob_x'] + clf.predict_proba(X[features])[:,1]/sk
Performance = []
features.append('Prob_x')
for train_index, test_index in kf.split(X):
X_train, X_test = X.ix[train_index,:], X.ix[test_index,:]
X_train, X_test = X_train[features], X_test[features]
y_train, y_test = X.ix[train_index,:].buy, X.ix[test_index,:].buy
clf.fit(X_train,y_train, eval_set = [(X_train, y_train), (X_test, y_test)], eval_metric='auc', verbose=v, early_stopping_rounds=esr)
pred = clf.predict_proba(X_test)[:,1]
Performance.append(roc_auc_score(y_test,pred))
print("Mean Score: {}".format(np.mean(Performance)))
return np.mean(Performance),clf
示例2: test_cross_val_multiscore
# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
def test_cross_val_multiscore():
"""Test cross_val_multiscore for computing scores on decoding over time."""
from sklearn.model_selection import KFold, StratifiedKFold, cross_val_score
from sklearn.linear_model import LogisticRegression, LinearRegression
# compare to cross-val-score
X = np.random.rand(20, 3)
y = np.arange(20) % 2
clf = LogisticRegression()
cv = KFold(2, random_state=0)
assert_array_equal(cross_val_score(clf, X, y, cv=cv),
cross_val_multiscore(clf, X, y, cv=cv))
# Test with search light
X = np.random.rand(20, 4, 3)
y = np.arange(20) % 2
clf = SlidingEstimator(LogisticRegression(), scoring='accuracy')
scores_acc = cross_val_multiscore(clf, X, y, cv=cv)
assert_array_equal(np.shape(scores_acc), [2, 3])
# check values
scores_acc_manual = list()
for train, test in cv.split(X, y):
clf.fit(X[train], y[train])
scores_acc_manual.append(clf.score(X[test], y[test]))
assert_array_equal(scores_acc, scores_acc_manual)
# check scoring metric
# raise an error if scoring is defined at cross-val-score level and
# search light, because search light does not return a 1-dimensional
# prediction.
assert_raises(ValueError, cross_val_multiscore, clf, X, y, cv=cv,
scoring='roc_auc')
clf = SlidingEstimator(LogisticRegression(), scoring='roc_auc')
scores_auc = cross_val_multiscore(clf, X, y, cv=cv, n_jobs=1)
scores_auc_manual = list()
for train, test in cv.split(X, y):
clf.fit(X[train], y[train])
scores_auc_manual.append(clf.score(X[test], y[test]))
assert_array_equal(scores_auc, scores_auc_manual)
# indirectly test that cross_val_multiscore rightly detects the type of
# estimator and generates a StratifiedKFold for classiers and a KFold
# otherwise
X = np.random.randn(1000, 3)
y = np.r_[np.zeros(500), np.ones(500)]
clf = LogisticRegression(random_state=0)
reg = LinearRegression()
for cross_val in (cross_val_score, cross_val_multiscore):
manual = cross_val(clf, X, y, cv=StratifiedKFold(2))
auto = cross_val(clf, X, y, cv=2)
assert_array_equal(manual, auto)
assert_raises(ValueError, cross_val, clf, X, y, cv=KFold(2))
manual = cross_val(reg, X, y, cv=KFold(2))
auto = cross_val(reg, X, y, cv=2)
assert_array_equal(manual, auto)
示例3: predict2
# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
def predict2(X_all, X_new, features, clf, score, v = False, esr=50, sk=3, fn='submission'):
temp_user = target_order[(target_order.o_day_series < 336) & (target_order.o_day_series >= 274)][['user_id']].drop_duplicates().reset_index(drop=True)
temp_user['CreateGroup'] = 336
print('before delete: {}'.format(X_all.shape))
X = temp_user.merge(X_all,on=['user_id','CreateGroup'],how = 'left')
print('after delete: {}'.format(X.shape))
temp_user = target_order[(target_order.o_day_series < 366) & \
(target_order.o_day_series >= 366 - 74)][['user_id']].drop_duplicates().reset_index(drop=True)
temp_user['CreateGroup'] = 366
print(-1 in temp_user.user_id)
print(4366 in temp_user.user_id)
print('before delete: {}'.format(X_new.shape))
X_new = temp_user.merge(X_new,on=['user_id','CreateGroup'],how = 'left')
temp_user = target_order[(target_order.o_day_series < 306) & (target_order.o_day_series >= 215)][['user_id']].drop_duplicates().reset_index(drop=True)
temp_user['CreateGroup'] = 306
print('before delete: {}'.format(X_all.shape))
X2 = temp_user.merge(X_all,on=['user_id','CreateGroup'],how = 'left')
print('Train: {}'.format(X_new.shape))
kf = KFold(n_splits=sk)
print(len(features))
Performance = []
X_new['Prob'] = 0
X_new['Prob_x'] = 0
X['Prob_x'] = 0
for train_index, test_index in kf.split(X2):
X_train, X_test = X2.ix[train_index,:], X2.ix[test_index,:]
X_train, X_test = X_train[features], X_test[features]
y_train, y_test = X2.ix[train_index,:].buy, X2.ix[test_index,:].buy
clf.fit(X_train,y_train, eval_set = [(X_train, y_train), (X_test, y_test)], eval_metric='auc', verbose=v, early_stopping_rounds=esr)
X_new['Prob_x'] = X_new['Prob_x'] + clf.predict_proba(X_new[features])[:,1]/sk
X['Prob_x'] = X['Prob_x'] + clf.predict_proba(X[features])[:,1]/sk
features.append('Prob_x')
for train_index, test_index in kf.split(X):
X_train, X_test = X.ix[train_index,:], X.ix[test_index,:]
X_train, X_test = X_train[features], X_test[features]
y_train, y_test = X.ix[train_index,:].buy, X.ix[test_index,:].buy
clf.fit(X_train,y_train, eval_set = [(X_train, y_train), (X_test, y_test)], eval_metric='auc', verbose=v, early_stopping_rounds=esr)
pred = clf.predict_proba(X_test)[:,1]
X_new['Prob'] = X_new['Prob'] + clf.predict_proba(X_new[features])[:,1]/sk
Performance.append(roc_auc_score(y_test,pred))
print("Mean Score: {}".format(np.mean(Performance)))
importantlist = []
for i, j in zip(features,clf.feature_importances_):
importantlist.append([j,i])
print(sorted(importantlist)[::-1])
first_day = datetime.datetime.strptime('2017-08-31 00:00:00', '%Y-%m-%d %H:%M:%S')
X_new['Days'] = np.random.randint(15,size=len(X_new))
X_new['pred_date'] = X_new['Days'].apply(lambda x: (datetime.timedelta(days=x) + first_day).strftime("%Y-%m-%d"))
X_new.sort_values(by = ['Prob'], ascending = False, inplace = True)
X_new[['user_id','Prob']].to_csv('prob_{}.csv'.format(fn), index = None)
X_new[['user_id','pred_date']][:50000].to_csv('{}.csv'.format(fn), index = None)
return np.mean(Performance),clf
示例4: cross_validate
# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
def cross_validate(self, values_labels, folds=10, processes=1):
"""
Trains and tests the model agaists folds of labeled data.
:Parameters:
values_labels : [( `<feature_values>`, `<label>` )]
an iterable of labeled data Where <values_labels> is an ordered
collection of predictive values that correspond to the
`Feature` s provided to the constructor
folds : `int`
When set to 1, cross-validation will run in the parent thread.
When set to 2 or greater, a :class:`multiprocessing.Pool` will
be created.
"""
folds_i = KFold(n_splits=folds, shuffle=True,
random_state=0)
if processes == 1:
mapper = map
else:
pool = Pool(processes=processes or cpu_count())
mapper = pool.map
results = mapper(self._cross_score,
((i, [values_labels[i] for i in train_i],
[values_labels[i] for i in test_i])
for i, (train_i, test_i) in enumerate(
folds_i.split(values_labels))))
agg_score_labels = []
for score_labels in results:
agg_score_labels.extend(score_labels)
self.info['statistics'].fit(agg_score_labels)
return self.info['statistics']
示例5: calculate_roc
# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10):
assert(embeddings1.shape[0] == embeddings2.shape[0])
assert(embeddings1.shape[1] == embeddings2.shape[1])
nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
nrof_thresholds = len(thresholds)
k_fold = KFold(n_splits=nrof_folds, shuffle=False)
tprs = np.zeros((nrof_folds,nrof_thresholds))
fprs = np.zeros((nrof_folds,nrof_thresholds))
accuracy = np.zeros((nrof_folds))
diff = np.subtract(embeddings1, embeddings2)
dist = np.sum(np.square(diff),1)
indices = np.arange(nrof_pairs)
for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
# Find the best threshold for the fold
acc_train = np.zeros((nrof_thresholds))
for threshold_idx, threshold in enumerate(thresholds):
_, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])
best_threshold_index = np.argmax(acc_train)
for threshold_idx, threshold in enumerate(thresholds):
tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set])
_, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set])
tpr = np.mean(tprs,0)
fpr = np.mean(fprs,0)
return tpr, fpr, accuracy
示例6: compute_matrices_for_gradient_totalcverr
# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
def compute_matrices_for_gradient_totalcverr(self, train_x, train_y, train_z):
if self.kernelX_use_median:
sigmax = self.kernelX.get_sigma_median_heuristic(train_x)
self.kernelX.set_width(float(sigmax))
if self.kernelY_use_median:
sigmay = self.kernelY.get_sigma_median_heuristic(train_y)
self.kernelY.set_width(float(sigmay))
kf = KFold( n_splits=self.K_folds)
matrix_results = [[[None] for _ in range(self.K_folds)]for _ in range(8)]
# xx=[[None]*10]*6 will give the same id to xx[0][0] and xx[1][0] etc. as
# this command simply copied [None] many times. But the above gives different ids.
count = 0
for train_index, test_index in kf.split(np.ones((self.num_samples,1))):
X_tr, X_tst = train_x[train_index], train_x[test_index]
Y_tr, Y_tst = train_y[train_index], train_y[test_index]
Z_tr, Z_tst = train_z[train_index], train_z[test_index]
matrix_results[0][count] = self.kernelX.kernel(X_tst, X_tr) #Kx_tst_tr
matrix_results[1][count] = self.kernelX.kernel(X_tr, X_tr) #Kx_tr_tr
matrix_results[2][count] = self.kernelX.kernel(X_tst, X_tst) #Kx_tst_tst
matrix_results[3][count] = self.kernelY.kernel(Y_tst, Y_tr) #Ky_tst_tr
matrix_results[4][count] = self.kernelY.kernel(Y_tr, Y_tr) #Ky_tr_tr
matrix_results[5][count] = self.kernelY.kernel(Y_tst,Y_tst) #Ky_tst_tst
matrix_results[6][count] = cdist(Z_tst, Z_tr, 'sqeuclidean') #D_tst_tr: square distance matrix
matrix_results[7][count] = cdist(Z_tr, Z_tr, 'sqeuclidean') #D_tr_tr: square distance matrix
count = count + 1
return matrix_results
示例7: predict_model_kfold
# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
def predict_model_kfold(name,path,features_type,label_name,data):
kfold = KFold(10, True)
#RandomForest -I 1000 -K 0 -S 1 -num-slots 1
model = BalancedRandomForestClassifier(n_estimators=1000,max_depth=5)
index = 0
size = data.shape[0]
all_predictions = 0
x = data.drop('hasBug', axis=1)
y = data['hasBug']
num_of_bugs = data.loc[data['hasBug'] == 1].shape[0]
num_of_all_instances = data.shape[0]
bug_precent = float(num_of_bugs) / float(num_of_all_instances)
for train, test in kfold.split(data):
index += 1
prediction_train = model.fit(x.iloc[train], y.iloc[train]).predict(x.iloc[test])
all_predictions += create_all_eval_results(False,y.iloc[test],prediction_train,name,"training",features_type,num_of_bugs,num_of_all_instances,bug_precent,None)
all_predictions /= index
start_list = [name,"training",features_type,"sklearn - python"]
result_list = start_list+ all_predictions.tolist()
global results_all_projects
results_all_projects.loc[len(results_all_projects)] = result_list
model.fit(x,y)
return model
示例8: _iter_test_masks
# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
def _iter_test_masks(self, X, y=None, groups=None):
# yields mask array for test splits
n_samples = X.shape[0]
# if groups is not specified, an entire data is specified as one group
if groups is None:
groups = np.zeros(n_samples, dtype=int)
# constants
indices = np.arange(n_samples)
test_fold = np.empty(n_samples, dtype=bool)
rng = check_random_state(self.random_state)
group_indices = np.unique(groups)
iters = np.empty(group_indices.shape[0], dtype=object)
# generate iterators
cv = KFold(self.n_splits, self.shuffle, rng)
for i, g in enumerate(group_indices):
group_member = indices[groups == g]
iters[i] = cv.split(group_member)
# generate training and test splits
for fold in xrange(self.n_splits):
test_fold[:] = False
for i, g in enumerate(group_indices):
group_train_i, group_test_i = next(iters[i])
test_fold[indices[groups == g][group_test_i]] = True
yield test_fold
示例9: original_data
# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
def original_data():
for target in TARGETS:
for algo_str in ALGORITHMS:
algorithm = importlib.import_module('src.multi_class.' + algo_str)
encoded_data = input_preproc.readFromDataset(
INPUT_DIR + ORIGINAL_DATA_FILE,
INPUT_COLS['original'],
target
)
# Split into predictors and target
X = np.array(encoded_data[encoded_data.columns.difference([target])])
y = np.array(encoded_data[target])
kf = KFold(n_splits=CROSS_VALIDATION_K, shuffle=True)
f1s = []
for train_index, test_index in kf.split(X):
X_train, y_train = X[train_index], y[train_index]
X_test, y_test = X[test_index], y[test_index]
scaler = preprocessing.StandardScaler()
X_train = pd.DataFrame(scaler.fit_transform(X_train)) # , columns=X_train.columns)
X_test = scaler.transform(X_test)
precision, recall, f1_score, accuracy = algorithm.runClassifier(X_train, X_test, y_train, y_test)
f1s.append(f1_score)
final_f1 = sum(f1s) / len(f1s)
print("\n================================")
print("%s, %s, F1 Score: %.6f" % (target, algo_str, final_f1))
print("================================\n")
示例10: test_multiclass_classification
# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
def test_multiclass_classification():
from sklearn.datasets import load_iris
from sklearn.model_selection import KFold
def check_pred(preds, labels, output_margin):
if output_margin:
err = sum(1 for i in range(len(preds))
if preds[i].argmax() != labels[i]) / float(len(preds))
else:
err = sum(1 for i in range(len(preds))
if preds[i] != labels[i]) / float(len(preds))
assert err < 0.4
iris = load_iris()
y = iris['target']
X = iris['data']
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
for train_index, test_index in kf.split(X, y):
xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
preds = xgb_model.predict(X[test_index])
# test other params in XGBClassifier().fit
preds2 = xgb_model.predict(X[test_index], output_margin=True,
ntree_limit=3)
preds3 = xgb_model.predict(X[test_index], output_margin=True,
ntree_limit=0)
preds4 = xgb_model.predict(X[test_index], output_margin=False,
ntree_limit=3)
labels = y[test_index]
check_pred(preds, labels, output_margin=False)
check_pred(preds2, labels, output_margin=True)
check_pred(preds3, labels, output_margin=True)
check_pred(preds4, labels, output_margin=False)
示例11: Get_KFolds
# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
def Get_KFolds(data, y_label, num_folds, scale):
#Creates 5 folds from the train/test set each with a separate training and test set
folds = []
kf = KFold(n_splits = num_folds)
for train_index, test_index in kf.split(data):
training = []
test = []
tempdf = Normalize_Scale(data,scale)
train_x = tempdf.drop([y_label], axis=1).values
train_y = tempdf[y_label].values
#Creates a training set within the fold
x = []
y = []
for index in train_index:
x.append(train_x[index])
y.append(train_y[index])
training = [x,y]
#Creates a test set within the fold
x = []
y = []
for index in test_index:
x.append(train_x[index])
y.append(train_y[index])
test = [x,y]
folds.append([training,test])
return folds
示例12: hyperopt_obj
# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
def hyperopt_obj(self,param,train_X,train_y):
# 5-fold crossvalidation error
#ret = xgb.cv(param,dtrain,num_boost_round=param['num_round'])
kf = KFold(n_splits = 3)
errors = []
r2 = []
int_params = ['max_depth','num_round']
for item in int_params:
param[item] = int(param[item])
for train_ind,test_ind in kf.split(train_X):
train_valid_x,train_valid_y = train_X[train_ind],train_y[train_ind]
test_valid_x,test_valid_y = train_X[test_ind],train_y[test_ind]
dtrain = xgb.DMatrix(train_valid_x,label = train_valid_y)
dtest = xgb.DMatrix(test_valid_x)
pred_model = xgb.train(param,dtrain,num_boost_round=int(param['num_round']))
pred_test = pred_model.predict(dtest)
errors.append(mean_squared_error(test_valid_y,pred_test))
r2.append(r2_score(test_valid_y,pred_test))
all_dtrain = xgb.DMatrix(train_X,label = train_y)
print('training score:')
pred_model = xgb.train(param,all_dtrain,num_boost_round= int(param['num_round']))
all_dtest = xgb.DMatrix(train_X)
pred_train = pred_model.predict(all_dtest)
print(str(r2_score(train_y,pred_train)))
print(np.mean(r2))
print('\n')
return {'loss':np.mean(errors),'status': STATUS_OK}
示例13: computing_cv_accuracy_LDA
# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
def computing_cv_accuracy_LDA(in_path=None, cv_n_fold=10):
def u65(mod_Y):
return 1.6 / mod_Y - 0.6 / mod_Y ** 2
def u80(mod_Y):
return 2.2 / mod_Y - 1.2 / mod_Y ** 2
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
data = export_data_set('iris.data') if in_path is None else pd.read_csv(in_path)
print("-----DATA SET TRAINING---", in_path)
X = data.iloc[:, :-1].values
y = np.array(data.iloc[:, -1].tolist())
kf = KFold(n_splits=cv_n_fold, random_state=None, shuffle=True)
lda = LinearDiscriminantAnalysis(solver="svd", store_covariance=True)
mean_u65, mean_u80 = 0, 0
for idx_train, idx_test in kf.split(y):
print("---k-FOLD-new-executing--")
X_cv_train, y_cv_train = X[idx_train], y[idx_train]
X_cv_test, y_cv_test = X[idx_test], y[idx_test]
lda.fit(X_cv_train, y_cv_train)
n_test = len(idx_test)
sum_u65, sum_u80 = 0, 0
for i, test in enumerate(X_cv_test):
evaluate = lda.predict([test])
print("-----TESTING-----", i)
if y_cv_test[i] in evaluate:
sum_u65 += u65(len(evaluate))
sum_u80 += u80(len(evaluate))
mean_u65 += sum_u65 / n_test
mean_u80 += sum_u80 / n_test
print("--->", mean_u65 / cv_n_fold, mean_u80 / cv_n_fold)
示例14: computing_cv_accuracy_imprecise
# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
def computing_cv_accuracy_imprecise(in_path=None, ell_optimal=0.1, cv_n_fold=10):
def u65(mod_Y):
return 1.6 / mod_Y - 0.6 / mod_Y ** 2
def u80(mod_Y):
return 2.2 / mod_Y - 1.2 / mod_Y ** 2
data = export_data_set('iris.data') if in_path is None else pd.read_csv(in_path)
print("-----DATA SET TRAINING---", in_path)
X = data.iloc[:, :-1].values
y = np.array(data.iloc[:, -1].tolist())
mean_u65, mean_u80 = 0, 0
lqa = LinearDiscriminant(init_matlab=True)
kf = KFold(n_splits=cv_n_fold, random_state=None, shuffle=True)
for idx_train, idx_test in kf.split(y):
X_cv_train, y_cv_train = X[idx_train], y[idx_train]
X_cv_test, y_cv_test = X[idx_test], y[idx_test]
lqa.learn(X_cv_train, y_cv_train, ell=ell_optimal)
sum_u65, sum_u80 = 0, 0
n_test, _ = X_cv_test.shape
for i, test in enumerate(X_cv_test):
print("--TESTING-----", i, ell_optimal)
evaluate, _ = lqa.evaluate(test)
print(evaluate, "-----", y_cv_test[i])
if y_cv_test[i] in evaluate:
sum_u65 += u65(len(evaluate))
sum_u80 += u80(len(evaluate))
mean_u65 += sum_u65 / n_test
mean_u80 += sum_u80 / n_test
mean_u65 = mean_u65 / cv_n_fold
mean_u80 = mean_u80 / cv_n_fold
print("--ell-->", ell_optimal, "--->", mean_u65, mean_u80)
示例15: split_data
# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import split [as 别名]
def split_data(root_path, num_splits=4):
mask_list = []
for ext in ('*.mhd', '*.hdr', '*.nii'):
mask_list.extend(sorted(glob(join(root_path,'masks',ext))))
assert len(mask_list) != 0, 'Unable to find any files in {}'.format(join(root_path,'masks'))
outdir = join(root_path,'split_lists')
try:
mkdir(outdir)
except:
pass
kf = KFold(n_splits=num_splits)
n = 0
for train_index, test_index in kf.split(mask_list):
with open(join(outdir,'train_split_' + str(n) + '.csv'), 'wb') as csvfile:
writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
for i in train_index:
writer.writerow([basename(mask_list[i])])
with open(join(outdir,'test_split_' + str(n) + '.csv'), 'wb') as csvfile:
writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
for i in test_index:
writer.writerow([basename(mask_list[i])])
n += 1