本文整理汇总了Python中sklearn.cross_validation.StratifiedKFold方法的典型用法代码示例。如果您正苦于以下问题:Python cross_validation.StratifiedKFold方法的具体用法?Python cross_validation.StratifiedKFold怎么用?Python cross_validation.StratifiedKFold使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.cross_validation
的用法示例。
在下文中一共展示了cross_validation.StratifiedKFold方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: validation
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def validation(self,X,Y,kind):
"""
使用2-fold进行验证
"""
print 'validating...'
fold_n=2
folds = list(StratifiedKFold(Y, n_folds=fold_n, random_state=0))
score=np.zeros(fold_n)
for j, (train_idx, test_idx) in enumerate(folds):
print j + 1, '-fold'
X_train = X[train_idx]
y_train = Y[train_idx]
X_test = X[test_idx]
y_test = Y[test_idx]
res = self.fit(X_train, y_train, X_test)
cur = sum(y_test == res) * 1.0 / len(res)
score[j] = cur
print score, score.mean()
return score.mean()
示例2: test_slice_on_dimension
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def test_slice_on_dimension(self):
iris = datasets.load_iris()
y = iris.target
M = iris.data
clfs = [{'clf': RandomForestClassifier,
'n_estimators': [10, 100],
'max_depth': [1, 10],
'random_state': [0]},
{'clf': SVC, 'kernel': ['linear', 'rbf'],
'random_state': [0]}]
subsets = [{'subset': per.SubsetRandomRowsActualDistribution,
'subset_size': [20, 40, 60, 80, 100],
'random_state': [0]}]
cvs = [{'cv': StratifiedKFold}]
exp = per.Experiment(M, y, clfs, subsets, cvs)
result = [str(trial) for trial in exp.slice_on_dimension(
per.CLF,
RandomForestClassifier).trials]
self.__compare_to_ref_pkl(result, 'slice_on_dimension_clf')
result = [str(trial) for trial in exp.slice_on_dimension(
per.SUBSET_PARAMS,
{'subset_size': 60}).trials]
self.__compare_to_ref_pkl(result, 'slice_on_dimension_subset_params')
示例3: test_slice_by_best_score
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def test_slice_by_best_score(self):
iris = datasets.load_iris()
y = iris.target
M = iris.data
clfs = [{'clf': RandomForestClassifier,
'n_estimators': [10, 100],
'max_depth': [1, 10],
'random_state': [0]},
{'clf': SVC, 'kernel': ['linear', 'rbf'],
'random_state': [0]}]
subsets = [{'subset': per.SubsetRandomRowsActualDistribution,
'subset_size': [20, 40],
'random_state': [0]}]
cvs = [{'cv': StratifiedKFold}]
exp = per.Experiment(M, y, clfs, subsets, cvs)
exp.run()
result = {str(trial): trial.average_score() for trial in
exp.slice_by_best_score(per.CLF_PARAMS).trials}
self.__compare_to_ref_pkl(result, 'slice_by_best_score')
示例4: test_make_csv
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def test_make_csv(self):
M, y = uft.generate_test_matrix(1000, 5, 2, random_state=0)
clfs = [{'clf': RandomForestClassifier,
'n_estimators': [10, 100],
'max_depth': [5, 25],
'random_state': [0]},
{'clf': SVC,
'kernel': ['linear', 'rbf'],
'probability': [True],
'random_state': [0]}]
subsets = [{'subset': per.SubsetSweepNumRows,
'num_rows': [[100, 200]],
'random_state': [0]}]
cvs = [{'cv': StratifiedKFold,
'n_folds': [2, 3]}]
exp = per.Experiment(M, y, clfs=clfs, subsets=subsets, cvs=cvs)
result_path = exp.make_csv()
示例5: test_report_complex
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def test_report_complex(self):
M, y = uft.generate_test_matrix(100, 5, 2)
clfs = [{'clf': RandomForestClassifier,
'n_estimators': [10, 100],
'max_depth': [1, 10],
'random_state': [0]},
{'clf': SVC,
'kernel': ['linear', 'rbf'],
'probability': [True],
'random_state': [0]}]
subsets = [{'subset': per.SubsetRandomRowsActualDistribution,
'subset_size': [20, 40, 60, 80, 100],
'random_state': [0]}]
cvs = [{'cv': StratifiedKFold}]
exp = per.Experiment(M, y, clfs, subsets, cvs)
_, rep = exp.make_report(dimension=per.CLF, return_report_object=True,
verbose=False)
self.report.add_heading('test_report_complex', 1)
self.report.add_subreport(rep)
示例6: test_toy_data
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def test_toy_data(name, clf):
X, y = classification_data()
k_folds = 5
cv = StratifiedKFold(y, k_folds, random_state=1234)
acc, auc = [], []
for train, test in cv:
xt, xv, yt, yv = X[train, :], X[test, :], y[train], y[test]
clf.fit(xt, yt)
yhat = clf.predict(xv)
proba = clf.predict_proba(xv)[:, 1]
acc.append(np.mean(yhat == yv))
auc.append(roc_auc_score(yv, proba))
acc_mean, acc_std = np.mean(acc), np.std(acc)
auc_mean, auc_std = np.mean(auc), np.std(auc)
print name
print 'accuracy: {0:.3f} +/- {1:.3f}'.format(acc_mean, acc_std)
print 'auc: {0:.3f} +/- {1:.3f}'.format(auc_mean, auc_std)
print '-'*80
return {'name': name,
'acc_mean': acc_mean,
'acc_std': acc_std,
'auc_mean': auc_mean,
'auc_std': auc_std}
示例7: get_weights
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def get_weights():
# Read validation labels
_, labels, _, _, _ = utils.load_data()
skf = StratifiedKFold(labels, n_folds=5, random_state=23)
test_index = None
for _, test_idx in skf:
test_index = np.append(test_index, test_idx) if test_index is not None else test_idx
val_labels = labels[test_index]
# Read predictions on validation set
val_predictions = []
prediction_files = utils.get_prediction_files()
for preds_file in prediction_files:
vp = np.genfromtxt(os.path.join(consts.BLEND_PATH, preds_file), delimiter=',')
val_predictions.append(vp)
# Minimize blending function
p0 = [1.] * len(prediction_files)
p = fmin_cobyla(error, p0, args=(val_predictions, val_labels), cons=[constraint], rhoend=1e-5)
return p
示例8: make_blender_cv
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def make_blender_cv(classifier, x, y, calibrate=False):
skf = StratifiedKFold(y, n_folds=5, random_state=23)
scores, predictions = [], None
for train_index, test_index in skf:
if calibrate:
# Make training and calibration
calibrated_classifier = CalibratedClassifierCV(classifier, method='isotonic', cv=get_cv(y[train_index]))
fitted_classifier = calibrated_classifier.fit(x[train_index, :], y[train_index])
else:
fitted_classifier = classifier.fit(x[train_index, :], y[train_index])
preds = fitted_classifier.predict_proba(x[test_index, :])
# Free memory
calibrated_classifier, fitted_classifier = None, None
gc.collect()
scores.append(log_loss(y[test_index], preds))
predictions = np.append(predictions, preds, axis=0) if predictions is not None else preds
return scores, predictions
示例9: create_cv_id
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def create_cv_id(target, n_folds_ = 5, cv_id_name=cv_id_name, seed=407):
try:
a = StratifiedKFold(target['target'],n_folds=n_folds_, shuffle=True, random_state=seed)
cv_index = a.test_folds
print 'Done StratifiedKFold'
except:
cv_index = np.empty(len(target))
a = KFold(len(target),n_folds=n_folds_, shuffle=True, random_state=seed)
for idx, i in enumerate(a):
cv_index[i[1]] = idx
cv_index = cv_index.astype(int)
print 'Done Kfold'
np.save(INPUT_PATH + cv_id_name, cv_index)
return
######### Utils #########
#feature listを渡してデータを作成するutil関数
示例10: naive_bayes
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def naive_bayes(pos_samples, neg_samples, n_folds = 2):
'''Trains a naive bayes classifier with NLTK. It uses stratified
n-fold validation. Inputs are the positive and negative samples and
the number of folds. Returns the total accuracy and the classifier and
the train/test sets of the last fold.'''
samples = np.array(pos_samples + neg_samples)
labels = [label for (words, label) in samples]
cv = cross_validation.StratifiedKFold(labels, n_folds= n_folds, shuffle=True)
accuracy = 0.0
for traincv, testcv in cv:
train_samples = samples[traincv]
test_samples = samples[testcv]
classifier = nltk.NaiveBayesClassifier.train(train_samples)
accuracy += nltk.classify.util.accuracy(classifier, test_samples)
accuracy /= n_folds
return (accuracy, classifier, train_samples, test_samples)
示例11: _validate_link_reconstruction
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def _validate_link_reconstruction(self, samples, lbs):
# cache = utils.KeyDefaultDict(lambda x: self.embeddings_at(x))
# feat = []
# for v in samples:
# emb = cache[v[0] - 1]
# # feat.append(np.concatenate((emb[v[1]], emb[v[2]]), axis=0))
# feat.append(np.abs(emb[v[1]] - emb[v[2]]))
# feat = np.vstack(feat)
feat = self.make_features(samples)
feat = np.abs(feat[:, 0] - feat[:, 1])
clf = LogisticRegression()
try:
cv = StratifiedKFold(lbs, n_folds=2, shuffle=True)
parts = cv
except TypeError:
cv = StratifiedKFold(n_splits=2, shuffle=True)
parts = cv.split(feat, lbs)
val_score = []
for tr, te in parts:
model = clf.fit(feat[tr], lbs[tr])
p = model.predict(feat[te])
val_score.append(f1_score(lbs[te], p))
return np.mean(val_score)
示例12: _validate_node_classify
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def _validate_node_classify(self, samples, lbs):
# note that the 1-st dimension of feat is for each node in each sample (time, node1, node2, ...)
feat = self.make_features(samples)[:, 0]
assert len(feat) == len(lbs)
clf = LogisticRegression(class_weight='balanced')
try:
cv = StratifiedKFold(lbs, n_folds=2, shuffle=True)
parts = cv
except TypeError as e:
cv = StratifiedKFold(n_splits=2, shuffle=True)
parts = cv.split(feat, lbs)
val_score = []
for tr, te in parts:
model = clf.fit(feat[tr], lbs[tr])
p = model.predict(feat[te])
val_score.append(f1_score(lbs[te], p))
return np.mean(val_score)
示例13: compute_svm_score_nestedCV
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def compute_svm_score_nestedCV(K, y, n_folds, scoring='accuracy',
random_state=None,
param_grid=[{'C': np.logspace(-5, 5, 20)}]):
cv = StratifiedKFold(y, n_folds=n_folds, shuffle=True,
random_state=random_state)
scores = np.zeros(n_folds)
for i, (train, test) in enumerate(cv):
cvclf = SVC(kernel='precomputed')
y_train = y[train]
cvcv = StratifiedKFold(y_train, n_folds=n_folds,
shuffle=True,
random_state=random_state)
clf = GridSearchCV(cvclf, param_grid=param_grid, scoring=scoring,
cv=cvcv, n_jobs=1)
clf.fit(K[:, train][train, :], y_train)
scores[i] = clf.score(K[test, :][:, train], y[test])
return scores.mean()
示例14: compute_svm_score_nestedCV
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def compute_svm_score_nestedCV(K, y, n_folds,
scoring=balanced_accuracy_scoring,
random_state=None,
param_grid=[{'C': np.logspace(-5, 5, 25)}]):
"""Compute cross-validated score of SVM using precomputed kernel.
"""
cv = StratifiedKFold(y, n_folds=n_folds, shuffle=True,
random_state=random_state)
scores = np.zeros(n_folds)
for i, (train, test) in enumerate(cv):
cvclf = SVC(kernel='precomputed')
y_train = y[train]
cvcv = StratifiedKFold(y_train, n_folds=n_folds,
shuffle=True,
random_state=random_state)
clf = GridSearchCV(cvclf, param_grid=param_grid, scoring=scoring,
cv=cvcv, n_jobs=1)
clf.fit(K[train, :][:, train], y_train)
# print clf.best_params_
scores[i] = clf.score(K[test, :][:, train], y[test])
return scores.mean()
示例15: fit_layer
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import StratifiedKFold [as 别名]
def fit_layer(self, layer_idx, X, y):
if layer_idx >= len(self.layers):
return
elif layer_idx == len(self.layers) - 1:
self.layers[layer_idx].fit(X, y)
else:
n_classes = len(set(y)) - 1
n_classifiers = len(self.layers[layer_idx])
output = np.zeros((X.shape[0], n_classes * n_classifiers))
skf = cross_validation.StratifiedKFold(y, self.cv)
for tra, tst in skf:
self.layers[layer_idx].fit(X[tra], y[tra])
out = self.layers[layer_idx].output(X[tst], mode=self.mode)
output[tst, :] = out[:, 1:, :].reshape(
out.shape[0], (out.shape[1] - 1) * out.shape[2])
self.layers[layer_idx].fit(X, y)
self.fit_layer(layer_idx + 1, output, y)