本文整理汇总了Python中sklearn.cross_validation.permutation_test_score函数的典型用法代码示例。如果您正苦于以下问题:Python permutation_test_score函数的具体用法?Python permutation_test_score怎么用?Python permutation_test_score使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了permutation_test_score函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_permutation_test_score_allow_nans
def test_permutation_test_score_allow_nans():
# Check that permutation_test_score allows input data with NaNs
X = np.arange(200, dtype=np.float64).reshape(10, -1)
X[2, :] = np.nan
y = np.repeat([0, 1], X.shape[0] / 2)
p = Pipeline([("imputer", Imputer(strategy="mean", missing_values="NaN")), ("classifier", MockClassifier())])
cval.permutation_test_score(p, X, y, cv=5)
示例2: test_permutation_score
def test_permutation_score():
iris = load_iris()
X = iris.data
X_sparse = coo_matrix(X)
y = iris.target
svm = SVC(kernel="linear")
cv = cval.StratifiedKFold(y, 2)
score, scores, pvalue = cval.permutation_test_score(svm, X, y, n_permutations=30, cv=cv, scoring="accuracy")
assert_greater(score, 0.9)
assert_almost_equal(pvalue, 0.0, 1)
score_label, _, pvalue_label = cval.permutation_test_score(
svm, X, y, n_permutations=30, cv=cv, scoring="accuracy", labels=np.ones(y.size), random_state=0
)
assert_true(score_label == score)
assert_true(pvalue_label == pvalue)
# check that we obtain the same results with a sparse representation
svm_sparse = SVC(kernel="linear")
cv_sparse = cval.StratifiedKFold(y, 2)
score_label, _, pvalue_label = cval.permutation_test_score(
svm_sparse,
X_sparse,
y,
n_permutations=30,
cv=cv_sparse,
scoring="accuracy",
labels=np.ones(y.size),
random_state=0,
)
assert_true(score_label == score)
assert_true(pvalue_label == pvalue)
# test with custom scoring object
def custom_score(y_true, y_pred):
return ((y_true == y_pred).sum() - (y_true != y_pred).sum()) / y_true.shape[0]
scorer = make_scorer(custom_score)
score, _, pvalue = cval.permutation_test_score(svm, X, y, n_permutations=100, scoring=scorer, cv=cv, random_state=0)
assert_almost_equal(score, 0.93, 2)
assert_almost_equal(pvalue, 0.01, 3)
# set random y
y = np.mod(np.arange(len(y)), 3)
score, scores, pvalue = cval.permutation_test_score(svm, X, y, n_permutations=30, cv=cv, scoring="accuracy")
assert_less(score, 0.5)
assert_greater(pvalue, 0.2)
示例3: permutation_cross_validation
def permutation_cross_validation(estimator, X, y, n_fold=3, isshuffle=True, cvmeth='shufflesplit', score_type='r2', n_perm=1000):
"""
An easy way to evaluate the significance of a cross-validated score by permutations
-------------------------------------------------
Parameters:
estimator: linear model estimator
X: IV
y: DV
n_fold: fold number cross validation
cvmeth: kfold or shufflesplit.
shufflesplit is the random permutation cross-validation iterator
score_type: scoring type, 'r2' as default
n_perm: permutation numbers
Return:
score: model scores
permutation_scores: model scores when permutation labels
pvalues: p value of permutation scores
"""
try:
from sklearn import cross_validation, preprocessing
except ImportError:
raise Exception('To call this function, please install sklearn')
if X.ndim == 1:
X = np.expand_dims(X, axis = 1)
if y.ndim == 1:
y = np.expand_dims(y, axis = 1)
X = preprocessing.scale(X)
y = preprocessing.scale(y)
if cvmeth == 'kfold':
cvmethod = cross_validation.KFold(y.shape[0], n_fold, shuffle = isshuffle)
elif cvmeth == 'shufflesplit':
testsize = 1.0/n_fold
cvmethod = cross_validation.ShuffleSplit(y.shape[0], n_iter = 100, test_size = testsize, random_state = 0)
score, permutation_scores, pvalues = cross_validation.permutation_test_score(estimator, X, y, scoring = score_type, cv = cvmethod, n_permutations = n_perm)
return score, permutation_scores, pvalues
示例4: automatic_bernulli
def automatic_bernulli():
data = pd.read_csv('/home/vasiliy/Study/StadiumProject/Classifier/signs.csv', sep=';')
Y = np.array(data['fight'].get_values())
np.random.shuffle(Y)
data.drop(['match', 'city', 'date', 'fight'], 1, inplace=True)
# data = data[['anger_over_value_relation', 'avg_likes', 'sc_max_surprise', 'sc_median_fear',
# 'fear_over_value_relation']]
X = data.as_matrix()
features_number = 0
result = {}
for features_number in range(3, 16):
X_new = SelectKBest(f_classif, k=features_number).fit_transform(X, Y)
# X_new = X
classifier = ExtraTreesClassifier()
super_means = []
for i in range(1000):
kf = KFold(len(X_new), n_folds=6, shuffle=True)
means = []
for training, testing in kf:
classifier.fit(X_new[training], Y[training])
prediction = classifier.predict(X_new[testing])
curmean = np.mean(prediction == Y[testing])
means.append(curmean)
super_means.append(np.mean(means))
print 'features_number=', features_number, 'Mean accuracy: {:.1%} '.format(
np.mean(super_means))
# result['fn'+str(features_number)+'n_n'+str(n_neib)] = np.mean(super_means)
score, permutation_scores, pvalue = permutation_test_score(classifier, X_new, Y, scoring="accuracy", cv=kf,
n_permutations=len(Y), n_jobs=1)
print ("Classification score %s (pvalue : %s)" % (score, pvalue))
示例5: classify
def classify(x, y, classifier='lda', kern='rbf', n_folds=10, rep=10, kind='sf', n_jobs=1, n_knn=3, n_perm=0, n_tree=100,
cvkind='skfold'):
"da, all_scores, permutation_scores, pvalue"
# Check format :
x = checkfeat(x,y)
n_epoch, n_feat = x.shape
priors = n.array([1/len(n.unique(y))]*len(n.unique(y)))
# - Classifier's choice :
if (type(classifier) is int) | (type(classifier) is str):
clf = classifier_choice(classifier, kern=kern, n_knn=n_knn, n_tree=n_tree, priors=priors)
else : clf = classifier
# - Cross validation definition :
if kind == 'mf' and n_perm == 0: # Multi feature classification
da, all_scores, cv_model = classify_fcn(x, y, clf, n_folds=n_folds, rep=rep, n_jobs=n_jobs, cvkind=cvkind)
elif kind == 'sf' and n_perm == 0: # Single features classification
da = n.zeros((1, n_feat))
all_scores = n.zeros((rep, n_folds, n_feat))
for k in range(0, n_feat):
da[:, k], all_scores[:, :, k], cv_model = classify_fcn(x[:, k], y, clf, n_folds=n_folds, rep=rep,
n_jobs=n_jobs, cvkind=cvkind)
# Statistical evaluation :
if n_perm == 0:
permutation_scores, pvalue = 0, [[0]]
else:
all_scores = 0
cv_model = crossval_choice(y, cvkind=cvkind, n_folds=n_folds, rndstate=0)
if kind == 'mf': # Multi feature classification
da, permutation_scores, pvalue = cross_validation.permutation_test_score(clf, x, y, scoring="accuracy",
cv=cv_model, n_permutations=n_perm,
n_jobs=n_jobs)
elif kind == 'sf': # Single features classification
permutation_scores = n.zeros((n_perm, n_feat))
da = n.zeros((1, n_feat))
pvalue = n.zeros((1, n_feat))
for k in range(0, n_feat):
da[0, k], permutation_scores[:, k], pvalue[0, k] = cross_validation.permutation_test_score(clf, x[:, k], y,
scoring="accuracy",
cv=cv_model,
n_permutations=n_perm,
n_jobs=n_jobs)
return 100*da, 100*all_scores, permutation_scores, list(pvalue[0])
示例6: computeScore
def computeScore(svm, X, y, cv):
score, permutation_scores, pvalue = permutation_test_score(svm, \
X, y, \
scoring='accuracy', \
cv=cv, \
n_permutations=100, \
n_jobs=1)
print("Classification score %s (pvalue: %s)" % (score, pvalue))
return score, permutation_scores, pvalue
示例7: check_trop_score
def check_trop_score(X_data, trop_clusters):
cv = Bootstrap(X_data.shape[0], n_iter=3, train_size=0.7)
pred = KMeans(n_clusters=len(set(trop_clusters)))
t_score, scores, pval = permutation_test_score(pred, X_data,
n_permutations=100,
y = trop_clusters,
n_jobs=20,
scoring=rand_linker,
cv=cv)
return t_score, scores, pval
示例8: test_permutation_test_score
def test_permutation_test_score(self):
import sklearn.svm as svm
iris = datasets.load_iris()
df = pdml.ModelFrame(iris)
clf = svm.SVC(kernel=str('linear'), C=1)
result = df.cross_validation.permutation_test_score(clf, cv=5)
expected = cv.permutation_test_score(clf, iris.data, y=iris.target, cv=5)
self.assertEqual(len(result), 3)
self.assertEqual(result[0], expected[0])
self.assert_numpy_array_almost_equal(result[1], expected[1])
self.assertEqual(result[2], expected[2])
示例9: test_permutation_score
def test_permutation_score():
iris = load_iris()
X = iris.data
X_sparse = coo_matrix(X)
y = iris.target
svm = SVC(kernel='linear')
cv = cval.StratifiedKFold(y, 2)
score, scores, pvalue = cval.permutation_test_score(
svm, X, y, zero_one_score, cv)
assert_greater(score, 0.9)
np.testing.assert_almost_equal(pvalue, 0.0, 1)
score_label, _, pvalue_label = cval.permutation_test_score(
svm, X, y, zero_one_score, cv, labels=np.ones(y.size), random_state=0)
assert_true(score_label == score)
assert_true(pvalue_label == pvalue)
# check that we obtain the same results with a sparse representation
svm_sparse = SVC(kernel='linear')
cv_sparse = cval.StratifiedKFold(y, 2, indices=True)
score_label, _, pvalue_label = cval.permutation_test_score(
svm_sparse, X_sparse, y, zero_one_score, cv_sparse,
labels=np.ones(y.size), random_state=0)
assert_true(score_label == score)
assert_true(pvalue_label == pvalue)
# set random y
y = np.mod(np.arange(len(y)), 3)
score, scores, pvalue = cval.permutation_test_score(svm, X, y,
zero_one_score, cv)
assert_less(score, 0.5)
assert_greater(pvalue, 0.4)
示例10: permutation
def permutation():
file = 'data/n228_bcdefgh.mat'
dat = data.load(file)
X, y = data.build(dat, range(0, 96), 'fr1', 17)
# Univariate Feature Selection
select = SelectKBest(f_classif,k=27).fit(X,y)
Xa = select.transform(X)
# Select good cell with heuristic
channel = data.goodCell(dat)
Xb, y = data.build(dat, channel, 'fr1', 17)
# PCA Dimentionnality Reduction
pca = PCA(n_components=38)
Xc = pca.fit_transform(X)
dat = [X, Xa, Xb, X, Xc,Xa]
pNB = PoissonNB()
gNB = GaussianNB()
classifiers = [pNB,pNB,pNB,gNB,gNB,gNB]
label = ['Poisson Unreduced', 'Poisson Univariate Reduction', 'Poisson Heuristic Reduction', 'Gaussion No reduction', 'Gaussian PCA reduction', 'Gaussian Univariate Reduction']
scores = []
perm_scores = []
p_value = []
for i in range(0,len(dat)):
score, permutation_score, pvalue = permutation_test_score(classifiers[i], dat[i], y, cv=StratifiedKFold(y, n_folds=3, shuffle=True, random_state=42),n_permutations=100, n_jobs=-1, random_state=42, scoring=make_scorer(error_distance, greater_is_better=False))
scores.append(score)
perm_scores.append(np.mean(permutation_score))
p_value.append(pvalue)
ind = np.arange(len(scores))
plt.bar(ind, scores)
# ax.set_xticks(ind)
# ax.set_xticklabels(label)
plt.plot(ind, perm_scores)
plt.show()
print "Average Distance between real location and predicted location"
print score
print "Chance Performance, from permutation"
print np.mean(permutation_score)
print "p-value"
print pvalue
示例11: handle_bayes
def handle_bayes():
input_data = pd.read_csv('/home/vasiliy/Study/StadiumProject/Classifier/signs.csv', sep=';')
signs = ['ms_avg_sadness', 'ms_avg_sadness', 'ms_disgust', 'ms_contempt', 'ms_max_sadness', 'ms_median_surprise',
'ms_avg_happiness']
signs = ['ms_avg_sadness', 'ms_avg_sadness', 'ms_disgust', 'ms_contempt', 'ms_max_sadness', 'ms_median_surprise']
signs = ['ms_median_sadness', 'likes', 'ms_min_anger', 'ms_min_disgust', 'ms_min_fear', 'ms_avg_anger' ]
X = input_data[signs]
X = X.as_matrix()
Y = np.array(input_data['fight'].get_values())
classifier = GaussianNB()
kf = KFold(len(signs), n_folds=6, shuffle=True)
for training, testing in kf:
classifier.fit(X[training], Y[training])
score, permutation_scores, pvalue = permutation_test_score(
classifier, X, Y, scoring="accuracy", cv=kf, n_permutations=len(Y), n_jobs=1)
print ("Classification score %s (pvalue : %s)" % (score, pvalue))
示例12: test_permutation_score
def test_permutation_score():
iris = load_iris()
X = iris.data
X_sparse = coo_matrix(X)
y = iris.target
svm = SVC(kernel='linear')
cv = cval.StratifiedKFold(y, 2)
score, scores, pvalue = cval.permutation_test_score(
svm, X, y, cv=cv, scoring="accuracy")
assert_greater(score, 0.9)
assert_almost_equal(pvalue, 0.0, 1)
score_label, _, pvalue_label = cval.permutation_test_score(
svm, X, y, cv=cv, scoring="accuracy", labels=np.ones(y.size),
random_state=0)
assert_true(score_label == score)
assert_true(pvalue_label == pvalue)
# test with custom scoring object
scorer = make_scorer(fbeta_score, beta=2)
score_label, _, pvalue_label = cval.permutation_test_score(
svm, X, y, scoring=scorer, cv=cv, labels=np.ones(y.size),
random_state=0)
assert_almost_equal(score_label, .97, 2)
assert_almost_equal(pvalue_label, 0.01, 3)
# check that we obtain the same results with a sparse representation
svm_sparse = SVC(kernel='linear')
cv_sparse = cval.StratifiedKFold(y, 2)
score_label, _, pvalue_label = cval.permutation_test_score(
svm_sparse, X_sparse, y, cv=cv_sparse,
scoring="accuracy", labels=np.ones(y.size), random_state=0)
assert_true(score_label == score)
assert_true(pvalue_label == pvalue)
# set random y
y = np.mod(np.arange(len(y)), 3)
score, scores, pvalue = cval.permutation_test_score(svm, X, y, cv=cv,
scoring="accuracy")
assert_less(score, 0.5)
assert_greater(pvalue, 0.2)
# test with deprecated interface
with warnings.catch_warnings(record=True):
score, scores, pvalue = cval.permutation_test_score(
svm, X, y, score_func=accuracy_score, cv=cv)
assert_less(score, 0.5)
assert_greater(pvalue, 0.2)
示例13: bayes_classification
def bayes_classification(permutation, test):
input_data = pd.read_csv('/home/vasiliy/Study/StadiumProject/Classifier/signs.csv', sep=';')
output_data = []
Y = np.array(input_data['fight'].get_values())
if permutation == True:
np.random.shuffle(Y)
input_data = input_data.drop(['match', 'city', 'date', 'fight'], 1)
data_array = input_data.as_matrix()
for features_number in range(3,30,1):
X_new = SelectKBest(f_classif, k=features_number).fit_transform(data_array, Y)
classifier = GaussianNB()
kf = KFold(len(X_new), n_folds=6, shuffle=True)
means = []
for training, testing in kf:
classifier.fit(X_new[training], Y[training])
prediction = classifier.predict(X_new[testing])
curmean = np.mean(classifier.score(X_new[testing], Y[testing]))
means.append(curmean)
output_data.append(np.mean(means))
score, permutation_scores, pvalue = permutation_test_score(
classifier, X_new, Y, scoring="accuracy", cv=kf, n_permutations=len(Y), n_jobs=1)
if test:
print ("Classification score %s (pvalue : %s)" % (score, pvalue))
return output_data
示例14: GaussianNB
#print "The different cross_scores: ", cross_score_LDA
#### Naive bayes ####
from sklearn.naive_bayes import GaussianNB
ngb = GaussianNB()
cross_score_NB = cross_val_score(ngb, X_scl, y, scoring="accuracy", cv = loo,
n_jobs = 8, verbose = True)
print "Cross val score: ", cross_score_NB.mean()
print "The different cross_scores: ", cross_score_NB
score_NB, permutation_score_NB, pvalue_NB = permutation_test_score(ngb, X_scl, y,
scoring="accuracy", cv = cv, n_permutations = 2000,
n_jobs = n_jobs, verbose = True)
print 'Classification score:', score_NB, 'p-value:', pvalue_NB
#### SVM ####
from sklearn.svm import LinearSVC
svc = LinearSVC()
cross_score_SVM = cross_val_score(svc, X_scl, y, scoring="accuracy", cv = loo,
n_jobs = 8, verbose = True)
print "Cross val score: ", cross_score_SVM.mean()
print "The different cross_scores: ", cross_score_SVM
score_SVM, permutation_score_SVM, pvalue_SVM = permutation_test_score(svc, X, y,
示例15: complex_networks_mapping_uri_data
#.........这里部分代码省略.........
(n, subjid, thresh_dens)
el = nx.read_edgelist(os.path.join(directory, g_name),
nodetype=int)
g = nx.Graph()
# there are 148 regions, or nodes
g.add_nodes_from(range(148))
g.add_edges_from(el.edges())
graphs.append(g)
subjects.append(subj_name)
classes.append(subjid)
vects.append(complex_network_mapping(graphs[-1]))
print "Graph built for subject %s and class %s." % \
(subj_name, subjid)
# Reordering data for the leave-one-subject-out cross-validation
nm_graphs = [None] * len(graphs)
nm_classes = [None] * len(classes)
nm_subjects = [None] * len(subjects)
nm_vects = [None] * len(vects)
for i in range(len(graphs) / 2):
nm_graphs[i*2] = graphs[i]
nm_graphs[i*2 + 1] = graphs[(len(graphs) / 2) + i]
nm_classes[i*2] = classes[i]
nm_classes[i*2 + 1] = classes[(len(classes) / 2) + i]
nm_subjects[i*2] = subjects[i]
nm_subjects[i*2 + 1] = subjects[(len(subjects) / 2) + i]
nm_vects[i*2] = vects[i]
nm_vects[i*2 + 1] = vects[(len(vects) / 2) + i]
print nm_subjects
print nm_classes
nm_vects = np.array(nm_vects)
# nm_vects = np.where(nm_vects == inf, 10, nm_vects)
# nm_vects = np.where(nm_vects == nan, 10, nm_vects)
ss = StandardScaler()
X = ss.fit_transform(nm_vects)
print X
print np.mean(X)
print np.max(X)
tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
'C': [1, 10, 100, 1000]},
{'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]
tuned_parameters2 = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
'C': [1, 10, 100, 1000]},
{'kernel': ['sigmoid'], 'gamma': [1e-3, 1e-4],
'C': [1, 10, 100, 1000]},
{'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]
from sklearn.grid_search import GridSearchCV
clf = GridSearchCV(SVC(C=1), tuned_parameters, cv=KFold(len(nm_classes), niter, shuffle=False))
clf2 = GridSearchCV(SVC(C=1), tuned_parameters2, cv=KFold(len(nm_classes), niter, shuffle=False))
clf.fit(X, np.array(nm_classes))
clf.best_params_
clf = SVC(C=100, kernel='linear')
print "Now getting cross validation "
cvr = SVC(C=1000, gamma=.001, kernel='rbf')
cv_scores = cross_val_score(cvr, X, np.array(nm_classes),
cv=KFold(len(nm_classes),
niter, shuffle=False))
cv_scores = cross_val_score(cvr, X, np.array(nm_classes),
cv=KFold(len(nm_classes),
niter, shuffle=False))
cv_scores = cross_val_score(clf, X, np.array(nm_classes),
cv=KFold(len(nm_classes),
niter, shuffle=False))
from sklearn.linear_model import SGDClassifier
clfGD = SGDClassifier(loss='log')
clfGD.fit(X, np.array(nm_classes))
cv_scores = cross_val_score(clfGD, X, np.array(nm_classes),
cv=KFold(len(nm_classes),
niter, shuffle=False))
print cv_scores
print np.mean(cv_scores)
print("Accuracy: %0.2f (+/- %0.2f)" %
(cv_scores.mean(), cv_scores.std() * 2))
from sklearn.dummy import DummyClassifier
null_scores = cross_val_score(DummyClassifier(), X, np.array(nm_classes),
cv=KFold(len(nm_classes),
niter, shuffle=False))
print null_scores.mean()
from sklearn.cross_validation import permutation_test_score
null_scores_perm = permutation_test_score(cvr, X, np.array(nm_classes),
cv=KFold(len(nm_classes),
niter, shuffle=False))
print null_scores_perm.mean()