本文整理汇总了Python中sklearn.discriminant_analysis.LinearDiscriminantAnalysis.predict_proba方法的典型用法代码示例。如果您正苦于以下问题:Python LinearDiscriminantAnalysis.predict_proba方法的具体用法?Python LinearDiscriminantAnalysis.predict_proba怎么用?Python LinearDiscriminantAnalysis.predict_proba使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.discriminant_analysis.LinearDiscriminantAnalysis
的用法示例。
在下文中一共展示了LinearDiscriminantAnalysis.predict_proba方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: LinearDiscriminantAnalysisPredictor
# 需要导入模块: from sklearn.discriminant_analysis import LinearDiscriminantAnalysis [as 别名]
# 或者: from sklearn.discriminant_analysis.LinearDiscriminantAnalysis import predict_proba [as 别名]
class LinearDiscriminantAnalysisPredictor(PredictorBase):
'''
Linear Discriminant Analysis
'''
def __init__(self, animal_type):
self.animal_type = animal_type
self.clf = LinearDiscriminantAnalysis()
def fit(self, X_train, y_train):
self.clf.fit(X_train, y_train)
def predict(self, X_test):
predictions = self.clf.predict_proba(X_test)
predictions_df = self.bundle_predictions(predictions)
return predictions_df
def find_best_params(self):
parameters = {'solver': ['svd', 'lsqr', 'eigen']}
knn = LinearDiscriminantAnalysis()
clf = grid_search.GridSearchCV(knn, parameters)
train_data = get_data('../data/train.csv')
train_data = select_features(train_data, self.animal_type)
X = train_data.drop(['OutcomeType'], axis=1)
y = train_data['OutcomeType']
clf.fit(X, y)
print clf.best_params_
开发者ID:paul-reiners,项目名称:kaggle-shelter-animal-outcomes,代码行数:30,代码来源:linear_descriminant_analysis_predictor.py
示例2: test_lda_predict
# 需要导入模块: from sklearn.discriminant_analysis import LinearDiscriminantAnalysis [as 别名]
# 或者: from sklearn.discriminant_analysis.LinearDiscriminantAnalysis import predict_proba [as 别名]
def test_lda_predict():
# Test LDA classification.
# This checks that LDA implements fit and predict and returns correct
# values for simple toy data.
for test_case in solver_shrinkage:
solver, shrinkage = test_case
clf = LinearDiscriminantAnalysis(solver=solver, shrinkage=shrinkage)
y_pred = clf.fit(X, y).predict(X)
assert_array_equal(y_pred, y, "solver %s" % solver)
# Assert that it works with 1D data
y_pred1 = clf.fit(X1, y).predict(X1)
assert_array_equal(y_pred1, y, "solver %s" % solver)
# Test probability estimates
y_proba_pred1 = clf.predict_proba(X1)
assert_array_equal((y_proba_pred1[:, 1] > 0.5) + 1, y, "solver %s" % solver)
y_log_proba_pred1 = clf.predict_log_proba(X1)
assert_array_almost_equal(np.exp(y_log_proba_pred1), y_proba_pred1, 8, "solver %s" % solver)
# Primarily test for commit 2f34950 -- "reuse" of priors
y_pred3 = clf.fit(X, y3).predict(X)
# LDA shouldn't be able to separate those
assert_true(np.any(y_pred3 != y3), "solver %s" % solver)
# Test invalid shrinkages
clf = LinearDiscriminantAnalysis(solver="lsqr", shrinkage=-0.2231)
assert_raises(ValueError, clf.fit, X, y)
clf = LinearDiscriminantAnalysis(solver="eigen", shrinkage="dummy")
assert_raises(ValueError, clf.fit, X, y)
clf = LinearDiscriminantAnalysis(solver="svd", shrinkage="auto")
assert_raises(NotImplementedError, clf.fit, X, y)
# Test unknown solver
clf = LinearDiscriminantAnalysis(solver="dummy")
assert_raises(ValueError, clf.fit, X, y)
示例3: main
# 需要导入模块: from sklearn.discriminant_analysis import LinearDiscriminantAnalysis [as 别名]
# 或者: from sklearn.discriminant_analysis.LinearDiscriminantAnalysis import predict_proba [as 别名]
def main():
"""Read Train/test log."""
df = pd.read_csv("train.csv")
# encode result label
le = LabelEncoder().fit(df.species)
labels = le.transform(df.species)
classes = list(le.classes_)
print classes
# drop extra field
df = df.drop(['species', 'id'], 1)
# train/test split using stratified sampling
sss = StratifiedShuffleSplit(labels, 10, test_size=0.2, random_state=23)
for train_index, test_index in sss:
x_train, x_test = df.values[train_index], df.values[test_index]
y_train, y_test = labels[train_index], labels[test_index]
# classification algorithm
# classification(x_train, y_train, x_test, y_test)
# Predict Test Set
favorite_clf = LinearDiscriminantAnalysis()
favorite_clf.fit(x_train, y_train)
test = pd.read_csv('test.csv')
test_ids = test.id
test = test.drop(['id'], axis=1)
test_predictions = favorite_clf.predict_proba(test)
print test_predictions
# Format DataFrame
submission = pd.DataFrame(test_predictions, columns=classes)
submission.tail()
submission.insert(0, 'id', test_ids)
submission.reset_index()
submission.tail()
# Export Submission
submission.to_csv('submission.csv', index=False)
submission.tail()
示例4: processTraining
# 需要导入模块: from sklearn.discriminant_analysis import LinearDiscriminantAnalysis [as 别名]
# 或者: from sklearn.discriminant_analysis.LinearDiscriminantAnalysis import predict_proba [as 别名]
def processTraining(cvtrainx,cvtrainy,cvevalx,prob=False):
print cvtrainx[0]
#cvevalx=[' '.join(s) for s in cvevalx]
print cvevalx[0]
tfv = TfidfVectorizer(min_df=10, max_features=None,
strip_accents='unicode', analyzer=mytokenlizer,
ngram_range=(1, 5), use_idf=1,smooth_idf=1,sublinear_tf=1,
stop_words = 'english')
cvtrainx=tfv.fit_transform(cvtrainx)
cvevalx=tfv.transform(cvevalx)
tsvd=TruncatedSVD(n_components=600,random_state=2016)
cvtrainx=tsvd.fit_transform(cvtrainx)
cvevalx=tsvd.transform(cvevalx)
print len(tfv.get_feature_names())
print tfv.get_feature_names()[0:10]
clf=LinearDiscriminantAnalysis()
clf.fit(cvtrainx,cvtrainy)
if prob:
predictValue=clf.predict_proba(cvevalx)
else:
predictValue=clf.predict(cvevalx)
return predictValue
示例5: range
# 需要导入模块: from sklearn.discriminant_analysis import LinearDiscriminantAnalysis [as 别名]
# 或者: from sklearn.discriminant_analysis.LinearDiscriminantAnalysis import predict_proba [as 别名]
mc_logloss = []
mc_train_pred = []
for i_mc in range(params['n_monte_carlo']):
cv_n = params['cv_n']
kf = StratifiedKFold(target.values, n_folds=cv_n, shuffle=True, random_state=i_mc ** 3)
xgboost_rounds = []
for cv_train_index, cv_test_index in kf:
X_train, X_test = train[cv_train_index, :], train[cv_test_index, :]
y_train, y_test = target.iloc[cv_train_index].values, target.iloc[cv_test_index].values
lda.fit(X_train, y_train)
# predict
predicted_results = lda.predict_proba(X_test)[:, 1]
train_predictions[cv_test_index] = predicted_results
print('logloss score ', log_loss(target.values, train_predictions))
mc_logloss.append(log_loss(target.values, train_predictions))
mc_train_pred.append(train_predictions)
mc_train_pred = np.mean(np.array(mc_train_pred), axis=0)
mc_logloss_mean.append(np.mean(mc_logloss))
mc_logloss_sd.append(np.std(mc_logloss))
print('The Logloss range is: %.5f to %.5f' %
(mc_logloss_mean[-1] - mc_logloss_sd[-1], mc_logloss_mean[-1] + mc_logloss_sd[-1]))
print_results.append('The AUC range is: %.5f to %.5f' %
(mc_logloss_mean[-1] - mc_logloss_sd[-1], mc_logloss_mean[-1] + mc_logloss_sd[-1]))
print('For ', mc_logloss)
示例6: test_model
# 需要导入模块: from sklearn.discriminant_analysis import LinearDiscriminantAnalysis [as 别名]
# 或者: from sklearn.discriminant_analysis.LinearDiscriminantAnalysis import predict_proba [as 别名]
print "Random Forest"
test_model(model)
model_lda = LinearDiscriminantAnalysis()
print "LDA"
test_model(model_lda)
use_prediction = False
raw_test_data, test_labels = readDataMultipleFiles([3])
test_data_matrix, test_data_matrices, test_labels, test_labels_binary = buildMatricesAndLabels(raw_test_data, test_labels, scaling_functions)
test_predictions = []
for features in test_data_matrix:
if not use_prediction:
test_predictions.append(model_lda.decision_function([features])[0]) # score for classes_[1]
else:
test_predictions.append(model_lda.predict_proba([features])[0])
for i in range(target_count):
print sum(test_labels_binary[i])
thresholds_for_bci = multiclassRoc(test_predictions, test_labels_binary)
# model = SVC(C=1000, kernel="poly", degree=2)
# print "SVM"
# test_model(model)
# pickle.Pickler(file("U:\\data\\test\\5_targets\\model0.pkl", "w")).dump(model_lda)
# pickle.Pickler(file("U:\\data\\test\\5_targets\\model0_mm.pkl", "w")).dump(min_max)
# pickle.Pickler(file("U:\\data\\test\\5_targets\\model0_thresh.pkl", "w")).dump(thresholds_for_bci)
# print model_lda.coef_
示例7: discriminatePlot
# 需要导入模块: from sklearn.discriminant_analysis import LinearDiscriminantAnalysis [as 别名]
# 或者: from sklearn.discriminant_analysis.LinearDiscriminantAnalysis import predict_proba [as 别名]
#.........这里部分代码省略.........
print 'LDA Weights:'
print 'DFA1:', ldaMod.coef_[0,:]
if nClasses > 2:
print 'DFA2:', ldaMod.coef_[1,:]
if nClasses > 3:
print 'DFA3:', ldaMod.coef_[2,:]
# Obtain fits in this rotated space for display purposes
ldaMod.fit(Xrr, yGood)
qdaMod.fit(Xrr, yGood)
rfMod.fit(Xrr, yGood)
XrrMean = Xrr.mean(0)
# Make a mesh for plotting
x1, x2 = np.meshgrid(np.arange(-6.0, 6.0, 0.1), np.arange(-6.0, 6.0, 0.1))
xm1 = np.reshape(x1, -1)
xm2 = np.reshape(x2, -1)
nxm = np.size(xm1)
Xm = np.zeros((nxm, Xrr.shape[1]))
Xm[:,0] = xm1
if Xrr.shape[1] > 1 :
Xm[:,1] = xm2
for ix in range(2,Xrr.shape[1]):
Xm[:,ix] = np.squeeze(np.ones((nxm,1)))*XrrMean[ix]
XmcLDA = np.zeros((nxm, 4)) # RGBA values for color for LDA
XmcQDA = np.zeros((nxm, 4)) # RGBA values for color for QDA
XmcRF = np.zeros((nxm, 4)) # RGBA values for color for RF
# Predict values on mesh for plotting based on the first two DFs
yPredLDA = ldaMod.predict_proba(Xm)
yPredQDA = qdaMod.predict_proba(Xm)
yPredRF = rfMod.predict_proba(Xm)
# Transform the predictions in color codes
maxLDA = yPredLDA.max()
for ix in range(nxm) :
cWeight = yPredLDA[ix,:] # Prob for all classes
cWinner = ((cWeight == cWeight.max()).astype('float')) # Winner takes all
# XmcLDA[ix,:] = np.dot(cWeight, cClasses)/nClasses
XmcLDA[ix,:] = np.dot(cWinner, cClasses)
XmcLDA[ix,3] = cWeight.max()/maxLDA
# Plot the surface of probability
plt.figure(facecolor='white', figsize=(10,3))
plt.subplot(131)
Zplot = XmcLDA.reshape(np.shape(x1)[0], np.shape(x1)[1],4)
plt.imshow(Zplot, zorder=0, extent=[-6, 6, -6, 6], origin='lower', interpolation='none', aspect='auto')
if nClasses > 2:
plt.scatter(Xrr[:,0], Xrr[:,1], c=cValGood, s=40, zorder=1)
else:
plt.scatter(Xrr,(np.random.rand(Xrr.size)-0.5)*12.0 , c=cValGood, s=40, zorder=1)
plt.title('%s: LDA pC %.0f %%' % (titleStr, (ldaScores.mean()*100.0)))
plt.axis('square')
plt.xlim((-6, 6))
plt.ylim((-6, 6))
plt.xlabel('DFA 1')
plt.ylabel('DFA 2')
# Transform the predictions in color codes
maxQDA = yPredQDA.max()
示例8: LinearDiscriminantAnalysis
# 需要导入模块: from sklearn.discriminant_analysis import LinearDiscriminantAnalysis [as 别名]
# 或者: from sklearn.discriminant_analysis.LinearDiscriminantAnalysis import predict_proba [as 别名]
lda = LinearDiscriminantAnalysis()
lda.fit(output, labels)
print(lda.predict([[-0.8, -1]]))
y_pred = lda.predict(output)
print(labels)
print(y_pred)
mcc = matthews_corrcoef(labels,y_pred)
print("MCC="+str(mcc))
# Plotting LDA contour
nx, ny = 200, 100
x_min, x_max = np.amin(output[:,0]), np.amax(output[:,0])
y_min, y_max = np.amin(output[:,1]), np.amax(output[:,1])
xx, yy = np.meshgrid(np.linspace(x_min, x_max, nx),np.linspace(y_min, y_max, ny))
Z = lda.predict_proba(np.c_[xx.ravel(), yy.ravel()])
Z = Z[:, 1].reshape(xx.shape)
plt.contour(xx, yy, Z, [0.5], linewidths=5, colors = 'k', linestyles = 'dashed')
# Plotting LDA means
plt.plot(lda.means_[0][0], lda.means_[0][1],'o', color='black', markersize=10)
plt.plot(lda.means_[1][0], lda.means_[1][1],'o', color='black', markersize=10)
plt.title('LDA with MDS and Gaussian Mixture')
# Plot red and green data
output_red = output[0:26]
output_green = output[27:52]
plt.scatter(output_red[:, 0], output_red[:,1], color='r')
plt.scatter(output_green[:, 0], output_green[:, 1],color='g')
plt.show()
示例9: TrialClassificationWithPhysiology
# 需要导入模块: from sklearn.discriminant_analysis import LinearDiscriminantAnalysis [as 别名]
# 或者: from sklearn.discriminant_analysis.LinearDiscriminantAnalysis import predict_proba [as 别名]
def TrialClassificationWithPhysiology(phys_filename, trial_types, plot_results = False):
BlockAB_stress_trial_inds = np.ravel(np.nonzero(trial_types==1))
BlockAB_reg_trial_inds = np.ravel(np.nonzero(trial_types==0))
num_trials = len(trial_types)
phys_features = dict()
sp.io.loadmat(phys_filename,phys_features)
ibi_reg_mean = np.ravel(phys_features['ibi_reg_mean'] )
ibi_stress_mean = np.ravel(phys_features['ibi_stress_mean'])
pupil_reg_mean = np.ravel(phys_features['pupil_reg_mean'])
pupil_stress_mean = np.ravel(phys_features['pupil_stress_mean'])
ibi = np.zeros([num_trials, 1])
ibi[BlockAB_reg_trial_inds] = ibi_reg_mean.reshape((len(BlockAB_reg_trial_inds),1))
ibi[BlockAB_stress_trial_inds] = ibi_stress_mean.reshape((len(BlockAB_stress_trial_inds),1))
pupil = np.zeros([num_trials,1])
pupil[BlockAB_reg_trial_inds] = pupil_reg_mean.reshape((len(BlockAB_reg_trial_inds),1))
pupil[BlockAB_stress_trial_inds] = pupil_stress_mean.reshape((len(BlockAB_stress_trial_inds),1))
ibi = ibi - np.nanmean(ibi)
pupil = pupil - np.nanmean(pupil)
# trial classification with physiological data
X_phys = np.hstack((ibi, pupil))
svc = LinearDiscriminantAnalysis(solver='eigen', shrinkage = 'auto')
#svc = SVC(kernel='linear', C=0.5, probability=True, random_state=0)
#svc = LogisticRegression(C=1.0, penalty='l1')
svc.fit(X_phys,trial_types)
y_pred = svc.predict(X_phys)
classif_rate = np.mean(y_pred.ravel()==trial_types.ravel())*100
xx = np.linspace(0.8*np.min(ibi),1.2*np.max(ibi),100)
yy = np.linspace(0.8*np.min(pupil),1.2*np.max(pupil),100)
xx,yy = np.meshgrid(xx,yy)
Xfull = np.c_[xx.ravel(), yy.ravel()]
probas = svc.predict_proba(Xfull)
n_classes = np.unique(y_pred).size
class_labels = ['Regular', 'Stress']
cmap = plt.get_cmap('bwr')
#plt.title('SVM Classification with Physiological Data: %f correct' % (classif_rate))
if plot_results:
plt.figure()
for k in range(n_classes):
plt.subplot(1,n_classes,k+1)
plt.title(class_labels[k])
imshow_handle = plt.imshow(probas[:,k].reshape((100,100)), vmin = 0.1, vmax = 0.9,extent = (0.8*np.min(ibi),1.2*np.max(ibi),0.8*np.min(pupil),1.2*np.max(pupil)), origin = 'lower',aspect='auto', cmap = cmap)
if k==0:
plt.xlabel('IBI')
plt.ylabel('Pupil')
plt.xticks(())
plt.yticks(())
plt.axis('tight')
idx = (y_pred == k)
if idx.any():
plt.scatter(X_phys[idx,0], X_phys[idx,1],marker = 'o',color = 'k')
ax = plt.axes([0.15, 0.04, 0.7, 0.05])
plt.colorbar(imshow_handle, cax = ax,orientation = 'horizontal')
plt.title('SVM Classification with Physiological Data: %f correct' % (classif_rate))
plt.show()
return ibi, pupil
示例10: test_lda_predict_proba
# 需要导入模块: from sklearn.discriminant_analysis import LinearDiscriminantAnalysis [as 别名]
# 或者: from sklearn.discriminant_analysis.LinearDiscriminantAnalysis import predict_proba [as 别名]
def test_lda_predict_proba(solver, n_classes):
def generate_dataset(n_samples, centers, covariances, random_state=None):
"""Generate a multivariate normal data given some centers and
covariances"""
rng = check_random_state(random_state)
X = np.vstack([rng.multivariate_normal(mean, cov,
size=n_samples // len(centers))
for mean, cov in zip(centers, covariances)])
y = np.hstack([[clazz] * (n_samples // len(centers))
for clazz in range(len(centers))])
return X, y
blob_centers = np.array([[0, 0], [-10, 40], [-30, 30]])[:n_classes]
blob_stds = np.array([[[10, 10], [10, 100]]] * len(blob_centers))
X, y = generate_dataset(
n_samples=90000, centers=blob_centers, covariances=blob_stds,
random_state=42
)
lda = LinearDiscriminantAnalysis(solver=solver, store_covariance=True,
shrinkage=None).fit(X, y)
# check that the empirical means and covariances are close enough to the
# one used to generate the data
assert_allclose(lda.means_, blob_centers, atol=1e-1)
assert_allclose(lda.covariance_, blob_stds[0], atol=1)
# implement the method to compute the probability given in The Elements
# of Statistical Learning (cf. p.127, Sect. 4.4.5 "Logistic Regression
# or LDA?")
precision = linalg.inv(blob_stds[0])
alpha_k = []
alpha_k_0 = []
for clazz in range(len(blob_centers) - 1):
alpha_k.append(
np.dot(precision,
(blob_centers[clazz] - blob_centers[-1])[:, np.newaxis]))
alpha_k_0.append(
np.dot(- 0.5 * (blob_centers[clazz] +
blob_centers[-1])[np.newaxis, :], alpha_k[-1]))
sample = np.array([[-22, 22]])
def discriminant_func(sample, coef, intercept, clazz):
return np.exp(intercept[clazz] + np.dot(sample, coef[clazz]))
prob = np.array([float(
discriminant_func(sample, alpha_k, alpha_k_0, clazz) /
(1 + sum([discriminant_func(sample, alpha_k, alpha_k_0, clazz)
for clazz in range(n_classes - 1)]))) for clazz in range(
n_classes - 1)])
prob_ref = 1 - np.sum(prob)
# check the consistency of the computed probability
# all probabilities should sum to one
prob_ref_2 = float(
1 / (1 + sum([discriminant_func(sample, alpha_k, alpha_k_0, clazz)
for clazz in range(n_classes - 1)]))
)
assert prob_ref == pytest.approx(prob_ref_2)
# check that the probability of LDA are close to the theoretical
# probabilties
assert_allclose(lda.predict_proba(sample),
np.hstack([prob, prob_ref])[np.newaxis],
atol=1e-2)