本文整理汇总了Python中sklearn.linear_model.RidgeClassifier.decision_function方法的典型用法代码示例。如果您正苦于以下问题:Python RidgeClassifier.decision_function方法的具体用法?Python RidgeClassifier.decision_function怎么用?Python RidgeClassifier.decision_function使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.linear_model.RidgeClassifier
的用法示例。
在下文中一共展示了RidgeClassifier.decision_function方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: of
# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import decision_function [as 别名]
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
X_den_train, X_den_test = X_den[train_index], X_den[test_index]
# feed models
clf_mNB.fit(X_train, y_train)
clf_ridge.fit(X_train, y_train)
clf_SGD.fit(X_train, y_train)
clf_lSVC.fit(X_train, y_train)
clf_SVC.fit(X_train, y_train)
# get prediction for this fold run
prob_mNB = clf_mNB.predict_proba(X_test)
prob_ridge = clf_ridge.decision_function(X_test)
prob_SGD = clf_SGD.decision_function(X_test)
prob_lSVC = clf_lSVC.decision_function(X_test)
prob_SVC = clf_SVC.predict_proba(X_test)
# add prob functions into the z 2d-array
z_temp = (prob_mNB + prob_ridge + prob_SGD + prob_lSVC + prob_SVC)
z = np.append(z, z_temp, axis=0)
# remove the first sub-1d-array of z, due to the creation with 0s
z = np.delete(z, 0, 0)
# the result of z is a 2d array with shape of (n_samples, n_categories)
# the elements are the sum of probabilities of classifiers on each (sample,category) pair
print z
print 'z shape: ', z.shape
示例2: get_ridge_plot
# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import decision_function [as 别名]
def get_ridge_plot(best_param_, experiment_,
param_keys_, param_vals_,
png_folder,
png_fname,
score_threshold=0.8):
parameters = dict(zip(param_keys_, param_vals_))
del parameters['model_type']
clf = RidgeClassifier()
X_train, y_train = experiment_.get_train_data()
clf.set_params(**best_param_)
clf.fit(X_train, y_train)
best_alpha = best_param_['alpha']
result = {'alphas':[],
'coefs':np.zeros( (len(parameters['alpha']), len(X_train.columns.values) + 1) ),
'scores':[],
'score':None}
for i, alpha in enumerate(parameters.get('alpha',None)):
result['alphas'].append(alpha)
del best_param_['alpha']
best_param_['alpha'] = alpha
clf.set_params(**best_param_)
clf.fit(X_train, y_train)
# regularization path
tmp = np.array([0 for j in xrange(len(X_train.columns.values) + 1)], dtype=np.float32)
if best_param_['fit_intercept']:
tmp = np.append(clf.intercept_, clf.coef_)
else:
tmp[1:] = clf.intercept_
result['coefs'][i,:] = tmp
result['scores'].append(experiment_.get_proba(clf, X_train))
del X_train, y_train
# 2.
tmp_len = len(experiment_.get_data_col_name())
index2feature = dict(zip(np.arange(1, tmp_len + 1),
experiment_.get_data_col_name()))
if best_param_['fit_intercept']:
index2feature[0] = 'intercept'
# 3. plot
gs = GridSpec(2,2)
ax1 = plt.subplot(gs[:,0])
ax2 = plt.subplot(gs[0,1])
ax3 = plt.subplot(gs[1,1])
# 3.1 feature importance
labels = np.append(np.array(['intercept'], dtype='S100'), experiment_.get_data_col_name())
nrows, ncols = result['coefs'].shape
for ncol in xrange(ncols):
ax1.plot(np.array(result['alphas']), result['coefs'][:,ncol], label = labels[ncol])
ax1.legend(loc='best')
ax1.set_xscale('log')
ax1.set_title("Regularization Path:%1.3e" % (best_alpha))
ax1.set_xlabel("alpha", fontsize=10)
# 3.2 PDF
X_test, y_test = experiment_.get_test_data()
result['score'] = clf.decision_function(X_test)
sns.distplot(result['score'], kde=False, rug=False, ax=ax2)
ax2.set_title("PDF : Decision_Function")
# 3.3 CDF
num_bins = 100
try:
counts, bin_edges = np.histogram(result['score'], bins=num_bins, normed=True)
except:
counts, bin_edges = np.histogram(result['score'], normed=True)
cdf = np.cumsum(counts)
ax3.plot(bin_edges[1:], cdf / cdf.max())
ax3.set_title("CDF")
ax3.set_xlabel("Decision_Function:Confidence_Score", fontsize=10)
png_fname = os.path.join(Config.get_string('data.path'), png_folder, png_fname)
plt.tight_layout()
plt.savefig(png_fname)
plt.close()
return True
示例3: zip
# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import decision_function [as 别名]
print
# # predict by simply apply the classifier
# # this will not use the multi-label threshold
# predicted = clf_rdg.predict(X_new)
# for doc, category in zip(docs_new, predicted):
# print '%r => %s' % (doc, data_train.target_names[int(category)])
# print
####################################
# Multi-label prediction using Ridge
# decision_function
print clf_rdg
pred_decision = clf_rdg.decision_function(X_new)
print pred_decision
print
# filtering using threshold
pred_decision_filtered = label_filtering(pred_decision, 0.1)
print pred_decision_filtered
print
# predict and print
for doc, labels in zip(docs_new, pred_decision_filtered):
print doc
for label in labels:
# label[0]: score; label[1]: #
print data_train.target_names[label[1]], label[0]
print
示例4: classify
# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import decision_function [as 别名]
def classify(granularity=10):
trainDir = path.join(GEOTEXT_HOME, 'processed_data/' + str(granularity).strip() + '_clustered/')
testDir = path.join(GEOTEXT_HOME, 'processed_data/test')
data_train = load_files(trainDir, encoding=encoding)
target = data_train.target
data_test = load_files(testDir, encoding=encoding)
categories = data_train.target_names
def size_mb(docs):
return sum(len(s.encode(encoding)) for s in docs) / 1e6
data_train_size_mb = size_mb(data_train.data)
data_test_size_mb = size_mb(data_test.data)
print("%d documents - %0.3fMB (training set)" % (
len(data_train.data), data_train_size_mb))
print("%d documents - %0.3fMB (test set)" % (
len(data_test.data), data_test_size_mb))
print("%d categories" % len(categories))
print()
# split a training set and a test set
y_train = data_train.target
y_test = data_test.target
print("Extracting features from the training dataset using a sparse vectorizer")
t0 = time()
vectorizer = TfidfVectorizer(use_idf=True, norm='l2', binary=False, sublinear_tf=True, min_df=2, max_df=1.0, ngram_range=(1, 1), stop_words='english')
X_train = vectorizer.fit_transform(data_train.data)
duration = time() - t0
print("done in %fs at %0.3fMB/s" % (duration, data_train_size_mb / duration))
print("n_samples: %d, n_features: %d" % X_train.shape)
print()
print("Extracting features from the test dataset using the same vectorizer")
t0 = time()
X_test = vectorizer.transform(data_test.data)
duration = time() - t0
print("done in %fs at %0.3fMB/s" % (duration, data_test_size_mb / duration))
print("n_samples: %d, n_features: %d" % X_test.shape)
print()
chi = False
if chi:
k = 500000
print("Extracting %d best features by a chi-squared test" % 0)
t0 = time()
ch2 = SelectKBest(chi2, k=k)
X_train = ch2.fit_transform(X_train, y_train)
X_test = ch2.transform(X_test)
print("done in %fs" % (time() - t0))
print()
feature_names = np.asarray(vectorizer.get_feature_names())
# clf = LinearSVC(loss='l2', penalty='l2', dual=True, tol=1e-3)
clf = RidgeClassifier(tol=1e-2, solver="auto")
print('_' * 80)
print("Training: ")
print(clf)
t0 = time()
clf.fit(X_train, y_train)
train_time = time() - t0
print("train time: %0.3fs" % train_time)
t0 = time()
pred = clf.predict(X_test)
scores = clf.decision_function(X_test)
print scores.shape
print pred.shape
test_time = time() - t0
print("test time: %0.3fs" % test_time)
# score = metrics.f1_score(y_test, pred)
# print("f1-score: %0.3f" % score)
if hasattr(clf, 'coef_'):
print("dimensionality: %d" % clf.coef_.shape[1])
print("density: %f" % density(clf.coef_))
print("top 10 keywords per class:")
for i, category in enumerate(categories):
top10 = np.argsort(clf.coef_[i])[-10:]
print("%s: %s" % (category, " ".join(feature_names[top10])))
sumMeanDistance = 0
sumMedianDistance = 0
distances = []
confidences = []
randomConfidences = []
for i in range(0, len(pred)):
user = path.basename(data_test.filenames[i])
location = userLocation[user].split(',')
lat = float(location[0])
lon = float(location[1])
prediction = categories[pred[i]]
confidence = scores[i][pred[i]] - mean(scores[i])
#.........这里部分代码省略.........
示例5: of
# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import decision_function [as 别名]
X_train_train, X_train_test = X_train[train_index], X_train[test_index]
y_train_train, y_train_test = y_train[train_index], y_train[test_index]
# X_den_train, X_den_test = X_den[train_index], X_den[test_index]
# feed models
clf_mNB.fit(X_train_train, y_train_train)
# clf_kNN.fit(X_train_train, y_train_train)
clf_ridge.fit(X_train_train, y_train_train)
clf_lSVC.fit(X_train_train, y_train_train)
clf_SVC.fit(X_train_train, y_train_train)
# get prediction for this fold run
prob_mNB = clf_mNB.predict_proba(X_train_test)
# prob_kNN = clf_kNN.predict_proba(X_train_test)
prob_ridge = clf_ridge.decision_function(X_train_test)
prob_lSVC = clf_lSVC.decision_function(X_train_test)
prob_SVC = clf_SVC.predict_proba(X_train_test)
# update z array for each model
# z_temp = prob_lSVC
# z_temp = (prob_ridge + prob_lSVC)
z_temp = (prob_mNB + prob_ridge + prob_lSVC + prob_SVC)
z = np.append(z, z_temp, axis=0)
# remove the first sub-1d-array of z, due to the creation with 0s
z = np.delete(z, 0, 0)
# the result of z is a 2d array with shape of (n_samples, n_categories)
# the elements are the sum of probabilities of classifiers on each (sample,category) pair
# Possible preprocessing on z