本文整理汇总了Python中sklearn.svm.LinearSVC.predict_proba方法的典型用法代码示例。如果您正苦于以下问题:Python LinearSVC.predict_proba方法的具体用法?Python LinearSVC.predict_proba怎么用?Python LinearSVC.predict_proba使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.svm.LinearSVC
的用法示例。
在下文中一共展示了LinearSVC.predict_proba方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: doench_on_fold
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import predict_proba [as 别名]
def doench_on_fold(feature_sets, train, test, y, y_all, X, dim, dimsum, learn_options):
auto_class_weight = None # 'auto'/None
verbose = False
penalty = [
0.005 * pow(1.15, x) for x in range(0, 45)
] # ian's code: tvals = [0.005*pow(1.15,x) for x in range(0,45)]
y_bin = y_all[learn_options["binary target name"]].values[:, None]
label_encoder = sklearn.preprocessing.LabelEncoder()
label_encoder.fit(y_all["Target gene"].values[train])
gene_classes = label_encoder.transform(y_all["Target gene"].values[train])
cv = sklearn.cross_validation.StratifiedKFold(gene_classes, n_folds=10, shuffle=True)
best_penalty = None
cv_results = np.zeros((10, len(penalty)))
for j, split in enumerate(cv):
train_inner, test_inner = split
for i, c in enumerate(penalty):
# fit an L1-penalized SVM classifier
clf = LinearSVC(penalty="l1", C=c, dual=False, class_weight=auto_class_weight)
clf.fit(X[train][train_inner], y_bin[train][train_inner].flatten())
# pass features with non-zero coeff to Logistic with l2 penalty (original code?)
non_zero_coeff = clf.coef_ != 0.0
if np.all(non_zero_coeff is False):
# if all are zero, turn one on so as to be able to run the code.
non_zero_coeff[0] = True
clf = LogisticRegression(penalty="l2", class_weight=auto_class_weight)
clf.fit(X[train][train_inner][:, non_zero_coeff.flatten()], y[train][train_inner].flatten())
y_test = clf.predict_proba(X[train][test_inner][:, non_zero_coeff.flatten()])[:, 1]
fpr, tpr, _ = sklearn.metrics.roc_curve(y_bin[train][test_inner], y_test)
assert np.nan not in fpr, "found nan fpr"
assert np.nan not in tpr, "found nan tpr"
roc_auc = sklearn.metrics.auc(fpr, tpr)
if verbose:
print j, i, roc_auc
cv_results[j][i] = roc_auc
best_penalty = penalty[np.argmax(np.mean(cv_results, axis=0))]
print "best AUC for penalty: ", np.median(cv_results, axis=0)
clf = LinearSVC(penalty="l1", C=best_penalty, dual=False, class_weight=auto_class_weight)
clf.fit(X[train], y_bin[train].flatten())
non_zero_coeff = clf.coef_ != 0.0
clf = LogisticRegression(penalty="l2", class_weight=auto_class_weight)
clf.fit(X[train][:, non_zero_coeff.flatten()], y[train].flatten())
y_pred = clf.predict_proba(X[test][:, non_zero_coeff.flatten()])[:, 1:2]
return y_pred, clf
示例2: run
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import predict_proba [as 别名]
def run(input_train, input_test, output_name):
"""
Takes a file path as input, a file path as output, and produces a sorted csv of
item IDs for Kaggle submission
-------
input_train : 'full path of the training file'
input_test : 'full path of the testing file'
output_name : 'full path of the output file'
"""
data = pd.read_table(input_train)
test = pd.read_table(input_test)
testItemIds = test.itemid
response = data.is_blocked
dummies = sparse.csc_matrix(pd.get_dummies(data.subcategory))
pretestdummies = pd.get_dummies(test.subcategory)
testdummies = sparse.csc_matrix(pretestdummies.drop(['Растения', 'Товары для компьютера'],axis=1))
words = np.array(data.description,str)
testwords = np.array(test.description,str)
del data, test
vect = text.CountVectorizer(decode_error = u'ignore', strip_accents='unicode', ngram_range=(1,2))
corpus = np.concatenate((words, testwords))
vect.fit(corpus)
counts = vect.transform(words)
features = sparse.hstack((dummies,counts))
clf = LinearSVC()
clf.fit(features, response)
testcounts = vect.transform(testwords)
testFeatures = sparse.hstack((testdummies,testcounts))
predicted_scores = clf.predict_proba(testFeatures).T[1]
f = open(output_name,'w')
f.write("id\n")
for pred_score, item_id in sorted(zip(predicted_scores, testItemIds), reverse = True):
f.write("%d\n" % (item_id))
f.close()
示例3: test_calibration_multiclass
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import predict_proba [as 别名]
def test_calibration_multiclass():
"""Test calibration for multiclass """
# test multi-class setting with classifier that implements
# only decision function
clf = LinearSVC()
X, y_idx = make_blobs(n_samples=100, n_features=2, random_state=42,
centers=3, cluster_std=3.0)
# Use categorical labels to check that CalibratedClassifierCV supports
# them correctly
target_names = np.array(['a', 'b', 'c'])
y = target_names[y_idx]
X_train, y_train = X[::2], y[::2]
X_test, y_test = X[1::2], y[1::2]
clf.fit(X_train, y_train)
for method in ['isotonic', 'sigmoid']:
cal_clf = CalibratedClassifierCV(clf, method=method, cv=2)
cal_clf.fit(X_train, y_train)
probas = cal_clf.predict_proba(X_test)
assert_array_almost_equal(np.sum(probas, axis=1), np.ones(len(X_test)))
# Check that log-loss of calibrated classifier is smaller than
# log-loss of naively turned OvR decision function to probabilities
# via softmax
def softmax(y_pred):
e = np.exp(-y_pred)
return e / e.sum(axis=1).reshape(-1, 1)
uncalibrated_log_loss = \
log_loss(y_test, softmax(clf.decision_function(X_test)))
calibrated_log_loss = log_loss(y_test, probas)
assert_greater_equal(uncalibrated_log_loss, calibrated_log_loss)
# Test that calibration of a multiclass classifier decreases log-loss
# for RandomForestClassifier
X, y = make_blobs(n_samples=100, n_features=2, random_state=42,
cluster_std=3.0)
X_train, y_train = X[::2], y[::2]
X_test, y_test = X[1::2], y[1::2]
clf = RandomForestClassifier(n_estimators=10, random_state=42)
clf.fit(X_train, y_train)
clf_probs = clf.predict_proba(X_test)
loss = log_loss(y_test, clf_probs)
for method in ['isotonic', 'sigmoid']:
cal_clf = CalibratedClassifierCV(clf, method=method, cv=3)
cal_clf.fit(X_train, y_train)
cal_clf_probs = cal_clf.predict_proba(X_test)
cal_loss = log_loss(y_test, cal_clf_probs)
assert_greater(loss, cal_loss)
示例4: ctr_svm
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import predict_proba [as 别名]
def ctr_svm(model='sklearn-clicklog', from_cache=False, train_dataset_length=100000, test_dataset_length=100000):
"""
Doesn't work
"""
TRAIN_FILE, TEST_FILE = create_dataset(model, from_cache, train_dataset_length, test_dataset_length)
prediction_model = LinearSVC(
penalty='l1',
loss='squared_hinge',
dual=False,
tol=0.0001,
C=1.0,
multi_class='ovr',
fit_intercept=True,
intercept_scaling=1,
class_weight=None,
verbose=1,
random_state=None,
max_iter=1000,
)
x_train, y_train = clean_data(TRAIN_FILE)
x_test, y_test = clean_data(TEST_FILE)
with Timer('fit model'):
prediction_model.fit(x_train, y_train)
with Timer('evaluate model'):
y_prediction_train = prediction_model.predict_proba(x_train)
y_prediction_test = prediction_model.predict_proba(x_test)
loss_train = log_loss(y_train, y_prediction_train)
loss_test = log_loss(y_test, y_prediction_test)
print 'loss_train: %s' % loss_train
print 'loss_test: %s' % loss_test
示例5: GridSearchCV
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import predict_proba [as 别名]
'random_state':[0]}]
n_jobs_ = 1
num_cv_ = 5
clf_cv = GridSearchCV(LinearSVC(),
parameters,
scoring = "f1",
cv = num_cv_, n_jobs = n_jobs_,
verbose = 10)
clf_cv.fit(X_train, y_train)
print clf_cv.best_params_
clf = LinearSVC()
clf.set_params(**clf_cv.best_params_)
del clf_cv
clf.fit(X_train, y_train)
if hasattr(clf, "predict_prob"):
prob_pos = clf.predict_proba(X_test)[:,1]
else: # use decision function
prob_pos = clf.decision_function(X_test)
prob_pos = \
(prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) # scailing [0,1]
print prob_pos
[[TP,FP],[FN,TN]] = metrics.confusion_matrix(y_test, clf.predict(X_test))
accuracy = float(TP + TN) / float(TP + FP + FN + TN)
precision = float(TP) / float(TP + FP)
recall = float(TP) / float(TP + FN)
f = 2.0 * precision * recall / (precision + recall)
print "accuracy=%1.5e, precision=%1.5e, recall=%1.5e, f=%1.5e" % (accuracy, precision, recall, f)
示例6: len
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import predict_proba [as 别名]
svc.fit(features['train'],labels['train'])
#test classifier
prediction = svc.predict(features['test'])
if use_linear:
proba = svc.decision_function(features['test'])
if len(proba.shape)==1:
proba = np.vstack((proba,1-proba)).transpose()
for i in range(len(proba)):
proba[i] = 1 / (1 + np.exp(-proba[i]))
proba[i] = proba[i] / sum(proba[i])
proba_sorted = sorted(proba[i],reverse=True)
# if proba_sorted[0] < 0.4 or proba_sorted[0] / proba_sorted[1] < 2:
# prediction[i] = 0
else:
proba = svc.predict_proba(features['test'])
print 'Accuracy %.2f%%' % (svc.score(features['test'],labels['test'])*100)
if plot_weights:
colors = {0:'#ffffff',1:'#ff0000',2:'#00ff00',3:'#0000ff',4:'#ffff00',5:'#ff00ff',6:'#00ffff'}
import matplotlib.pyplot as plt
for i in range(len(svc.coef_)):
w = svc.coef_[i]
plt.plot(np.arange(len(w)),w,lw=2,color=colors[i+1])
plt.legend(loc='upper left')
plt.show()
#output data
file = open(outputFile,'w')
file.write('labels ')
示例7: precision_score
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import predict_proba [as 别名]
pl.show()
#stats by type
pre = precision_score(test_label, preds, average=None)
rec = recall_score(test_label, preds, average=None)
k=0
while k<6:
acc_type[k][itr].append(cm[k,k])
precision_type[k][itr].append(pre[k])
recall_type[k][itr].append(rec[k])
k += 1
'''
#entropy based example selection block
#compute entropy for each instance and rank
label_pr = np.sort(clf.predict_proba(validate_data)) #sort in ascending order
preds = clf.predict(validate_data)
res = []
for h,i,pr in zip(validate,preds,label_pr):
#entropy = np.sum(-p*math.log(p,2) for p in pr if p!=0)
if len(pr)<2:
margin = 1
else:
margin = pr[-1]-pr[-2]
#margin = 1 - margin
#margin *= p_x[h]
res.append([h,i,margin])
#print 'iter', itr, 'wrong #', len(wrong)
'''
#Entropy-based, sort and pick the one with largest H