本文整理汇总了Python中sklearn.linear_model.logistic.LogisticRegression.predict_proba方法的典型用法代码示例。如果您正苦于以下问题:Python LogisticRegression.predict_proba方法的具体用法?Python LogisticRegression.predict_proba怎么用?Python LogisticRegression.predict_proba使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.linear_model.logistic.LogisticRegression
的用法示例。
在下文中一共展示了LogisticRegression.predict_proba方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
def main():
scriptdir = os.path.dirname(os.path.realpath(__file__))
parser = argparse.ArgumentParser(description="Skeleton for features and classifier for CWI-2016--optimisation of threshhold")
parser.add_argument('--threshold',type=float,default=0.5)
parser.add_argument('--annotator',type=str,default="03")
parser.add_argument('--penalty',type=str,choices=["l1","l2"],default="l1")
args = parser.parse_args()
current_single_ann = scriptdir+"/../data/cwi_training/cwi_training_"+args.annotator+".lbl.conll"
testfile = scriptdir+"/../data/cwi_testing/cwi_testing.txt.lbl.conll"
X__dict_train, y_train, v_train = feats_and_classify.collect_features(current_single_ann,vectorize=False)
X_dict_test, y_test, v_test = feats_and_classify.collect_features(testfile,vectorize=False)
featdicts = list([x for x in X__dict_train + X_dict_test])
vect = DictVectorizer()
X = vect.fit_transform(featdicts).toarray()
X_train=X[:len(y_train)]
X_test=X[len(y_train):]
maxent = LogisticRegression(penalty=args.penalty)
maxent.fit(X_train,y_train)
y_pred_proba = maxent.predict_proba(X_test)
ypred_i=["1" if pair[1]>=args.threshold else "0" for pair in y_pred_proba]
fout = open(args.annotator+".pred",mode="w")
print("\n".join(ypred_i),file=fout)
fout.close()
sys.exit(0)
示例2: __init__
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
class LogReg:
def __init__(self):
self.load_data()
self.clf = LogisticRegression(class_weight = 'balanced')
self.train()
self.predict()
def load_data(self):
train_csv = './data/train.csv'
test_csv = './data/test.csv'
df_train = pd.read_csv(train_csv, header=0)
df_test = pd.read_csv(test_csv, header=0)
arr_train = df_train.values
arr_test = df_test.values
self.train_X = arr_train[0::,1::]
self.train_Y = arr_train[0::, 0]
self.test_X = arr_test[0::, 1::]
self.test_ID = arr_test[0::,0]
def train(self):
self.clf.fit(self.train_X, self.train_Y)
def predict(self):
self.test_Y = self.clf.predict_proba(self.test_X)
def get_training_accuracy(self):
return (self.clf.score(self.train_X, self.train_Y))
def store_result(self):
df_out = pd.DataFrame()
df_out['Id'] = self.test_ID
df_out['Action'] = self.test_Y[0::,1]
df_out.to_csv('./data/results/c1_result.csv',index=False)
示例3: test_logreg_predict_proba_multinomial
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
def test_logreg_predict_proba_multinomial():
X, y = make_classification(n_samples=10, n_features=20, random_state=0, n_classes=3, n_informative=10)
# Predicted probabilites using the true-entropy loss should give a
# smaller loss than those using the ovr method.
clf_multi = LogisticRegression(multi_class="multinomial", solver="lbfgs")
clf_multi.fit(X, y)
clf_multi_loss = log_loss(y, clf_multi.predict_proba(X))
clf_ovr = LogisticRegression(multi_class="ovr", solver="lbfgs")
clf_ovr.fit(X, y)
clf_ovr_loss = log_loss(y, clf_ovr.predict_proba(X))
assert_greater(clf_ovr_loss, clf_multi_loss)
# Predicted probabilites using the soft-max function should give a
# smaller loss than those using the logistic function.
clf_multi_loss = log_loss(y, clf_multi.predict_proba(X))
clf_wrong_loss = log_loss(y, clf_multi._predict_proba_lr(X))
assert_greater(clf_wrong_loss, clf_multi_loss)
示例4: test_nnet
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
def test_nnet(n_samples=200, n_features=7, distance=0.8, complete=False):
"""
:param complete: if True, all possible combinations will be checked, and quality is printed
"""
X, y = generate_sample(n_samples=n_samples, n_features=n_features, distance=distance)
nn_types = [
nnet.SimpleNeuralNetwork,
nnet.MLPClassifier,
nnet.SoftmaxNeuralNetwork,
nnet.RBFNeuralNetwork,
nnet.PairwiseNeuralNetwork,
nnet.PairwiseSoftplusNeuralNetwork,
]
if complete:
# checking all possible combinations
for loss in nnet.losses:
for NNType in nn_types:
for trainer in nnet.trainers:
nn = NNType(layers=[5], loss=loss, trainer=trainer, random_state=42, epochs=100)
nn.fit(X, y )
print(roc_auc_score(y, nn.predict_proba(X)[:, 1]), nn)
lr = LogisticRegression().fit(X, y)
print(lr, roc_auc_score(y, lr.predict_proba(X)[:, 1]))
assert 0 == 1, "Let's see and compare results"
else:
# checking combinations of losses, nn_types, trainers, most of them are used once during tests.
attempts = max(len(nnet.losses), len(nnet.trainers), len(nn_types))
losses_shift = numpy.random.randint(10)
trainers_shift = numpy.random.randint(10)
for attempt in range(attempts):
# each combination is tried 3 times. before raising exception
retry_attempts = 3
for retry_attempt in range(retry_attempts):
loss = list(nnet.losses.keys())[(attempt + losses_shift) % len(nnet.losses)]
trainer = list(nnet.trainers.keys())[(attempt + trainers_shift) % len(nnet.trainers)]
nn_type = nn_types[attempt % len(nn_types)]
nn = nn_type(layers=[5], loss=loss, trainer=trainer, random_state=42 + retry_attempt, epochs=200)
print(nn)
nn.fit(X, y)
quality = roc_auc_score(y, nn.predict_proba(X)[:, 1])
computed_loss = nn.compute_loss(X, y)
if quality > 0.8:
break
else:
print('attempt {} : {}'.format(retry_attempt, quality))
if retry_attempt == retry_attempts - 1:
raise RuntimeError('quality of model is too low: {} {}'.format(quality, nn))
示例5: fit_model_2
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
def fit_model_2(self, lol = .07, toWrite = False):
model = LogisticRegression(C = lol, penalty = 'l1', tol = 1e-6)
for data in self.cv_data:
X_train, X_test, Y_train, Y_test = data
X_train,Y_train = self.balance_data(X_train,Y_train)
model.fit(X_train,Y_train)
pred = model.predict_proba(X_test)[:,1]
print("Model 2 Score: %f" % (logloss(Y_test,pred),))
if toWrite:
f2 = open('model2/model.pkl','w')
pickle.dump(model,f2)
f2.close()
示例6: test_multinomial_binary_probabilities
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
def test_multinomial_binary_probabilities():
# Test multinomial LR gives expected probabilities based on the
# decision function, for a binary problem.
X, y = make_classification()
clf = LogisticRegression(multi_class='multinomial', solver='saga')
clf.fit(X, y)
decision = clf.decision_function(X)
proba = clf.predict_proba(X)
expected_proba_class_1 = (np.exp(decision) /
(np.exp(decision) + np.exp(-decision)))
expected_proba = np.c_[1-expected_proba_class_1, expected_proba_class_1]
assert_almost_equal(proba, expected_proba)
示例7: test_predict_iris
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
def test_predict_iris():
"""Test logistic regression with the iris dataset"""
n_samples, n_features = iris.data.shape
target = iris.target_names[iris.target]
clf = LogisticRegression(C=len(iris.data)).fit(iris.data, target)
assert_array_equal(np.unique(target), clf.classes_)
pred = clf.predict(iris.data)
assert_greater(np.mean(pred == target), .95)
probabilities = clf.predict_proba(iris.data)
assert_array_almost_equal(probabilities.sum(axis=1), np.ones(n_samples))
pred = iris.target_names[probabilities.argmax(axis=1)]
assert_greater(np.mean(pred == target), .95)
示例8: test_nnet
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
def test_nnet(n_samples=200, n_features=5, distance=0.5, complete=False):
X, y = make_blobs(
n_samples=n_samples,
n_features=5,
centers=[numpy.ones(n_features) * distance, -numpy.ones(n_features) * distance],
)
nn_types = [
nnet.SimpleNeuralNetwork,
nnet.MultiLayerNetwork,
nnet.SoftmaxNeuralNetwork,
nnet.RBFNeuralNetwork,
nnet.PairwiseNeuralNetwork,
nnet.PairwiseSoftplusNeuralNetwork,
]
if complete:
# checking all possible combinations
for loss in nnet.losses:
for NNType in nn_types:
for trainer in nnet.trainers:
nn = NNType(layers=[5], loss=loss, trainer=trainer, random_state=42)
nn.fit(X, y, epochs=100)
print(roc_auc_score(y, nn.predict_proba(X)[:, 1]), nn)
lr = LogisticRegression().fit(X, y)
print(lr, roc_auc_score(y, lr.predict_proba(X)[:, 1]))
assert 0 == 1, "Let's see and compare results"
else:
# checking combinations of losses, nn_types, trainers, most of them are used once during tests.
attempts = max(len(nnet.losses), len(nnet.trainers), len(nn_types))
attempts = 4
losses_shift = numpy.random.randint(10)
trainers_shift = numpy.random.randint(10)
for attempt in range(attempts):
loss = nnet.losses.keys()[(attempt + losses_shift) % len(nnet.losses)]
trainer = nnet.trainers.keys()[(attempt + trainers_shift) % len(nnet.trainers)]
nn_type = nn_types[attempt % len(nn_types)]
nn = nn_type(layers=[5], loss=loss, trainer=trainer, random_state=42)
print(nn)
nn.fit(X, y, epochs=200)
assert roc_auc_score(y, nn.predict_proba(X)[:, 1]) > 0.8, "quality of model is too low: {}".format(nn)
示例9: WebsiteMatchConfidencePredictor
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
class WebsiteMatchConfidencePredictor(object):
def __init__(self):
self.model = LogisticRegression()
def fit(self, urls, websites, y):
"""
:param urls: list of urls
:param websites: list of corresponding scraped websites
:param y: list of corresponding booleans - matches or not
"""
X = [make_features(url, web) for url, web in zip(urls, websites)]
self.model.fit(X, y)
def predict(self, url, website):
X = make_features(url, website)
return self.model.predict_proba(X)
示例10: main
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
def main():
parser = argparse.ArgumentParser(description="""Export AMT""")
parser.add_argument('--input', default="../res/dga_extendedamt_simplemajority.tsv")
parser.add_argument('--dump_to_predict', default="../res/dga_data_october2016.tsv")
parser.add_argument('--embeddings', default="/Users/hmartine/data/glove.6B/glove.6B.50d.txt")
args = parser.parse_args()
E = load_embeddings(args.embeddings)
predarrays = {}
variants = ["bcd","cd"]
for variant in variants:
#1 collect features for train
trainfeatures, labels, vec = collect_features(args.input,embeddings=E,variant=variant,vectorize=False)
maxent = LogisticRegression(penalty='l2')
#TODO collect features for new data
#TODO proper vectorization
dumpfeatdicts = features_from_dump(args.dump_to_predict,variant=variant,embeddings=E,bowfilter=trainingbow)
#dumpfeats = vec.fit_transform(dumpfeatdicts)
vec = DictVectorizer()
X_train = vec.fit_transform(trainfeatures)
maxent.fit(X_train,labels)
X_test = vec.transform(dumpfeatdicts)
predarrays[variant+"_pred_label"] = ["SAME" if x == 0 else "OMISSION" for x in maxent.predict(X_test)]
predarrays[variant + "_pred_prob"] = ['{:.2}'.format(y) for x,y in maxent.predict_proba(X_test)]
#maxent.fit(np.array(allfeatures[:len(labels)]),labels)
#print(maxent.predict(allfeatures[len(labels):]))
# predict using {features, features without lenght} --> instance 'variants' properly
#TODO compare prediction similarity
#TODO provide an output format with labels and probs for both feature templates
frame = read_dump(args.dump_to_predict)
keyindices = sorted(predarrays.keys())
header = "Index Ref TitleRef URLRef Target TitleTarget URLTarget Source Contains BCD_label BCD_prob CD_label CD_prob".replace(" ","\t")
print(header)
for a in zip([str(x) for x in range(len(frame.Ref))],list(frame.Ref),list(frame.Target),list(frame.TitleRef),list(frame.URLRef),list(frame.TitleTarget),list(frame.URLTarget),list(frame.Source),list(frame.Contains),predarrays[keyindices[0]],predarrays[keyindices[1]],predarrays[keyindices[2]],predarrays[keyindices[3]]):
print("\t".join(a))
示例11: test_nnet
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
def test_nnet(n_samples=200, n_features=5, distance=0.5):
X, y = make_blobs(n_samples=n_samples, n_features=5,
centers=[numpy.ones(n_features) * distance, - numpy.ones(n_features) * distance])
nn_types = [
nnet.SimpleNeuralNetwork,
nnet.MultiLayerNetwork,
nnet.SoftmaxNeuralNetwork,
nnet.RBFNeuralNetwork,
nnet.PairwiseNeuralNetwork,
nnet.PairwiseSoftplusNeuralNetwork,
]
for loss in nnet.losses:
for NNType in nn_types:
for trainer in nnet.trainers:
nn = NNType(layers=[5], loss=loss, trainer=trainer, random_state=42)
nn.fit(X, y, stages=100, verbose=nnet.SILENT)
print(roc_auc_score(y, nn.predict_proba(X)[:, 1]), nn)
lr = LogisticRegression().fit(X, y)
print(lr, roc_auc_score(y, lr.predict_proba(X)[:, 1]))
assert 0 == 1
示例12: readData
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
from sklearn.metrics import classification_report
import time
#计算运行时间
start_time = time.time()
path = "E:/Desktop/Image/SVMData/gender_wechat_scale.txt"
x,y = readData(path)
average = 0
testNum = 10
clf = LogisticRegression()
print clf
for i in range(0,testNum):
x_train, x_test, y_train, y_test = train_test_split(x, y)
clf = LogisticRegression()
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
p = np.mean(y_pred == y_test)
print(p)
average += p
answer = clf.predict_proba(x_test)[:,1]
precision, recall, thresholds = precision_recall_curve(y_test, answer)
report = answer > 0.5
print(classification_report(y_test, report, target_names = ['neg', 'pos']))
print("average precision:", average/testNum)
print("time spent:", time.time() - start_time)
示例13: train
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
def train():
weather = load_weather()
training = load_training()
X, y = assemble_X_y(training, weather)
mean, std = normalize(X)
#y = assemble_y(training)
'''
input_size = len(X[0])
learning_rate = theano.shared(np.float32(0.1))
net = NeuralNet(
layers=[
('input', InputLayer),
('hidden1', DenseLayer),
('dropout1', DropoutLayer),
('hidden2', DenseLayer),
('dropout2', DropoutLayer),
('output', DenseLayer),
],
# layer parameters:
input_shape=(None, input_size),
hidden1_num_units=256,
dropout1_p=0.4,
hidden2_num_units=256,
dropout2_p=0.4,
output_nonlinearity=sigmoid,
output_num_units=1,
# optimization method:
update=nesterov_momentum,
update_learning_rate=learning_rate,
update_momentum=0.9,
# Decay the learning rate
on_epoch_finished=[
AdjustVariable(learning_rate, target=0, half_life=4),
],
# This is silly, but we don't want a stratified K-Fold here
# To compensate we need to pass in the y_tensor_type and the loss.
regression=True,
y_tensor_type = T.imatrix,
objective_loss_function = binary_crossentropy,
max_epochs=32,
eval_size=0.1,
verbose=1,
)
'''
clf = LogisticRegression(C = 10)
#clf = svm.SVC()
X, y = shuffle(X, y, random_state=123)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33)
clf.fit(X_train, y_train)
probas = clf.predict_proba(X_test)[:,1]
print("ROC score", metrics.roc_auc_score(np.ravel(y_test), probas))
print("fitting...")
clf.fit(X, y)
#clf.fit(X[:100, :], y[:100])
#Tracer()()
#probas = clf.predict(X[:100, :])[:,1]
#y_pred = (probas > 0.5).astype(int)
#print(np.abs(y_pred-y[:100]).sum())
return clf, mean, std
示例14: train_model
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
def train_model(clf_factory, X, Y, name, plot=False):
"""
Trains and saves model to disk.
"""
labels = np.unique(Y)
cv = ShuffleSplit( n=len(X), n_iterations=1, test_fraction=0.3, indices=True, random_state=0)
#print "cv = ",cv
train_errors = []
test_errors = []
scores = []
pr_scores, precisions, recalls, thresholds = defaultdict(list), defaultdict(list), defaultdict(list), defaultdict(list)
roc_scores, tprs, fprs = defaultdict(list), defaultdict(list) ,defaultdict(list)
clfs = [] # just to later get the median
cms = []
for train, test in cv:
X_train, y_train = X[train], Y[train]
X_test, y_test = X[test], Y[test]
global clf
clf = LogisticRegression()
clf.fit(X_train, y_train)
clfs.append(clf)
train_score = clf.score(X_train, y_train)
test_score = clf.score(X_test, y_test)
scores.append(test_score)
train_errors.append(1 - train_score)
test_errors.append(1 - test_score)
y_pred = clf.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
cms.append(cm)
for label in labels:
y_label_test = np.asarray(y_test == label, dtype=int)
proba = clf.predict_proba(X_test)
proba_label = proba[:, label]
precision, recall, pr_thresholds = precision_recall_curve(
y_label_test, proba_label)
pr_scores[label].append(auc(recall, precision))
precisions[label].append(precision)
recalls[label].append(recall)
thresholds[label].append(pr_thresholds)
fpr, tpr, roc_thresholds = roc_curve(y_label_test, proba_label)
roc_scores[label].append(auc(fpr, tpr))
tprs[label].append(tpr)
fprs[label].append(fpr)
if plot:
for label in labels:
#print("Plotting %s"%genre_list[label])
scores_to_sort = roc_scores[label]
median = np.argsort(scores_to_sort)[len(scores_to_sort) / 2]
desc = "%s %s" % (name, genre_list[label])
#plot_pr(pr_scores[label][median], desc, precisions[label][median],recalls[label][median], label='%s vs rest' % genre_list[label])
#plot_roc(roc_scores[label][median], desc, tprs[label][median],fprs[label][median], label='%s vs rest' % genre_list[label])
all_pr_scores = np.asarray(pr_scores.values()).flatten()
summary = (np.mean(scores), np.std(scores),np.mean(all_pr_scores), np.std(all_pr_scores))
print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary)
#save the trained model to disk
joblib.dump(clf, 'saved_model_fft/my_model.pkl')
return np.mean(train_errors), np.mean(test_errors), np.asarray(cms)
示例15: train_model
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
def train_model(X, Y, name, plot=False):
"""
train_model(vector, vector, name[, plot=False])
Trains and saves model to disk.
"""
labels = np.unique(Y)
print labels
cv = ShuffleSplit(n=len(X), n_iter=1, test_size=0.3, random_state=0)
train_errors = []
test_errors = []
scores = []
pr_scores = defaultdict(list)
precisions, recalls, thresholds = defaultdict(list), defaultdict(list), defaultdict(list)
roc_scores = defaultdict(list)
tprs = defaultdict(list)
fprs = defaultdict(list)
clfs = [] # for the median
cms = []
for train, test in cv:
X_train, y_train = X[train], Y[train]
X_test, y_test = X[test], Y[test]
clf = LogisticRegression()
clf.fit(X_train, y_train)
clfs.append(clf)
train_score = clf.score(X_train, y_train)
test_score = clf.score(X_test, y_test)
scores.append(test_score)
train_errors.append(1 - train_score)
test_errors.append(1 - test_score)
y_pred = clf.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
cms.append(cm)
for label in labels:
y_label_test = np.asarray(y_test == label, dtype=int)
proba = clf.predict_proba(X_test)
proba_label = proba[:, label]
fpr, tpr, roc_thresholds = roc_curve(y_label_test, proba_label)
roc_scores[label].append(auc(fpr, tpr))
tprs[label].append(tpr)
fprs[label].append(fpr)
if plot:
for label in labels:
scores_to_sort = roc_scores[label]
median = np.argsort(scores_to_sort)[len(scores_to_sort) / 2]
desc = "%s %s" % (name, genre_list[label])
plot_roc_curves(roc_scores[label][median], desc, tprs[label][median],fprs[label][median], label='%s vs rest' % genre_list[label])
all_pr_scores = np.asarray(pr_scores.values()).flatten()
summary = (np.mean(scores), np.std(scores), np.mean(all_pr_scores), np.std(all_pr_scores))
#print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary)
#save the trained model to disk
joblib.dump(clf, 'saved_model/model_ceps.pkl')
return np.mean(train_errors), np.mean(test_errors), np.asarray(cms)