本文整理汇总了Python中sklearn.linear_model.logistic.LogisticRegression.predict方法的典型用法代码示例。如果您正苦于以下问题:Python LogisticRegression.predict方法的具体用法?Python LogisticRegression.predict怎么用?Python LogisticRegression.predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.linear_model.logistic.LogisticRegression
的用法示例。
在下文中一共展示了LogisticRegression.predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: mlogistic
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
def mlogistic():
X = []
# 前三行作为输入样本
X.append("fuck you")
X.append("fuck you all")
X.append("hello everyone")
# 后两句作为测试样本
X.append("fuck me")
X.append("hello boy")
# y为样本标注
y = [1,1,0]
vectorizer = TfidfVectorizer()
# 取X的前三句作为输入做tfidf转换
X_train = vectorizer.fit_transform(X[:-2])
print X_train
# 取X的后两句用“上句生成”的tfidf做转换
X_test = vectorizer.transform(X[-2:])
print X_test
# 用逻辑回归模型做训练
classifier = LogisticRegression()
classifier.fit(X_train, y)
# 做测试样例的预测
predictions = classifier.predict(X_test)
print predictions
示例2: main
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
def main():
scriptdir = os.path.dirname(os.path.realpath(__file__))
default_pool = scriptdir+"/../data/cwi_training/cwi_training.txt.lbl.conll"
parser = argparse.ArgumentParser(description="Skeleton for features and classifier for CWI-2016--optimisation of threshhold")
parser.add_argument('--iterations',type=int,default=5)
args = parser.parse_args()
all_feats = []
all_labels = defaultdict(list)
scores = defaultdict(list)
for idx in "01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20".split(" "):
# for idx in "01".split(" "):
current_single_ann = scriptdir+"/../data/cwi_training/cwi_training_"+idx+".lbl.conll"
f_current, labels_current, v_current = feats_and_classify.collect_features(current_single_ann,vectorize=False,generateFeatures=False)
for instance_index,l in enumerate(labels_current):
all_labels[instance_index].append(l)
current_single_ann = scriptdir+"/../data/cwi_training/cwi_training_01.lbl.conll"
feats, labels_current, v_current = feats_and_classify.collect_features(current_single_ann,vectorize=True,generateFeatures=True)
for it in range(args.iterations):
for TrainIndices, TestIndices in cross_validation.KFold(n=feats.shape[0], n_folds=10, shuffle=True, random_state=None):
maxent = LogisticRegression(penalty='l2')
TrainX_i = feats[TrainIndices]
Trainy_i = [all_labels[x][random.randrange(0,20)] for x in TrainIndices]
TestX_i = feats[TestIndices]
Testy_i = [all_labels[x][random.randrange(0,20)] for x in TestIndices]
maxent.fit(TrainX_i,Trainy_i)
ypred_i = maxent.predict(TestX_i)
acc = accuracy_score(ypred_i, Testy_i)
pre = precision_score(ypred_i, Testy_i)
rec = recall_score(ypred_i, Testy_i)
# shared task uses f1 of *accuracy* and recall!
f1 = 2 * acc * rec / (acc + rec)
scores["Accuracy"].append(acc)
scores["F1"].append(f1)
scores["Precision"].append(pre)
scores["Recall"].append(rec)
#scores = cross_validation.cross_val_score(maxent, features, labels, cv=10)
print("--")
for key in sorted(scores.keys()):
currentmetric = np.array(scores[key])
print("%s : %0.2f (+/- %0.2f)" % (key,currentmetric.mean(), currentmetric.std()))
print("--")
sys.exit(0)
示例3: classify_logistic
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
def classify_logistic(train_features, train_labels, test_features):
global SAVE
clf = LogisticRegression()
clf.fit(train_features, train_labels)
if not TEST and SAVE:
save_pickle("logistic", clf)
return clf.predict(test_features)
示例4: test_liblinear_decision_function_zero
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
def test_liblinear_decision_function_zero():
# Test negative prediction when decision_function values are zero.
# Liblinear predicts the positive class when decision_function values
# are zero. This is a test to verify that we do not do the same.
# See Issue: https://github.com/scikit-learn/scikit-learn/issues/3600
# and the PR https://github.com/scikit-learn/scikit-learn/pull/3623
X, y = make_classification(n_samples=5, n_features=5, random_state=0)
clf = LogisticRegression(fit_intercept=False)
clf.fit(X, y)
# Dummy data such that the decision function becomes zero.
X = np.zeros((5, 5))
assert_array_equal(clf.predict(X), np.zeros(5))
示例5: clazzify
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
def clazzify(train_mat, test_mat, true_train_labels):
"""
"""
# learn
logging.info('learning...')
model = LogisticRegression(random_state=17, penalty='l1')
model.fit(train_mat, true_train_labels)
logging.info('finished learning.')
# test
logging.info('testing')
predicted_test_labels = model.predict(test_mat)
logging.info('finished testing')
return predicted_test_labels, model
示例6: test_predict_iris
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
def test_predict_iris():
"""Test logistic regression with the iris dataset"""
n_samples, n_features = iris.data.shape
target = iris.target_names[iris.target]
clf = LogisticRegression(C=len(iris.data)).fit(iris.data, target)
assert_array_equal(np.unique(target), clf.classes_)
pred = clf.predict(iris.data)
assert_greater(np.mean(pred == target), .95)
probabilities = clf.predict_proba(iris.data)
assert_array_almost_equal(probabilities.sum(axis=1), np.ones(n_samples))
pred = iris.target_names[probabilities.argmax(axis=1)]
assert_greater(np.mean(pred == target), .95)
示例7: generate_submission
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
def generate_submission():
global alg, predictions, submission
# The columns we'll use to predict the target
# Initialize the algorithm class
alg = LogisticRegression(random_state=1)
# Train the algorithm using all the training data
alg.fit(train[predictors], train["Survived"])
# Make predictions using the test set.
predictions = alg.predict(test[predictors])
# Create a new dataframe with only the columns Kaggle wants from the dataset.
submission = pandas.DataFrame({
"PassengerId": test["PassengerId"],
"Survived": predictions
})
submission.to_csv("kaggle.csv", index=False)
print("kaggele.csv is generated")
示例8: test_multinomial_binary
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
def test_multinomial_binary():
"""Test multinomial LR on a binary problem."""
target = (iris.target > 0).astype(np.intp)
target = np.array(["setosa", "not-setosa"])[target]
clf = LogisticRegression(solver='lbfgs', multi_class='multinomial')
clf.fit(iris.data, target)
assert_equal(clf.coef_.shape, (1, iris.data.shape[1]))
assert_equal(clf.intercept_.shape, (1,))
assert_array_equal(clf.predict(iris.data), target)
mlr = LogisticRegression(solver='lbfgs', multi_class='multinomial',
fit_intercept=False)
mlr.fit(iris.data, target)
pred = clf.classes_[np.argmax(clf.predict_log_proba(iris.data), axis=1)]
assert_greater(np.mean(pred == target), .9)
示例9: LogisticRegressionSMSFilteringExample
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
def LogisticRegressionSMSFilteringExample():
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model.logistic import LogisticRegression
from sklearn.cross_validation import train_test_split, cross_val_score
df = pd.read_csv('C:/Users/Ahmad/Documents/Mastering ML with Scikitlearn/ml/DataSets/smsspamcollection/SMSSpamCollection', delimiter='\t',header=None)
X_train_raw, X_test_raw, y_train, y_test = train_test_split(df[1],df[0])
vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(X_train_raw)
X_test = vectorizer.transform(X_test_raw)
classifier = LogisticRegression()
classifier.fit(X_train, y_train)
predictions = classifier.predict(X_test)
for i in xrange(0,5):
print X_test_raw.values.tolist()[i],"\r\n Classification: ", predictions[i]
示例10: __init__
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
class mentoryWEB:
def __init__(self, file):
self.vect = TfidfVectorizer(max_df=0.25, stop_words=None, max_features=2500, ngram_range=(1,2), use_idf=True, norm='l2')
df = pd.read_csv(file, delimiter='\t', header=None)
X_train_raw, y_train = df[1], df[0]
X_train = self.vect.fit_transform(X_train_raw)
self.clf = LogisticRegression(penalty='l2', C=10)
self.clf.fit(X_train, y_train)
def test(self, string):
X_test = self.vect.transform([string])
prediction = self.clf.predict(X_test)
return prediction[0]
示例11: makeClassificationAndMeasureAccuracy
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
def makeClassificationAndMeasureAccuracy(genre_wise_train_data, genre_wise_test_data, meta_dict):
accuracy_for_genre = dict()
for genre in genre_wise_train_data:
meta_dict_for_genre = meta_dict[genre]
train_data, train_result = genre_wise_train_data[genre]
test_data, test_result = genre_wise_test_data[genre]
train_data = [list(meta_dict_for_genre[file_name][TAGS].values()) for file_name in train_data]
test_data = [list(meta_dict_for_genre[file_name][TAGS].values()) for file_name in test_data]
log_r = LogisticRegression()
log_r.fit(train_data, train_result)
accuracy = 0.0
for i in range(len(test_data)):
label = int(log_r.predict(test_data[i]))
if label == test_result[i]:
accuracy += 1.0
accuracy = accuracy/len(test_data)
accuracy_for_genre[genre] = accuracy
return accuracy_for_genre
示例12: test_multinomial_binary
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
def test_multinomial_binary():
# Test multinomial LR on a binary problem.
target = (iris.target > 0).astype(np.intp)
target = np.array(["setosa", "not-setosa"])[target]
for solver in ['lbfgs', 'newton-cg', 'sag']:
clf = LogisticRegression(solver=solver, multi_class='multinomial',
random_state=42, max_iter=2000)
clf.fit(iris.data, target)
assert_equal(clf.coef_.shape, (1, iris.data.shape[1]))
assert_equal(clf.intercept_.shape, (1,))
assert_array_equal(clf.predict(iris.data), target)
mlr = LogisticRegression(solver=solver, multi_class='multinomial',
random_state=42, fit_intercept=False)
mlr.fit(iris.data, target)
pred = clf.classes_[np.argmax(clf.predict_log_proba(iris.data),
axis=1)]
assert_greater(np.mean(pred == target), .9)
示例13: test_multinomial_logistic_regression_string_inputs
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
def test_multinomial_logistic_regression_string_inputs():
# Test with string labels for LogisticRegression(CV)
n_samples, n_features, n_classes = 50, 5, 3
X_ref, y = make_classification(n_samples=n_samples, n_features=n_features,
n_classes=n_classes, n_informative=3,
random_state=0)
y_str = LabelEncoder().fit(['bar', 'baz', 'foo']).inverse_transform(y)
# For numerical labels, let y values be taken from set (-1, 0, 1)
y = np.array(y) - 1
# Test for string labels
lr = LogisticRegression(solver='lbfgs', multi_class='multinomial')
lr_cv = LogisticRegressionCV(solver='lbfgs', multi_class='multinomial')
lr_str = LogisticRegression(solver='lbfgs', multi_class='multinomial')
lr_cv_str = LogisticRegressionCV(solver='lbfgs', multi_class='multinomial')
lr.fit(X_ref, y)
lr_cv.fit(X_ref, y)
lr_str.fit(X_ref, y_str)
lr_cv_str.fit(X_ref, y_str)
assert_array_almost_equal(lr.coef_, lr_str.coef_)
assert_equal(sorted(lr_str.classes_), ['bar', 'baz', 'foo'])
assert_array_almost_equal(lr_cv.coef_, lr_cv_str.coef_)
assert_equal(sorted(lr_str.classes_), ['bar', 'baz', 'foo'])
assert_equal(sorted(lr_cv_str.classes_), ['bar', 'baz', 'foo'])
# The predictions should be in original labels
assert_equal(sorted(np.unique(lr_str.predict(X_ref))),
['bar', 'baz', 'foo'])
assert_equal(sorted(np.unique(lr_cv_str.predict(X_ref))),
['bar', 'baz', 'foo'])
# Make sure class weights can be given with string labels
lr_cv_str = LogisticRegression(
solver='lbfgs', class_weight={'bar': 1, 'baz': 2, 'foo': 0},
multi_class='multinomial').fit(X_ref, y_str)
assert_equal(sorted(np.unique(lr_cv_str.predict(X_ref))), ['bar', 'baz'])
示例14: classify
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
def classify(data_set_df, user_info_df, feat_set_name, features=None, label='gender',
classifier=None, reg_param=1.0, selection=False, num_feat=20, sel_method='LR',
cv=10):
instance_num = len(data_set_df.columns)
df_filtered, y_v = pc.get_filtered_x_y(data_set_df, user_info_df, label)
x = df_filtered if features is None else df_filtered.loc[features]
x = x.dropna(how='all', axis=0)
x = x.dropna(how='all', axis=1)
if x.isnull().any().any() or (x == np.inf).any().any() or (x == -np.inf).any().any():
x_imp = pc.fill_nan_features(x)
# x_imp = dense_df.loc[x.index, x.columns]
else:
x_imp = x
y_filtered = y_v[(map(int, x.columns.values))]
clf = LogisticRegression(C=reg_param) if classifier is None else classifier
cv_num = min(len(y_filtered), cv)
score_mean = 0.0
miss_clf_rate = 1.0
if cv_num > 1 and len(y_filtered.unique()) > 1:
kf = KFold(y_filtered.shape[0], n_folds=cv_num, shuffle=True)
# skf = StratifiedKFold(y_filtered, n_folds=cv_num, shuffle=True)
fold = 0
result_str = ""
matrix_str = ""
for tr_index, te_index in kf:
fold += 1
x_train, x_test = x_imp.T.iloc[tr_index], x_imp.T.iloc[te_index]
y_train, y_test = y_filtered.iloc[tr_index], y_filtered.iloc[te_index]
if selection:
if sel_method == 'LR' or 'RF' in sel_method:
feat_index = fimp.feature_selection(x_train.T, user_info_df, num_feat,
method=sel_method, label=label)
else:
x_tr_df, x_te_df = x.T.iloc[tr_index].T, x.T.iloc[te_index].T
feat_index = fimp.feature_selection(x_tr_df, user_info_df, num_feat,
method=sel_method, label=label)
x_train = x_train.loc[:, feat_index].values
x_test = x_test.loc[:, feat_index].values
try:
clf.fit(x_train, y_train)
score = clf.score(x_test, y_test)
score_mean += score
result_str += "%s, %s, %s, %s, %s, %s, %s, %s, %s, %s\n" \
% (label, True if param.FILL_SUFFIX in feat_set_name else False,
True if param.SCALING_SUFFIX in feat_set_name else False, selection, 'LR',
reg_param, cv, fold, x_train.shape[1], score)
cf_mat = confusion_matrix(y_test, clf.predict(x_test),
labels=range(len(info.LABEL_CATEGORY[label])))
matrix_str += np.array_str(cf_mat) + "\n"
except ValueError:
pass
# traceback.print_exc()
# print i, "why error? skip!"
print result_str
file_name = "%s/new_%s.csv" % (param.EXPERIMENT_PATH, feat_set_name)
with open(file_name, mode='a') as f:
f.write(result_str)
file_name = "%s/new_%s_mat.csv" % (param.EXPERIMENT_PATH, feat_set_name)
with open(file_name, mode='a') as f:
f.write(matrix_str)
if fold > 0:
score_mean = score_mean / fold
miss_clf_rate = (float(instance_num - len(y_filtered)) / instance_num)
return score_mean, miss_clf_rate
示例15: crossval
# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
def crossval(features, labels,variant,printcoeffs=False):
maxent = LogisticRegression(penalty='l2')
dummyclass = DummyClassifier("most_frequent")
#maxent = SGDClassifier(penalty='l1')
#maxent = Perceptron(penalty='l1')
maxent.fit(features,labels) # only needed for feature inspection, crossvalidation calls fit(), too
scores = defaultdict(list)
TotalCoeffCounter = Counter()
for TrainIndices, TestIndices in cross_validation.KFold(n=features.shape[0], n_folds=10, shuffle=False, random_state=None):
TrainX_i = features[TrainIndices]
Trainy_i = labels[TrainIndices]
TestX_i = features[TestIndices]
Testy_i = labels[TestIndices]
dummyclass.fit(TrainX_i,Trainy_i)
maxent.fit(TrainX_i,Trainy_i)
ypred_i = maxent.predict(TestX_i)
ydummypred_i = dummyclass.predict(TestX_i)
#coeffs_i = list(maxent.coef_[0])
#coeffcounter_i = Counter(vec.feature_names_)
#for value,name in zip(coeffs_i,vec.feature_names_):
# coeffcounter_i[name] = value
acc = accuracy_score(ypred_i, Testy_i)
#pre = precision_score(ypred_i, Testy_i,pos_label=1)
#rec = recall_score(ypred_i, Testy_i,pos_label=1)
f1 = f1_score(ypred_i, Testy_i,pos_label=1)
scores["Accuracy"].append(acc)
scores["F1"].append(f1)
#scores["Precision"].append(pre)
#scores["Recall"].append(rec)
#
# acc = accuracy_score(ydummypred_i, Testy_i)
# pre = precision_score(ydummypred_i, Testy_i,pos_label=1)
# rec = recall_score(ydummypred_i, Testy_i,pos_label=1)
# f1 = f1_score(ydummypred_i, Testy_i,pos_label=1)
#
# scores["dummy-Accuracy"].append(acc)
# scores["dummy-F1"].append(f1)
# scores["dummy-Precision"].append(pre)
# scores["dummy-Recall"].append(rec)
#posfeats = posfeats.intersection(set([key for (key,value) in coeffcounter.most_common()[:20]]))
#negfeats = negfeats.intersection(set([key for (key,value) in coeffcounter.most_common()[-20:]]))
#print("Pervasive positive: ", posfeats)
#print("Pervasive negative: ",negfeats)
#scores = cross_validation.cross_val_score(maxent, features, labels, cv=10)
#print("--")
#for key in sorted(scores.keys()):
# currentmetric = np.array(scores[key])
#print("%s : %0.2f (+/- %0.2f)" % (key,currentmetric.mean(), currentmetric.std()))
#print("%s : %0.2f" % (key,currentmetric.mean()))
print("%s %.2f (%.2f)" % (variant,np.array(scores["Accuracy"]).mean(),np.array(scores["F1"]).mean()))
if printcoeffs:
maxent.fit(features,labels) # fit on everything
coeffs_total = list(maxent.coef_[0])
for (key,value) in TotalCoeffCounter.most_common()[:20]:
print(key,value)
print("---")
for (key,value) in TotalCoeffCounter.most_common()[-20:]:
print(key,value)