本文整理汇总了Python中sklearn.linear_model.LogisticRegressionCV.predict方法的典型用法代码示例。如果您正苦于以下问题:Python LogisticRegressionCV.predict方法的具体用法?Python LogisticRegressionCV.predict怎么用?Python LogisticRegressionCV.predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.linear_model.LogisticRegressionCV
的用法示例。
在下文中一共展示了LogisticRegressionCV.predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Fraud
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import predict [as 别名]
class Fraud(object):
def __init__(self):
self.model = None
self.fitted = False
def fit(self, jsonfile, target=0.3):
self.model = LogisticRegressionCV(cv=15, scoring='recall')
X, y = featurize_data(jsonfile)
# Balance the classes
X_oversample, y_oversample = oversample(X, y, target)
print X_oversample, y_oversample
# Fit the model
self.model.fit(X_oversample, y_oversample)
self.fitted = True
def predict(self, X_test):
return self.model.predict(X_test)[0]
def save_model(self, picklefile):
with open(picklefile, 'w') as f:
pickle.dump(self.model, f)
def load_model(self, picklefile):
with open(picklefile, 'r') as f:
self.model = pickle.load(f)
self.fitted = True
示例2: logistic_test
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import predict [as 别名]
def logistic_test(train_data, train_labels, test_data, test_labels, cv=False):
# Perform logistic regression.
clf = LogisticRegressionCV() if cv else LogisticRegression()
clf.fit(train_data, train_labels)
predicted_labels = clf.predict(test_data)
# Count true positives, true negatives, false positives, false negatives.
tp, tn, fp, fn = 0, 0, 0, 0
for predicted, actual in zip(predicted_labels, test_labels):
if predicted == 1 and actual == 1:
tp += 1
if predicted == 0 and actual == 0:
tn += 1
if predicted == 1 and actual == 0:
fp += 1
if predicted == 0 and actual == 1:
fn += 1
# Compute statistics.
accuracy = (tp + tn) / (tp + tn + fp +fn)
precision = 0 if (tp + fp) == 0 else tp / (tp + fp)
recall = 0 if (tp + fn) == 0 else tp / (tp + fn)
# Print report.
print "Correctly classified {}/{}".format(tp + tn, tp + tn + fp +fn)
print "Accuracy:", accuracy
print "Precision:", precision
print "Recall:", recall
print "tp: {}; tn: {}; fp: {}; fn {}".format(tp, tn, fp, fn)
return accuracy
示例3: logistic_test_using_cosine
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import predict [as 别名]
def logistic_test_using_cosine(score_feature=False):
logger.info('using cosine features in logistic regression')
if score_feature:
logger.info('also use score feature')
Cs = [2**t for t in range(0, 10, 1)]
Cs.extend([3**t for t in range(1, 10, 1)])
snli2cosine = SNLI2Cosine('/home/junfeng/word2vec/GoogleNews-vectors-negative300.bin')
logger.info('loading snli data ...')
train_df = pd.read_csv('./snli/snli_1.0/snli_1.0_train.txt', delimiter='\t')
train_df = train_df[pd.notnull(train_df.sentence2)]
train_df = train_df[train_df.gold_label != '-']
train_df = train_df[:(len(train_df) / 3)]
train_df.reset_index(inplace=True)
test_df = pd.read_csv('./snli/snli_1.0/snli_1.0_test.txt', delimiter='\t')
test_df = test_df[pd.notnull(test_df.sentence2)]
test_df = test_df[test_df.gold_label != '-']
test_df.reset_index(inplace=True)
X_train, train_labels, X_test, test_labels = snli2cosine.calculate_cosine_features(train_df, test_df)
if score_feature:
y_train_proba, y_test_proba = joblib.load('./snli/logistic_score_snli.pkl')
# y_train_proba = y_train_proba.flatten()
# y_test_proba = y_test_proba.flatten()
X_train = np.concatenate([X_train, y_train_proba.reshape((-1, 1))], axis=1)
X_test = np.concatenate([X_test, y_test_proba.reshape((-1, 1))], axis=1)
logger.info('X_train.shape: {0}'.format(X_train.shape))
logger.info('X_test.shape: {0}'.format(X_test.shape))
logreg = LogisticRegressionCV(Cs=Cs, cv=3, n_jobs=10, random_state=919)
logreg.fit(X_train, train_labels)
logger.info('best C is {0}'.format(logreg.C_))
y_test_predicted = logreg.predict(X_test)
acc = accuracy_score(test_labels, y_test_predicted)
logger.info('test data predicted accuracy: {0}'.format(acc))
示例4: classify_maxEnt
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import predict [as 别名]
def classify_maxEnt(train_X, train_Y, test_X):
print("Classifying using Maximum Entropy ...")
maxEnt = LogisticRegressionCV()
maxEnt.fit(train_X, train_Y)
yHat = maxEnt.predict(test_X)
return yHat
开发者ID:shalinc,项目名称:ML-Sentiment-Analysis-of-Movie-Reviews-from-Twitter,代码行数:10,代码来源:sentiment_analysis.py
示例5: classify_maxEnt_twitter
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import predict [as 别名]
def classify_maxEnt_twitter(train_X, train_Y, test_X, test_Y):
print("Classifying using Maximum Entropy ...")
maxEnt = LogisticRegressionCV()
maxEnt.fit(train_X, train_Y)
yHat = maxEnt.predict(test_X)
conf_mat = confusion_matrix(test_Y,yHat)
print(conf_mat)
Accuracy = (sum(conf_mat.diagonal())) / np.sum(conf_mat)
print("Accuracy: ", Accuracy)
evaluate_classifier(conf_mat)
开发者ID:shalinc,项目名称:ML-Sentiment-Analysis-of-Movie-Reviews-from-Twitter,代码行数:13,代码来源:sentiment_analysis.py
示例6: classify
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import predict [as 别名]
def classify(self, mp, x_train, y_train, x_test):
x_train = sm.add_constant(x_train)
x_test = sm.add_constant(x_test)
clf = LogisticRegressionCV(verbose=1, cv=5)
log_to_info('Fitting a Logistic Regression to labeled training data...')
clf = clf.fit(x_train, y_train)
log_to_info('Training details')
log_to_info('Classifier parameters: {}'.format(clf.get_params()))
log_to_info('On training: {}'.format(clf.score(x_train, y_train) * 100.0))
log_to_info('Predicting test value')
y_test = clf.predict(x_test)
log_to_info('Done!')
return y_test
示例7: SentenceClassifier
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import predict [as 别名]
class SentenceClassifier(BaseEstimator, ClassifierMixin):
def __init__(self,
sents_shuffle=False,
doc2vec=gensim.models.doc2vec.Doc2Vec()
):
argdict= locals()
argdict.pop('argdict',None)
argdict.pop('self',None)
vars(self).update(argdict)
#print argdict
def fit(self, X, y):
self.sents_train=X
self.Y_train=y
return self
def doc2vec_set(self,all_docs):
#print 'doc2vec_set,SentenceClassifier'
if hasattr(self.doc2vec, 'syn0'):
self.doc2vec.reset_weights()
#del self.doc2vec.syn0
delattr(self.doc2vec, 'syn0')
self.doc2vec.build_vocab(all_docs)
self.doc2vec.train(all_docs)
def predict(self,X):
self.sents_test=X
self.sents_all=self.sents_train + self.sents_test
if self.sents_shuffle :
s_indexs=range(len(self.sents_all))
random.shuffle(s_indexs)
s_invers_indexs=range(len(s_indexs))
for n in range(len(s_indexs)):
s_invers_indexs[s_indexs[n]]=n
sents_all=[self.sents_all[n] for n in s_indexs]
else:
sents_all=self.sents_all
all_docs = list(LabeledListSentence(self.sents_all))
self.doc2vec_set(all_docs)
#print 'size',self.doc2vec.vector_size
self.X_train= [self.doc2vec.infer_vector(s) for s in self.sents_train]
self.X_test= [self.doc2vec.infer_vector(s) for s in self.sents_test]
self.logistic =LogisticRegressionCV(class_weight='balanced')#,n_jobs=-1)
self.logistic.fit(self.X_train,self.Y_train)
Y_test_predict=self.logistic.predict(self.X_test)
return Y_test_predict
示例8: try_all_k_best
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import predict [as 别名]
def try_all_k_best(max=13):
for k in range(1,max+1):
data = featureFormat(my_dataset, features_list, sort_keys = True)
labels, features = targetFeatureSplit(data)
features_train, features_test, labels_train, labels_test = \
train_test_split(features, labels, test_size=0.3, random_state=42)
selector = SelectKBest(k=k)
features_train = selector.fit_transform(features_train, labels_train)
features_test = selector.transform(features_test)
choices.append(selector.transform(np.array(features_list[1:]).reshape(1, -1)))
lr_cv = LogisticRegressionCV()
lr_cv.fit(features_train, labels_train)
pred.append(lr_cv.predict(features_test))
acc.append(accuracy_score(labels_test, pred[k-1]))
prec.append(precision_score(labels_test, pred[k-1]))
reca.append(recall_score(labels_test, pred[k-1]))
示例9: load_bos_2012_partition
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import predict [as 别名]
def load_bos_2012_partition():
data = Dataset.load_dataset(mrg_data=False)
train_secs = [0,1,2,3,4,5,6,7,8,10,12,14]
test_secs = [9,11,13,15]
train_auxs = []
train_idxs = []
test_auxs = []
test_idxs = []
for i,aux in enumerate(data.auxs):
if aux.type == 'do': # bos only considered do-vpe
section = None # first find section the aux belongs to
for sec in sorted(data.section_ends.iterkeys()):
if aux.sentnum < data.section_ends[sec]:
section = sec
break
if section in train_secs:
train_auxs.append(aux)
train_idxs.append(i)
if section in test_secs:
test_auxs.append(aux)
test_idxs.append(i)
data.X = np.array(data.X)
data.Y = np.array(data.Y)
train_X = data.X[train_idxs]
train_Y = data.Y[train_idxs]
test_X = data.X[test_idxs]
test_Y = data.Y[test_idxs]
train_X, train_Y = Dataset.oversample(train_X, train_Y, 5)
print 'Training classifier...'
classifier = LogisticRegressionCV()
classifier.fit(vstack_csr_vecs(train_X), train_Y)
predictions = classifier.predict(vstack_csr_vecs(test_X))
print 'Results acquired from using our algorithm on Bos\' train-test split:'
print accuracy_results(test_Y, predictions)
示例10: bos_train_test_split
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import predict [as 别名]
def bos_train_test_split():
data = Dataset.load_dataset(mrg_data=False)
train = range(0,15)
test = range(20,25)
train_auxs, test_auxs = [], []
train_idxs, test_idxs = [], []
for i,aux in enumerate(data.auxs):
section = find_section(aux.sentnum, data.section_ends)
if section in train:
train_auxs.append(aux)
train_idxs.append(i)
if section in test:
test_auxs.append(aux)
test_idxs.append(i)
data.X = np.array(data.X)
data.Y = np.array(data.Y)
train_X = data.X[train_idxs]
train_Y = data.Y[train_idxs]
test_X = data.X[test_idxs]
test_Y = data.Y[test_idxs]
train_X, train_Y = Dataset.oversample(train_X, train_Y, 5)
print 'Training classifier...'
classifier = LogisticRegressionCV()
classifier.fit(vstack_csr_vecs(train_X), train_Y)
predictions = classifier.predict(vstack_csr_vecs(test_X))
print 'Results acquired from using our algorithm on the bos train-test split:'
print accuracy_results(test_Y, predictions)
save_end_to_end(test_Y, predictions)
示例11: build_clf
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import predict [as 别名]
def build_clf(x, y, fe):
n = len(x)
x_train = x[:n * 2 // 3]
x_val = x[n * 2// 3:]
y_train = y[:n * 2 // 3]
y_val = y[n * 2// 3:]
x_train_ts = fe.fit_transform(x_train, y_train)
x_val_ts = fe.transform(x_val)
scaler = StandardScaler()
x_train_ts = scaler.fit_transform(x_train_ts)
x_val_ts = scaler.transform(x_val_ts)
train = np.arange(len(x_train_ts))
val = np.arange(len(x_val_ts)) + len(train)
x = np.vstack((x_train_ts, x_val_ts))
y = np.hstack((y_train, y_val))
cv = ((train, val),)
clf = LogisticRegressionCV(Cs=21, cv=cv, n_jobs=-1)
clf.fit(x, y)
y_pred = clf.predict(x_val_ts)
acc = accuracy_score(y_val, y_pred)
print('{}'.format(acc))
print(confusion_matrix(y_val, y_pred))
def new_clf(x):
x_ts = scaler.transform(x)
y_pred = clf.predict_proba(x_ts)[:, 1]
return y_pred
return new_clf
示例12: plot_decision_boundary
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import predict [as 别名]
def plot_decision_boundary(pred_func, X, y):
# Set min and max values and give it some padding
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
h = 0.01
# Generate a grid of points with distance h between them
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# Predict the function value for the whole gid
Z = pred_func(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot the contour and training examples
plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Spectral)
plt.show()
if __name__ == '__main__':
np.random.seed(0)
X, y = make_moons(200, noise=0.20)
plt.scatter(X[:, 0], X[:, 1], s=40, c=y, cmap=plt.cm.Spectral)
# Train the logistic regression classifier
clf = LogisticRegressionCV()
clf.fit(X, y)
# Plot the decision boundary
plot_decision_boundary(lambda x: clf.predict(x), X, y)
plt.title("Logistic Regression")
示例13: classification_report
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import predict [as 别名]
ytrain_predict = tree.predict(Xtrain)
ytest_predict = tree.predict(Xtest)
print classification_report(y_true=ytrain,y_pred=ytrain_predict)
rf_feat_importances = pd.Series(rf.feature_importances_,index=feat_labels).sort_values(ascending=False)
lrcv = LogisticRegressionCV(Cs=[0.001,0.01,0.1,1,10,100],cv=5,scoring='roc_auc')
lrcv.fit(Xtrain,ytrain)
ytrain_predict = lrcv.predict(Xtrain)
ytest_predict = lrcv.predict(Xtest)
print classification_report(y_true=ytrain,y_pred=ytrain_predict)
print classification_report(y_true=ytest,y_pred=ytest_predict)
lr_feat_importances = pd.Series(lrcv.coef_[0],index=feat_labels).sort_values(ascending=False)
feat_importances = pd.concat([rf_feat_importances,lr_feat_importances],axis=1,keys = ['rf','lr'])
feat_importances.sort_values(by='rf',inplace=True,ascending=False)
feat_importances['abs_lr'] = np.abs(feat_importances.lr)
# ===============================================
feat_labels = [u'source_ads-bing',u'source_ads-google', u'source_ads-yahoo', u'source_ads_facebook',
示例14: make_classification
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import predict [as 别名]
X, y = make_classification(n_samples=1000, n_informative=5, n_redundant=6, random_state=4)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4)
scaler = StandardScaler().fit(X_train)
X_train_standard = scaler.transform(X_train)
X_test_standard = scaler.transform(X_test)
# specify classifiers
ridge = RidgeClassifierCV(alphas=np.logspace(-3, 1, 20))
lasso = LogisticRegressionCV(Cs=np.logspace(-3, 1, num=20))
forest = RandomForestClassifier(n_estimators=100, n_jobs=-1)
# train the classifiers
ridge.fit(X_train_standard, y_train)
lasso.fit(X_train_standard, y_train)
forest.fit(X_train, y_train)
# predicted values
ridge_preds = ridge.predict(X_test_standard)
lasso_preds = lasso.predict(X_test_standard)
forest_preds = forest.predict(X_test)
# confusion matrices
c1 = confusion_matrix(y_test, ridge_preds)
c2 = confusion_matrix(y_test, lasso_preds)
c3 = confusion_matrix(y_test, forest_preds)
# build a plot to compare results
preds = [ridge_preds, lasso_preds, forest_preds]
names = ["Ridge", "Lasso", "Random Forest"]
confusion_plot(y_test, preds, model_names=names, fname="images/confusion_plot.png")
示例15: accuracy
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import predict [as 别名]
import sys
import argparse
def accuracy(y_true, y_pred):
return np.mean(y_true == y_pred)
if __name__=='__main__':
df=pd.read_csv(sys.argv[1])
label=sys.argv[2]
print ' shuffling'
df=df.sample(df.shape[0])#.head(100000)
#print 'Gender profile after shuffling', df.gend.head()
print 'Truth Distribution Overall', df[label].value_counts()
y=df[label]
try:
df=df.drop(['orig_cid2','old_cid','msisdn','gend'], axis=1)
except:
pass
#print df.columns
X = df.as_matrix().astype(np.float)
X_Train,X_Test,y_train,y_test=train_test_split(X,y)
print 'Labels Distribution Test', y_test.value_counts()
#clf=LogisticRegressionCV()
clf=LogisticRegressionCV(solver='liblinear',penalty='l1', cv=3)
#clf=LogisticRegressionCV(penalty='l2', cv=5, n_jobs=-1)
clf.fit(X_Train,y_train)
y_pred=clf.predict(X_Test)
#print 'Truth Distribution Test Prediction', np.bincount(y_pred), sum(y_pred)
print "%s Test Accuracy %.4f" % ('CVLogistic',accuracy(y_test, y_pred))
print 'ROC: %f', roc_auc_score(y_test, y_pred)
print 'LogLoss: %f', log_loss(y_test, y_pred)