本文整理汇总了Python中sklearn.linear_model.LogisticRegressionCV.fit方法的典型用法代码示例。如果您正苦于以下问题:Python LogisticRegressionCV.fit方法的具体用法?Python LogisticRegressionCV.fit怎么用?Python LogisticRegressionCV.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.linear_model.LogisticRegressionCV
的用法示例。
在下文中一共展示了LogisticRegressionCV.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: LogitSelector
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
def LogitSelector(x, y, cv, niter, njob):
t_size=1 / cv
lb = prep.LabelBinarizer()
y = lb.fit_transform(y).ravel()
model = LogisticRegressionCV(penalty='l1', solver='liblinear', refit=False, cv=cv, n_jobs=njob)
with warnings.catch_warnings():
warnings.simplefilter('ignore', UserWarning)
warnings.simplefilter('ignore', ConvergenceWarning)
model.fit(x, y)
columns = np.arange(x.shape[1])[model.coef_.ravel() != 0]
accu = []
prec = []
rec = []
f1 = []
au = []
cls = LogisticRegression()
gn_cvset = (Cvset(x[i][:, columns], y[i], x[j][:, columns], y[j]) for (i, j) in ShuffleSplit(len(y), n_iter=niter, test_size=t_size))
for cvt in gn_cvset:
cls.fit(cvt.xtr, cvt.ytr)
accu.append(accuracy_score(cvt.yte, cls.predict(cvt.xte)))
prec.append(precision_score(cvt.yte, cls.predict(cvt.xte)))
rec.append(recall_score(cvt.yte, cls.predict(cvt.xte)))
f1.append(f1_score(cvt.yte, cls.predict(cvt.xte)))
au.append(__Auc(cls, cvt.xte, cvt.yte))
cls.fit(x[:,columns], y)
return Mdc(model=cls, idx=columns, accu=np.mean(accu),
prec=np.mean(prec), rec=np.mean(rec), f1=np.mean(f1),
au=np.mean(au))
示例2: classify
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
def classify(_char):
print 'to fetch data'
start_time = time.time()
char_count = Character.objects.filter(char=_char, is_correct=1).count()
if char_count < 10:
return
char_lst = Character.objects.filter(char=_char)
y, X, ty, tX, t_charid_lst, test_accuracy_lst = prepare_data_with_database(char_lst)
if len(y) == 0 or len(ty) == 0:
return
if 1 == len(set(y)) or len(y) < 10:
return
fetch_negative_samples(_char, X, y)
if len(y) == 0 or len(ty) == 0:
return
if 1 == len(set(y)) or len(y) < 50:
return
print "fetch data done, spent %s seconds." % int(time.time() - start_time)
start_time = time.time()
print "traning: data size: %d" % len(y)
model = LogisticRegressionCV(cv=5, solver='liblinear', n_jobs=1)
try:
model.fit(X, y)
print "training done, spent %s seconds." % int(time.time() - start_time)
#print 'params: '
#for k, v in model.get_params().iteritems():
# print '\t', k, ' : ', v
print 'score: ', model.score(X, y)
except Exception, e:
print 'except: ', e
traceback.print_exc()
return
示例3: lr_with_scale2
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
def lr_with_scale2():
"""
Submission: lr_with_scale2_0704_03.csv
E_val:
E_in: 0.878996
E_out: 0.8768131004917349
"""
from sklearn.linear_model import LogisticRegressionCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
X, y = dataset.load_train()
raw_scaler = StandardScaler()
raw_scaler.fit(X)
X_scaled = raw_scaler.transform(X)
clf = LogisticRegressionCV(Cs=50, cv=5, scoring='roc_auc', n_jobs=-1,
class_weight='auto')
clf.fit(X_scaled, y)
logger.debug('Best C: %f', clf.C_[0])
logger.debug('Cs: %s', clf.Cs_)
logger.debug('Grid scores: %f', clf.scores_)
logger.debug('Ein: %f', Util.auc_score(clf, X_scaled, y))
IO.dump_submission(Pipeline([('scale_raw', raw_scaler),
('lr', clf)]), 'lr_with_scale2_0704_03')
示例4: logistic_test
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
def logistic_test(train_data, train_labels, test_data, test_labels, cv=False):
# Perform logistic regression.
clf = LogisticRegressionCV() if cv else LogisticRegression()
clf.fit(train_data, train_labels)
predicted_labels = clf.predict(test_data)
# Count true positives, true negatives, false positives, false negatives.
tp, tn, fp, fn = 0, 0, 0, 0
for predicted, actual in zip(predicted_labels, test_labels):
if predicted == 1 and actual == 1:
tp += 1
if predicted == 0 and actual == 0:
tn += 1
if predicted == 1 and actual == 0:
fp += 1
if predicted == 0 and actual == 1:
fn += 1
# Compute statistics.
accuracy = (tp + tn) / (tp + tn + fp +fn)
precision = 0 if (tp + fp) == 0 else tp / (tp + fp)
recall = 0 if (tp + fn) == 0 else tp / (tp + fn)
# Print report.
print "Correctly classified {}/{}".format(tp + tn, tp + tn + fp +fn)
print "Accuracy:", accuracy
print "Precision:", precision
print "Recall:", recall
print "tp: {}; tn: {}; fp: {}; fn {}".format(tp, tn, fp, fn)
return accuracy
示例5: LogitSelector
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
def LogitSelector(x, y, cv, njob):
lb = prep.LabelBinarizer()
y = lb.fit_transform(y).ravel()
cls = LogisticRegression()
def __Auc(xte, yte):
ypo = cls.predict_proba(xte)
flt_auc = roc_auc_score(yte, ypo[:,1])
return flt_auc
skf = StratifiedKFold(y, n_folds=cv)
model = LogisticRegressionCV(penalty='l1', solver='liblinear', fit_intercept=False, cv=cv, n_jobs=njob)
with warnings.catch_warnings():
warnings.simplefilter('ignore', UserWarning)
warnings.simplefilter('ignore', ConvergenceWarning)
model.fit(x, y)
columns = np.arange(x.shape[1])[model.coef_.ravel() != 0]
mdl_eval = lambda func: lambda idx_tr, idx_te: func(y[idx_te], cls.fit(x[idx_tr][:,columns], y[idx_tr]).predict(x[idx_te][:,columns]))
auc_eval = lambda idx_tr, idx_te: roc_auc_score(y[idx_te], cls.fit(x[idx_tr][:,columns], y[idx_tr]).predict_proba(x[idx_te][:,columns])[:,1])
res_eval = lambda func: np.average(map(mdl_eval(func), *zip(*[(idx_tr, idx_te) for idx_tr, idx_te in skf])))
accu = res_eval(accuracy_score)
prec = res_eval(precision_score)
rec = res_eval(recall_score)
f1 = res_eval(f1_score)
au = np.average(map(auc_eval, *zip(*[(idx_tr, idx_te) for idx_tr, idx_te in skf])))
cls.fit(x[:,columns], y)
return Mdc(model=cls, idx=columns, accu=accu, prec=prec, rec=rec, f1=f1, au=au)
示例6: train
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
def train(trainingData, pklFile):
# ========================================================================= #
# =============== STEP 1. DEFINE OUTPUT LEARNT MODEL FILE ================= #
# ========================================================================= #
if (pklFile == ''):
os.system('rm -rf learntModel & mkdir learntModel')
pklFile = 'learntModel/learntModel.pkl'
# ========================================================================= #
# ================= STEP 2. PREPARE AND FORMATTING DATA =================== #
# ========================================================================= #
NUMBER_OF_FEATURES = len(trainingData[0]) - 1
NUMBER_OF_TRAINING_POINTS = len(trainingData)
x = trainingData[:, range(0, NUMBER_OF_FEATURES)]
y = trainingData[:, NUMBER_OF_FEATURES]
# ========================================================================= #
# ============== STEP 3. DECLARE PRIMITIVES BEFORE THE PARTY ============== #
# ========================================================================= #
minSquareError = np.inf
targetAlpha = None
alphas = np.logspace(-10, -2, 500)
# ========================================================================= #
# ===== STEP 4. PERFORM FITTING WITH THE BEST ALPHA AND SAVE THE MODEL ==== #
# ========================================================================= #
clf = LogisticRegressionCV(Cs=alphas)
clf.fit(x, y)
joblib.dump(clf, pklFile)
return {"intercept": clf.intercept_, "coef":clf.coef_, "alpha":clf.C_, "accuracy":clf.score(x,y)}
示例7: optimal_l2
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
def optimal_l2(X, y):
'''
Find the optimal level of L2 regularization for logistic regression
'''
logit = LogisticRegressionCV(Cs=50, cv=10)
logit.fit(X, y)
return logit.C_
示例8: make_predictions
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
def make_predictions():
# Fit Logistic Regression Model
logreg = LogisticRegressionCV(scoring='log_loss', n_jobs=-1, verbose=1, random_state=6156)
logreg.fit(X=trainX, y=train['y'].values)
# Validate
pred_pr = logreg.predict_proba(valX)
loss = log_loss(y_true=val['y'].values, y_pred=pred_pr)
print "Validation log loss:", loss
# Get Test predictions
img_files = [os.path.join(IMG_DIR, f) for f in os.listdir(IMG_DIR)]
if os.path.isfile('test_pca.csv'):
test_pca = pd.read_csv('test_pca.csv', dtype={'id' : str})
else:
test_pca = prepare_test_data(img_files, STD_SIZE)
test_predictions = logreg.predict_proba(test_pca.values[:, 1:])
id_s = [re.sub('\D', '', f) for f in img_files]
df_id = pd.DataFrame({'id' : id_s})
col_names = ['col'+str(i) for i in range(1, 9)]
df_yhat = pd.DataFrame(data=test_predictions, columns=col_names)
df_id_yhat = pd.concat([test_pca['id'], df_yhat], axis=1)
yhat = df_id.merge(df_id_yhat, on='id', how='left')
yhat.fillna(1./8, inplace=True)
yhat.to_csv('kaggle_430_2pm.csv', index=False)
示例9: lr_with_fs
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
def lr_with_fs():
"""
Submission: lr_with_fs_0620_02.csv
E_val: <missing>
E_in: 0.856252488379
E_out: 0.8552577388980213
"""
from sklearn.linear_model import LogisticRegressionCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
X = util.fetch(util.cache_path('train_X_before_2014-08-01_22-00-47'))
y = util.fetch(util.cache_path('train_y_before_2014-08-01_22-00-47'))
raw_scaler = StandardScaler()
raw_scaler.fit(X)
X_scaled = raw_scaler.transform(X)
rfe = util.fetch(util.cache_path('feature_selection.RFE.21'))
X_pruned = rfe.transform(X_scaled)
new_scaler = StandardScaler()
new_scaler.fit(X_pruned)
X_new = new_scaler.transform(X_pruned)
clf = LogisticRegressionCV(cv=10, scoring='roc_auc', n_jobs=-1)
clf.fit(X_new, y)
print(auc_score(clf, X_new, y))
to_submission(Pipeline([('scale_raw', raw_scaler),
('rfe', rfe),
('scale_new', new_scaler),
('lr', clf)]), 'lr_with_fs_0620_02')
示例10: mdl_1d_cat
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
def mdl_1d_cat(x, y):
"""builds univariate model to calculate AUC"""
if x.nunique() > 10 and com.is_numeric_dtype(x):
x = sb_cutz(x)
series = pd.get_dummies(x, dummy_na=True)
lr = LogisticRegressionCV(scoring='roc_auc')
lr.fit(series, y)
try:
preds = (lr.predict_proba(series)[:, -1])
#preds = (preds > preds.mean()).astype(int)
except ValueError:
Tracer()()
plot = plot_cat(x, y)
imgdata = BytesIO()
plot.savefig(imgdata)
imgdata.seek(0)
aucz = roc_auc_score(y, preds)
cmatrix = 'data:image/png;base64,' + \
quote(base64.b64encode(imgdata.getvalue()))
plt.close()
return aucz, cmatrix
示例11: Fraud
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
class Fraud(object):
def __init__(self):
self.model = None
self.fitted = False
def fit(self, jsonfile, target=0.3):
self.model = LogisticRegressionCV(cv=15, scoring='recall')
X, y = featurize_data(jsonfile)
# Balance the classes
X_oversample, y_oversample = oversample(X, y, target)
print X_oversample, y_oversample
# Fit the model
self.model.fit(X_oversample, y_oversample)
self.fitted = True
def predict(self, X_test):
return self.model.predict(X_test)[0]
def save_model(self, picklefile):
with open(picklefile, 'w') as f:
pickle.dump(self.model, f)
def load_model(self, picklefile):
with open(picklefile, 'r') as f:
self.model = pickle.load(f)
self.fitted = True
示例12: logistic_test_using_cosine
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
def logistic_test_using_cosine(score_feature=False):
logger.info('using cosine features in logistic regression')
if score_feature:
logger.info('also use score feature')
Cs = [2**t for t in range(0, 10, 1)]
Cs.extend([3**t for t in range(1, 10, 1)])
snli2cosine = SNLI2Cosine('/home/junfeng/word2vec/GoogleNews-vectors-negative300.bin')
logger.info('loading snli data ...')
train_df = pd.read_csv('./snli/snli_1.0/snli_1.0_train.txt', delimiter='\t')
train_df = train_df[pd.notnull(train_df.sentence2)]
train_df = train_df[train_df.gold_label != '-']
train_df = train_df[:(len(train_df) / 3)]
train_df.reset_index(inplace=True)
test_df = pd.read_csv('./snli/snli_1.0/snli_1.0_test.txt', delimiter='\t')
test_df = test_df[pd.notnull(test_df.sentence2)]
test_df = test_df[test_df.gold_label != '-']
test_df.reset_index(inplace=True)
X_train, train_labels, X_test, test_labels = snli2cosine.calculate_cosine_features(train_df, test_df)
if score_feature:
y_train_proba, y_test_proba = joblib.load('./snli/logistic_score_snli.pkl')
# y_train_proba = y_train_proba.flatten()
# y_test_proba = y_test_proba.flatten()
X_train = np.concatenate([X_train, y_train_proba.reshape((-1, 1))], axis=1)
X_test = np.concatenate([X_test, y_test_proba.reshape((-1, 1))], axis=1)
logger.info('X_train.shape: {0}'.format(X_train.shape))
logger.info('X_test.shape: {0}'.format(X_test.shape))
logreg = LogisticRegressionCV(Cs=Cs, cv=3, n_jobs=10, random_state=919)
logreg.fit(X_train, train_labels)
logger.info('best C is {0}'.format(logreg.C_))
y_test_predicted = logreg.predict(X_test)
acc = accuracy_score(test_labels, y_test_predicted)
logger.info('test data predicted accuracy: {0}'.format(acc))
示例13: classify_maxEnt
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
def classify_maxEnt(train_X, train_Y, test_X):
print("Classifying using Maximum Entropy ...")
maxEnt = LogisticRegressionCV()
maxEnt.fit(train_X, train_Y)
yHat = maxEnt.predict(test_X)
return yHat
开发者ID:shalinc,项目名称:ML-Sentiment-Analysis-of-Movie-Reviews-from-Twitter,代码行数:10,代码来源:sentiment_analysis.py
示例14: build_classifier_lr
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
def build_classifier_lr(data, labels, regularization='l2', **kwargs):
if (regularization == 'l1') or (regularization == 'l2'):
log_reg = LogisticRegressionCV(penalty=regularization, Cs=100, cv=10, solver='liblinear', refit=False,
n_jobs=10, verbose=1, class_weight='balanced', **kwargs)
else:
# lambda = 1/C: if C->inf lambda -> 0. So if we want no regularization we need to set C to a high value
log_reg = LogisticRegression(C=100000000., class_weight='balanced', solver='liblinear', n_jobs=10,
verbose=1, **kwargs)
log_reg.fit(data, labels)
return log_reg
示例15: fitModels
# 需要导入模块: from sklearn.linear_model import LogisticRegressionCV [as 别名]
# 或者: from sklearn.linear_model.LogisticRegressionCV import fit [as 别名]
def fitModels(training_data, training_labels, test_data, test_labels):
print('=========fitModels========:')
# print('RandomForestClassifier:')
# clf =RandomForestClassifier(n_estimators=100)
# clf.fit(training_data, training_labels) # 训练模型
# getReport(clf, test_data, test_labels)
# print('='*50)
# print('GradientBoostingClassifier: ')
# gbdt = GradientBoostingClassifier()
# gbdt.fit(training_data, training_labels)
# getReport(gbdt, test_data, test_labels)
# print('='*50)
# print('MultinomialNB: ')
# clf =MultinomialNB()
# clf.fit(training_data, training_labels) # 训练模型
# getReport(clf, test_data, test_labels)
# print('='*50)
#
# print('GaussianNB: ')
# clf =GaussianNB()
# clf.fit(training_data, training_labels) # 训练模型
# getReport(clf, test_data, test_labels)
# print('='*50)
print('LogisticRegression: ')
lr =LogisticRegressionCV()
lr.fit(training_data, training_labels) # 训练模型
print(lr)
getReport(lr, test_data, test_labels)
print('='*50)
print('LinearSVC: ')
linSVC =LinearSVC()
linSVC.fit(training_data, training_labels) # 训练模型
predict_labels = linSVC.predict(test_data) # 预测训练集
getPRF(predict_labels, test_labels)
print('='*50)
# print('svm: ')
# clf =svm.SVC()
# clf.fit(training_data, training_labels) # 训练模型
# getReport(clf, test_data, test_labels)
# print('='*50)
# print('DecisionTreeClassifier: ')
# clf =tree.DecisionTreeClassifier()
# clf.fit(training_data, training_labels) # 训练模型
# getReport(clf, test_data, test_labels)
# print('='*50)
return lr, linSVC