本文整理汇总了Python中sklearn.linear_model.logistic.LogisticRegression类的典型用法代码示例。如果您正苦于以下问题:Python LogisticRegression类的具体用法?Python LogisticRegression怎么用?Python LogisticRegression使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了LogisticRegression类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_regularization_path
def test_regularization_path(self):
# Check results using logistic path
num_samples = 10
num_feat = 5
X, y = make_classification(n_samples=num_samples, n_features=num_feat, n_informative=3,
n_classes=2, random_state=0, weights=[0.5, 0.5])
matrix = np.zeros((num_samples, num_feat + 2))
matrix[:,:-2] = X
matrix[:, -2] = np.ones(num_samples)
matrix[:, -1] = y
# Betas to test
logitfitL1 = LogisticRegressionL1()
lambda_grid = np.exp(-1 * np.linspace(1, 17, 200))
path = logitfitL1.fit(matrix, lambda_grid)
# Sklearn
cs = l1_min_c(X, y, loss='log') * np.logspace(0, 3)
# Computing regularization path using sklearn
clf = LogisticRegression(C=1.0, penalty='l1', tol=1e-6)
coefs_ = []
for c in cs:
clf.set_params(C=c)
clf.fit(X, y)
coefs_.append(clf.coef_.ravel().copy())
skbetas = np.append(clf.intercept_[0], clf.coef_)
np.testing.assert_almost_equal(skbetas, logitfitL1.coef_, 1)
示例2: __init__
class LogReg:
def __init__(self):
self.load_data()
self.clf = LogisticRegression(class_weight = 'balanced')
self.train()
self.predict()
def load_data(self):
train_csv = './data/train.csv'
test_csv = './data/test.csv'
df_train = pd.read_csv(train_csv, header=0)
df_test = pd.read_csv(test_csv, header=0)
arr_train = df_train.values
arr_test = df_test.values
self.train_X = arr_train[0::,1::]
self.train_Y = arr_train[0::, 0]
self.test_X = arr_test[0::, 1::]
self.test_ID = arr_test[0::,0]
def train(self):
self.clf.fit(self.train_X, self.train_Y)
def predict(self):
self.test_Y = self.clf.predict_proba(self.test_X)
def get_training_accuracy(self):
return (self.clf.score(self.train_X, self.train_Y))
def store_result(self):
df_out = pd.DataFrame()
df_out['Id'] = self.test_ID
df_out['Action'] = self.test_Y[0::,1]
df_out.to_csv('./data/results/c1_result.csv',index=False)
示例3: predictWithThreshold
def predictWithThreshold(datadir, threshold, penalty_type='l2'):
maxent = LogisticRegression(penalty=penalty_type)
scores = defaultdict(list)
for dir in sorted(os.listdir(datadir), reverse=True):
trainfeatures, trainlabels, vec = feats_and_classify.collect_features(datadir+dir+'/train.conll')
TrainIndices=np.array(range(len(trainfeatures)))
features, labels, vec = feats_and_classify.collect_features(datadir+dir+'/all.conll')
TestIndices=np.array(range(len(trainfeatures),len(features)))
# print('\r'+dir, end="")
# print(dir)
TrainX_i = features[TrainIndices]
Trainy_i = labels[TrainIndices]
TestX_i = features[TestIndices]
Testy_i = labels[TestIndices]
maxent.fit(TrainX_i,Trainy_i)
# print('Finished fitting')
ypred_i, score=pred_for_threshold(maxent,TestX_i,Testy_i, threshold)
# print('Predicting')
scores["F1"].append(score[0])
scores["Recall"].append(score[1])
scores["Accuracy"].append(score[2])
scores["Precision"].append(score[3])
#scores = cross_validation.cross_val_score(maxent, features, labels, cv=10)
print("\n--")
for key in sorted(scores.keys()):
currentmetric = np.array(scores[key])
print("%s : %0.2f (+/- %0.2f)" % (key,currentmetric.mean(), currentmetric.std()))
print("--")
示例4: cvWithThreshold
def cvWithThreshold(X, y_current_tr, y_current_te, threshold, regularization='l2'):
out_dict = {}
scores = defaultdict(list)
fold=1
maxent = LogisticRegression(penalty=regularization)
for TrainIndices, TestIndices in cross_validation.StratifiedKFold(y_current_tr, n_folds=10, shuffle=False, random_state=None):
print('\r'+str(fold), end="")
fold+=1
TrainX_i = X[TrainIndices]
Trainy_i = y_current_tr[TrainIndices]
TestX_i = X[TestIndices]
Testy_i = y_current_te[TestIndices]
maxent.fit(TrainX_i,Trainy_i)
ypred_i, score=pred_for_threshold(maxent,TestX_i,Testy_i, threshold)
scores["F1"].append(score[0])
scores["Recall"].append(score[1])
scores["Accuracy"].append(score[2])
scores["Precision"].append(score[3])
#scores = cross_validation.cross_val_score(maxent, features, labels, cv=10)
print("\n--")
for key in sorted(scores.keys()):
currentmetric = np.array(scores[key])
out_dict[key] = (currentmetric.mean(),currentmetric.std())
print("%s : %0.2f (+/- %0.2f)" % (key,currentmetric.mean(), currentmetric.std()))
print("--")
return out_dict
示例5: main
def main():
scriptdir = os.path.dirname(os.path.realpath(__file__))
parser = argparse.ArgumentParser(description="Skeleton for features and classifier for CWI-2016--optimisation of threshhold")
parser.add_argument('--threshold',type=float,default=0.5)
parser.add_argument('--annotator',type=str,default="03")
parser.add_argument('--penalty',type=str,choices=["l1","l2"],default="l1")
args = parser.parse_args()
current_single_ann = scriptdir+"/../data/cwi_training/cwi_training_"+args.annotator+".lbl.conll"
testfile = scriptdir+"/../data/cwi_testing/cwi_testing.txt.lbl.conll"
X__dict_train, y_train, v_train = feats_and_classify.collect_features(current_single_ann,vectorize=False)
X_dict_test, y_test, v_test = feats_and_classify.collect_features(testfile,vectorize=False)
featdicts = list([x for x in X__dict_train + X_dict_test])
vect = DictVectorizer()
X = vect.fit_transform(featdicts).toarray()
X_train=X[:len(y_train)]
X_test=X[len(y_train):]
maxent = LogisticRegression(penalty=args.penalty)
maxent.fit(X_train,y_train)
y_pred_proba = maxent.predict_proba(X_test)
ypred_i=["1" if pair[1]>=args.threshold else "0" for pair in y_pred_proba]
fout = open(args.annotator+".pred",mode="w")
print("\n".join(ypred_i),file=fout)
fout.close()
sys.exit(0)
示例6: main
def main():
classes = [
'chimp',
'corvette',
'tokyo',
'goldengatebridge'
]
images, labels = get_labels(classes)
std_features = get_standard_features(images)
k = 256
surf_features = get_visual_words(images, k)
tas_features = get_tas_features(images)
feature_dict = {
'Std': std_features,
'SURF': surf_features,
'TAS': tas_features
#'Zernike': zernike_features
}
best_features = log_classify(feature_dict, labels)
classifier = LogisticRegression()
classifier.fit(best_features, labels)
示例7: mlogistic
def mlogistic():
X = []
# 前三行作为输入样本
X.append("fuck you")
X.append("fuck you all")
X.append("hello everyone")
# 后两句作为测试样本
X.append("fuck me")
X.append("hello boy")
# y为样本标注
y = [1,1,0]
vectorizer = TfidfVectorizer()
# 取X的前三句作为输入做tfidf转换
X_train = vectorizer.fit_transform(X[:-2])
print X_train
# 取X的后两句用“上句生成”的tfidf做转换
X_test = vectorizer.transform(X[-2:])
print X_test
# 用逻辑回归模型做训练
classifier = LogisticRegression()
classifier.fit(X_train, y)
# 做测试样例的预测
predictions = classifier.predict(X_test)
print predictions
示例8: test_logreg_cv_penalty
def test_logreg_cv_penalty():
# Test that the correct penalty is passed to the final fit.
X, y = make_classification(n_samples=50, n_features=20, random_state=0)
lr_cv = LogisticRegressionCV(penalty="l1", Cs=[1.0], solver='liblinear')
lr_cv.fit(X, y)
lr = LogisticRegression(penalty="l1", C=1.0, solver='liblinear')
lr.fit(X, y)
assert_equal(np.count_nonzero(lr_cv.coef_), np.count_nonzero(lr.coef_))
示例9: main
def main():
scriptdir = os.path.dirname(os.path.realpath(__file__))
default_pool = scriptdir+"/../data/cwi_training/cwi_training.txt.lbl.conll"
parser = argparse.ArgumentParser(description="Skeleton for features and classifier for CWI-2016--optimisation of threshhold")
parser.add_argument('--iterations',type=int,default=5)
args = parser.parse_args()
all_feats = []
all_labels = defaultdict(list)
scores = defaultdict(list)
for idx in "01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20".split(" "):
# for idx in "01".split(" "):
current_single_ann = scriptdir+"/../data/cwi_training/cwi_training_"+idx+".lbl.conll"
f_current, labels_current, v_current = feats_and_classify.collect_features(current_single_ann,vectorize=False,generateFeatures=False)
for instance_index,l in enumerate(labels_current):
all_labels[instance_index].append(l)
current_single_ann = scriptdir+"/../data/cwi_training/cwi_training_01.lbl.conll"
feats, labels_current, v_current = feats_and_classify.collect_features(current_single_ann,vectorize=True,generateFeatures=True)
for it in range(args.iterations):
for TrainIndices, TestIndices in cross_validation.KFold(n=feats.shape[0], n_folds=10, shuffle=True, random_state=None):
maxent = LogisticRegression(penalty='l2')
TrainX_i = feats[TrainIndices]
Trainy_i = [all_labels[x][random.randrange(0,20)] for x in TrainIndices]
TestX_i = feats[TestIndices]
Testy_i = [all_labels[x][random.randrange(0,20)] for x in TestIndices]
maxent.fit(TrainX_i,Trainy_i)
ypred_i = maxent.predict(TestX_i)
acc = accuracy_score(ypred_i, Testy_i)
pre = precision_score(ypred_i, Testy_i)
rec = recall_score(ypred_i, Testy_i)
# shared task uses f1 of *accuracy* and recall!
f1 = 2 * acc * rec / (acc + rec)
scores["Accuracy"].append(acc)
scores["F1"].append(f1)
scores["Precision"].append(pre)
scores["Recall"].append(rec)
#scores = cross_validation.cross_val_score(maxent, features, labels, cv=10)
print("--")
for key in sorted(scores.keys()):
currentmetric = np.array(scores[key])
print("%s : %0.2f (+/- %0.2f)" % (key,currentmetric.mean(), currentmetric.std()))
print("--")
sys.exit(0)
示例10: classify_logistic
def classify_logistic(train_features, train_labels, test_features):
global SAVE
clf = LogisticRegression()
clf.fit(train_features, train_labels)
if not TEST and SAVE:
save_pickle("logistic", clf)
return clf.predict(test_features)
示例11: my_module
def my_module(rt, params, inputs, outputs):
# TODO : Fill your code here
X = pickle.load(open(inputs.X, 'r'))
Y = pickle.load(open(inputs.Y, 'r'))
model = LogisticRegression()
model.fit(X, Y)
pickle.dump(model, open(outputs.MODEL, 'w'))
print "Done"
示例12: test_dtype_match
def test_dtype_match():
# Test that np.float32 input data is not cast to np.float64 when possible
X_32 = np.array(X).astype(np.float32)
y_32 = np.array(Y1).astype(np.float32)
X_64 = np.array(X).astype(np.float64)
y_64 = np.array(Y1).astype(np.float64)
X_sparse_32 = sp.csr_matrix(X, dtype=np.float32)
for solver in ['newton-cg']:
for multi_class in ['ovr', 'multinomial']:
# Check type consistency
lr_32 = LogisticRegression(solver=solver, multi_class=multi_class)
lr_32.fit(X_32, y_32)
assert_equal(lr_32.coef_.dtype, X_32.dtype)
# check consistency with sparsity
lr_32_sparse = LogisticRegression(solver=solver,
multi_class=multi_class)
lr_32_sparse.fit(X_sparse_32, y_32)
assert_equal(lr_32_sparse.coef_.dtype, X_sparse_32.dtype)
# Check accuracy consistency
lr_64 = LogisticRegression(solver=solver, multi_class=multi_class)
lr_64.fit(X_64, y_64)
assert_equal(lr_64.coef_.dtype, X_64.dtype)
assert_almost_equal(lr_32.coef_, lr_64.coef_.astype(np.float32))
示例13: test_dtype_match
def test_dtype_match():
# Disabled to unblock the 0.19.2 release. See:
# https://github.com/scikit-learn/scikit-learn/issues/11438
# Test that np.float32 input data is not cast to np.float64 when possible
raise SkipTest()
X_32 = np.array(X).astype(np.float32)
y_32 = np.array(Y1).astype(np.float32)
X_64 = np.array(X).astype(np.float64)
y_64 = np.array(Y1).astype(np.float64)
X_sparse_32 = sp.csr_matrix(X, dtype=np.float32)
for solver in ['newton-cg']:
for multi_class in ['ovr', 'multinomial']:
# Check type consistency
lr_32 = LogisticRegression(solver=solver, multi_class=multi_class)
lr_32.fit(X_32, y_32)
assert_equal(lr_32.coef_.dtype, X_32.dtype)
# check consistency with sparsity
lr_32_sparse = LogisticRegression(solver=solver,
multi_class=multi_class)
lr_32_sparse.fit(X_sparse_32, y_32)
assert_equal(lr_32_sparse.coef_.dtype, X_sparse_32.dtype)
# Check accuracy consistency
lr_64 = LogisticRegression(solver=solver, multi_class=multi_class)
lr_64.fit(X_64, y_64)
assert_equal(lr_64.coef_.dtype, X_64.dtype)
assert_almost_equal(lr_32.coef_, lr_64.coef_.astype(np.float32))
示例14: test_logreg_l1_sparse_data
def test_logreg_l1_sparse_data():
# Because liblinear penalizes the intercept and saga does not, we do not
# fit the intercept to make it possible to compare the coefficients of
# the two models at convergence.
rng = np.random.RandomState(42)
n_samples = 50
X, y = make_classification(n_samples=n_samples, n_features=20,
random_state=0)
X_noise = rng.normal(scale=0.1, size=(n_samples, 3))
X_constant = np.zeros(shape=(n_samples, 2))
X = np.concatenate((X, X_noise, X_constant), axis=1)
X[X < 1] = 0
X = sparse.csr_matrix(X)
lr_liblinear = LogisticRegression(penalty="l1", C=1.0, solver='liblinear',
fit_intercept=False,
tol=1e-10)
lr_liblinear.fit(X, y)
lr_saga = LogisticRegression(penalty="l1", C=1.0, solver='saga',
fit_intercept=False,
max_iter=1000, tol=1e-10)
lr_saga.fit(X, y)
assert_array_almost_equal(lr_saga.coef_, lr_liblinear.coef_)
# Noise and constant features should be regularized to zero by the l1
# penalty
assert_array_almost_equal(lr_liblinear.coef_[0, -5:], np.zeros(5))
assert_array_almost_equal(lr_saga.coef_[0, -5:], np.zeros(5))
# Check that solving on the sparse and dense data yield the same results
lr_saga_dense = LogisticRegression(penalty="l1", C=1.0, solver='saga',
fit_intercept=False,
max_iter=1000, tol=1e-10)
lr_saga_dense.fit(X.toarray(), y)
assert_array_almost_equal(lr_saga.coef_, lr_saga_dense.coef_)
示例15: test_nnet
def test_nnet(n_samples=200, n_features=7, distance=0.8, complete=False):
"""
:param complete: if True, all possible combinations will be checked, and quality is printed
"""
X, y = generate_sample(n_samples=n_samples, n_features=n_features, distance=distance)
nn_types = [
nnet.SimpleNeuralNetwork,
nnet.MLPClassifier,
nnet.SoftmaxNeuralNetwork,
nnet.RBFNeuralNetwork,
nnet.PairwiseNeuralNetwork,
nnet.PairwiseSoftplusNeuralNetwork,
]
if complete:
# checking all possible combinations
for loss in nnet.losses:
for NNType in nn_types:
for trainer in nnet.trainers:
nn = NNType(layers=[5], loss=loss, trainer=trainer, random_state=42, epochs=100)
nn.fit(X, y )
print(roc_auc_score(y, nn.predict_proba(X)[:, 1]), nn)
lr = LogisticRegression().fit(X, y)
print(lr, roc_auc_score(y, lr.predict_proba(X)[:, 1]))
assert 0 == 1, "Let's see and compare results"
else:
# checking combinations of losses, nn_types, trainers, most of them are used once during tests.
attempts = max(len(nnet.losses), len(nnet.trainers), len(nn_types))
losses_shift = numpy.random.randint(10)
trainers_shift = numpy.random.randint(10)
for attempt in range(attempts):
# each combination is tried 3 times. before raising exception
retry_attempts = 3
for retry_attempt in range(retry_attempts):
loss = list(nnet.losses.keys())[(attempt + losses_shift) % len(nnet.losses)]
trainer = list(nnet.trainers.keys())[(attempt + trainers_shift) % len(nnet.trainers)]
nn_type = nn_types[attempt % len(nn_types)]
nn = nn_type(layers=[5], loss=loss, trainer=trainer, random_state=42 + retry_attempt, epochs=200)
print(nn)
nn.fit(X, y)
quality = roc_auc_score(y, nn.predict_proba(X)[:, 1])
computed_loss = nn.compute_loss(X, y)
if quality > 0.8:
break
else:
print('attempt {} : {}'.format(retry_attempt, quality))
if retry_attempt == retry_attempts - 1:
raise RuntimeError('quality of model is too low: {} {}'.format(quality, nn))