本文整理汇总了Python中sklearn.svm.LinearSVC.set_params方法的典型用法代码示例。如果您正苦于以下问题:Python LinearSVC.set_params方法的具体用法?Python LinearSVC.set_params怎么用?Python LinearSVC.set_params使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.svm.LinearSVC
的用法示例。
在下文中一共展示了LinearSVC.set_params方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: train_classifier
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import set_params [as 别名]
def train_classifier(train_X, train_y, dev_X, dev_y):
conf = get_config()
# Normalize data
scaler = StandardScaler()
if conf['normalize']:
train_X = scaler.fit_transform(train_X)
dev_X = scaler.transform(dev_X)
# Explore param classifier
clsf = LinearSVC(random_state=0)
#clsf = LogisticRegression(random_state=0)
C_opts = eval(conf['C_opts'])
scores = np.zeros_like(C_opts)
for i, c in enumerate(C_opts):
print 'exploring {0}-th param'.format(i)
clsf.set_params(C=c).fit(train_X, train_y)
pred_y = clsf.predict(dev_X)
pr, rc, f1, s = precision_recall_fscore_support(
dev_y, pred_y, average='micro')
scores[i] = f1
best_c = C_opts[scores.argmax()]
clsf.set_params(C=best_c).fit(
np.vstack((train_X, dev_X)), np.hstack((train_y, dev_y)))
return clsf, scaler
示例2: test_grid_search_correct_score_results
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import set_params [as 别名]
def test_grid_search_correct_score_results():
# test that correct scores are used
n_splits = 3
clf = LinearSVC(random_state=0)
X, y = make_blobs(random_state=0, centers=2)
Cs = [.1, 1, 10]
for score in ['f1', 'roc_auc']:
grid_search = GridSearchCV(clf, {'C': Cs}, scoring=score, cv=n_splits)
results = grid_search.fit(X, y).cv_results_
# Test scorer names
result_keys = list(results.keys())
expected_keys = (("mean_test_score", "rank_test_score") +
tuple("split%d_test_score" % cv_i
for cv_i in range(n_splits)))
assert_true(all(in1d(expected_keys, result_keys)))
cv = StratifiedKFold(n_splits=n_splits)
n_splits = grid_search.n_splits_
for candidate_i, C in enumerate(Cs):
clf.set_params(C=C)
cv_scores = np.array(
list(grid_search.cv_results_['split%d_test_score'
% s][candidate_i]
for s in range(n_splits)))
for i, (train, test) in enumerate(cv.split(X, y)):
clf.fit(X[train], y[train])
if score == "f1":
correct_score = f1_score(y[test], clf.predict(X[test]))
elif score == "roc_auc":
dec = clf.decision_function(X[test])
correct_score = roc_auc_score(y[test], dec)
assert_almost_equal(correct_score, cv_scores[i])
示例3: test_grid_search_score_consistency
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import set_params [as 别名]
def test_grid_search_score_consistency():
# test that correct scores are used
clf = LinearSVC(random_state=0)
X, y = make_blobs(random_state=0, centers=2)
Cs = [.1, 1, 10]
for score in ['f1', 'roc_auc']:
grid_search = GridSearchCV(clf, {'C': Cs}, scoring=score)
grid_search.fit(X, y)
cv = StratifiedKFold(n_folds=3, y=y)
for C, scores in zip(Cs, grid_search.grid_scores_):
clf.set_params(C=C)
scores = scores[2] # get the separate runs from grid scores
i = 0
for train, test in cv:
clf.fit(X[train], y[train])
if score == "f1":
correct_score = f1_score(y[test], clf.predict(X[test]))
elif score == "roc_auc":
dec = clf.decision_function(X[test])
correct_score = roc_auc_score(y[test], dec)
assert_almost_equal(correct_score, scores[i])
i += 1
示例4: test_grid_search_correct_score_results
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import set_params [as 别名]
def test_grid_search_correct_score_results():
# test that correct scores are used
n_splits = 3
clf = LinearSVC(random_state=0)
X, y = make_blobs(random_state=0, centers=2)
Cs = [.1, 1, 10]
for score in ['f1', 'roc_auc']:
# XXX: It seems there's some global shared state in LinearSVC - fitting
# multiple `SVC` instances in parallel using threads sometimes results
# in wrong results. This only happens with threads, not processes/sync.
# For now, we'll fit using the sync scheduler.
grid_search = dcv.GridSearchCV(clf, {'C': Cs}, scoring=score,
cv=n_splits, scheduler='sync')
cv_results = grid_search.fit(X, y).cv_results_
# Test scorer names
result_keys = list(cv_results.keys())
expected_keys = (("mean_test_score", "rank_test_score") +
tuple("split%d_test_score" % cv_i
for cv_i in range(n_splits)))
assert all(in1d(expected_keys, result_keys))
cv = StratifiedKFold(n_splits=n_splits)
n_splits = grid_search.n_splits_
for candidate_i, C in enumerate(Cs):
clf.set_params(C=C)
cv_scores = np.array(
list(grid_search.cv_results_['split%d_test_score'
% s][candidate_i]
for s in range(n_splits)))
for i, (train, test) in enumerate(cv.split(X, y)):
clf.fit(X[train], y[train])
if score == "f1":
correct_score = f1_score(y[test], clf.predict(X[test]))
elif score == "roc_auc":
dec = clf.decision_function(X[test])
correct_score = roc_auc_score(y[test], dec)
assert_almost_equal(correct_score, cv_scores[i])
示例5: run_fold
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import set_params [as 别名]
def run_fold(path, fold_i, C_list=[1.0]):
train = pickle.load(open('%s/fold%i/train.pkl' % (path, fold_i)))
valid = pickle.load(open('%s/fold%i/valid.pkl' % (path, fold_i)))
test = pickle.load(open('%s/fold%i/test.pkl' % (path, fold_i)))
preproc = Standardize(train.X)
train.X = preproc.apply(train.X)
valid.X = preproc.apply(valid.X)
test.X = preproc.apply(test.X)
train_y = process_labels(train.y)
valid_y = process_labels(valid.y)
test_y = process_labels(test.y)
svm = LinearSVC(C=1.0, loss='l2', penalty='l2')
if len(C_list) == 1:
svm.set_params(C = C_list[0])
svm = retrain_svm(svm, (train.X, train_y), (valid.X, valid_y))
valid_error = -1.
else:
(svm, valid_error) = cross_validate_svm(svm, (train.X, train_y), (valid.X, valid_y), C_list)
svm = retrain_svm(svm, (train.X, train_y), (valid.X, valid_y))
test_error = compute_test_error(svm, (test.X, test_y))
print 'Fold %i: valid_error = %f\t test_error = %f' % (fold_i, valid_error, test_error)
return (valid_error, test_error, svm.C)
示例6: GridSearchCV
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import set_params [as 别名]
'penalty':['l1'],
'dual':[False],
'tol':[1.0e-3],
'random_state':[0]}]
n_jobs_ = 1
num_cv_ = 5
clf_cv = GridSearchCV(LinearSVC(),
parameters,
scoring = "f1",
cv = num_cv_, n_jobs = n_jobs_,
verbose = 10)
clf_cv.fit(X_train, y_train)
print clf_cv.best_params_
clf = LinearSVC()
clf.set_params(**clf_cv.best_params_)
del clf_cv
clf.fit(X_train, y_train)
if hasattr(clf, "predict_prob"):
prob_pos = clf.predict_proba(X_test)[:,1]
else: # use decision function
prob_pos = clf.decision_function(X_test)
prob_pos = \
(prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) # scailing [0,1]
print prob_pos
[[TP,FP],[FN,TN]] = metrics.confusion_matrix(y_test, clf.predict(X_test))
accuracy = float(TP + TN) / float(TP + FP + FN + TN)
precision = float(TP) / float(TP + FP)
recall = float(TP) / float(TP + FN)
示例7: DualSvm
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import set_params [as 别名]
class DualSvm(object):
"""
This is is the implementation of a combined Support Vector classifier.
The goal is to trade accuracy for speed by giving the 'hardest' points to the second classifier.
The combined classifier consists of a linearSVC classifier (less accurate) and a SVC classifier with RBF-Kernel (more accurate).
The user has to set a trade-off parameter k, which determines how many points percentage-wise are given to the second classifier.
The points are chosen according to their distance to the hyperplane of the linear classifier.
"""
def __init__(self, use_distance=True, c_lin=0.001, c_gauss=10, gamma=0.01, k=0, verbose=True):
"""
The constructor of the class.
:param c_lin: Penalty parameter C of the error term of the linear support vector machine.
:param c_gauss: Penalty parameter C of the error term of gaussian support vector machine
:param gamma: Kernel coefficient for the gaussian svm
:param k: k has to be in the range [0,1]. It determines which percentage of closest points should be given to the gaussian svm, sorted by their margins.
:param verbose: Debug parameter for logging events into a file debug.txt.
:return: Returns self.
"""
self._use_distance = use_distance
# Parameters
self._c_lin = c_lin
self._c_gauss = c_gauss
self._gamma = gamma
self._k = k
self._n_gauss = -1
self._n_lin = -1
self._verbose = verbose
# Intern objects
self._lin_svc = LinearSVC(C=self._c_lin)
self._gauss_svc = SVC(C=self._c_gauss, kernel="rbf", gamma=self._gamma)
self._gauss_distance = 0
# region Getters and Setters
@property
def c_lin(self):
"""
The C parameter for the linear SVM.
"""
return self._c_lin
@c_lin.setter
def c_lin(self, value):
self._c_lin = value
self._lin_svc.set_params(C=value)
@property
def c_gauss(self):
"""
The C parameter for the gauss SVM.
"""
return self._c_gauss
@c_gauss.setter
def c_gauss(self, value):
self._c_gauss = value
self._gauss_svc.set_params(C=value)
@property
def gamma(self):
"""
The gamma parameter for the gauss SVM.
"""
return self._gamma
@gamma.setter
def gamma(self, value):
self._gamma = value
self._gauss_svc.set_params(gamma=value)
@property
def k(self):
"""
The percentage of points that should be given to the second classifier.
"""
return self._k
@property
def time_fit_lin(self):
return self._time_fit_lin
@property
def time_fit_gauss(self):
return self._time_fit_gauss
@property
def time_overhead(self):
return self._time_overhead
@property
def time_predict(self):
return self._time_predict
@k.setter
#.........这里部分代码省略.........
示例8: AdjectiveClassifier
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import set_params [as 别名]
#.........这里部分代码省略.........
verbose=0,
n_jobs=1
)
grid.fit(self.features, self.labels)
self.svc = grid.best_estimator_
return self
def train_on_separate_dataset(self, test_X, test_Y,
train_X = None, train_Y = None,
all_Cs = None,
score_fun = None,
verbose = True):
if train_X is None and not hasattr(self, "features"):
raise ValueError("No features present, have you run create_features_set?")
if train_Y is None and not hasattr(self, "labels"):
raise ValueError("No labels present, have you run create_features_set?")
if all_Cs is None:
all_Cs = np.linspace(1, 1e6, 1000)
if train_X is None:
train_X = self.features
if train_Y is None:
train_Y = self.labels
scores = []
for C in all_Cs:
clf = LinearSVC(C=C, dual=False)
clf.fit(train_X, train_Y)
if score_fun is None:
score = clf.score(test_X, test_Y)
else:
pred_Y = clf.predict(test_X)
score = score_fun(test_Y, pred_Y)
scores.append(score)
best_C = np.argmax(all_Cs)
self.svc = LinearSVC(C = best_C, dual = False).fit(train_X, train_Y)
if verbose:
if score_fun is None:
score_training = self.svc.score(train_X, train_Y)
score_testing = self.svc.score(test_X, test_Y)
else:
pred_Y = self.svc.predict(train_X)
score_training = score_fun(train_Y, pred_Y)
pred_Y = self.svc.predict(test_X)
score_testing = score_fun(test_Y, pred_Y)
print "Training score: %f, testing score: %f" %(score_training,
score_testing)
return self
def train_gridsearch(self, n_jobs = 1,
verbose = 0,
score_fun = None,
):
train_X, train_Y = self.features, self.labels
test_X, test_Y = self.test_features, self.test_labels
if score_fun is None:
score_fun = f1_score
train_indexes = range(len(train_X))
test_indexes = range(len(train_X), len(train_X) + len(test_X))
dataset = np.vstack((train_X, test_X))
ys = np.hstack((train_Y, test_Y))
cv = [(train_indexes, test_indexes)]
params = {'C': np.linspace(1,1e6,1000),
'penalty':('l1', 'l2'),
}
clf = LinearSVC(dual=False)
grid = GridSearchCV(clf, params, cv=cv,
verbose=verbose,
n_jobs=n_jobs,
refit = False)
grid.fit(dataset, ys)
print "Found the best parameters, training with them"
self.svc = LinearSVC(dual=False)
self.svc.set_params(**grid.best_params_)
score = self.svc.fit(train_X, train_Y).score(test_X, test_Y)
print "Adjective %s, Params: %s, Best score over SEPARATE TEST SET: %f" % (self.adjective,
grid.best_params_,
score,
)
return self
def load_test_test(self, database):
if hasattr(self, 'test_features') and self.test_features is not None:
print "Adjective %s already has test features!" % self.adjective
else:
self.test_features, self.test_labels = self.create_features_set(database,
store=False
)
return self