本文整理汇总了Python中sklearn.linear_model.SGDClassifier方法的典型用法代码示例。如果您正苦于以下问题:Python linear_model.SGDClassifier方法的具体用法?Python linear_model.SGDClassifier怎么用?Python linear_model.SGDClassifier使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.linear_model
的用法示例。
在下文中一共展示了linear_model.SGDClassifier方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_validate_sklearn_sgd_with_text_cv
# 需要导入模块: from sklearn import linear_model [as 别名]
# 或者: from sklearn.linear_model import SGDClassifier [as 别名]
def test_validate_sklearn_sgd_with_text_cv(self):
categories = ['alt.atheism','talk.religion.misc']
data = fetch_20newsgroups(subset='train', categories=categories)
X = data.data[:4]
Y = data.target[:4]
features = ['input']
target = 'output'
model = SGDClassifier(loss="log")
file_name = model.__class__.__name__ + '_CountVec_.pmml'
pipeline = Pipeline([
('vect', CountVectorizer()),
('clf', model)
])
pipeline.fit(X, Y)
skl_to_pmml(pipeline, features , target, file_name)
self.assertEqual(self.schema.is_valid(file_name), True)
示例2: test_corrupted_classif
# 需要导入模块: from sklearn import linear_model [as 别名]
# 或者: from sklearn.linear_model import SGDClassifier [as 别名]
def test_corrupted_classif(loss, weighting):
clf = RobustWeightedEstimator(
SGDClassifier(),
loss=loss,
max_iter=50,
weighting=weighting,
k=5,
c=None,
random_state=rng,
)
clf.fit(X_cc, y_cc)
score = clf.score(X_cc, y_cc)
assert score > 0.75
# Classification test without outliers
示例3: test_not_robust_classif
# 需要导入模块: from sklearn import linear_model [as 别名]
# 或者: from sklearn.linear_model import SGDClassifier [as 别名]
def test_not_robust_classif(loss, weighting):
clf = RobustWeightedEstimator(
SGDClassifier(),
loss=loss,
max_iter=100,
weighting=weighting,
k=0,
c=1e7,
burn_in=0,
random_state=rng,
)
clf_not_rob = SGDClassifier(loss=loss, random_state=rng)
clf.fit(X_c, y_c)
clf_not_rob.fit(X_c, y_c)
pred1 = clf.base_estimator_.decision_function(X_c)
pred2 = clf_not_rob.decision_function(X_c)
assert (
np.linalg.norm(pred1 - pred2) / np.linalg.norm(pred2)
- np.linalg.norm(pred1 - y_c) / np.linalg.norm(y_c)
< 0.1
)
# Case "log" loss, test predict_proba
示例4: test_predict_proba
# 需要导入模块: from sklearn import linear_model [as 别名]
# 或者: from sklearn.linear_model import SGDClassifier [as 别名]
def test_predict_proba(weighting):
clf = RobustWeightedEstimator(
SGDClassifier(loss="log"),
loss="log",
max_iter=100,
weighting=weighting,
k=0,
c=1e7,
burn_in=0,
random_state=rng,
)
clf_not_rob = SGDClassifier(loss="log", random_state=rng)
clf.fit(X_c, y_c)
clf_not_rob.fit(X_c, y_c)
pred1 = clf.base_estimator_.predict_proba(X_c)[:, 1]
pred2 = clf_not_rob.predict_proba(X_c)[:, 1]
assert (
np.linalg.norm(pred1 - pred2) / np.linalg.norm(pred2)
- np.linalg.norm(pred1 - y_c) / np.linalg.norm(y_c)
< 0.1
)
# Regression test with outliers
示例5: demo
# 需要导入模块: from sklearn import linear_model [as 别名]
# 或者: from sklearn.linear_model import SGDClassifier [as 别名]
def demo():
# The classifier we will use (other options: SAMKNNClassifier, LeveragingBaggingClassifier, SGD)
h1 = [HoeffdingTreeClassifier(), SAMKNNClassifier(), LeveragingBaggingClassifier(random_state=1), SGDClassifier()]
h2 = [HoeffdingTreeClassifier(), SAMKNNClassifier(), LeveragingBaggingClassifier(random_state=1), SGDClassifier()]
h3 = [HoeffdingTreeClassifier(), SAMKNNClassifier(), LeveragingBaggingClassifier(random_state=1), SGDClassifier()]
model_names = ['HT', 'SAMKNNClassifier', 'LBkNN', 'SGDC']
# Demo 1 -- plot should not fail
demo_parameterized(h1, model_names=model_names)
# Demo 2 -- csv output should look nice
demo_parameterized(h2, "sea_stream.csv", False, model_names)
# Demo 3 -- should not give "'NoneType' object is not iterable" error
demo_parameterized(h3, "covtype.csv", False, model_names)
示例6: test_prefit
# 需要导入模块: from sklearn import linear_model [as 别名]
# 或者: from sklearn.linear_model import SGDClassifier [as 别名]
def test_prefit():
# Test all possible combinations of the prefit parameter.
# Passing a prefit parameter with the selected model
# and fitting a unfit model with prefit=False should give same results.
clf = SGDClassifier(alpha=0.1, max_iter=10, shuffle=True,
random_state=0, tol=None)
model = SelectFromModel(clf)
model.fit(data, y)
X_transform = model.transform(data)
clf.fit(data, y)
model = SelectFromModel(clf, prefit=True)
assert_array_almost_equal(model.transform(data), X_transform)
# Check that the model is rewritten if prefit=False and a fitted model is
# passed
model = SelectFromModel(clf, prefit=False)
model.fit(data, y)
assert_array_almost_equal(model.transform(data), X_transform)
# Check that prefit=True and calling fit raises a ValueError
model = SelectFromModel(clf, prefit=True)
assert_raises(ValueError, model.fit, data, y)
示例7: test_multi_output_classification_partial_fit_sample_weights
# 需要导入模块: from sklearn import linear_model [as 别名]
# 或者: from sklearn.linear_model import SGDClassifier [as 别名]
def test_multi_output_classification_partial_fit_sample_weights():
# weighted classifier
Xw = [[1, 2, 3], [4, 5, 6], [1.5, 2.5, 3.5]]
yw = [[3, 2], [2, 3], [3, 2]]
w = np.asarray([2., 1., 1.])
sgd_linear_clf = SGDClassifier(random_state=1, max_iter=20)
clf_w = MultiOutputClassifier(sgd_linear_clf)
clf_w.fit(Xw, yw, w)
# unweighted, but with repeated samples
X = [[1, 2, 3], [1, 2, 3], [4, 5, 6], [1.5, 2.5, 3.5]]
y = [[3, 2], [3, 2], [2, 3], [3, 2]]
sgd_linear_clf = SGDClassifier(random_state=1, max_iter=20)
clf = MultiOutputClassifier(sgd_linear_clf)
clf.fit(X, y)
X_test = [[1.5, 2.5, 3.5]]
assert_array_almost_equal(clf.predict(X_test), clf_w.predict(X_test))
示例8: test_sgd_predict_proba_method_access
# 需要导入模块: from sklearn import linear_model [as 别名]
# 或者: from sklearn.linear_model import SGDClassifier [as 别名]
def test_sgd_predict_proba_method_access(klass):
# Checks that SGDClassifier predict_proba and predict_log_proba methods
# can either be accessed or raise an appropriate error message
# otherwise. See
# https://github.com/scikit-learn/scikit-learn/issues/10938 for more
# details.
for loss in linear_model.SGDClassifier.loss_functions:
clf = SGDClassifier(loss=loss)
if loss in ('log', 'modified_huber'):
assert hasattr(clf, 'predict_proba')
assert hasattr(clf, 'predict_log_proba')
else:
message = ("probability estimates are not "
"available for loss={!r}".format(loss))
assert not hasattr(clf, 'predict_proba')
assert not hasattr(clf, 'predict_log_proba')
with pytest.raises(AttributeError,
match=message):
clf.predict_proba
with pytest.raises(AttributeError,
match=message):
clf.predict_log_proba
示例9: test_l1_ratio
# 需要导入模块: from sklearn import linear_model [as 别名]
# 或者: from sklearn.linear_model import SGDClassifier [as 别名]
def test_l1_ratio():
# Test if l1 ratio extremes match L1 and L2 penalty settings.
X, y = datasets.make_classification(n_samples=1000,
n_features=100, n_informative=20,
random_state=1234)
# test if elasticnet with l1_ratio near 1 gives same result as pure l1
est_en = SGDClassifier(alpha=0.001, penalty='elasticnet', tol=None,
max_iter=6, l1_ratio=0.9999999999,
random_state=42).fit(X, y)
est_l1 = SGDClassifier(alpha=0.001, penalty='l1', max_iter=6,
random_state=42, tol=None).fit(X, y)
assert_array_almost_equal(est_en.coef_, est_l1.coef_)
# test if elasticnet with l1_ratio near 0 gives same result as pure l2
est_en = SGDClassifier(alpha=0.001, penalty='elasticnet', tol=None,
max_iter=6, l1_ratio=0.0000000001,
random_state=42).fit(X, y)
est_l2 = SGDClassifier(alpha=0.001, penalty='l2', max_iter=6,
random_state=42, tol=None).fit(X, y)
assert_array_almost_equal(est_en.coef_, est_l2.coef_)
示例10: test_elastic_net_versus_sgd
# 需要导入模块: from sklearn import linear_model [as 别名]
# 或者: from sklearn.linear_model import SGDClassifier [as 别名]
def test_elastic_net_versus_sgd(C, l1_ratio):
# Compare elasticnet penalty in LogisticRegression() and SGD(loss='log')
n_samples = 500
X, y = make_classification(n_samples=n_samples, n_classes=2, n_features=5,
n_informative=5, n_redundant=0, n_repeated=0,
random_state=1)
X = scale(X)
sgd = SGDClassifier(
penalty='elasticnet', random_state=1, fit_intercept=False, tol=-np.inf,
max_iter=2000, l1_ratio=l1_ratio, alpha=1. / C / n_samples, loss='log')
log = LogisticRegression(
penalty='elasticnet', random_state=1, fit_intercept=False, tol=1e-5,
max_iter=1000, l1_ratio=l1_ratio, C=C, solver='saga')
sgd.fit(X, y)
log.fit(X, y)
assert_array_almost_equal(sgd.coef_, log.coef_, decimal=1)
示例11: make_classifier
# 需要导入模块: from sklearn import linear_model [as 别名]
# 或者: from sklearn.linear_model import SGDClassifier [as 别名]
def make_classifier(self, name, ids, labels):
"""Entrenar un clasificador SVM sobre los textos cargados.
Crea un clasificador que se guarda en el objeto bajo el nombre `name`.
Args:
name (str): Nombre para el clasidicador.
ids (list): Se espera una lista de N ids de textos ya almacenados
en el TextClassifier.
labels (list): Se espera una lista de N etiquetas. Una por cada id
de texto presente en ids.
Nota:
Usa el clasificador de `Scikit-learn <http://scikit-learn.org/>`_
"""
if not all(np.in1d(ids, self.ids)):
raise ValueError("Hay ids de textos que no se encuentran \
almacenados.")
setattr(self, name, SGDClassifier())
classifier = getattr(self, name)
indices = np.searchsorted(self.ids, ids)
classifier.fit(self.tfidf_mat[indices, :], labels)
示例12: build_language_classifier
# 需要导入模块: from sklearn import linear_model [as 别名]
# 或者: from sklearn.linear_model import SGDClassifier [as 别名]
def build_language_classifier(texts, labels, verbose=False, random_state=None):
"""Train a text classifier with scikit-learn
The text classifier is composed of two elements assembled in a pipeline:
- A text feature extractor (`TfidfVectorizer`) that extract the relative
frequencies of unigrams, bigrams and trigrams of characters in the text.
- An instance of `SGDClassifier` for the classification it-self. To speed
up training it is recommended to enable early stopping.
`random_state` is passed to the underlying `SGDClassifier` instance.
"""
language_classifier = make_pipeline(
TfidfVectorizer(analyzer="char", ngram_range=(1, 3),
min_df=2, max_df=0.9, norm="l2", dtype=np.float32),
SGDClassifier(early_stopping=True, validation_fraction=0.2,
n_iter_no_change=3, max_iter=1000, tol=1e-3,
alpha=1e-5, penalty="l2", verbose=verbose,
random_state=random_state)
)
return language_classifier.fit(texts, labels)
示例13: test_09_sgd_classifier
# 需要导入模块: from sklearn import linear_model [as 别名]
# 或者: from sklearn.linear_model import SGDClassifier [as 别名]
def test_09_sgd_classifier(self):
print("\ntest 09 (SGD Classifier with preprocessing) [multi-class]\n")
X, X_test, y, features, target, test_file = self.data_utility.get_data_for_multi_class_classification()
model = SGDClassifier(loss="log")
pipeline_obj = Pipeline([
("scaler", StandardScaler()),
("model", model)
])
pipeline_obj.fit(X,y)
file_name = 'test09sklearn.pmml'
skl_to_pmml(pipeline_obj, features, target, file_name)
model_name = self.adapa_utility.upload_to_zserver(file_name)
predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file)
model_pred = pipeline_obj.predict(X_test)
model_prob = pipeline_obj.predict_proba(X_test)
self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True)
示例14: test_10_sgd_classifier
# 需要导入模块: from sklearn import linear_model [as 别名]
# 或者: from sklearn.linear_model import SGDClassifier [as 别名]
def test_10_sgd_classifier(self):
print("\ntest 10 (SGD Classifier with preprocessing) [binary-class]\n")
X, X_test, y, features, target, test_file = self.data_utility.get_data_for_binary_classification()
model = SGDClassifier(loss="log")
pipeline_obj = Pipeline([
("scaler", StandardScaler()),
("model", model)
])
pipeline_obj.fit(X,y)
file_name = 'test10sklearn.pmml'
skl_to_pmml(pipeline_obj, features, target, file_name)
model_name = self.adapa_utility.upload_to_zserver(file_name)
predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file)
model_pred = pipeline_obj.predict(X_test)
model_prob = pipeline_obj.predict_proba(X_test)
self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True)
示例15: test_validate_sklearn_sgd_with_text
# 需要导入模块: from sklearn import linear_model [as 别名]
# 或者: from sklearn.linear_model import SGDClassifier [as 别名]
def test_validate_sklearn_sgd_with_text(self):
categories = ['alt.atheism','talk.religion.misc']
data = fetch_20newsgroups(subset='train', categories=categories)
X = data.data[:4]
Y = data.target[:4]
features = ['input']
target = 'output'
model = SGDClassifier(loss="log")
file_name = model.__class__.__name__ + '_TfIdfVec_.pmml'
pipeline = Pipeline([
('vect', TfidfVectorizer()),
('clf', model)
])
pipeline.fit(X, Y)
skl_to_pmml(pipeline, features , target, file_name)
self.assertEqual(self.schema.is_valid(file_name), True)