本文整理汇总了Python中sklearn.utils.compute_class_weight方法的典型用法代码示例。如果您正苦于以下问题:Python utils.compute_class_weight方法的具体用法?Python utils.compute_class_weight怎么用?Python utils.compute_class_weight使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.utils
的用法示例。
在下文中一共展示了utils.compute_class_weight方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: fit
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import compute_class_weight [as 别名]
def fit(self, X, y):
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import compute_class_weight
label_encoder = LabelEncoder().fit(y)
classes = label_encoder.classes_
class_weight = compute_class_weight(self.class_weight, classes, y)
# Intentionally modify the balanced class_weight
# to simulate a bug and raise an exception
if self.class_weight == "balanced":
class_weight += 1.
# Simply assigning coef_ to the class_weight
self.coef_ = class_weight
return self
示例2: initialize_labels
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import compute_class_weight [as 别名]
def initialize_labels(self, Y):
y_nodes_flat = [y_val for y in Y for y_val in y.nodes]
y_links_flat = [y_val for y in Y for y_val in y.links]
self.prop_encoder_ = LabelEncoder().fit(y_nodes_flat)
self.link_encoder_ = LabelEncoder().fit(y_links_flat)
self.n_prop_states = len(self.prop_encoder_.classes_)
self.n_link_states = len(self.link_encoder_.classes_)
self.prop_cw_ = np.ones_like(self.prop_encoder_.classes_,
dtype=np.double)
self.link_cw_ = compute_class_weight(self.class_weight,
self.link_encoder_.classes_,
y_links_flat)
self.link_cw_ /= self.link_cw_.min()
logging.info('Setting node class weights {}'.format(", ".join(
"{}: {}".format(lbl, cw) for lbl, cw in zip(
self.prop_encoder_.classes_, self.prop_cw_))))
logging.info('Setting link class weights {}'.format(", ".join(
"{}: {}".format(lbl, cw) for lbl, cw in zip(
self.link_encoder_.classes_, self.link_cw_))))
示例3: test_auto_weight
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import compute_class_weight [as 别名]
def test_auto_weight():
# Test class weights for imbalanced data
from sklearn.linear_model import LogisticRegression
# We take as dataset the two-dimensional projection of iris so
# that it is not separable and remove half of predictors from
# class 1.
# We add one to the targets as a non-regression test: class_weight="balanced"
# used to work only when the labels where a range [0..K).
from sklearn.utils import compute_class_weight
X, y = iris.data[:, :2], iris.target + 1
unbalanced = np.delete(np.arange(y.size), np.where(y > 2)[0][::2])
classes = np.unique(y[unbalanced])
class_weights = compute_class_weight('balanced', classes, y[unbalanced])
assert_true(np.argmax(class_weights) == 2)
for clf in (svm.SVC(kernel='linear'), svm.LinearSVC(random_state=0),
LogisticRegression()):
# check that score is better when class='balanced' is set.
y_pred = clf.fit(X[unbalanced], y[unbalanced]).predict(X)
clf.set_params(class_weight='balanced')
y_pred_balanced = clf.fit(X[unbalanced], y[unbalanced],).predict(X)
assert_true(metrics.f1_score(y, y_pred, average='macro')
<= metrics.f1_score(y, y_pred_balanced,
average='macro'))
示例4: test_auto_weight
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import compute_class_weight [as 别名]
def test_auto_weight():
# Test class weights for imbalanced data
from sklearn.linear_model import LogisticRegression
# We take as dataset the two-dimensional projection of iris so
# that it is not separable and remove half of predictors from
# class 1.
# We add one to the targets as a non-regression test:
# class_weight="balanced"
# used to work only when the labels where a range [0..K).
from sklearn.utils import compute_class_weight
X, y = iris.data[:, :2], iris.target + 1
unbalanced = np.delete(np.arange(y.size), np.where(y > 2)[0][::2])
classes = np.unique(y[unbalanced])
class_weights = compute_class_weight('balanced', classes, y[unbalanced])
assert np.argmax(class_weights) == 2
for clf in (svm.SVC(kernel='linear'), svm.LinearSVC(random_state=0),
LogisticRegression()):
# check that score is better when class='balanced' is set.
y_pred = clf.fit(X[unbalanced], y[unbalanced]).predict(X)
clf.set_params(class_weight='balanced')
y_pred_balanced = clf.fit(X[unbalanced], y[unbalanced],).predict(X)
assert (metrics.f1_score(y, y_pred, average='macro')
<= metrics.f1_score(y, y_pred_balanced,
average='macro'))
示例5: _compute_class_weight_dictionary
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import compute_class_weight [as 别名]
def _compute_class_weight_dictionary(y):
# helper for returning a dictionary instead of an array
classes = np.unique(y)
class_weight = compute_class_weight("balanced", classes, y)
class_weight_dict = dict(zip(classes, class_weight))
return class_weight_dict
示例6: get_class_weights
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import compute_class_weight [as 别名]
def get_class_weights(y):
"""
Returns the normalized weights for each class based on the frequencies of the samples
:param y: list of true labels (the labels must be hashable)
:return: dictionary with the weight for each class
"""
weights = compute_class_weight('balanced', numpy.unique(y), y)
d = {c: w for c, w in zip(numpy.unique(y), weights)}
return d
示例7: get_class_weights
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import compute_class_weight [as 别名]
def get_class_weights(y):
"""
Returns the normalized weights for each class
based on the frequencies of the samples
:param y: list of true labels (the labels must be hashable)
:return: dictionary with the weight for each class
"""
weights = compute_class_weight('balanced', numpy.unique(y), y)
d = {c: w for c, w in zip(numpy.unique(y), weights)}
return d
示例8: test_binary_classifier_class_weight
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import compute_class_weight [as 别名]
def test_binary_classifier_class_weight():
"""tests binary classifier with classweights for each class"""
alpha = .1
n_samples = 50
n_iter = 20
tol = .00001
fit_intercept = True
X, y = make_blobs(n_samples=n_samples, centers=2, random_state=10,
cluster_std=0.1)
step_size = get_step_size(X, alpha, fit_intercept, classification=True)
classes = np.unique(y)
y_tmp = np.ones(n_samples)
y_tmp[y != classes[1]] = -1
y = y_tmp
class_weight = {1: .45, -1: .55}
clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
max_iter=n_iter, tol=tol, random_state=77,
fit_intercept=fit_intercept, multi_class='ovr',
class_weight=class_weight)
clf2 = clone(clf1)
clf1.fit(X, y)
clf2.fit(sp.csr_matrix(X), y)
le = LabelEncoder()
class_weight_ = compute_class_weight(class_weight, np.unique(y), y)
sample_weight = class_weight_[le.fit_transform(y)]
spweights, spintercept = sag_sparse(X, y, step_size, alpha, n_iter=n_iter,
dloss=log_dloss,
sample_weight=sample_weight,
fit_intercept=fit_intercept)
spweights2, spintercept2 = sag_sparse(X, y, step_size, alpha,
n_iter=n_iter,
dloss=log_dloss, sparse=True,
sample_weight=sample_weight,
fit_intercept=fit_intercept)
assert_array_almost_equal(clf1.coef_.ravel(),
spweights.ravel(),
decimal=2)
assert_almost_equal(clf1.intercept_, spintercept, decimal=1)
assert_array_almost_equal(clf2.coef_.ravel(),
spweights2.ravel(),
decimal=2)
assert_almost_equal(clf2.intercept_, spintercept2, decimal=1)
示例9: test_multiclass_classifier_class_weight
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import compute_class_weight [as 别名]
def test_multiclass_classifier_class_weight():
"""tests multiclass with classweights for each class"""
alpha = .1
n_samples = 20
tol = .00001
max_iter = 50
class_weight = {0: .45, 1: .55, 2: .75}
fit_intercept = True
X, y = make_blobs(n_samples=n_samples, centers=3, random_state=0,
cluster_std=0.1)
step_size = get_step_size(X, alpha, fit_intercept, classification=True)
classes = np.unique(y)
clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
max_iter=max_iter, tol=tol, random_state=77,
fit_intercept=fit_intercept, multi_class='ovr',
class_weight=class_weight)
clf2 = clone(clf1)
clf1.fit(X, y)
clf2.fit(sp.csr_matrix(X), y)
le = LabelEncoder()
class_weight_ = compute_class_weight(class_weight, np.unique(y), y)
sample_weight = class_weight_[le.fit_transform(y)]
coef1 = []
intercept1 = []
coef2 = []
intercept2 = []
for cl in classes:
y_encoded = np.ones(n_samples)
y_encoded[y != cl] = -1
spweights1, spintercept1 = sag_sparse(X, y_encoded, step_size, alpha,
n_iter=max_iter, dloss=log_dloss,
sample_weight=sample_weight)
spweights2, spintercept2 = sag_sparse(X, y_encoded, step_size, alpha,
n_iter=max_iter, dloss=log_dloss,
sample_weight=sample_weight,
sparse=True)
coef1.append(spweights1)
intercept1.append(spintercept1)
coef2.append(spweights2)
intercept2.append(spintercept2)
coef1 = np.vstack(coef1)
intercept1 = np.array(intercept1)
coef2 = np.vstack(coef2)
intercept2 = np.array(intercept2)
for i, cl in enumerate(classes):
assert_array_almost_equal(clf1.coef_[i].ravel(),
coef1[i].ravel(),
decimal=2)
assert_almost_equal(clf1.intercept_[i], intercept1[i], decimal=1)
assert_array_almost_equal(clf2.coef_[i].ravel(),
coef2[i].ravel(),
decimal=2)
assert_almost_equal(clf2.intercept_[i], intercept2[i], decimal=1)
示例10: fit
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import compute_class_weight [as 别名]
def fit(self, dataset):
"""Fits the intent classifier with a valid Snips dataset
Returns:
:class:`LogRegIntentClassifier`: The same instance, trained
"""
from sklearn.linear_model import SGDClassifier
from sklearn.utils import compute_class_weight
logger.info("Fitting LogRegIntentClassifier...")
dataset = validate_and_format_dataset(dataset)
self.load_resources_if_needed(dataset[LANGUAGE])
self.fit_builtin_entity_parser_if_needed(dataset)
self.fit_custom_entity_parser_if_needed(dataset)
language = dataset[LANGUAGE]
data_augmentation_config = self.config.data_augmentation_config
utterances, classes, intent_list = build_training_data(
dataset, language, data_augmentation_config, self.resources,
self.random_state)
self.intent_list = intent_list
if len(self.intent_list) <= 1:
return self
self.featurizer = Featurizer(
config=self.config.featurizer_config,
builtin_entity_parser=self.builtin_entity_parser,
custom_entity_parser=self.custom_entity_parser,
resources=self.resources,
random_state=self.random_state,
)
self.featurizer.language = language
none_class = max(classes)
try:
x = self.featurizer.fit_transform(
dataset, utterances, classes, none_class)
except _EmptyDatasetUtterancesError:
logger.warning("No (non-empty) utterances found in dataset")
self.featurizer = None
return self
alpha = get_regularization_factor(dataset)
class_weights_arr = compute_class_weight(
"balanced", range(none_class + 1), classes)
# Re-weight the noise class
class_weights_arr[-1] *= self.config.noise_reweight_factor
class_weight = {idx: w for idx, w in enumerate(class_weights_arr)}
self.classifier = SGDClassifier(
random_state=self.random_state, alpha=alpha,
class_weight=class_weight, **LOG_REG_ARGS)
self.classifier.fit(x, classes)
logger.debug("%s", DifferedLoggingMessage(self.log_best_features))
return self
示例11: test_binary_classifier_class_weight
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import compute_class_weight [as 别名]
def test_binary_classifier_class_weight():
"""tests binary classifier with classweights for each class"""
alpha = .1
n_samples = 50
n_iter = 20
tol = .00001
fit_intercept = True
X, y = make_blobs(n_samples=n_samples, centers=2, random_state=10,
cluster_std=0.1)
step_size = get_step_size(X, alpha, fit_intercept, classification=True)
classes = np.unique(y)
y_tmp = np.ones(n_samples)
y_tmp[y != classes[1]] = -1
y = y_tmp
class_weight = {1: .45, -1: .55}
clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
max_iter=n_iter, tol=tol, random_state=77,
fit_intercept=fit_intercept,
class_weight=class_weight)
clf2 = clone(clf1)
clf1.fit(X, y)
clf2.fit(sp.csr_matrix(X), y)
le = LabelEncoder()
class_weight_ = compute_class_weight(class_weight, np.unique(y), y)
sample_weight = class_weight_[le.fit_transform(y)]
spweights, spintercept = sag_sparse(X, y, step_size, alpha, n_iter=n_iter,
dloss=log_dloss,
sample_weight=sample_weight,
fit_intercept=fit_intercept)
spweights2, spintercept2 = sag_sparse(X, y, step_size, alpha,
n_iter=n_iter,
dloss=log_dloss, sparse=True,
sample_weight=sample_weight,
fit_intercept=fit_intercept)
assert_array_almost_equal(clf1.coef_.ravel(),
spweights.ravel(),
decimal=2)
assert_almost_equal(clf1.intercept_, spintercept, decimal=1)
assert_array_almost_equal(clf2.coef_.ravel(),
spweights2.ravel(),
decimal=2)
assert_almost_equal(clf2.intercept_, spintercept2, decimal=1)
示例12: test_multiclass_classifier_class_weight
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import compute_class_weight [as 别名]
def test_multiclass_classifier_class_weight():
"""tests multiclass with classweights for each class"""
alpha = .1
n_samples = 20
tol = .00001
max_iter = 50
class_weight = {0: .45, 1: .55, 2: .75}
fit_intercept = True
X, y = make_blobs(n_samples=n_samples, centers=3, random_state=0,
cluster_std=0.1)
step_size = get_step_size(X, alpha, fit_intercept, classification=True)
classes = np.unique(y)
clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
max_iter=max_iter, tol=tol, random_state=77,
fit_intercept=fit_intercept,
class_weight=class_weight)
clf2 = clone(clf1)
clf1.fit(X, y)
clf2.fit(sp.csr_matrix(X), y)
le = LabelEncoder()
class_weight_ = compute_class_weight(class_weight, np.unique(y), y)
sample_weight = class_weight_[le.fit_transform(y)]
coef1 = []
intercept1 = []
coef2 = []
intercept2 = []
for cl in classes:
y_encoded = np.ones(n_samples)
y_encoded[y != cl] = -1
spweights1, spintercept1 = sag_sparse(X, y_encoded, step_size, alpha,
n_iter=max_iter, dloss=log_dloss,
sample_weight=sample_weight)
spweights2, spintercept2 = sag_sparse(X, y_encoded, step_size, alpha,
n_iter=max_iter, dloss=log_dloss,
sample_weight=sample_weight,
sparse=True)
coef1.append(spweights1)
intercept1.append(spintercept1)
coef2.append(spweights2)
intercept2.append(spintercept2)
coef1 = np.vstack(coef1)
intercept1 = np.array(intercept1)
coef2 = np.vstack(coef2)
intercept2 = np.array(intercept2)
for i, cl in enumerate(classes):
assert_array_almost_equal(clf1.coef_[i].ravel(),
coef1[i].ravel(),
decimal=2)
assert_almost_equal(clf1.intercept_[i], intercept1[i], decimal=1)
assert_array_almost_equal(clf2.coef_[i].ravel(),
coef2[i].ravel(),
decimal=2)
assert_almost_equal(clf2.intercept_[i], intercept2[i], decimal=1)