本文整理汇总了Python中sklearn.utils.class_weight.compute_class_weight方法的典型用法代码示例。如果您正苦于以下问题:Python class_weight.compute_class_weight方法的具体用法?Python class_weight.compute_class_weight怎么用?Python class_weight.compute_class_weight使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.utils.class_weight
的用法示例。
在下文中一共展示了class_weight.compute_class_weight方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: calculate_class_weights
# 需要导入模块: from sklearn.utils import class_weight [as 别名]
# 或者: from sklearn.utils.class_weight import compute_class_weight [as 别名]
def calculate_class_weights(params):
"""
Computes the class weights for the training data and writes out to a json file
:param params: global parameters, used to find location of the dataset and json file
:return:
"""
counts = {}
for i in range(0,params.num_labels):
counts[i] = 0
trainingData = json.load(open(params.files['training_struct']))
ytrain = []
for i,currData in enumerate(trainingData):
ytrain.append(currData['category'])
counts[currData['category']] += 1
print(i)
classWeights = class_weight.compute_class_weight('balanced', np.unique(ytrain), np.array(ytrain))
with open(params.files['class_weight'], 'w') as json_file:
json.dump(classWeights.tolist(), json_file)
示例2: test_compute_class_weight_dict
# 需要导入模块: from sklearn.utils import class_weight [as 别名]
# 或者: from sklearn.utils.class_weight import compute_class_weight [as 别名]
def test_compute_class_weight_dict():
classes = np.arange(3)
class_weights = {0: 1.0, 1: 2.0, 2: 3.0}
y = np.asarray([0, 0, 1, 2])
cw = compute_class_weight(class_weights, classes, y)
# When the user specifies class weights, compute_class_weights should just
# return them.
assert_array_almost_equal(np.asarray([1.0, 2.0, 3.0]), cw)
# When a class weight is specified that isn't in classes, a ValueError
# should get raised
msg = 'Class label 4 not present.'
class_weights = {0: 1.0, 1: 2.0, 2: 3.0, 4: 1.5}
assert_raise_message(ValueError, msg, compute_class_weight, class_weights,
classes, y)
msg = 'Class label -1 not present.'
class_weights = {-1: 5.0, 0: 1.0, 1: 2.0, 2: 3.0}
assert_raise_message(ValueError, msg, compute_class_weight, class_weights,
classes, y)
示例3: test_compute_class_weight_balanced_negative
# 需要导入模块: from sklearn.utils import class_weight [as 别名]
# 或者: from sklearn.utils.class_weight import compute_class_weight [as 别名]
def test_compute_class_weight_balanced_negative():
# Test compute_class_weight when labels are negative
# Test with balanced class labels.
classes = np.array([-2, -1, 0])
y = np.asarray([-1, -1, 0, 0, -2, -2])
cw = compute_class_weight("balanced", classes, y)
assert_equal(len(cw), len(classes))
assert_array_almost_equal(cw, np.array([1., 1., 1.]))
# Test with unbalanced class labels.
y = np.asarray([-1, 0, 0, -2, -2, -2])
cw = compute_class_weight("balanced", classes, y)
assert_equal(len(cw), len(classes))
class_counts = np.bincount(y + 2)
assert_almost_equal(np.dot(cw, class_counts), y.shape[0])
assert_array_almost_equal(cw, [2. / 3, 2., 1.])
示例4: split_data
# 需要导入模块: from sklearn.utils import class_weight [as 别名]
# 或者: from sklearn.utils.class_weight import compute_class_weight [as 别名]
def split_data(self, y_file_path, X, test_data_size=0.2):
"""
Split data into test and training data sets.
INPUT
y_file_path: path to CSV containing labels
X: NumPy array of arrays
test_data_size: size of test/train split. Value from 0 to 1
OUTPUT
Four arrays: X_train, X_test, y_train, and y_test
"""
# labels = pd.read_csv(y_file_path, nrows=60)
labels = pd.read_csv(y_file_path)
self.X = np.load(X)
self.y = np.array(labels['level'])
self.weights = class_weight.compute_class_weight('balanced', np.unique(self.y), self.y)
self.test_data_size = test_data_size
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X, self.y,
test_size=self.test_data_size,
random_state=42)
示例5: fit
# 需要导入模块: from sklearn.utils import class_weight [as 别名]
# 或者: from sklearn.utils.class_weight import compute_class_weight [as 别名]
def fit(self, batch_size, epochs, save_best_model_to_filepath=None):
checkpoint = ModelCheckpoint(save_best_model_to_filepath,
monitor='val_acc', verbose=1,
save_best_only=True, mode='max')
weights = class_weight.compute_class_weight('balanced', np.unique(self.train_labels),
self.train_labels)
weights[1] = weights[1] * 5
# Fit the model
self.model.fit(self.x_train, self.y_train,
batch_size=batch_size,
epochs=epochs,
class_weight=None if not self.balance_classes else weights,
callbacks=[checkpoint] if save_best_model_to_filepath is not None else [],
validation_data=[self.x_test, self.y_test])
if save_best_model_to_filepath:
self.model = load_model(save_best_model_to_filepath)
return self.model
示例6: calculate_weigths_labels
# 需要导入模块: from sklearn.utils import class_weight [as 别名]
# 或者: from sklearn.utils.class_weight import compute_class_weight [as 别名]
def calculate_weigths_labels():
class Config:
mode = "train"
num_classes = 21
batch_size = 32
max_epoch = 150
validate_every = 2
checkpoint_file = "checkpoint.pth.tar"
data_loader = "VOCDataLoader"
data_root = "../data/pascal_voc_seg/"
data_loader_workers = 4
pin_memory = True
async_loading = True
# Create an instance from the data loader
from tqdm import tqdm
data_loader = VOCDataLoader(Config)
z = np.zeros((Config.num_classes,))
# Initialize tqdm
tqdm_batch = tqdm(data_loader.train_loader, total=data_loader.train_iterations)
for _, y in tqdm_batch:
labels = y.numpy().astype(np.uint8).ravel().tolist()
z += np.bincount(labels, minlength=Config.num_classes)
tqdm_batch.close()
# ret = compute_class_weight(class_weight='balanced', classes=np.arange(21), y=np.asarray(labels, dtype=np.uint8))
total_frequency = np.sum(z)
print(z)
print(total_frequency)
class_weights = []
for frequency in z:
class_weight = 1 / (np.log(1.02 + (frequency / total_frequency)))
class_weights.append(class_weight)
ret = np.array(class_weights)
np.save('../pretrained_weights/voc2012_256_class_weights', ret)
print(ret)
示例7: calculate_class_weights
# 需要导入模块: from sklearn.utils import class_weight [as 别名]
# 或者: from sklearn.utils.class_weight import compute_class_weight [as 别名]
def calculate_class_weights(self, task_name, source="train"):
""" For imbalanced datasets, we can calculate class weights that can be used later in the
loss function of the prediction head to upweight the loss of minorities.
:param task_name: name of the task as used in the processor
:type task_name: str
"""
tensor_name = self.processor.tasks[task_name]["label_tensor_name"]
label_list = self.processor.tasks[task_name]["label_list"]
tensor_idx = list(self.tensor_names).index(tensor_name)
# we need at least ONE observation for each label to avoid division by zero in compute_class_weights.
observed_labels = copy.deepcopy(label_list)
if source == "all":
datasets = self.data.values()
elif source == "train":
datasets = [self.data["train"]]
else:
raise Exception("source argument expects one of [\"train\", \"all\"]")
for dataset in datasets:
if "multilabel" in self.processor.tasks[task_name]["task_type"]:
for x in dataset:
observed_labels += [label_list[label_id] for label_id in (x[tensor_idx] == 1).nonzero()]
else:
observed_labels += [label_list[x[tensor_idx].item()] for x in dataset]
#TODO scale e.g. via logarithm to avoid crazy spikes for rare classes
class_weights = compute_class_weight("balanced", np.asarray(label_list), observed_labels)
# conversion necessary to have class weights of same type as model weights
class_weights = class_weights.astype(np.float32)
return class_weights
示例8: test_compute_class_weight
# 需要导入模块: from sklearn.utils import class_weight [as 别名]
# 或者: from sklearn.utils.class_weight import compute_class_weight [as 别名]
def test_compute_class_weight():
# Test (and demo) compute_class_weight.
y = np.asarray([2, 2, 2, 3, 3, 4])
classes = np.unique(y)
cw = compute_class_weight("balanced", classes, y)
# total effect of samples is preserved
class_counts = np.bincount(y)[2:]
assert_almost_equal(np.dot(cw, class_counts), y.shape[0])
assert cw[0] < cw[1] < cw[2]
示例9: test_compute_class_weight_not_present
# 需要导入模块: from sklearn.utils import class_weight [as 别名]
# 或者: from sklearn.utils.class_weight import compute_class_weight [as 别名]
def test_compute_class_weight_not_present():
# Raise error when y does not contain all class labels
classes = np.arange(4)
y = np.asarray([0, 0, 0, 1, 1, 2])
assert_raises(ValueError, compute_class_weight, "balanced", classes, y)
# Fix exception in error message formatting when missing label is a string
# https://github.com/scikit-learn/scikit-learn/issues/8312
assert_raise_message(ValueError,
'Class label label_not_present not present',
compute_class_weight,
{'label_not_present': 1.}, classes, y)
# Raise error when y has items not in classes
classes = np.arange(2)
assert_raises(ValueError, compute_class_weight, "balanced", classes, y)
assert_raises(ValueError, compute_class_weight, {0: 1., 1: 2.}, classes, y)
示例10: test_compute_class_weight_balanced_unordered
# 需要导入模块: from sklearn.utils import class_weight [as 别名]
# 或者: from sklearn.utils.class_weight import compute_class_weight [as 别名]
def test_compute_class_weight_balanced_unordered():
# Test compute_class_weight when classes are unordered
classes = np.array([1, 0, 3])
y = np.asarray([1, 0, 0, 3, 3, 3])
cw = compute_class_weight("balanced", classes, y)
class_counts = np.bincount(y)[classes]
assert_almost_equal(np.dot(cw, class_counts), y.shape[0])
assert_array_almost_equal(cw, [2., 1., 2. / 3])
示例11: calculate_class_weight
# 需要导入模块: from sklearn.utils import class_weight [as 别名]
# 或者: from sklearn.utils.class_weight import compute_class_weight [as 别名]
def calculate_class_weight(y_train, no_classes=2):
# https://datascience.stackexchange.com/questions/13490/how-to-set-class-weights-for-imbalanced-classes-in-keras
from sklearn.utils import class_weight
class_weight_list = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {}
for i in range(no_classes):
class_weights[i] = class_weight_list[i]
print(class_weights)
return class_weights
示例12: get_weights
# 需要导入模块: from sklearn.utils import class_weight [as 别名]
# 或者: from sklearn.utils.class_weight import compute_class_weight [as 别名]
def get_weights(target):
t_np = target.view(-1).data.cpu().numpy()
classes, counts = np.unique(t_np, return_counts=True)
cls_w = np.median(counts) / counts
#cls_w = class_weight.compute_class_weight('balanced', classes, t_np)
weights = np.ones(7)
weights[classes] = cls_w
return torch.from_numpy(weights).float().cuda()
示例13: get_class_weights
# 需要导入模块: from sklearn.utils import class_weight [as 别名]
# 或者: from sklearn.utils.class_weight import compute_class_weight [as 别名]
def get_class_weights(self, loader):
targets = []
for input_dict in loader:
targets.append(input_dict["targets"])
targets = torch.cat(targets).cpu().numpy()
unique_targets = np.unique(targets)
class_weights = compute_class_weight('balanced', unique_targets, targets)
return torch.tensor(class_weights).to(self._device).float()
示例14: get_class_weights_raw
# 需要导入模块: from sklearn.utils import class_weight [as 别名]
# 或者: from sklearn.utils.class_weight import compute_class_weight [as 别名]
def get_class_weights_raw(self, targets):
unique_targets = np.unique(targets)
class_weights = compute_class_weight('balanced', unique_targets, targets)
return torch.tensor(class_weights).to(self._device).float()
# -----------
# Constraints
# -----------
示例15: _select_classifier_from_list
# 需要导入模块: from sklearn.utils import class_weight [as 别名]
# 或者: from sklearn.utils.class_weight import compute_class_weight [as 别名]
def _select_classifier_from_list(candidates, X, A, n_splits=5, seed=None, loss_type='01'):
accuracies = np.zeros(len(candidates))
class_weight = compute_class_weight('balanced', np.unique(A), A)[LabelEncoder().fit_transform(A)]
if n_splits >= 2:
cv = KFold(n_splits=n_splits, shuffle=True, random_state=seed)
for model_idx, m in enumerate(candidates):
if loss_type == '01':
pred = cross_val_predict(m, X=X, y=A, cv=cv, fit_params={'sample_weight': class_weight}).reshape(-1)
else:
ps = cross_val_predict(m, X=X, y=A, cv=cv, fit_params={'sample_weight': class_weight},
method='predict_proba')
pred = ps[:, 1]
else:
for model_idx, m in enumerate(candidates):
m.fit(X, A, sample_weight=class_weight)
if loss_type == '01':
pred = m.predict(X=X)
else:
pred = m.predict_proba(X=X)[:, 1]
if loss_type == '01':
accuracies[model_idx] = np.sum(class_weight[pred == A]) / np.sum(class_weight)
else:
logl = np.zeros(A.shape)
logl[A == -1] = np.log(1.0 - pred[A == -1])
logl[A == 1] = np.log(pred[A == 1])
accuracies[model_idx] = np.sum(class_weight * logl) / np.sum(class_weight)
i_best = np.argmax(accuracies)
# print('accuracies =', accuracies, "accuracies-sorted", sorted(accuracies))
# print('Selected model {} {}'.format(i_best, candidates[i_best]))
return candidates[i_best]