本文整理汇总了Python中sklearn.preprocessing.label_binarize方法的典型用法代码示例。如果您正苦于以下问题:Python preprocessing.label_binarize方法的具体用法?Python preprocessing.label_binarize怎么用?Python preprocessing.label_binarize使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.preprocessing
的用法示例。
在下文中一共展示了preprocessing.label_binarize方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_precision_recall_f_ignored_labels
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import label_binarize [as 别名]
def test_precision_recall_f_ignored_labels():
# Test a subset of labels may be requested for PRF
y_true = [1, 1, 2, 3]
y_pred = [1, 3, 3, 3]
y_true_bin = label_binarize(y_true, classes=np.arange(5))
y_pred_bin = label_binarize(y_pred, classes=np.arange(5))
data = [(y_true, y_pred),
(y_true_bin, y_pred_bin)]
for i, (y_true, y_pred) in enumerate(data):
recall_13 = partial(recall_score, y_true, y_pred, labels=[1, 3])
recall_all = partial(recall_score, y_true, y_pred, labels=None)
assert_array_almost_equal([.5, 1.], recall_13(average=None))
assert_almost_equal((.5 + 1.) / 2, recall_13(average='macro'))
assert_almost_equal((.5 * 2 + 1. * 1) / 3,
recall_13(average='weighted'))
assert_almost_equal(2. / 3, recall_13(average='micro'))
# ensure the above were meaningful tests:
for average in ['macro', 'weighted', 'micro']:
assert_not_equal(recall_13(average=average),
recall_all(average=average))
示例2: score
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import label_binarize [as 别名]
def score(self,
actual: np.array,
predicted: np.array,
sample_weight: typing.Optional[np.array] = None,
labels: typing.Optional[np.array] = None,
**kwargs) -> float:
if sample_weight is not None:
sample_weight = sample_weight.ravel()
enc_actual, enc_predicted, labels = prep_actual_predicted(actual, predicted, labels)
cm_weights = sample_weight if sample_weight is not None else None
# multiclass
if enc_predicted.shape[1] > 1:
enc_predicted = enc_predicted.ravel()
enc_actual = label_binarize(enc_actual, labels).ravel()
cm_weights = np.repeat(cm_weights, predicted.shape[1]).ravel() if cm_weights is not None else None
assert enc_predicted.shape == enc_actual.shape
assert cm_weights is None or enc_predicted.shape == cm_weights.shape
cms = daicx.confusion_matrices(enc_actual.ravel(), enc_predicted.ravel(), sample_weight=cm_weights)
cms = cms.loc[
cms[[self.__class__._threshold_optimizer]].idxmax()] # get row(s) for optimal metric defined above
cms['metric'] = cms[['tp', 'fp', 'tn', 'fn']].apply(lambda x: self.protected_metric(*x), axis=1, raw=True)
return cms['metric'].mean() # in case of ties
示例3: weight_dict_fc
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import label_binarize [as 别名]
def weight_dict_fc(trainLabel, para):
train_labels = []
for i in range(len(trainLabel)):
[train_labels.append(j) for j in trainLabel[i]]
from sklearn.preprocessing import label_binarize
y_total_40=label_binarize(train_labels, classes=[i for i in range(40)])
class_distribution_40_class=np.sum(y_total_40,axis=0)
class_distribution_40_class=[float(i) for i in class_distribution_40_class]
class_distribution_40_class=class_distribution_40_class/np.sum(class_distribution_40_class)
inverse_dist=1/class_distribution_40_class
norm_inv_dist=inverse_dist/np.sum(inverse_dist)
weights=norm_inv_dist*para.weight_scaler+1
weight_dict = dict()
for classID, value in enumerate(weights):
weight_dict.update({classID: value})
return weight_dict
示例4: weight_dict_fc
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import label_binarize [as 别名]
def weight_dict_fc(trainLabel, para):
train_labels = []
for i in range(len(trainLabel)):
[train_labels.append(j) for j in trainLabel[i]]
class_number = len(np.unique(train_labels))
from sklearn.preprocessing import label_binarize
y_total_40=label_binarize(train_labels, classes=[i for i in range(para.outputClassN)])
class_distribution_40_class=np.sum(y_total_40,axis=0)
class_distribution_40_class=[float(i) for i in class_distribution_40_class]
class_distribution_40_class=class_distribution_40_class/np.sum(class_distribution_40_class)
inverse_dist=1/class_distribution_40_class
norm_inv_dist=inverse_dist/np.sum(inverse_dist)
weights=norm_inv_dist*para.weight_scaler+1
weight_dict = dict()
for classID, value in enumerate(weights):
weight_dict.update({classID: value})
return weight_dict
示例5: __init__
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import label_binarize [as 别名]
def __init__(self, feats_path, class_nums, n_classes, n_frames_per_video, batch_size, n_feat_maps, feat_map_side_dim, n_threads=10):
random.seed(101)
np.random.seed(101)
self.__feats_pathes = feats_path
self.__class_nums = class_nums
self.__n_frames_per_video = n_frames_per_video
self.__n_feat_maps = n_feat_maps
self.__feat_map_side_dim = feat_map_side_dim
self.__batch_size = batch_size
# binarize the labels
classes = range(1, n_classes + 1)
self.__y = label_binarize(self.__class_nums, classes)
self.__is_busy = False
self.__batch_features = None
self.__batch_y = None
self.__n_threads_in_pool = n_threads
self.__pool = Pool(self.__n_threads_in_pool)
示例6: cross_val_roc_auc_score
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import label_binarize [as 别名]
def cross_val_roc_auc_score(self, cv=10, **kwargs):
"""
被装饰器entry_wrapper(support=(EMLFitType.E_FIT_CLF,))装饰,
即支持有监督学习分类,使用cross_val_score对数据进行roc_auc度量,如果数据的y的
label标签 > 2,通过label_binarize将label标签进行二值化处理,
依次计算二值化的列的roc_auc,结果返回score最好的数据度量
:param cv: 透传cross_val_score的参数,默认10
:param kwargs: 外部可以传递x, y, 通过
x = kwargs.pop('x', self.x)
y = kwargs.pop('y', self.y)
确定传递self._do_cross_val_score中参数x,y,
以及装饰器使用的fiter_type,eg:ttn_abu.cross_val_roc_auc_score(fiter_type=ml.EMLFitType.E_FIT_REG)
:return: cross_val_score返回的score序列,
eg: array([ 1. , 0.9 , 1. , 0.9 , 1. , 0.9 , 1. , 0.9 , 0.95, 1. ])
"""
x = kwargs.pop('x', self.x)
y = kwargs.pop('y', self.y)
return self._do_cross_val_score(x, y, cv, _EMLScoreType.E_SCORE_ROC_AUC.value)
示例7: test_matthews_corrcoef
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import label_binarize [as 别名]
def test_matthews_corrcoef():
rng = np.random.RandomState(0)
y_true = ["a" if i == 0 else "b" for i in rng.randint(0, 2, size=20)]
# corrcoef of same vectors must be 1
assert_almost_equal(matthews_corrcoef(y_true, y_true), 1.0)
# corrcoef, when the two vectors are opposites of each other, should be -1
y_true_inv = ["b" if i == "a" else "a" for i in y_true]
assert_almost_equal(matthews_corrcoef(y_true, y_true_inv), -1)
y_true_inv2 = label_binarize(y_true, ["a", "b"])
y_true_inv2 = np.where(y_true_inv2, 'a', 'b')
assert_almost_equal(matthews_corrcoef(y_true, y_true_inv2), -1)
# For the zero vector case, the corrcoef cannot be calculated and should
# result in a RuntimeWarning
mcc = assert_warns_div0(matthews_corrcoef, [0, 0, 0, 0], [0, 0, 0, 0])
# But will output 0
assert_almost_equal(mcc, 0.)
# And also for any other vector with 0 variance
mcc = assert_warns_div0(matthews_corrcoef, y_true, ['a'] * len(y_true))
# But will output 0
assert_almost_equal(mcc, 0.)
# These two vectors have 0 correlation and hence mcc should be 0
y_1 = [1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1]
y_2 = [1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1]
assert_almost_equal(matthews_corrcoef(y_1, y_2), 0.)
# Check that sample weight is able to selectively exclude
mask = [1] * 10 + [0] * 10
# Now the first half of the vector elements are alone given a weight of 1
# and hence the mcc will not be a perfect 0 as in the previous case
assert_raises(AssertionError, assert_almost_equal,
matthews_corrcoef(y_1, y_2, sample_weight=mask), 0.)
示例8: evaluateOneEpoch
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import label_binarize [as 别名]
def evaluateOneEpoch(inputCoor, inputGraph, inputLabel, para, sess, trainOperaion):
test_loss = []
test_acc = []
test_predict = []
for i in range(len(inputCoor)):
xTest, graphTest, labelTest = inputCoor[i], inputGraph[i], inputLabel[i]
graphTest = graphTest.tocsr()
labelBinarize = label_binarize(labelTest, classes=[i for i in range(para.outputClassN)])
test_batch_size = para.testBatchSize
for testBatchID in range(len(labelTest) / test_batch_size):
start = testBatchID * test_batch_size
end = start + test_batch_size
batchCoor, batchGraph, batchLabel = get_mini_batch(xTest, graphTest, labelBinarize, start, end)
batchWeight = uniform_weight(batchLabel)
batchGraph = batchGraph.todense()
feed_dict = {trainOperaion['inputPC']: batchCoor, trainOperaion['inputGraph']: batchGraph,
trainOperaion['outputLabel']: batchLabel, trainOperaion['weights']: batchWeight,
trainOperaion['keep_prob_1']: 1.0, trainOperaion['keep_prob_2']: 1.0}
predict, loss_test, acc_test = sess.run(
[trainOperaion['predictLabels'], trainOperaion['loss'], trainOperaion['acc']], feed_dict=feed_dict)
test_loss.append(loss_test)
test_acc.append(acc_test)
test_predict.append(predict)
test_average_loss = np.mean(test_loss)
test_average_acc = np.mean(test_acc)
return test_average_loss, test_average_acc, test_predict
示例9: _compute_roc_stats
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import label_binarize [as 别名]
def _compute_roc_stats(y_test, y_test_probas, num_class):
"""Compute ROC AUC statistics and visualize ROC curves.
Arguments:
y_test: [int]
list of test class labels as integer indices
y_test_probas: np.ndarray, float
array of predicted probabilities with shape
(num_sample, num_class)
num_class: int
number of classes
Returns:
roc_auc_dict: {int: float}
dictionary mapping classes to ROC AUC scores
fpr_dict: {string: np.ndarray}
dictionary mapping names of classes or an averaging method to
arrays of increasing false positive rates
tpr_dict: {string: float}
dictionary mapping names of classes or an averaging method to
arrays of increasing true positive rates
"""
y_test = label_binarize(y_test, classes=range(0, num_class))
fpr_dict, tpr_dict, roc_auc_dict = {}, {}, {}
for i in range(num_class):
fpr_dict[i], tpr_dict[i], _ = roc_curve(
y_test[:, i], y_test_probas[:, i])
roc_auc_dict[i] = auc(fpr_dict[i], tpr_dict[i])
# Compute micro-average ROC curve and ROC area
fpr_dict["micro"], tpr_dict["micro"], _ = roc_curve(
y_test.ravel(), y_test_probas.ravel())
roc_auc_dict["micro"] = auc(fpr_dict["micro"], tpr_dict["micro"])
return roc_auc_dict, fpr_dict, tpr_dict
示例10: roc_graph_example
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import label_binarize [as 别名]
def roc_graph_example():
"""
Plot an example ROC graph of an SVM model predictions over the Iris
dataset.
Based on sklearn examples (as was seen on April 2018):
http://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html
"""
# Load data
iris = datasets.load_iris()
X = iris.data
y = label_binarize(iris.target, classes=[0, 1, 2])
# Add noisy features
random_state = np.random.RandomState(4)
n_samples, n_features = X.shape
X = np.c_[X, random_state.randn(n_samples, 200 * n_features)]
# Train a model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5, random_state=0)
classifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True, random_state=0))
# Predict
y_score = classifier.fit(X_train, y_train).predict_proba(X_test)
# Plot ROC graphs
return roc_graph(y_test, y_score, class_names=iris.target_names)
示例11: average_precision
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import label_binarize [as 别名]
def average_precision(prob_np, target_np):
num_class = prob_np.shape[1]
label = label_binarize(target_np, classes=list(range(num_class)))
with np.errstate(divide='ignore', invalid='ignore'):
return average_precision_score(label, prob_np, None)
示例12: _marg_rounded
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import label_binarize [as 别名]
def _marg_rounded(self, x, y):
y_node = y.nodes
y_link = y.links
Y_node = label_binarize(y_node, self.prop_encoder_.classes_)
Y_link = label_binarize(y_link, self.link_encoder_.classes_)
# XXX can this be avoided?
Y_node, Y_link = map(_binary_2d, (Y_node, Y_link))
src_type = Y_node[x.link_to_prop[:, 0]]
trg_type = Y_node[x.link_to_prop[:, 1]]
if self.compat_features:
pw = np.einsum('...j,...k,...l->...jkl',
src_type, trg_type, Y_link)
compat = np.tensordot(x.X_compat.T, pw, axes=[1, 0])
else:
# equivalent to compat_features == np.ones(n_links)
compat = np.einsum('ij,ik,il->jkl', src_type, trg_type, Y_link)
second_order = []
if self.coparents_ or self.grandparents_ or self.siblings_:
link = {(a, b): k for k, (a, b) in enumerate(x.link_to_prop)}
if self.coparents_:
second_order.extend(y_link[link[a, b]] & y_link[link[c, b]]
for a, b, c in x.second_order)
if self.grandparents_:
second_order.extend(y_link[link[a, b]] & y_link[link[b, c]]
for a, b, c in x.second_order)
if self.siblings_:
second_order.extend(y_link[link[b, a]] & y_link[link[b, c]]
for a, b, c in x.second_order)
second_order = np.array(second_order)
return Y_node, Y_link, compat, second_order
示例13: __call__
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import label_binarize [as 别名]
def __call__(self, y_true, y_pred, **kwargs):
"""
Compute auroc
Parameters
----------
y_true: np.ndarray
ground truth data with shape (N)
y_pred: np.ndarray
predictions of network in numpy format with shape (N, nclasses)
kwargs:
variable number of keyword arguments passed to roc_auc_score
Returns
-------
float
computes auc score
Raises
------
ValueError
if two classes are given and the predictions contain more than two
classes
"""
# binary classification
if len(self.classes) == 2:
# single output unit (e.g. sigmoid)
if len(y_pred.shape) == 1 or y_pred.shape[2] == 1:
return roc_auc_score(y_true, y_pred, **kwargs)
# output of two units (e.g. softmax)
elif y_pred.shape[2] == 2:
return roc_auc_score(y_true, y_pred[:, 1], **kwargs)
else:
raise ValueError("Can not compute auroc metric for binary "
"classes with {} predicted "
"classes.".format(y_pred.shape[2]))
# classification with multiple classes
if len(self.classes) > 2:
y_true_bin = label_binarize(y_true, self.classes)
return roc_auc_score(y_true_bin, y_pred, **kwargs, **self.kwargs)
示例14: make_roc
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import label_binarize [as 别名]
def make_roc(gt,cpl,cl):
from sklearn.preprocessing import label_binarize
y_predict = label_binarize(gt, classes=[0, 1, 2, 3, 4, 5])
print('c=',cl)
y = label_binarize(cl, classes=[0, 1, 2, 3, 4, 5])
n_classesi = y.shape[1]
fpr = dict()
tpr = dict()
roc_auc = dict()
from sklearn.metrics import roc_curve, auc
for i in range(n_classesi):
fpr[i], tpr[i], thre = roc_curve(y_predict[:, i], cpl[:, i])
roc_auc[i] = auc(fpr[i], tpr[i])
print('state=, {}, auc=,{}'.format(i,roc_auc[i]))
示例15: performance_report
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import label_binarize [as 别名]
def performance_report(labels, predictions):
from sklearn.preprocessing import label_binarize
from sklearn.metrics import precision_recall_fscore_support
classes = list(range(labels.shape[1]))
roc_aucs, pr_aucs = [], []
if len(classes) == 2:
roc_aucs = [auROC(labels[:, 0], predictions[:, 0])[2]] * 2
pr_aucs = [auPR(labels[:, 0], predictions[:, 0])[2]] * 2
labels = label_binarize(np.argmax(labels, axis = 1), classes = classes)
else:
for x in classes:
roc_aucs.append(auROC(labels[:, x], predictions[:, x])[2])
pr_aucs.append(auPR(labels[:, x], predictions[:, x])[2])
if not np.isclose(np.sum(predictions, axis=1), 1).all():
# multi-label classification
y_pred = predictions > 0.5
y_pred.dtype = np.uint8
else:
y_pred = label_binarize(np.argmax(predictions, axis = 1), classes = classes)
prec_recall_f1_support = precision_recall_fscore_support(labels, y_pred)
report = np.empty((len(classes), 6))
for x in classes:
report[x,:] = [prec_recall_f1_support[0][x], prec_recall_f1_support[1][x],
prec_recall_f1_support[2][x], roc_aucs[x],
pr_aucs[x], prec_recall_f1_support[3][x]]
return report