本文整理汇总了Python中sklearn.preprocessing.label.LabelBinarizer.fit_transform方法的典型用法代码示例。如果您正苦于以下问题:Python LabelBinarizer.fit_transform方法的具体用法?Python LabelBinarizer.fit_transform怎么用?Python LabelBinarizer.fit_transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.preprocessing.label.LabelBinarizer
的用法示例。
在下文中一共展示了LabelBinarizer.fit_transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_label_binarizer
# 需要导入模块: from sklearn.preprocessing.label import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.label.LabelBinarizer import fit_transform [as 别名]
def test_label_binarizer():
lb = LabelBinarizer()
# one-class case defaults to negative label
inp = ["pos", "pos", "pos", "pos"]
expected = np.array([[0, 0, 0, 0]]).T
got = lb.fit_transform(inp)
assert_array_equal(lb.classes_, ["pos"])
assert_array_equal(expected, got)
assert_array_equal(lb.inverse_transform(got), inp)
# two-class case
inp = ["neg", "pos", "pos", "neg"]
expected = np.array([[0, 1, 1, 0]]).T
got = lb.fit_transform(inp)
assert_array_equal(lb.classes_, ["neg", "pos"])
assert_array_equal(expected, got)
to_invert = np.array([[1, 0],
[0, 1],
[0, 1],
[1, 0]])
assert_array_equal(lb.inverse_transform(to_invert), inp)
# multi-class case
inp = ["spam", "ham", "eggs", "ham", "0"]
expected = np.array([[0, 0, 0, 1],
[0, 0, 1, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
[1, 0, 0, 0]])
got = lb.fit_transform(inp)
assert_array_equal(lb.classes_, ['0', 'eggs', 'ham', 'spam'])
assert_array_equal(expected, got)
assert_array_equal(lb.inverse_transform(got), inp)
示例2: test_label_binarizer_column_y
# 需要导入模块: from sklearn.preprocessing.label import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.label.LabelBinarizer import fit_transform [as 别名]
def test_label_binarizer_column_y():
# first for binary classification vs multi-label with 1 possible class
# lists are multi-label, array is multi-class :-/
inp_list = [[1], [2], [1]]
inp_array = np.array(inp_list)
multilabel_indicator = np.array([[1, 0], [0, 1], [1, 0]])
binaryclass_array = np.array([[0], [1], [0]])
lb_1 = LabelBinarizer()
out_1 = lb_1.fit_transform(inp_list)
lb_2 = LabelBinarizer()
out_2 = lb_2.fit_transform(inp_array)
assert_array_equal(out_1, multilabel_indicator)
assert_array_equal(out_2, binaryclass_array)
# second for multiclass classification vs multi-label with multiple
# classes
inp_list = [[1], [2], [1], [3]]
inp_array = np.array(inp_list)
# the indicator matrix output is the same in this case
indicator = np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 0, 1]])
lb_1 = LabelBinarizer()
out_1 = lb_1.fit_transform(inp_list)
lb_2 = LabelBinarizer()
out_2 = lb_2.fit_transform(inp_array)
assert_array_equal(out_1, out_2)
assert_array_equal(out_2, indicator)
示例3: test_label_binarizer
# 需要导入模块: from sklearn.preprocessing.label import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.label.LabelBinarizer import fit_transform [as 别名]
def test_label_binarizer():
lb = LabelBinarizer()
# one-class case defaults to negative label
inp = ["pos", "pos", "pos", "pos"]
expected = np.array([[0, 0, 0, 0]]).T
got = lb.fit_transform(inp)
assert_false(assert_warns(DeprecationWarning, getattr, lb, "multilabel_"))
assert_array_equal(lb.classes_, ["pos"])
assert_array_equal(expected, got)
assert_array_equal(lb.inverse_transform(got), inp)
# two-class case
inp = ["neg", "pos", "pos", "neg"]
expected = np.array([[0, 1, 1, 0]]).T
got = lb.fit_transform(inp)
assert_false(assert_warns(DeprecationWarning, getattr, lb, "multilabel_"))
assert_array_equal(lb.classes_, ["neg", "pos"])
assert_array_equal(expected, got)
to_invert = np.array([[1, 0], [0, 1], [0, 1], [1, 0]])
assert_array_equal(lb.inverse_transform(to_invert), inp)
# multi-class case
inp = ["spam", "ham", "eggs", "ham", "0"]
expected = np.array([[0, 0, 0, 1], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0]])
got = lb.fit_transform(inp)
assert_array_equal(lb.classes_, ["0", "eggs", "ham", "spam"])
assert_false(assert_warns(DeprecationWarning, getattr, lb, "multilabel_"))
assert_array_equal(expected, got)
assert_array_equal(lb.inverse_transform(got), inp)
示例4: test_label_binarizer_multilabel
# 需要导入模块: from sklearn.preprocessing.label import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.label.LabelBinarizer import fit_transform [as 别名]
def test_label_binarizer_multilabel():
lb = LabelBinarizer()
# test input as lists of tuples
inp = [(2, 3), (1,), (1, 2)]
indicator_mat = np.array([[0, 1, 1],
[1, 0, 0],
[1, 1, 0]])
got = lb.fit_transform(inp)
assert_true(lb.multilabel_)
assert_array_equal(indicator_mat, got)
assert_equal(lb.inverse_transform(got), inp)
# test input as label indicator matrix
lb.fit(indicator_mat)
assert_array_equal(indicator_mat,
lb.inverse_transform(indicator_mat))
# regression test for the two-class multilabel case
lb = LabelBinarizer()
inp = [[1, 0], [0], [1], [0, 1]]
expected = np.array([[1, 1],
[1, 0],
[0, 1],
[1, 1]])
got = lb.fit_transform(inp)
assert_true(lb.multilabel_)
assert_array_equal(expected, got)
assert_equal([set(x) for x in lb.inverse_transform(got)],
[set(x) for x in inp])
示例5: test_label_binarizer_multilabel_unlabeled
# 需要导入模块: from sklearn.preprocessing.label import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.label.LabelBinarizer import fit_transform [as 别名]
def test_label_binarizer_multilabel_unlabeled():
"""Check that LabelBinarizer can handle an unlabeled sample"""
lb = LabelBinarizer()
y = [[1, 2], [1], []]
Y = np.array([[1, 1],
[1, 0],
[0, 0]])
assert_array_equal(lb.fit_transform(y), Y)
示例6: test_label_binarizer_set_label_encoding
# 需要导入模块: from sklearn.preprocessing.label import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.label.LabelBinarizer import fit_transform [as 别名]
def test_label_binarizer_set_label_encoding():
lb = LabelBinarizer(neg_label=-2, pos_label=0)
# two-class case with pos_label=0
inp = np.array([0, 1, 1, 0])
expected = np.array([[-2, 0, 0, -2]]).T
got = lb.fit_transform(inp)
assert_array_equal(expected, got)
assert_array_equal(lb.inverse_transform(got), inp)
lb = LabelBinarizer(neg_label=-2, pos_label=2)
# multi-class case
inp = np.array([3, 2, 1, 2, 0])
expected = np.array([[-2, -2, -2, +2], [-2, -2, +2, -2], [-2, +2, -2, -2], [-2, -2, +2, -2], [+2, -2, -2, -2]])
got = lb.fit_transform(inp)
assert_array_equal(expected, got)
assert_array_equal(lb.inverse_transform(got), inp)
示例7: test_label_binarizer
# 需要导入模块: from sklearn.preprocessing.label import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.label.LabelBinarizer import fit_transform [as 别名]
def test_label_binarizer():
# one-class case defaults to negative label
# For dense case:
inp = ["pos", "pos", "pos", "pos"]
lb = LabelBinarizer(sparse_output=False)
expected = np.array([[0, 0, 0, 0]]).T
got = lb.fit_transform(inp)
assert_array_equal(lb.classes_, ["pos"])
assert_array_equal(expected, got)
assert_array_equal(lb.inverse_transform(got), inp)
# For sparse case:
lb = LabelBinarizer(sparse_output=True)
got = lb.fit_transform(inp)
assert issparse(got)
assert_array_equal(lb.classes_, ["pos"])
assert_array_equal(expected, got.toarray())
assert_array_equal(lb.inverse_transform(got.toarray()), inp)
lb = LabelBinarizer(sparse_output=False)
# two-class case
inp = ["neg", "pos", "pos", "neg"]
expected = np.array([[0, 1, 1, 0]]).T
got = lb.fit_transform(inp)
assert_array_equal(lb.classes_, ["neg", "pos"])
assert_array_equal(expected, got)
to_invert = np.array([[1, 0],
[0, 1],
[0, 1],
[1, 0]])
assert_array_equal(lb.inverse_transform(to_invert), inp)
# multi-class case
inp = ["spam", "ham", "eggs", "ham", "0"]
expected = np.array([[0, 0, 0, 1],
[0, 0, 1, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
[1, 0, 0, 0]])
got = lb.fit_transform(inp)
assert_array_equal(lb.classes_, ['0', 'eggs', 'ham', 'spam'])
assert_array_equal(expected, got)
assert_array_equal(lb.inverse_transform(got), inp)
示例8: test_label_binarizer_unseen_labels
# 需要导入模块: from sklearn.preprocessing.label import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.label.LabelBinarizer import fit_transform [as 别名]
def test_label_binarizer_unseen_labels():
lb = LabelBinarizer()
expected = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
got = lb.fit_transform(["b", "d", "e"])
assert_array_equal(expected, got)
expected = np.array([[0, 0, 0], [1, 0, 0], [0, 0, 0], [0, 1, 0], [0, 0, 1], [0, 0, 0]])
got = lb.transform(["a", "b", "c", "d", "e", "f"])
assert_array_equal(expected, got)
示例9: test_label_binarizer_iris
# 需要导入模块: from sklearn.preprocessing.label import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.label.LabelBinarizer import fit_transform [as 别名]
def test_label_binarizer_iris():
lb = LabelBinarizer()
Y = lb.fit_transform(iris.target)
clfs = [SGDClassifier().fit(iris.data, Y[:, k])
for k in range(len(lb.classes_))]
Y_pred = np.array([clf.decision_function(iris.data) for clf in clfs]).T
y_pred = lb.inverse_transform(Y_pred)
accuracy = np.mean(iris.target == y_pred)
y_pred2 = SGDClassifier().fit(iris.data, iris.target).predict(iris.data)
accuracy2 = np.mean(iris.target == y_pred2)
assert_almost_equal(accuracy, accuracy2)
示例10: dbpedia_convgemb
# 需要导入模块: from sklearn.preprocessing.label import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.label.LabelBinarizer import fit_transform [as 别名]
def dbpedia_convgemb(sample=None, n_procs=None):
if not n_procs:
n_procs = cpu_count()
df = get_dbpedia_data(size=sample)
if sample:
test_size = int(round(np.sum(5000 * df.category.value_counts().values / 45000)))
else:
test_size = 5000 * 14
split = StratifiedShuffleSplit(df.category, test_size=test_size)
train_split, test_split = next(iter(split))
train_df = df.iloc[train_split]
test_df = df.iloc[test_split]
train_docs = DataframeSentences(train_df, cols=['title', 'abstract'], flatten=True)
vocab = Dictionary(train_docs)
vocab.filter_extremes(keep_n=5000)
bin = LabelBinarizer()
x_train = np.array(pad_sentences([[vocab.token2id[tok] + 1 for tok in s if tok in vocab.token2id]
for s in train_docs],
max_length=100, padding_word=0))
y_train = bin.fit_transform(train_df.category.values)
test_docs = DataframeSentences(test_df, cols=['title', 'abstract'], flatten=True)
x_test = np.array(pad_sentences([[vocab.token2id[tok] + 1 for tok in s if tok in vocab.token2id]
for s in test_docs],
max_length=100, padding_word=0))
y_test = bin.transform(test_df.category.values)
emb_weights = load_w2v_weights(vocab)
model = Sequential()
model.add(Embedding(5001, 300, input_length=100, dropout=.2, weights=[emb_weights], trainable=False))
model.add(Convolution1D(nb_filter=50, filter_length=3, border_mode='valid',
activation='relu', subsample_length=1))
model.add(MaxPooling1D(pool_length=model.output_shape[1]))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dropout(.2))
model.add(Dense(14, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
model.fit(x_train, y_train)
print(accuracy_score(np.argwhere(y_test)[:,1], model.predict_classes(x_test)))
示例11: test_label_binarizer_unseen_labels
# 需要导入模块: from sklearn.preprocessing.label import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.label.LabelBinarizer import fit_transform [as 别名]
def test_label_binarizer_unseen_labels():
lb = LabelBinarizer()
expected = np.array([[1, 0, 0],
[0, 1, 0],
[0, 0, 1]])
got = lb.fit_transform(['b', 'd', 'e'])
assert_array_equal(expected, got)
expected = np.array([[0, 0, 0],
[1, 0, 0],
[0, 0, 0],
[0, 1, 0],
[0, 0, 1],
[0, 0, 0]])
got = lb.transform(['a', 'b', 'c', 'd', 'e', 'f'])
assert_array_equal(expected, got)
示例12: _log_loss
# 需要导入模块: from sklearn.preprocessing.label import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.label.LabelBinarizer import fit_transform [as 别名]
def _log_loss(y_true, y_pred, eps=1e-10, sample_weight=None):
""" This is shorter ans simpler version og log_loss, which supports sample_weight """
sample_weight = check_sample_weight(y_true, sample_weight=sample_weight)
y_true, y_pred, sample_weight = check_arrays(y_true, y_pred, sample_weight)
y_true = column_or_1d(y_true)
lb = LabelBinarizer()
T = lb.fit_transform(y_true)
if T.shape[1] == 1:
T = numpy.append(1 - T, T, axis=1)
# Clipping
Y = numpy.clip(y_pred, eps, 1 - eps)
# Check if dimensions are consistent.
T, Y = check_arrays(T, Y)
# Renormalize
Y /= Y.sum(axis=1)[:, numpy.newaxis]
loss = -(T * numpy.log(Y) * sample_weight[:, numpy.newaxis]).sum() / numpy.sum(sample_weight)
return loss
示例13: check_binarized_results
# 需要导入模块: from sklearn.preprocessing.label import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.label.LabelBinarizer import fit_transform [as 别名]
def check_binarized_results(y, classes, pos_label, neg_label, expected):
for sparse_output in [True, False]:
if ((pos_label == 0 or neg_label != 0) and sparse_output):
assert_raises(ValueError, label_binarize, y, classes,
neg_label=neg_label, pos_label=pos_label,
sparse_output=sparse_output)
continue
# check label_binarize
binarized = label_binarize(y, classes, neg_label=neg_label,
pos_label=pos_label,
sparse_output=sparse_output)
assert_array_equal(toarray(binarized), expected)
assert_equal(issparse(binarized), sparse_output)
# check inverse
y_type = type_of_target(y)
if y_type == "multiclass":
inversed = _inverse_binarize_multiclass(binarized, classes=classes)
else:
inversed = _inverse_binarize_thresholding(binarized,
output_type=y_type,
classes=classes,
threshold=((neg_label +
pos_label) /
2.))
assert_array_equal(toarray(inversed), toarray(y))
# Check label binarizer
lb = LabelBinarizer(neg_label=neg_label, pos_label=pos_label,
sparse_output=sparse_output)
binarized = lb.fit_transform(y)
assert_array_equal(toarray(binarized), expected)
assert_equal(issparse(binarized), sparse_output)
inverse_output = lb.inverse_transform(binarized)
assert_array_equal(toarray(inverse_output), toarray(y))
assert_equal(issparse(inverse_output), issparse(y))
示例14: test_label_binarize_with_multilabel_indicator
# 需要导入模块: from sklearn.preprocessing.label import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.label.LabelBinarizer import fit_transform [as 别名]
def test_label_binarize_with_multilabel_indicator():
"""Check that passing a binary indicator matrix is not noop"""
classes = np.arange(3)
neg_label = -1
pos_label = 2
y = np.array([[0, 1, 0], [1, 1, 1]])
expected = np.array([[-1, 2, -1], [2, 2, 2]])
# With label binarize
output = label_binarize(y, classes, multilabel=True, neg_label=neg_label,
pos_label=pos_label)
assert_array_equal(output, expected)
# With the transformer
lb = LabelBinarizer(pos_label=pos_label, neg_label=neg_label)
output = lb.fit_transform(y)
assert_array_equal(output, expected)
output = lb.fit(y).transform(y)
assert_array_equal(output, expected)
示例15: dbpedia_smallcharconv
# 需要导入模块: from sklearn.preprocessing.label import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.label.LabelBinarizer import fit_transform [as 别名]
def dbpedia_smallcharconv(sample=None, n_procs=None):
if not n_procs:
n_procs = cpu_count()
df = get_dbpedia_data(size=sample)
if sample:
test_size = int(round(np.sum(5000 * df.category.value_counts().values / 45000)))
else:
test_size = 5000 * 14
logging.info('creating train test split ...')
split = StratifiedShuffleSplit(df.category, test_size=test_size)
train_split, test_split = next(iter(split))
train_df = df.iloc[train_split]
test_df = df.iloc[test_split]
logging.info('preprocessing, padding and binarizing data ...')
train_docs = [[CHAR_MAP.index(c) if c in CHAR_MAP else len(CHAR_MAP) for c in text] for text
in train_df[['title', 'abstract']].apply(lambda cols: u'\n'.join(cols), axis=1).values]
bin = LabelBinarizer()
x_train = np.array(pad_sentences(train_docs, max_length=1014, padding_word=CHAR_MAP.index(' ')))
y_train = bin.fit_transform(train_df.category.values)
test_docs = [[CHAR_MAP.index(c) if c in CHAR_MAP else len(CHAR_MAP) for c in text] for text
in test_df[['title', 'abstract']].apply(lambda cols: u'\n'.join(cols), axis=1).values]
x_test = np.array(pad_sentences(test_docs, max_length=1014, padding_word=0))
y_test = bin.transform(test_df.category.values)
logging.info('building model ...')
model = Sequential()
model.add(Embedding(len(CHAR_MAP) + 1, len(CHAR_MAP) + 1, input_length=1014,
weights=[char_embedding()], trainable=False))
model.add(Convolution1D(nb_filter=256, filter_length=7, border_mode='valid',
activation='relu'))
model.add(MaxPooling1D(pool_length=3))
model.add(Convolution1D(nb_filter=256, filter_length=7, border_mode='valid',
activation='relu', subsample_length=1))
model.add(MaxPooling1D(pool_length=3))
model.add(Convolution1D(nb_filter=256, filter_length=3, border_mode='valid',
activation='relu', subsample_length=1))
model.add(Convolution1D(nb_filter=256, filter_length=3, border_mode='valid',
activation='relu', subsample_length=1))
model.add(Convolution1D(nb_filter=256, filter_length=3, border_mode='valid',
activation='relu', subsample_length=1))
model.add(Convolution1D(nb_filter=256, filter_length=3, border_mode='valid',
activation='relu', subsample_length=1))
model.add(MaxPooling1D(pool_length=3))
model.add(Flatten())
model.add(Dense(1024, activation='relu'))
model.add(Dropout(.5))
model.add(Dense(1024, activation='relu'))
model.add(Dropout(.5))
model.add(Dense(14, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['categorical_accuracy'])
print(model.summary())
model.fit(x_train, y_train, batch_size=64, nb_epoch=5, validation_data=[x_test, y_test])
print(accuracy_score(np.argwhere(y_test)[:,1], model.predict_classes(x_test)))