本文整理汇总了Python中sklearn.preprocessing.LabelBinarizer.transform方法的典型用法代码示例。如果您正苦于以下问题:Python LabelBinarizer.transform方法的具体用法?Python LabelBinarizer.transform怎么用?Python LabelBinarizer.transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.preprocessing.LabelBinarizer
的用法示例。
在下文中一共展示了LabelBinarizer.transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: train
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
def train():
tr, va, te = read_dataset('../mnist.pkl.gz')
binarizer = LabelBinarizer().fit(range(10))
x = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32, [None, 10])
keep_prob = tf.placeholder(tf.float32)
preds = model.inference(x, keep_prob)
loss, total_loss = model.loss(preds, y)
acc = model.evaluation(preds, y)
# learning rate: 0.1
train_op = model.training(total_loss, 0.1)
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
for i in xrange(10000):
batch_xs, batch_ys = tr.next_batch(50)
if i % 100 == 0:
train_acc = acc.eval(feed_dict={
x:batch_xs, y:binarizer.transform(batch_ys),
keep_prob: 1.0}, session=sess)
print "step: {0}, training accuracy {1}".format(i, train_acc)
validation_accuracy = getAccuracy(x, y, keep_prob, binarizer, acc, va, sess)
print("Validation accuracy : {0}".format(validation_accuracy))
train_op.run(feed_dict={
x:batch_xs, y:binarizer.transform(batch_ys), keep_prob: 0.5},
session=sess)
test_accuracy = getAccuracy(x, y, keep_prob, binarizer, acc, te, sess)
print("Test accuracy : ", test_accuracy)
示例2: Encoding
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
def Encoding(data, general_matrix=None):
encoder = LabelBinarizer()
count = 0
# encoding
for i in range(data.shape[1]):
if type(data[0, i]) == str:
count += 1
col = data[:, i]
unique = np.unique(col if general_matrix is None else general_matrix[:, i])
try:
encoder.fit(unique)
except:
pass
new_col = encoder.transform(col)
# split at i and i + 1
before, removed, after = np.hsplit(data, [i, i + 1])
# concatenate
data = np.concatenate((before, new_col, after), axis=1)
before, removed, after = np.hsplit(general_matrix, [i, i + 1])
general_matrix = np.concatenate((before, encoder.transform(general_matrix[:, i]), after), axis=1)
print "count : %d" % count
# return data
return data
示例3: one_hot_encoding
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
def one_hot_encoding(y_train, y_test):
labelBinarizer = LabelBinarizer()
labelBinarizer.fit(y_train)
y_train_one_hot = labelBinarizer.transform(y_train)
y_test_one_hot = labelBinarizer.transform(y_test)
return y_train_one_hot, y_test_one_hot
示例4: NN_Classifier
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
class NN_Classifier(NNBase):
def __init__(self,layers = [], lr=0.01, epochs=None, noisy=None, verbose=False):
super(NN_Classifier, self).__init__(layers=layers, lr=lr, epochs=epochs, noisy=noisy, verbose=verbose)
self.type = 'C'
self.error_func = CrossEntropyError
self.accuracy_score = AccuracyScore
self.label_binarizer = LabelBinarizer()
def predict(self, X):
predictions = []
for el in X:
current_prediction = NNBase._predict(self, row(el))
predictions.append(current_prediction)
predictions = np.vstack(predictions)
current_results = coalesce(predictions)
return self.label_binarizer.inverse_transform(current_results)
def predict_proba(self, X):
predictions = []
for el in X:
current_prediction = NNBase._predict(self, row(el))
predictions.append(current_prediction)
predictions = np.vstack(predictions)
return predictions
def fit(self, X, T):
T_impl = self.label_binarizer.fit_transform(T)
if not self.epochs:
self.epochs = 1
for num in xrange(self.epochs):
if self.verbose:
print "Epoch: %d" % num
for i in xrange(len(X)):
NNBase._update(self, row(X[i]), row(T_impl[i]))
def error(self, X, T):
T_impl = self.label_binarizer.transform(T)
Y = self.predict_proba(X)
return self.error_func.func(Y, T_impl)
def score(self, X, T):
Y = self.predict(X)
return self.accuracy_score.func(Y,T)
def analytical_gradient(self, X, T):
T_impl = self.label_binarizer.transform(T)
return NNBase._analytical_gradient(self, X, T_impl)
def numerical_gradient(self, X, T):
T_impl = self.label_binarizer.transform(T)
return NNBase._numerical_gradient(self, X, T_impl)
示例5: partb
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
def partb():
def load(file_name):
file = np.load(file_name)
X_train =file['X_train'].T
y_train =file['y_train']
X_test =file['X_test'].T
y_test =file['y_test']
X_cv =file['X_cv'].T
y_cv =file['y_cv']
return X_train,y_train,X_cv,y_cv,X_test,y_test
train_ = [0,0]
test_ = [0,0]
overall = []
for i in range(14):
X_train,y_train,X_cv,y_cv,X_test,y_test = load('pofa{}.npz'.format(i))
from sklearn.preprocessing import LabelBinarizer
binarizer = LabelBinarizer()
binarizer.fit(y_train)
Y_train = binarizer.transform(y_train).T
Y_cv = binarizer.transform(y_cv).T
#nn.forward(X)
#nn.backprop(X,Y,graient_check=True)
print(X_train.shape[0], Y_train.shape[0])
nn = NeuralNetwork([X_train.shape[0],30,Y_train.shape[0]], functions=[sigmoid,softmax], derivatives=[derivative_sigmoid])
nn.fit(X_train,Y_train,eta=0.01,momentum=0.5,minibatch=16,regularizer=0.15,max_iter=200,gradient_check=False,cv = (X_cv,Y_cv),graphs=False, lbfgs=False)
output = nn.forward(X_train)
y_train_output = binarizer.inverse_transform(output.T)
y_test_output = binarizer.inverse_transform(nn.forward(X_test).T)
print("Iteration: ",i)
print((y_train_output==y_train).mean())
print((y_test_output ==y_test).mean())
overall.append((y_test == y_test_output).mean())
train_[0] += (y_train_output==y_train).sum()
train_[1] += y_train.shape[0]
test_[0] += (y_test_output==y_test).sum()
test_[1] += y_test.shape[0]
print("Average train accuracy: ", train_[0]/train_[1],"Average test accuracy: ",test_[0]/test_[1])
print(train_,test_)
overall = np.array(overall)
print(overall.mean())
示例6: load_dataset
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
def load_dataset(self):
X, y, X_test, y_test = dataset = snippet_reader.toNumpy()
lb = LabelBinarizer()
lb.fit(y)
for y_bin in lb.transform(y).T:
y = y_bin
break
for y_bin in lb.transform(y_test).T:
y_test = y_bin
break
return X, y, X_test, y_test
示例7: bio_classification_report
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
def bio_classification_report(y_true, y_pred):
"""
Classification report for a list of BIO-encoded sequences.
It computes token-level metrics and discards "O" labels.
Note that it requires scikit-learn 0.15+ (or a version from github master)
to calculate averages properly!
Note: This function was copied from
http://nbviewer.ipython.org/github/tpeng/python-crfsuite/blob/master/examples/CoNLL%202002.ipynb
Args:
y_true: True labels, list of strings
y_pred: Predicted labels, list of strings
Returns:
classification report as string
"""
lbin = LabelBinarizer()
y_true_combined = lbin.fit_transform(list(chain.from_iterable(y_true)))
y_pred_combined = lbin.transform(list(chain.from_iterable(y_pred)))
#tagset = set(lbin.classes_) - {NO_NE_LABEL}
tagset = set(lbin.classes_)
tagset = sorted(tagset, key=lambda tag: tag.split('-', 1)[::-1])
class_indices = {cls: idx for idx, cls in enumerate(lbin.classes_)}
return classification_report(
y_true_combined,
y_pred_combined,
labels=[class_indices[cls] for cls in tagset],
target_names=tagset,
)
示例8: test_normalize_option_multilabel_classification
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
def test_normalize_option_multilabel_classification():
# Test in the multilabel case
n_classes = 4
n_samples = 100
_, y_true = make_multilabel_classification(n_features=1, n_classes=n_classes, random_state=0, n_samples=n_samples)
_, y_pred = make_multilabel_classification(n_features=1, n_classes=n_classes, random_state=1, n_samples=n_samples)
# Be sure to have at least one empty label
y_true += ([],)
y_pred += ([],)
n_samples += 1
lb = LabelBinarizer().fit([range(n_classes)])
y_true_binary_indicator = lb.transform(y_true)
y_pred_binary_indicator = lb.transform(y_pred)
for name, metrics in METRICS_WITH_NORMALIZE_OPTION.items():
# List of list of labels
measure = metrics(y_true, y_pred, normalize=True)
assert_greater(measure, 0, msg="We failed to test correctly the normalize option")
assert_almost_equal(
metrics(y_true, y_pred, normalize=False) / n_samples, measure, err_msg="Failed with %s" % name
)
# Indicator matrix format
measure = metrics(y_true_binary_indicator, y_pred_binary_indicator, normalize=True)
assert_greater(measure, 0, msg="We failed to test correctly the normalize option")
assert_almost_equal(
metrics(y_true_binary_indicator, y_pred_binary_indicator, normalize=False) / n_samples,
measure,
err_msg="Failed with %s" % name,
)
示例9: get_abalone19
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
def get_abalone19():
"""Loads abalone dataset, maps gender feature to binary features, adds
new label to create abalone19 imbalanced binary classification dataset."""
raw_data = pd.read_csv(ABALONE_FILE, sep=',')
genders = list(raw_data.ix[:, 'gender'])
cts_data = raw_data.drop(labels='gender', axis=1)
# initialize & fit preprocesser
lbz = LabelBinarizer()
lbz.fit(genders)
# encode categorical var
encoded_genders = pd.DataFrame(lbz.transform(genders))
encoded_genders.columns = ['gender_' + k for k in lbz.classes_]
# recombine encoded data & return
new_data = pd.concat(objs=[encoded_genders, cts_data], axis=1)
new_data['label'] = raw_data['rings'].map(
lambda k: 1 if k > 10 else 0) # binary clf task
new_data = new_data.drop('rings', axis=1)
# standardize cts features
if STANDARDIZE:
for col in new_data.ix[:, 3:-1]:
mean = new_data[col].mean()
std = new_data[col].std()
new_data[col] = new_data[col].map(lambda k: (k - mean) / float(std))
pos_recs = new_data['label'].sum()
print 'total pos class pct = {} %\n'.format(
round(100 * pos_recs / float(len(new_data)), 3))
return new_data
示例10: our_classification_report
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
def our_classification_report(y_true, y_pred):
"""
Classification report for a list of BIO-encoded sequences.
It computes token-level metrics and discards "O" labels.
Note that it requires scikit-learn 0.15+ (or a version from github master)
to calculate averages properly!
"""
lb = LabelBinarizer()
y_true_combined = lb.fit_transform(list(chain.from_iterable(y_true)))
y_pred_combined = lb.transform(list(chain.from_iterable(y_pred)))
# print "Y_true combined", y_true_combined
# print "Y_pred combined", y_pred_combined
tagset = set(lb.classes_)
# print "tagset: ", tagset
tagset = sorted(tagset)
class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)}
return classification_report(
y_true_combined,
y_pred_combined,
labels = [class_indices[cls] for cls in tagset],
target_names = tagset
)
示例11: CategoricalToNumerical
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
class CategoricalToNumerical(object):
def __init__(self, dimensionality_reducer=None, verify=True):
pass
"""Takes in a dimensionality reducer in order to convert categorical features into numerical.
"""
if dimensionality_reducer is None:
dimensionality_reducer = RandomizedPCA(1)
self.dimensionality_reducer = dimensionality_reducer
self.verify = verify
self.binarizer = LabelBinarizer()
def fit(self, X, y=None):
self._verify(X, self.verify)
binarized = self.binarizer.fit_transform(X)
self.dimensionality_reducer.fit(binarized)
def transform(self, X):
self._verify(X, False)
binarized = self.binarizer.transform(X)
result = self.dimensionality_reducer.transform(binarized).flatten()
assert X.shape == result.shape
return result
def fit_transform(self, X, y=None):
self.fit(X)
return self.transform(X)
def _verify(self, X, verify):
if verify:
assert is_categorical(X)
else:
assert isinstance(X, np.ndarray)
assert len(X.shape) == 1
示例12: bio_classification_report
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
def bio_classification_report(y_true, y_pred):
"""Evaluates entity extraction accuracy.
Classification report for a list of BIO-encoded sequences.
It computes token-level metrics and discards "O" labels.
Note that it requires scikit-learn 0.15+ (or a version from github master)
to calculate averages properly!
Taken from https://github.com/scrapinghub/python-crfsuite/blob/master/examples/CoNLL%202002.ipynb
"""
from sklearn.preprocessing import LabelBinarizer
from itertools import chain
from sklearn.metrics import classification_report
lb = LabelBinarizer()
y_true_combined = lb.fit_transform(list(chain.from_iterable(y_true)))
y_pred_combined = lb.transform(list(chain.from_iterable(y_pred)))
tagset = set(lb.classes_) - {'O'}
tagset = sorted(tagset, key=lambda tag: tag.split('-', 1)[::-1])
class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)}
return classification_report(
y_true_combined,
y_pred_combined,
labels=[class_indices[cls] for cls in tagset],
target_names=tagset,
)
示例13: report
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
def report(test_y, pred_y):
lb = LabelBinarizer()
test_y_combined = lb.fit_transform(list(chain.from_iterable(test_y)))
pred_y_combined = lb.transform(list(chain.from_iterable(pred_y)))
tagset = sorted(set(lb.classes_))
class_indices = {cls: idx for idx, cls in enumerate(tagset)}
print(classification_report(test_y_combined, pred_y_combined, labels=[class_indices[cls] for cls in tagset], target_names=tagset))
示例14: bio_classification_report
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
def bio_classification_report(y_true, y_pred):
"""
Classification report for a list of BIO-encoded sequences.
It computes token-level metrics and discards "O" labels.
Note that it requires scikit-learn 0.15+ (or a version from
github master) to calculate averages properly!
"""
lb = LabelBinarizer()
y_true_combined = lb.fit_transform(list(chain.from_iterable(y_true)))
y_pred_combined = lb.transform(list(chain.from_iterable(y_pred)))
tagset = set(lb.classes_) - {'O'}
tagset = sorted(tagset, key=lambda tag: tag.split('-', 1)[::-1])
class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)}
labs = [class_indices[cls] for cls in tagset]
return((precision_recall_fscore_support(y_true_combined,
y_pred_combined,
labels=labs,
average=None,
sample_weight=None)),
(classification_report(
y_true_combined,
y_pred_combined,
labels=[class_indices[cls] for cls in tagset],
target_names=tagset,
)), labs)
示例15: BusinessCategoriesFeature
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import transform [as 别名]
class BusinessCategoriesFeature(BaseEstimator):
"""
WARNING!!!
Works only with a modified version of LabelBinarizer.
A binarization of the reviews' business categories.
"""
def __init__(self, data=None):
self.data = data
def __create_labels_list(self, review_list):
labels = []
for review in review_list:
business = self.data.get_business_for_review(review)
labels.append(business['categories'])
return labels
def fit(self, X, y):
self.binarizer = LabelBinarizer()
labels = self.__create_labels_list(X)
self.binarizer.fit(labels)
return self
def transform(self, X):
labels = self.__create_labels_list(X)
binarized_labels = self.binarizer.transform(labels)
return binarized_labels.astype(float)