本文整理汇总了Python中sklearn.preprocessing.LabelBinarizer.fit方法的典型用法代码示例。如果您正苦于以下问题:Python LabelBinarizer.fit方法的具体用法?Python LabelBinarizer.fit怎么用?Python LabelBinarizer.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.preprocessing.LabelBinarizer
的用法示例。
在下文中一共展示了LabelBinarizer.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_label_binarizer_multilabel
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
def test_label_binarizer_multilabel():
lb = LabelBinarizer()
# test input as lists of tuples
inp = [(2, 3), (1,), (1, 2)]
indicator_mat = np.array([[0, 1, 1],
[1, 0, 0],
[1, 1, 0]])
got = lb.fit_transform(inp)
assert_array_equal(indicator_mat, got)
assert_equal(lb.inverse_transform(got), inp)
# test input as label indicator matrix
lb.fit(indicator_mat)
assert_array_equal(indicator_mat,
lb.inverse_transform(indicator_mat))
# regression test for the two-class multilabel case
lb = LabelBinarizer()
inp = [[1, 0], [0], [1], [0, 1]]
expected = np.array([[1, 1],
[1, 0],
[0, 1],
[1, 1]])
got = lb.fit_transform(inp)
assert_array_equal(expected, got)
assert_equal([set(x) for x in lb.inverse_transform(got)],
[set(x) for x in inp])
示例2: BaseSGD
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
class BaseSGD(object):
def _get_loss(self):
losses = {
"modified_huber": ModifiedHuber(),
"hinge": Hinge(1.0),
"perceptron": Hinge(0.0),
"log": Log(),
"sparse_log": SparseLog(),
"squared": SquaredLoss(),
"huber": Huber(self.epsilon),
"epsilon_insensitive": EpsilonInsensitive(self.epsilon),
}
return losses[self.loss]
def _get_learning_rate(self):
learning_rates = {"constant": 1, "pegasos": 2, "invscaling": 3}
return learning_rates[self.learning_rate]
def _set_label_transformers(self, y):
if self.multiclass == "natural":
self.label_encoder_ = LabelEncoder()
y = self.label_encoder_.fit_transform(y).astype(np.float64)
self.label_binarizer_ = LabelBinarizer(neg_label=-1, pos_label=1)
self.label_binarizer_.fit(y)
self.classes_ = self.label_binarizer_.classes_.astype(np.int32)
n_classes = len(self.label_binarizer_.classes_)
n_vectors = 1 if n_classes <= 2 else n_classes
return n_classes, n_vectors
示例3: get_abalone19
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
def get_abalone19():
"""Loads abalone dataset, maps gender feature to binary features, adds
new label to create abalone19 imbalanced binary classification dataset."""
raw_data = pd.read_csv(ABALONE_FILE, sep=',')
genders = list(raw_data.ix[:, 'gender'])
cts_data = raw_data.drop(labels='gender', axis=1)
# initialize & fit preprocesser
lbz = LabelBinarizer()
lbz.fit(genders)
# encode categorical var
encoded_genders = pd.DataFrame(lbz.transform(genders))
encoded_genders.columns = ['gender_' + k for k in lbz.classes_]
# recombine encoded data & return
new_data = pd.concat(objs=[encoded_genders, cts_data], axis=1)
new_data['label'] = raw_data['rings'].map(
lambda k: 1 if k > 10 else 0) # binary clf task
new_data = new_data.drop('rings', axis=1)
# standardize cts features
if STANDARDIZE:
for col in new_data.ix[:, 3:-1]:
mean = new_data[col].mean()
std = new_data[col].std()
new_data[col] = new_data[col].map(lambda k: (k - mean) / float(std))
pos_recs = new_data['label'].sum()
print 'total pos class pct = {} %\n'.format(
round(100 * pos_recs / float(len(new_data)), 3))
return new_data
示例4: fit
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
def fit(self, X, y):
X = np.array(X)
y = np.array(y)
samples, self.n_features = X.shape
# because our space of targets are discrete
lb = LabelBinarizer()
lb.fit(y)
self.classes = lb.classes_
self.n_class = self.classes.size
self.class_prior = np.zeros(self.n_class, dtype=np.float64)
self.feature_proba = []
for i, y_i in enumerate(self.classes):
# get Xs only for y_i class
X_yi = X[y == y_i]
class_count = X_yi[:, 0].size
self.class_prior[i] = np.float64(class_count) / samples
count_all_features = 0
all_features = np.zeros(self.n_features)
for sample_features in X_yi:
# accumulate feature according our algorithm
all_features, count_all_features = self._add_features_dens(
sample_features, all_features, count_all_features)
# calculate probabilites according our algorithm
self.feature_proba.append(
self._compute_proba(all_features, count_all_features))
return self
示例5: BusinessCategoriesFeature
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
class BusinessCategoriesFeature(BaseEstimator):
"""
WARNING!!!
Works only with a modified version of LabelBinarizer.
A binarization of the reviews' business categories.
"""
def __init__(self, data=None):
self.data = data
def __create_labels_list(self, review_list):
labels = []
for review in review_list:
business = self.data.get_business_for_review(review)
labels.append(business['categories'])
return labels
def fit(self, X, y):
self.binarizer = LabelBinarizer()
labels = self.__create_labels_list(X)
self.binarizer.fit(labels)
return self
def transform(self, X):
labels = self.__create_labels_list(X)
binarized_labels = self.binarizer.transform(labels)
return binarized_labels.astype(float)
示例6: train_logreg
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
def train_logreg(X, y, test_X, test_y, load_vec=True):
"""
Trains logistic regression on the feature set.
"""
full_y = y + test_y
lb = LabelBinarizer()
lb.fit(full_y)
# Convert into 1-D array
print len(X), len(test_X)
model = LogisticRegression()
big_X = X + test_X
features = featurize(big_X)
X, test_X = features[:4500], features[4500:]
print X.shape, X
model.fit(X, y)
y_pred = model.predict(X)
print set(y_pred)
print metrics.classification_report(y, y_pred, digits = 3)
y_pred = model.predict(test_X)
print set(y_pred)
print metrics.classification_report(test_y, y_pred, digits = 3)
示例7: display_image_predictions
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
def display_image_predictions(features, labels, predictions):
n_classes = 10
label_names = _load_label_names()
label_binarizer = LabelBinarizer()
label_binarizer.fit(range(n_classes))
label_ids = label_binarizer.inverse_transform(np.array(labels))
fig, axies = plt.subplots(nrows=4, ncols=2)
fig.tight_layout()
fig.suptitle('Softmax Predictions', fontsize=20, y=1.1)
n_predictions = 3
margin = 0.05
ind = np.arange(n_predictions)
width = (1. - 2. * margin) / n_predictions
for image_i, (feature, label_id, pred_indicies, pred_values) in enumerate(zip(features, label_ids, predictions.indices, predictions.values)):
pred_names = [label_names[pred_i] for pred_i in pred_indicies]
correct_name = label_names[label_id]
axies[image_i][0].imshow(feature*255)
axies[image_i][0].set_title(correct_name)
axies[image_i][0].set_axis_off()
axies[image_i][1].barh(ind + margin, pred_values[::-1], width)
axies[image_i][1].set_yticks(ind + margin)
axies[image_i][1].set_yticklabels(pred_names[::-1])
axies[image_i][1].set_xticks([0, 0.5, 1.0])
示例8: one_hot_encoding
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
def one_hot_encoding(y_train, y_test):
labelBinarizer = LabelBinarizer()
labelBinarizer.fit(y_train)
y_train_one_hot = labelBinarizer.transform(y_train)
y_test_one_hot = labelBinarizer.transform(y_test)
return y_train_one_hot, y_test_one_hot
示例9: Encoding
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
def Encoding(data, general_matrix=None):
encoder = LabelBinarizer()
count = 0
# encoding
for i in range(data.shape[1]):
if type(data[0, i]) == str:
count += 1
col = data[:, i]
unique = np.unique(col if general_matrix is None else general_matrix[:, i])
try:
encoder.fit(unique)
except:
pass
new_col = encoder.transform(col)
# split at i and i + 1
before, removed, after = np.hsplit(data, [i, i + 1])
# concatenate
data = np.concatenate((before, new_col, after), axis=1)
before, removed, after = np.hsplit(general_matrix, [i, i + 1])
general_matrix = np.concatenate((before, encoder.transform(general_matrix[:, i]), after), axis=1)
print "count : %d" % count
# return data
return data
示例10: logloss
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
def logloss(act, pred):
epsilon = 10 ** -15
pred = np.maximum(np.minimum(pred, 1 - epsilon), epsilon)
lb = LabelBinarizer()
lb.fit(act)
act_binary = lb.transform(act)
logloss = - np.sum(np.multiply(act_binary, np.log(pred))) / pred.shape[0]
return logloss
示例11: __init__
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
def __init__(
self,
train_file,
test_file,
batch_size=32,
embedding_size=20,
max_norm=40,
lr=0.01,
num_hops=3,
adj_weight_tying=True,
linear_start=True,
**kwargs
):
train_lines, test_lines = self.get_lines(train_file), self.get_lines(test_file)
lines = np.concatenate([train_lines, test_lines], axis=0)
vocab, word_to_idx, idx_to_word, max_seqlen, max_sentlen = self.get_vocab(lines)
self.data = {"train": {}, "test": {}}
S_train, self.data["train"]["C"], self.data["train"]["Q"], self.data["train"]["Y"] = self.process_dataset(
train_lines, word_to_idx, max_sentlen, offset=0
)
S_test, self.data["test"]["C"], self.data["test"]["Q"], self.data["test"]["Y"] = self.process_dataset(
test_lines, word_to_idx, max_sentlen, offset=len(S_train)
)
S = np.concatenate([np.zeros((1, max_sentlen), dtype=np.int32), S_train, S_test], axis=0)
for i in range(10):
for k in ["C", "Q", "Y"]:
print k, self.data["test"][k][i]
print "batch_size:", batch_size, "max_seqlen:", max_seqlen, "max_sentlen:", max_sentlen
print "sentences:", S.shape
print "vocab:", len(vocab), vocab
for d in ["train", "test"]:
print d,
for k in ["C", "Q", "Y"]:
print k, self.data[d][k].shape,
print ""
lb = LabelBinarizer()
lb.fit(list(vocab))
vocab = lb.classes_.tolist()
self.batch_size = batch_size
self.max_seqlen = max_seqlen
self.max_sentlen = max_sentlen
self.embedding_size = embedding_size
self.num_classes = len(vocab) + 1
self.vocab = vocab
self.adj_weight_tying = adj_weight_tying
self.num_hops = num_hops
self.lb = lb
self.init_lr = lr
self.lr = self.init_lr
self.max_norm = max_norm
self.S = S
self.idx_to_word = idx_to_word
self.nonlinearity = None if linear_start else lasagne.nonlinearities.softmax
self.build_network(self.nonlinearity)
示例12: fit
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
def fit(self, Xt, yt, Xh, yh, callback=None):
lbin = LabelBinarizer()
lbin.fit(yt)
Yt_multi = lbin.transform(yt)
Yh_multi = lbin.transform(yh)
sample_weight_train = np.ones(Xt.shape[0])
sample_weight_test = np.ones(Xh.shape[0])
if Yt_multi.shape[1] == 1:
Yt_multi = np.hstack([1 - Yt_multi, Yt_multi])
Yh_multi = np.hstack([1 - Yh_multi, Yh_multi])
print('warning: only two classes detected')
n_classes = Yt_multi.shape[1]
n_features = Xt.shape[1]
if self.alpha0 is None:
self.alpha0 = np.zeros(n_classes * n_features) # if not np.all(np.unique(yt) == np.array([-1, 1])):
# raise ValueError
x0 = np.zeros(n_features * n_classes)
# assert x0.size == self.alpha0.size
def h_func_grad(x, alpha):
# x = x.reshape((-1,Yt_multi.shape[1]))
return _multinomial_loss_grad(
x, Xt, Yt_multi, np.exp(alpha), sample_weight_train)[:2]
def h_hessian(x, alpha):
# x = x.reshape((-1,Yt_multi.shape[1]))
return _multinomial_grad_hess(
x, Xt, Yt_multi, np.exp(alpha), sample_weight_train)[1]
def g_func_grad(x, alpha):
# x = x.reshape((-1,Yt_multi.shape[1]))
return _multinomial_loss_grad(
x, Xh, Yh_multi, np.zeros(alpha.size),
sample_weight_test)[:2]
def h_crossed(x, alpha):
# return x.reshape((n_classes, -1)) * alpha
# x = x.reshape((-1,Yt_multi.shape[1]))
tmp = np.exp(alpha) * x
return sparse.dia_matrix(
(tmp, 0),
shape=(n_features * n_classes, n_features * n_classes))
opt = hoag_lbfgs(
h_func_grad, h_hessian, h_crossed, g_func_grad, x0,
callback=callback,
tolerance_decrease=self.tolerance_decrease,
lambda0=self.alpha0, maxiter=self.max_iter,
verbose=self.verbose)
self.coef_ = opt[0]
self.alpha_ = opt[1]
return self
示例13: load_dataset2
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
def load_dataset2(self):
X, y, X_test, y_test = dataset = snippet_reader.toNumpy()
X, y = shuffle(X, y)
lb = LabelBinarizer()
lb.fit(y)
for y_bin in lb.transform(y).T:
return X, y_bin
示例14: BaseClassifier
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
class BaseClassifier(BaseEstimator):
def predict_proba(self, X):
if len(self.classes_) != 2:
raise NotImplementedError("predict_(log_)proba only supported"
" for binary classification")
if self.loss == "log":
df = self.decision_function(X).ravel()
prob = 1.0 / (1.0 + np.exp(-df))
elif self.loss == "modified_huber":
df = self.decision_function(X).ravel()
prob = np.minimum(1, np.maximum(-1, df))
prob += 1
prob /= 2
else:
raise NotImplementedError("predict_(log_)proba only supported when"
" loss='log' or loss='modified_huber' "
"(%s given)" % self.loss)
out = np.zeros((X.shape[0], 2), dtype=np.float64)
out[:, 1] = prob
out[:, 0] = 1 - prob
return out
def _set_label_transformers(self, y, reencode=False, neg_label=-1):
if reencode:
self.label_encoder_ = LabelEncoder()
y = self.label_encoder_.fit_transform(y).astype(np.int32)
else:
y = y.astype(np.int32)
self.label_binarizer_ = LabelBinarizer(neg_label=neg_label,
pos_label=1)
self.label_binarizer_.fit(y)
self.classes_ = self.label_binarizer_.classes_.astype(np.int32)
n_classes = len(self.label_binarizer_.classes_)
n_vectors = 1 if n_classes <= 2 else n_classes
return y, n_classes, n_vectors
def decision_function(self, X):
pred = safe_sparse_dot(X, self.coef_.T)
if hasattr(self, "intercept_"):
pred += self.intercept_
return pred
def predict(self, X):
pred = self.decision_function(X)
out = self.label_binarizer_.inverse_transform(pred)
if hasattr(self, "label_encoder_"):
out = self.label_encoder_.inverse_transform(out)
return out
示例15: fit
# 需要导入模块: from sklearn.preprocessing import LabelBinarizer [as 别名]
# 或者: from sklearn.preprocessing.LabelBinarizer import fit [as 别名]
def fit(self, X, y=None):
if not isinstance(X, pd.DataFrame):
raise RuntimeError("Only works with DataFrames. Got {}".format(X.__class__))
self.binarizers_ = []
for col in X.columns:
binarizer = LabelBinarizer(self.neg_label, self.pos_label)
binarizer.fit(X[col].values)
self.binarizers_.append((col, binarizer))
return self