本文整理汇总了Python中sklearn.preprocessing.LabelBinarizer方法的典型用法代码示例。如果您正苦于以下问题:Python preprocessing.LabelBinarizer方法的具体用法?Python preprocessing.LabelBinarizer怎么用?Python preprocessing.LabelBinarizer使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.preprocessing
的用法示例。
在下文中一共展示了preprocessing.LabelBinarizer方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: mmb_evaluate_model
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def mmb_evaluate_model(self):
"""
Returns scores from cross validation evaluation on the malicious / benign classifier
"""
predictive_features = self.features['predictive_features']
self.clf_X = self.modeldata[predictive_features].values
self.clf_y = np.array(self.modeldata['label'])
X_train, X_test, y_train, y_test = train_test_split(self.clf_X, self.clf_y, test_size=0.2, random_state=0)
lb = LabelBinarizer()
y_train = np.array([number[0] for number in lb.fit_transform(y_train)])
eval_cls = RandomForestClassifier(n_estimators=100, max_features=.2)
eval_cls.fit(X_train, y_train)
recall = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='recall')
precision = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='precision')
accuracy = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='accuracy')
f1_score = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='f1_macro')
return {'accuracy': accuracy, 'f1': f1_score, 'precision': precision, 'recall': recall}
示例2: test_sklearn_labelbin
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def test_sklearn_labelbin(self):
m = np.array([1.0, .81, .85, .81, .85, .81])
u = np.array([1.0, .23, .50, .23, .30, 0.13])
# Create the train dataset.
X_train, true_links = binary_vectors(
1000, 500, m=m, u=u, random_state=535, return_links=True)
binarizer = LabelBinarizer()
binarizer.fit(X_train.iloc[:, 0])
assert len(binarizer.classes_) == 1
binarizer.classes_ = np.array([0, 1])
assert len(binarizer.classes_) == 2
binarizer.transform(X_train.iloc[:, 1])
assert len(binarizer.classes_) == 2
示例3: formatClass
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def formatClass(rootFile, Cl):
import sklearn.preprocessing as pp
print('==========================================================================\n')
print(' Running basic TensorFlow. Creating class data in binary form...')
Cl2 = pp.LabelBinarizer().fit_transform(Cl)
import matplotlib.pyplot as plt
plt.hist([float(x) for x in Cl], bins=np.unique([float(x) for x in Cl]), edgecolor="black")
plt.xlabel('Class')
plt.ylabel('Occurrances')
plt.title('Class distibution')
plt.savefig(rootFile + '_ClassDistrib.png', dpi = 160, format = 'png') # Save plot
if tfDef.plotClassDistribTF == True:
print(' Plotting Class distibution \n')
plt.show()
return Cl2
#********************************************************************************
示例4: test_cross_val_predict
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def test_cross_val_predict():
# Make sure it works in cross_val_predict for multiclass.
X, y = load_iris(return_X_y=True)
y = LabelBinarizer().fit_transform(y)
X = StandardScaler().fit_transform(X)
mlp = MLPClassifier(n_epochs=10,
solver_kwargs={'learning_rate': 0.05},
random_state=4567).fit(X, y)
cv = KFold(n_splits=4, random_state=457, shuffle=True)
y_oos = cross_val_predict(mlp, X, y, cv=cv, method='predict_proba')
auc = roc_auc_score(y, y_oos, average=None)
assert np.all(auc >= 0.96)
示例5: bio_classification_report
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def bio_classification_report(y_true, y_pred):
"""
Classification report for a l ist of BIOSE-encoded sequences.
It computes token-level metrics and discards 'O' labels.
:param y_true:
:param y_pred:
:return:
"""
lb = LabelBinarizer()
y_true_combined = lb.fit_transform(y_true)
y_pred_combined = lb.transform(y_pred)
tagset = set(lb.classes_) - {'O'}
tagset = set(lb.classes_)
tagset = sorted(tagset, key=lambda tag: tag.split('-', 1)[::-1])
class_indices = {
cls: idx for idx, cls in enumerate(lb.classes_)
}
return classification_report(
y_true_combined,
y_pred_combined,
labels=[class_indices[cls] for cls in tagset],
target_names=tagset
)
示例6: eval
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def eval(self, test_x, test_y, crf_model):
tagger = pycrfsuite.Tagger()
tagger.open(crf_model)
y_pred = []
for feat_list in test_x:
preds = tagger.tag(feat_list)
y_pred.append(preds)
lb = LabelBinarizer()
y_true_all = lb.fit_transform(list(chain.from_iterable(test_y)))
y_pred_all = lb.transform(list(chain.from_iterable(y_pred)))
tagset = sorted(set(lb.classes_))
class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)}
print(classification_report(
y_true_all,
y_pred_all,
labels=[class_indices[cls] for cls in tagset],
target_names=tagset,
digits=5
))
示例7: train
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def train(self, x, y):
"""
Training multiple estimators each for distinguishing a pair of classes.
Args:
x (numpy.ndarray): input points
y (numpy.ndarray): input labels
Raises:
Exception: given all data points are assigned to the same class,
the prediction would be boring
"""
self.label_binarizer_ = LabelBinarizer(neg_label=0)
Y = self.label_binarizer_.fit_transform(y)
self.classes = self.label_binarizer_.classes_
columns = (np.ravel(col) for col in Y.T)
self.estimators = []
for _, column in enumerate(columns):
unique_y = np.unique(column)
if len(unique_y) == 1:
raise Exception("given all data points are assigned to the same class, "
"the prediction would be boring.")
estimator = self.estimator_cls(*self.params)
estimator.fit(x, column)
self.estimators.append(estimator)
示例8: __init__
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def __init__(self, feature_vector_size, label_words):
self.ann = cv2.ml.ANN_MLP_create()
# Number of centroids used to build the feature vectors
input_size = feature_vector_size
# Number of models to recongnize
output_size = len(label_words)
# Applying Heaton rules
hidden_size = (input_size * (2 / 3)) + output_size
nn_config = np.array([input_size, hidden_size, output_size], dtype=np.uint8)
self.label_words = label_words
self.ann.setLayerSizes(np.array(nn_config))
# Symmetrical Sigmoid as activation function
self.ann.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM)
# Map models as tuples of probabilities
self.le = preprocessing.LabelBinarizer()
self.le.fit(label_words) # Label words are ['dress', 'footwear', 'backpack']
示例9: test_conversion_with_sparse_y
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def test_conversion_with_sparse_y(self):
"""Tests conversion of a model that's fitted with y values in a sparse format."""
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
self.iris_X, self.iris_y, test_size=0.2, train_size=0.8
)
from sklearn import preprocessing
lb = preprocessing.LabelBinarizer(sparse_output=True)
binarized_y = lb.fit_transform(y_train)
sklearn_model = KNeighborsClassifier(algorithm="brute")
sklearn_model.fit(X_train, binarized_y)
self.assertRaises(ValueError, sklearn.convert, sklearn_model)
示例10: fit
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def fit(self, X, y):
"""
:param X_: shape = [n_samples, n_features]
:param y: shape = [n_samples]
:return: self
"""
labelbin = LabelBinarizer()
Y = labelbin.fit_transform(y)
self.classes = labelbin.classes_
self.class_count = np.zeros(Y.shape[1], dtype=np.float64)
self.feature_count = np.zeros((Y.shape[1], X.shape[1]),
dtype=np.float64)
self.feature_count += Y.T @ X
self.class_count += Y.sum(axis=0)
smoothed_fc = self.feature_count + self.alpha
smoothed_cc = smoothed_fc.sum(axis=1)
self.feature_log_prob = (np.log(smoothed_fc) -
np.log(smoothed_cc.reshape(-1, 1)))
示例11: encode_labels
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def encode_labels(self, label_dict, srcids):
flat_labels = ['O']
if self.use_brick_flag:
with open('brick/tags.json', 'r') as fp:
brick_tags = json.load(fp)
flat_labels += ['B_' + tag for tag in brick_tags] + \
['I_' + tag for tag in brick_tags]
flat_labels += reduce(adder, [reduce(adder, label_dict[srcid].values()) for srcid in srcids])
self.le = LabelBinarizer().fit(flat_labels)
stack = []
for srcid in srcids:
labels = label_dict[srcid]
sentences = self.sentence_dict[srcid]
for metadata_type in self.sentence_dict[srcid].keys():
labels = label_dict[srcid][metadata_type]
if len(labels) == 0:
encoded = np.zeros((self.max_len, encoded.shape[1]))
else:
encoded = self.le.transform(labels)
encoded = np.vstack([encoded, np.zeros(
(self.max_len - encoded.shape[0],
encoded.shape[1]))])
stack.append(encoded)
return np.stack(stack)
示例12: get_mnist_data
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def get_mnist_data():
"""Loads the MNIST data set into memory.
Returns
-------
X : array-like, shape=[n_samples, n_features]
Training data for the MNIST data set.
y : array-like, shape=[n_samples,]
Labels for the MNIST data set.
"""
digits = load_digits()
X, y = digits.data, digits.target
y = LabelBinarizer().fit_transform(y)
return X, y
示例13: __init__
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def __init__(self, n_hidden=20, alpha=0.5, rbf_width=1.0,
activation_func='tanh', activation_args=None,
user_components=None, regressor=None,
binarizer=LabelBinarizer(-1, 1),
random_state=None):
super(ELMClassifier, self).__init__(n_hidden=n_hidden,
alpha=alpha,
random_state=random_state,
activation_func=activation_func,
activation_args=activation_args,
user_components=user_components,
rbf_width=rbf_width,
regressor=regressor)
self.classes_ = None
self.binarizer = binarizer
示例14: make_xgboost_dataframe_mapper
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def make_xgboost_dataframe_mapper(dtypes, missing_value_aware = True):
"""Construct a DataFrameMapper for feeding complex data into an XGBModel.
Parameters
----------
dtypes: iterable of tuples (column, dtype)
missing_value_aware: boolean
If true, use missing value aware transformers.
Returns
-------
DataFrameMapper
"""
features = list()
for column, dtype in dtypes.items():
if _is_categorical(dtype):
features.append(([column], PMMLLabelBinarizer(sparse_output = True) if missing_value_aware else LabelBinarizer(sparse_output = True)))
else:
features.append(([column], None))
return DataFrameMapper(features)
示例15: _check_X_y
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelBinarizer [as 别名]
def _check_X_y(self, X, y):
# helpful error message for sklearn < 1.17
is_2d = hasattr(y, 'shape') and len(y.shape) > 1 and y.shape[1] >= 2
if is_2d or type_of_target(y) != 'binary':
raise TypeError("Only binary targets supported. For training "
"multiclass or multilabel models, you may use the "
"OneVsRest or OneVsAll metaestimators in "
"scikit-learn.")
X, Y = check_X_y(X, y, dtype=np.double, accept_sparse='csc',
multi_output=False)
self.label_binarizer_ = LabelBinarizer(pos_label=1, neg_label=-1)
y = self.label_binarizer_.fit_transform(Y).ravel().astype(np.double)
return X, y