本文整理汇总了Python中sklearn.preprocessing.LabelEncoder方法的典型用法代码示例。如果您正苦于以下问题:Python preprocessing.LabelEncoder方法的具体用法?Python preprocessing.LabelEncoder怎么用?Python preprocessing.LabelEncoder使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.preprocessing
的用法示例。
在下文中一共展示了preprocessing.LabelEncoder方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_explain_model_local_with_predicted_label
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelEncoder [as 别名]
def test_explain_model_local_with_predicted_label(self):
"""
Test for explain_local of classical explainer
:return:
"""
X_train, X_test, y_train, y_test = setup_mnli_test_train_split()
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
explainer = ClassicalTextExplainer()
classifier, best_params = explainer.fit(X_train, y_train)
explainer.preprocessor.labelEncoder = label_encoder
y = classifier.predict(DOCUMENT)
predicted_label = label_encoder.inverse_transform(y)
local_explanation = explainer.explain_local(DOCUMENT, predicted_label)
assert len(local_explanation.local_importance_values) == len(local_explanation.features)
示例2: __init__
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelEncoder [as 别名]
def __init__(self, estimator, dtype=float, sparse=True):
"""
:param estimator: scikit-learn classifier object.
:param dtype: data type used when building feature array.
scikit-learn estimators work exclusively on numeric data. The
default value should be fine for almost all situations.
:param sparse: Whether to use sparse matrices internally.
The estimator must support these; not all scikit-learn classifiers
do (see their respective documentation and look for "sparse
matrix"). The default value is True, since most NLP problems
involve sparse feature sets. Setting this to False may take a
great amount of memory.
:type sparse: boolean.
"""
self._clf = estimator
self._encoder = LabelEncoder()
self._vectorizer = DictVectorizer(dtype=dtype, sparse=sparse)
示例3: __init__
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelEncoder [as 别名]
def __init__(self,
corpus,
sherlock_features: List[str] = None,
topic_feature: str = None,
label_enc: LabelEncoder = None,
id_filter: List[str] = None,
max_col_count:int = None,
shuffle_group:str=None):
super().__init__(corpus,
sherlock_features,
topic_feature,
label_enc,
id_filter,
max_col_count)
l = len(self.df_header)
self.tempcorpus = corpus
self.shuffle_group = shuffle_group
self.prng = np.random.RandomState(SEED)
self.shuffle_order = self.prng.permutation(l)
示例4: cat_onehot_encoder
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelEncoder [as 别名]
def cat_onehot_encoder(df,y,col,selection=True):
feat_x = df.values.reshape(-1,1)
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit(feat_x)
feat_x = le.transform(feat_x)
mlbs = OneHotEncoder(sparse=True).fit(feat_x.reshape(-1,1))
from scipy.sparse import csr_matrix
features_tmp = mlbs.transform(feat_x.reshape(-1,1))
features_tmp = csr_matrix(features_tmp,dtype=float).tocsr()
models = None
auc_score = None
if selection is True:
auc_score, models = train_lightgbm_for_feature_selection(features_tmp, y)
print(col, "auc", auc_score)
#new_feature = pd.DataFrame(features_tmp,columns=["mul_feature_"+col])
new_feature = features_tmp
return new_feature,mlbs,models,auc_score,le
示例5: preprocessData
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelEncoder [as 别名]
def preprocessData(dataset):
le = preprocessing.LabelEncoder()
# in case divid-by-zero
dataset.Open[dataset.Open == 0] = 1
# add prediction target: next day Up/Down
threshold = 0.000
dataset['UpDown'] = (dataset['Close'] - dataset['Open']) / dataset['Open']
dataset.UpDown[dataset.UpDown >= threshold] = 'Up'
dataset.UpDown[dataset.UpDown < threshold] = 'Down'
dataset.UpDown = le.fit(dataset.UpDown).transform(dataset.UpDown)
dataset.UpDown = dataset.UpDown.shift(-1) # shift 1, so the y is actually next day's up/down
dataset = dataset.drop(dataset.index[-1]) # drop last one because it has no up/down value
return dataset
示例6: get_query_y
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelEncoder [as 别名]
def get_query_y(self, Qy, Qyc, class_label):
"""
Returns labeled representation of classes of Query set and a list of labels.
"""
labels = []
m = len(Qy)
for i in range(m):
labels += [Qy[i]] * Qyc[i]
labels = np.array(labels).reshape(len(labels), 1)
label_encoder = LabelEncoder()
Query_y = torch.Tensor(
label_encoder.fit_transform(labels).astype(int)).long()
if self.gpu:
Query_y = Query_y.cuda()
Query_y_labels = np.unique(labels)
return Query_y, Query_y_labels
示例7: get_cars_data
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelEncoder [as 别名]
def get_cars_data():
"""
Load the cars dataset, split it into X and y, and then call the label encoder to get an integer y column.
:return:
"""
df = pd.read_csv('source_data/cars/car.data.txt')
X = df.reindex(columns=[x for x in df.columns.values if x != 'class'])
y = df.reindex(columns=['class'])
y = preprocessing.LabelEncoder().fit_transform(y.values.reshape(-1, ))
mapping = [
{'col': 'buying', 'mapping': [('vhigh', 0), ('high', 1), ('med', 2), ('low', 3)]},
{'col': 'maint', 'mapping': [('vhigh', 0), ('high', 1), ('med', 2), ('low', 3)]},
{'col': 'doors', 'mapping': [('2', 0), ('3', 1), ('4', 2), ('5more', 3)]},
{'col': 'persons', 'mapping': [('2', 0), ('4', 1), ('more', 2)]},
{'col': 'lug_boot', 'mapping': [('small', 0), ('med', 1), ('big', 2)]},
{'col': 'safety', 'mapping': [('high', 0), ('med', 1), ('low', 2)]},
]
return X, y, mapping
示例8: get_mushroom_data
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelEncoder [as 别名]
def get_mushroom_data():
"""
Load the mushroom dataset, split it into X and y, and then call the label encoder to get an integer y column.
:return:
"""
df = pd.read_csv('source_data/mushrooms/agaricus-lepiota.csv')
X = df.reindex(columns=[x for x in df.columns.values if x != 'class'])
y = df.reindex(columns=['class'])
y = preprocessing.LabelEncoder().fit_transform(y.values.reshape(-1, ))
# this data is truly categorical, with no known concept of ordering
mapping = None
return X, y, mapping
示例9: get_splice_data
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelEncoder [as 别名]
def get_splice_data():
"""
Load the mushroom dataset, split it into X and y, and then call the label encoder to get an integer y column.
:return:
"""
df = pd.read_csv('source_data/splice/splice.csv')
X = df.reindex(columns=[x for x in df.columns.values if x != 'class'])
X['dna'] = X['dna'].map(lambda x: list(str(x).strip()))
for idx in range(60):
X['dna_%d' % (idx, )] = X['dna'].map(lambda x: x[idx])
del X['dna']
y = df.reindex(columns=['class'])
y = preprocessing.LabelEncoder().fit_transform(y.values.reshape(-1, ))
# this data is truly categorical, with no known concept of ordering
mapping = None
return X, y, mapping
示例10: get_X_y
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelEncoder [as 别名]
def get_X_y(**kwargs):
"""simple wrapper around pd.read_csv that extracts features and labels
Some systematic preprocessing is also carried out to avoid doing this
transformation repeatedly in the code.
"""
global label_encoder
df = pd.read_csv(info['path'], sep='\t', **kwargs)
return preprocess(df, label_encoder)
###############################################################################
# Classifier objects in |sklearn| often require :code:`y` to be integer labels.
# Additionally, |APS| requires a binary version of the labels. For these two
# purposes, we create:
#
# * a |LabelEncoder|, that we pre-fitted on the known :code:`y` classes
# * a |OneHotEncoder|, pre-fitted on the resulting integer labels.
#
# Their |transform| methods can the be called at appopriate times.
示例11: fit
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelEncoder [as 别名]
def fit(self, X, y):
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import compute_class_weight
label_encoder = LabelEncoder().fit(y)
classes = label_encoder.classes_
class_weight = compute_class_weight(self.class_weight, classes, y)
# Intentionally modify the balanced class_weight
# to simulate a bug and raise an exception
if self.class_weight == "balanced":
class_weight += 1.
# Simply assigning coef_ to the class_weight
self.coef_ = class_weight
return self
示例12: score
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelEncoder [as 别名]
def score(self,
actual: np.array,
predicted: np.array,
sample_weight: typing.Optional[np.array] = None,
labels: typing.Optional[np.array] = None,
**kwargs) -> float:
lb = LabelEncoder()
labels = lb.fit_transform(labels)
actual = lb.transform(actual)
method = "binary"
if len(labels) > 2:
predicted = np.argmax(predicted, axis=1)
method = "micro"
else:
predicted = (predicted > self._threshold)
f4_score = fbeta_score(actual, predicted, labels=labels, average=method, sample_weight=sample_weight, beta=4)
return f4_score
示例13: score
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelEncoder [as 别名]
def score(self,
actual: np.array,
predicted: np.array,
sample_weight: typing.Optional[np.array] = None,
labels: typing.Optional[np.array] = None,
**kwargs) -> float:
# label actuals as 1 or 0
lb = LabelEncoder()
labels = lb.fit_transform(labels)
actual = lb.transform(actual)
# label predictions as 1 or 0
predicted = predicted >= self._threshold
# use sklearn to get fp and fn
cm = confusion_matrix(actual, predicted, sample_weight=sample_weight, labels=labels)
tn, fp, fn, tp = cm.ravel()
# calculate`$1*FP + $2*FN`
return ((fp * self.__class__._fp_cost) + (fn * self.__class__._fn_cost)) / (
tn + fp + fn + tp) # divide by total weighted count to make loss invariant to data size
示例14: score
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelEncoder [as 别名]
def score(self,
actual: np.array,
predicted: np.array,
sample_weight: typing.Optional[np.array] = None,
labels: typing.Optional[np.array] = None,
**kwargs) -> float:
lb = LabelEncoder()
labels = lb.fit_transform(labels)
actual = lb.transform(actual)
method = "binary"
if len(labels) > 2:
predicted = np.argmax(predicted, axis=1)
method = "micro"
else:
predicted = (predicted > self._threshold)
f3_score = fbeta_score(actual, predicted, labels=labels, average=method, sample_weight=sample_weight, beta=3)
return f3_score
示例15: fit
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import LabelEncoder [as 别名]
def fit(self, X, y, sample_weight=None, eval_set=None, sample_weight_eval_set=None, **kwargs):
lb = LabelEncoder()
lb.fit(self.labels)
y = lb.transform(y)
orig_cols = list(X.names)
XX = X.to_pandas()
params = {
'train_dir': user_dir(),
'allow_writing_files': False,
'thread_count': 10,
# 'loss_function': 'Logloss'
}
from catboost import CatBoostClassifier
model = CatBoostClassifier(**params)
model.fit(XX, y=y, sample_weight=sample_weight, verbose=False,
cat_features=list(X[:, [str, int]].names)) # Amazon specific, also no early stopping
# must always set best_iterations
self.set_model_properties(model=model,
features=orig_cols,
importances=model.feature_importances_,
iterations=0)