本文整理汇总了Python中sklearn.preprocessing.OneHotEncoder方法的典型用法代码示例。如果您正苦于以下问题:Python preprocessing.OneHotEncoder方法的具体用法?Python preprocessing.OneHotEncoder怎么用?Python preprocessing.OneHotEncoder使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.preprocessing
的用法示例。
在下文中一共展示了preprocessing.OneHotEncoder方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_one_hot_encoder_categorical_features
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def test_one_hot_encoder_categorical_features():
X = np.array([[3, 2, 1], [0, 1, 1]])
X2 = np.array([[1, 1, 1]])
cat = [True, False, False]
_check_one_hot(X, X2, cat, 4)
# Edge case: all non-categorical
cat = [False, False, False]
_check_one_hot(X, X2, cat, 3)
# Edge case: all categorical
cat = [True, True, True]
_check_one_hot(X, X2, cat, 5)
# check error raised if also specifying categories
oh = OneHotEncoder(categories=[range(3)],
categorical_features=[True, False, False])
assert_raises(ValueError, oh.fit, X)
示例2: cat_onehot_encoder
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def cat_onehot_encoder(df,y,col,selection=True):
feat_x = df.values.reshape(-1,1)
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit(feat_x)
feat_x = le.transform(feat_x)
mlbs = OneHotEncoder(sparse=True).fit(feat_x.reshape(-1,1))
from scipy.sparse import csr_matrix
features_tmp = mlbs.transform(feat_x.reshape(-1,1))
features_tmp = csr_matrix(features_tmp,dtype=float).tocsr()
models = None
auc_score = None
if selection is True:
auc_score, models = train_lightgbm_for_feature_selection(features_tmp, y)
print(col, "auc", auc_score)
#new_feature = pd.DataFrame(features_tmp,columns=["mul_feature_"+col])
new_feature = features_tmp
return new_feature,mlbs,models,auc_score,le
示例3: loadmodel
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def loadmodel(self, prefix):
""" Load the model.
:param prefix: prefix of the model path
:return: None
:type prefix: str
"""
self.dictionary = Dictionary.load(prefix+'_vocabs.gensimdict')
parameters = json.load(open(prefix+'_config.json', 'r'))
self.operation = parameters['operation']
self.alph = parameters['alph']
self.specialsignals = parameters['special_signals']
self.binarizer = SCRNNBinarizer(self.alph, self.specialsignals)
self.concatcharvec_encoder = SpellingToConcatCharVecEncoder(self.alph)
self.batchsize = parameters['batchsize']
self.nb_hiddenunits = parameters['nb_hiddenunits']
self.onehotencoder = OneHotEncoder()
self.onehotencoder.fit(np.arange(len(self.dictionary)).reshape((len(self.dictionary), 1)))
self.model = kerasio.load_model(prefix)
self.trained = True
示例4: get_X_y
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def get_X_y(**kwargs):
"""simple wrapper around pd.read_csv that extracts features and labels
Some systematic preprocessing is also carried out to avoid doing this
transformation repeatedly in the code.
"""
global label_encoder
df = pd.read_csv(info['path'], sep='\t', **kwargs)
return preprocess(df, label_encoder)
###############################################################################
# Classifier objects in |sklearn| often require :code:`y` to be integer labels.
# Additionally, |APS| requires a binary version of the labels. For these two
# purposes, we create:
#
# * a |LabelEncoder|, that we pre-fitted on the known :code:`y` classes
# * a |OneHotEncoder|, pre-fitted on the resulting integer labels.
#
# Their |transform| methods can the be called at appopriate times.
示例5: __call__
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def __call__(self, data):
if 'metadata' not in data:
raise TransformException(
f"Expected metadata in data, got {list(data.keys())}")
if 'labels' not in data['metadata']:
raise TransformException(
f"Expected labels in data['metadata'], got "
f"{list(data['metadata'].keys())}")
enc = OneHotEncoder(categories=[data['metadata']['labels']])
sources = data[self.source_key]
source_keys = [k.split('::')[0] for k in list(sources.keys())]
source_labels = [[l] for l in sorted(source_keys)]
one_hot_labels = enc.fit_transform(source_labels)
data['one_hot_labels'] = one_hot_labels.toarray()
return data
示例6: __init__
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def __init__(self,
maptimes = 10,
enhencetimes = 10,
map_function = 'linear',
enhence_function = 'linear',
batchsize = 'auto',
reg = 0.001):
self._maptimes = maptimes
self._enhencetimes = enhencetimes
self._batchsize = batchsize
self._reg = reg
self._map_function = map_function
self._enhence_function = enhence_function
self.W = 0
self.pesuedoinverse = 0
self.normalscaler = scaler()
self.onehotencoder = preprocessing.OneHotEncoder(sparse = False)
self.mapping_generator = node_generator()
self.enhence_generator = node_generator(whiten = True)
示例7: test_column_transformer_list
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def test_column_transformer_list():
X_list = [
[1, float('nan'), 'a'],
[0, 0, 'b']
]
expected_result = np.array([
[1, float('nan'), 1, 0],
[-1, 0, 0, 1],
])
ct = ColumnTransformer([
('numerical', StandardScaler(), [0, 1]),
('categorical', OneHotEncoder(), [2]),
])
assert_array_equal(ct.fit_transform(X_list), expected_result)
assert_array_equal(ct.fit(X_list).transform(X_list), expected_result)
示例8: test_encode_options
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def test_encode_options():
est = KBinsDiscretizer(n_bins=[2, 3, 3, 3],
encode='ordinal').fit(X)
Xt_1 = est.transform(X)
est = KBinsDiscretizer(n_bins=[2, 3, 3, 3],
encode='onehot-dense').fit(X)
Xt_2 = est.transform(X)
assert not sp.issparse(Xt_2)
assert_array_equal(OneHotEncoder(
categories=[np.arange(i) for i in [2, 3, 3, 3]],
sparse=False)
.fit_transform(Xt_1), Xt_2)
est = KBinsDiscretizer(n_bins=[2, 3, 3, 3],
encode='onehot').fit(X)
Xt_3 = est.transform(X)
assert sp.issparse(Xt_3)
assert_array_equal(OneHotEncoder(
categories=[np.arange(i) for i in [2, 3, 3, 3]],
sparse=True)
.fit_transform(Xt_1).toarray(),
Xt_3.toarray())
示例9: test_one_hot_encoder_force_new_behaviour
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def test_one_hot_encoder_force_new_behaviour():
# ambiguous integer case (non secutive range of categories)
X = np.array([[1, 2]]).T
X2 = np.array([[0, 1]]).T
# without argument -> by default using legacy behaviour with warnings
enc = OneHotEncoder()
with ignore_warnings(category=FutureWarning):
enc.fit(X)
res = enc.transform(X2)
exp = np.array([[0, 0], [1, 0]])
assert_array_equal(res.toarray(), exp)
# with explicit auto argument -> don't use legacy behaviour
# (so will raise an error on unseen value within range)
enc = OneHotEncoder(categories='auto')
enc.fit(X)
assert_raises(ValueError, enc.transform, X2)
示例10: test_one_hot_encoder_specified_categories
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def test_one_hot_encoder_specified_categories(X, X2, cats, cat_dtype):
enc = OneHotEncoder(categories=cats)
exp = np.array([[1., 0., 0.],
[0., 1., 0.]])
assert_array_equal(enc.fit_transform(X).toarray(), exp)
assert list(enc.categories[0]) == list(cats[0])
assert enc.categories_[0].tolist() == list(cats[0])
# manually specified categories should have same dtype as
# the data when coerced from lists
assert enc.categories_[0].dtype == cat_dtype
# when specifying categories manually, unknown categories should already
# raise when fitting
enc = OneHotEncoder(categories=cats)
with pytest.raises(ValueError, match="Found unknown categories"):
enc.fit(X2)
enc = OneHotEncoder(categories=cats, handle_unknown='ignore')
exp = np.array([[1., 0., 0.], [0., 0., 0.]])
assert_array_equal(enc.fit(X2).transform(X2).toarray(), exp)
示例11: test_one_hot_encoder_unsorted_categories
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def test_one_hot_encoder_unsorted_categories():
X = np.array([['a', 'b']], dtype=object).T
enc = OneHotEncoder(categories=[['b', 'a', 'c']])
exp = np.array([[0., 1., 0.],
[1., 0., 0.]])
assert_array_equal(enc.fit(X).transform(X).toarray(), exp)
assert_array_equal(enc.fit_transform(X).toarray(), exp)
assert enc.categories_[0].tolist() == ['b', 'a', 'c']
assert np.issubdtype(enc.categories_[0].dtype, np.object_)
# unsorted passed categories still raise for numerical values
X = np.array([[1, 2]]).T
enc = OneHotEncoder(categories=[[2, 1, 3]])
msg = 'Unsorted categories are not supported'
with pytest.raises(ValueError, match=msg):
enc.fit_transform(X)
示例12: test_one_hot_encoder_raise_missing
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def test_one_hot_encoder_raise_missing(X, as_data_frame, handle_unknown):
if as_data_frame:
pd = pytest.importorskip('pandas')
X = pd.DataFrame(X)
ohe = OneHotEncoder(categories='auto', handle_unknown=handle_unknown)
with pytest.raises(ValueError, match="Input contains NaN"):
ohe.fit(X)
with pytest.raises(ValueError, match="Input contains NaN"):
ohe.fit_transform(X)
if as_data_frame:
X_partial = X.iloc[:1, :]
else:
X_partial = X[:1, :]
ohe.fit(X_partial)
with pytest.raises(ValueError, match="Input contains NaN"):
ohe.transform(X)
示例13: test_encoder_dtypes
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def test_encoder_dtypes():
# check that dtypes are preserved when determining categories
enc = OneHotEncoder(categories='auto')
exp = np.array([[1., 0., 1., 0.], [0., 1., 0., 1.]], dtype='float64')
for X in [np.array([[1, 2], [3, 4]], dtype='int64'),
np.array([[1, 2], [3, 4]], dtype='float64'),
np.array([['a', 'b'], ['c', 'd']]), # string dtype
np.array([[1, 'a'], [3, 'b']], dtype='object')]:
enc.fit(X)
assert all([enc.categories_[i].dtype == X.dtype for i in range(2)])
assert_array_equal(enc.transform(X).toarray(), exp)
X = [[1, 2], [3, 4]]
enc.fit(X)
assert all([np.issubdtype(enc.categories_[i].dtype, np.integer)
for i in range(2)])
assert_array_equal(enc.transform(X).toarray(), exp)
X = [[1, 'a'], [3, 'b']]
enc.fit(X)
assert all([enc.categories_[i].dtype == 'object' for i in range(2)])
assert_array_equal(enc.transform(X).toarray(), exp)
示例14: test_encoder_dtypes_pandas
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def test_encoder_dtypes_pandas():
# check dtype (similar to test_categorical_encoder_dtypes for dataframes)
pd = pytest.importorskip('pandas')
enc = OneHotEncoder(categories='auto')
exp = np.array([[1., 0., 1., 0., 1., 0.],
[0., 1., 0., 1., 0., 1.]], dtype='float64')
X = pd.DataFrame({'A': [1, 2], 'B': [3, 4], 'C': [5, 6]}, dtype='int64')
enc.fit(X)
assert all([enc.categories_[i].dtype == 'int64' for i in range(2)])
assert_array_equal(enc.transform(X).toarray(), exp)
X = pd.DataFrame({'A': [1, 2], 'B': ['a', 'b'], 'C': [3., 4.]})
X_type = [int, object, float]
enc.fit(X)
assert all([enc.categories_[i].dtype == X_type[i] for i in range(3)])
assert_array_equal(enc.transform(X).toarray(), exp)
示例15: test_one_hot_encoder_drop_manual
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OneHotEncoder [as 别名]
def test_one_hot_encoder_drop_manual():
cats_to_drop = ['def', 12, 3, 56]
enc = OneHotEncoder(drop=cats_to_drop)
X = [['abc', 12, 2, 55],
['def', 12, 1, 55],
['def', 12, 3, 56]]
trans = enc.fit_transform(X).toarray()
exp = [[1, 0, 1, 1],
[0, 1, 0, 1],
[0, 0, 0, 0]]
assert_array_equal(trans, exp)
dropped_cats = [cat[feature]
for cat, feature in zip(enc.categories_,
enc.drop_idx_)]
assert_array_equal(dropped_cats, cats_to_drop)
assert_array_equal(np.array(X, dtype=object),
enc.inverse_transform(trans))