本文整理汇总了Python中sklearn.preprocessing.OrdinalEncoder方法的典型用法代码示例。如果您正苦于以下问题:Python preprocessing.OrdinalEncoder方法的具体用法?Python preprocessing.OrdinalEncoder怎么用?Python preprocessing.OrdinalEncoder使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.preprocessing
的用法示例。
在下文中一共展示了preprocessing.OrdinalEncoder方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_gp_regressor
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OrdinalEncoder [as 别名]
def test_gp_regressor():
rng = np.random.RandomState(0)
X = np.asarray([
["ham", "spam", "ted"],
["ham", "ted", "ted"],
["ham", "spam", "spam"]])
y = rng.randn(3)
hm = HammingKernel(length_scale=[1.0, 1.0, 1.0])
if UseOrdinalEncoder:
enc = OrdinalEncoder()
enc.fit(X)
gpr = GaussianProcessRegressor(hm)
if UseOrdinalEncoder:
gpr.fit(enc.transform(X), y)
assert_array_almost_equal(gpr.predict(enc.transform(X)), y)
assert_array_almost_equal(gpr.predict(enc.transform(X[:2])), y[:2])
else:
gpr.fit(X, y)
assert_array_almost_equal(gpr.predict(X), y)
assert_array_almost_equal(gpr.predict(X[:2]), y[:2])
示例2: make_xgboost_column_transformer
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OrdinalEncoder [as 别名]
def make_xgboost_column_transformer(dtypes, missing_value_aware = True):
"""Construct a ColumnTransformer for feeding complex data into an XGBModel.
Parameters
----------
dtypes: iterable of tuples (column, dtype)
missing_value_aware: boolean
If true, use missing value aware transformers.
Returns
-------
ColumnTransformer
"""
transformers = list()
for column, dtype in dtypes.items():
if _is_categorical(dtype):
transformers.append((str(column), PMMLLabelBinarizer(sparse_output = True) if missing_value_aware else Pipeline([("ordinal_encoder", OrdinalEncoder()), ("one_hot_encoder", OneHotEncoder())]), [column]))
else:
transformers.append((str(column), "passthrough", [column]))
return ColumnTransformer(transformers, remainder = "drop")
示例3: test_model_ordinal_encoder
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OrdinalEncoder [as 别名]
def test_model_ordinal_encoder(self):
model = OrdinalEncoder(dtype=np.int64)
data = np.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]],
dtype=np.int64)
model.fit(data)
model_onnx = convert_sklearn(
model,
"scikit-learn ordinal encoder",
[("input", Int64TensorType([None, 3]))],
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
data,
model,
model_onnx,
basename="SklearnOrdinalEncoderInt64-SkipDim1",
allow_failure="StrictVersion("
"onnxruntime.__version__)"
"<= StrictVersion('0.5.0')",
)
示例4: test_ordinal_encoder_onecat
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OrdinalEncoder [as 别名]
def test_ordinal_encoder_onecat(self):
data = [["cat"], ["cat"]]
model = OrdinalEncoder(categories="auto")
model.fit(data)
inputs = [("input1", StringTensorType([None, 1]))]
model_onnx = convert_sklearn(model, "ordinal encoder one string cat",
inputs)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
data,
model,
model_onnx,
basename="SklearnOrdinalEncoderOneStringCat",
allow_failure="StrictVersion("
"onnxruntime.__version__)"
"<= StrictVersion('0.5.0')",
)
示例5: test_ordinal_encoder_twocats
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OrdinalEncoder [as 别名]
def test_ordinal_encoder_twocats(self):
data = [["cat2"], ["cat1"]]
model = OrdinalEncoder(categories="auto")
model.fit(data)
inputs = [("input1", StringTensorType([None, 1]))]
model_onnx = convert_sklearn(model, "ordinal encoder two string cats",
inputs)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
data,
model,
model_onnx,
allow_failure="StrictVersion("
"onnxruntime.__version__)"
"<= StrictVersion('0.5.0')",
basename="SklearnOrdinalEncoderTwoStringCat",
)
示例6: test_model_ordinal_encoder_cat_list
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OrdinalEncoder [as 别名]
def test_model_ordinal_encoder_cat_list(self):
model = OrdinalEncoder(categories=[[0, 1, 4, 5],
[1, 2, 3, 5],
[0, 3, 4, 6]])
data = np.array([[1, 2, 3], [4, 3, 0], [0, 1, 4], [0, 5, 6]],
dtype=np.int64)
model.fit(data)
model_onnx = convert_sklearn(
model,
"scikit-learn ordinal encoder",
[("input", Int64TensorType([None, 3]))],
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
data,
model,
model_onnx,
basename="SklearnOrdinalEncoderCatList",
allow_failure="StrictVersion("
"onnxruntime.__version__)"
"<= StrictVersion('0.5.0')",
)
示例7: encode_variables
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OrdinalEncoder [as 别名]
def encode_variables(data):
""" Encodes variables using simple ordinal encoding."""
data2 = np.copy(data)
encoder = OrdinalEncoder()
categorical_indices = kBytesIndices
data2[:,
categorical_indices] = encoder.fit_transform(data2[:,
categorical_indices])
return data2
示例8: test_ordinal_encoder
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OrdinalEncoder [as 别名]
def test_ordinal_encoder(X):
enc = OrdinalEncoder()
exp = np.array([[0, 1, 0],
[1, 0, 0]], dtype='int64')
assert_array_equal(enc.fit_transform(X), exp.astype('float64'))
enc = OrdinalEncoder(dtype='int64')
assert_array_equal(enc.fit_transform(X), exp)
示例9: test_ordinal_encoder_specified_categories
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OrdinalEncoder [as 别名]
def test_ordinal_encoder_specified_categories(X, X2, cats, cat_dtype):
enc = OrdinalEncoder(categories=cats)
exp = np.array([[0.], [1.]])
assert_array_equal(enc.fit_transform(X), exp)
assert list(enc.categories[0]) == list(cats[0])
assert enc.categories_[0].tolist() == list(cats[0])
# manually specified categories should have same dtype as
# the data when coerced from lists
assert enc.categories_[0].dtype == cat_dtype
# when specifying categories manually, unknown categories should already
# raise when fitting
enc = OrdinalEncoder(categories=cats)
with pytest.raises(ValueError, match="Found unknown categories"):
enc.fit(X2)
示例10: test_ordinal_encoder_inverse
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OrdinalEncoder [as 别名]
def test_ordinal_encoder_inverse():
X = [['abc', 2, 55], ['def', 1, 55]]
enc = OrdinalEncoder()
X_tr = enc.fit_transform(X)
exp = np.array(X, dtype=object)
assert_array_equal(enc.inverse_transform(X_tr), exp)
# incorrect shape raises
X_tr = np.array([[0, 1, 1, 2], [1, 0, 1, 0]])
msg = re.escape('Shape of the passed X data is not correct')
assert_raises_regex(ValueError, msg, enc.inverse_transform, X_tr)
示例11: test_ordinal_encoder_raise_missing
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OrdinalEncoder [as 别名]
def test_ordinal_encoder_raise_missing(X):
ohe = OrdinalEncoder()
with pytest.raises(ValueError, match="Input contains NaN"):
ohe.fit(X)
with pytest.raises(ValueError, match="Input contains NaN"):
ohe.fit_transform(X)
ohe.fit(X[:1, :])
with pytest.raises(ValueError, match="Input contains NaN"):
ohe.transform(X)
示例12: test_ordinal_encoder_raise_categories_shape
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OrdinalEncoder [as 别名]
def test_ordinal_encoder_raise_categories_shape():
X = np.array([['Low', 'Medium', 'High', 'Medium', 'Low']], dtype=object).T
cats = ['Low', 'Medium', 'High']
enc = OrdinalEncoder(categories=cats)
msg = ("Shape mismatch: if categories is an array,")
with pytest.raises(ValueError, match=msg):
enc.fit(X)
示例13: make_lightgbm_column_transformer
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OrdinalEncoder [as 别名]
def make_lightgbm_column_transformer(dtypes, missing_value_aware = True):
"""Construct a ColumnTransformer for feeding complex data into a LGBMModel.
Parameters
----------
dtypes: iterable of tuples (column, dtype)
missing_value_aware: boolean
If true, use missing value aware transformers.
Returns:
Tuple (ColumnTransformer, list of categorical column indices)
"""
transformers = list()
categorical_features = list()
i = 0
for column, dtype in dtypes.items():
if _is_categorical(dtype):
transformers.append((str(column), PMMLLabelEncoder(missing_values = -1) if missing_value_aware else OrdinalEncoder(), [column]))
categorical_features.append(i)
else:
transformers.append((str(column), "passthrough", [column]))
i += 1
return (ColumnTransformer(transformers, remainder = "drop"), categorical_features)
示例14: test_ordinal_encoder_mixed_string_int_drop
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OrdinalEncoder [as 别名]
def test_ordinal_encoder_mixed_string_int_drop(self):
data = [
["c0.4", "c0.2", 3],
["c1.4", "c1.2", 0],
["c0.2", "c2.2", 1],
["c0.2", "c2.2", 1],
["c0.2", "c2.2", 1],
["c0.2", "c2.2", 1],
]
test = [["c0.2", "c2.2", 1]]
model = OrdinalEncoder(categories="auto")
model.fit(data)
inputs = [
("input1", StringTensorType([None, 2])),
("input2", Int64TensorType([None, 1])),
]
model_onnx = convert_sklearn(
model, "ordinal encoder", inputs)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
test,
model,
model_onnx,
basename="SklearnOrdinalEncoderMixedStringIntDrop",
allow_failure="StrictVersion("
"onnxruntime.__version__)"
"<= StrictVersion('0.5.0')",
)
示例15: create_data
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import OrdinalEncoder [as 别名]
def create_data(X: dt.Frame = None):
if X is None:
return []
data = X.to_pandas().copy()
# identify categorical colmns and trasform them
cats = [x for x in data.select_dtypes(exclude=np.number).columns if x not in [target] + cols2ignore]
for c in cats:
data[c] = OrdinalEncoder().fit_transform(data[c].astype(str).values.reshape(-1, 1))
# Get the actual importance, i.e. without shuffling
actual_imp_df = get_feature_importances(data=data, cats=cats, shuffle=False, seed=42)
# Seed the unexpected randomness of this world
np.random.seed(123)
seeds = np.random.randint(0, 2 ** 30, size=number_of_iterations)
null_imp_df = pd.DataFrame()
for i, s in enumerate(seeds):
# Get current run importances
imp_df = get_feature_importances(data=data, cats=cats, shuffle=True, seed=s)
imp_df['run'] = i + 1
# Concat the latest importances with the old ones
null_imp_df = pd.concat([null_imp_df, imp_df], axis=0)
feature_scores = []
for _f in actual_imp_df['feature'].unique():
f_null_imps_gain = null_imp_df.loc[null_imp_df['feature'] == _f, 'importance'].values
f_act_imps_gain = actual_imp_df.loc[actual_imp_df['feature'] == _f, 'importance'].mean()
_score = np.log(
1e-10 + f_act_imps_gain / (1 + np.percentile(f_null_imps_gain, max(75, min(99, threshold)))))
feature_scores.append((_f, _score))
scores_df = pd.DataFrame(feature_scores, columns=['feature', 'score'])
# final feature selection
selected_features = scores_df[scores_df['score'] > 0]['feature'].values.tolist()
selected_features = np.unique(selected_features).tolist()
data = X.to_pandas().copy()
return data[cols2ignore + selected_features + [target]]