本文整理汇总了Python中sklearn.compose.ColumnTransformer方法的典型用法代码示例。如果您正苦于以下问题:Python compose.ColumnTransformer方法的具体用法?Python compose.ColumnTransformer怎么用?Python compose.ColumnTransformer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.compose
的用法示例。
在下文中一共展示了compose.ColumnTransformer方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_column_transformer_list
# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def test_column_transformer_list():
X_list = [
[1, float('nan'), 'a'],
[0, 0, 'b']
]
expected_result = np.array([
[1, float('nan'), 1, 0],
[-1, 0, 0, 1],
])
ct = ColumnTransformer([
('numerical', StandardScaler(), [0, 1]),
('categorical', OneHotEncoder(), [2]),
])
assert_array_equal(ct.fit_transform(X_list), expected_result)
assert_array_equal(ct.fit(X_list).transform(X_list), expected_result)
示例2: test_column_transformer
# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def test_column_transformer(self):
# see issue #169
for encoder_name in (set(encoders.__all__) - {'HashingEncoder'}): # HashingEncoder does not accept handle_missing parameter
with self.subTest(encoder_name=encoder_name):
# we can only test one data type at once. Here, we test string columns.
tested_columns = ['unique_str', 'invariant', 'underscore', 'none', 'extra']
# ColumnTransformer instantiates the encoder twice -> we have to make sure the encoder settings are correctly passed
ct = ColumnTransformer([
("dummy_encoder_name", getattr(encoders, encoder_name)(handle_missing="return_nan"), tested_columns)
])
obtained = ct.fit_transform(X, y)
# the old-school approach
enc = getattr(encoders, encoder_name)(handle_missing="return_nan", return_df=False)
expected = enc.fit_transform(X[tested_columns], y)
np.testing.assert_array_equal(obtained, expected)
示例3: make_pipeline
# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def make_pipeline(encoding_method):
# static transformers from the other columns
transformers = [(enc + '_' + col, encoders_dict[enc], [col])
for col, enc in clean_columns.items()]
# adding the encoded column
transformers += [(encoding_method, encoders_dict[encoding_method],
[dirty_column])]
pipeline = Pipeline([
# Use ColumnTransformer to combine the features
('union', ColumnTransformer(
transformers=transformers,
remainder='drop')),
('scaler', StandardScaler(with_mean=False)),
('clf', RidgeCV())
])
return pipeline
#########################################################################
# Fitting each encoding methods with a RidgeCV
# --------------------------------------------
# Eventually, we loop over the different encoding methods,
# instantiate each time a new pipeline, fit it
# and store the returned cross-validation score:
示例4: make_pipeline
# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def make_pipeline(encoding_method):
# static transformers from the other columns
transformers = [('one-hot-clean', encoder_dict['one-hot'], clean_columns)]
# adding the encoded column
transformers += [(encoding_method + '-dirty', encoder_dict[encoding_method],
[dirty_column])]
pipeline = Pipeline([
# Use ColumnTransformer to combine the features
('union', ColumnTransformer(
transformers=transformers,
remainder='drop')),
('scaler', StandardScaler(with_mean=False)),
('classifier', RandomForestClassifier(random_state=5))
])
return pipeline
###############################################################################
# Evaluation of different encoding methods
# -----------------------------------------
# We then loop over encoding methods, scoring the different pipeline predictions
# using a cross validation score:
示例5: test_column_transformer_sparse_array
# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def test_column_transformer_sparse_array():
X_sparse = sparse.eye(3, 2).tocsr()
# no distinction between 1D and 2D
X_res_first = X_sparse[:, 0]
X_res_both = X_sparse
for col in [0, [0], slice(0, 1)]:
for remainder, res in [('drop', X_res_first),
('passthrough', X_res_both)]:
ct = ColumnTransformer([('trans', Trans(), col)],
remainder=remainder,
sparse_threshold=0.8)
assert sparse.issparse(ct.fit_transform(X_sparse))
assert_allclose_dense_sparse(ct.fit_transform(X_sparse), res)
assert_allclose_dense_sparse(ct.fit(X_sparse).transform(X_sparse),
res)
for col in [[0, 1], slice(0, 2)]:
ct = ColumnTransformer([('trans', Trans(), col)],
sparse_threshold=0.8)
assert sparse.issparse(ct.fit_transform(X_sparse))
assert_allclose_dense_sparse(ct.fit_transform(X_sparse), X_res_both)
assert_allclose_dense_sparse(ct.fit(X_sparse).transform(X_sparse),
X_res_both)
示例6: test_column_transformer_sparse_stacking
# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def test_column_transformer_sparse_stacking():
X_array = np.array([[0, 1, 2], [2, 4, 6]]).T
col_trans = ColumnTransformer([('trans1', Trans(), [0]),
('trans2', SparseMatrixTrans(), 1)],
sparse_threshold=0.8)
col_trans.fit(X_array)
X_trans = col_trans.transform(X_array)
assert sparse.issparse(X_trans)
assert_equal(X_trans.shape, (X_trans.shape[0], X_trans.shape[0] + 1))
assert_array_equal(X_trans.toarray()[:, 1:], np.eye(X_trans.shape[0]))
assert len(col_trans.transformers_) == 2
assert col_trans.transformers_[-1][0] != 'remainder'
col_trans = ColumnTransformer([('trans1', Trans(), [0]),
('trans2', SparseMatrixTrans(), 1)],
sparse_threshold=0.1)
col_trans.fit(X_array)
X_trans = col_trans.transform(X_array)
assert not sparse.issparse(X_trans)
assert X_trans.shape == (X_trans.shape[0], X_trans.shape[0] + 1)
assert_array_equal(X_trans[:, 1:], np.eye(X_trans.shape[0]))
示例7: test_column_transformer_remainder_pandas
# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def test_column_transformer_remainder_pandas(key):
# test different ways that columns are specified with passthrough
pd = pytest.importorskip('pandas')
if isinstance(key, str) and key == 'pd-index':
key = pd.Index(['first'])
X_array = np.array([[0, 1, 2], [2, 4, 6]]).T
X_df = pd.DataFrame(X_array, columns=['first', 'second'])
X_res_both = X_array
ct = ColumnTransformer([('trans1', Trans(), key)],
remainder='passthrough')
assert_array_equal(ct.fit_transform(X_df), X_res_both)
assert_array_equal(ct.fit(X_df).transform(X_df), X_res_both)
assert len(ct.transformers_) == 2
assert ct.transformers_[-1][0] == 'remainder'
assert ct.transformers_[-1][1] == 'passthrough'
assert_array_equal(ct.transformers_[-1][2], [1])
示例8: test_column_transformer_remainder_transformer
# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def test_column_transformer_remainder_transformer(key):
X_array = np.array([[0, 1, 2],
[2, 4, 6],
[8, 6, 4]]).T
X_res_both = X_array.copy()
# second and third columns are doubled when remainder = DoubleTrans
X_res_both[:, 1:3] *= 2
ct = ColumnTransformer([('trans1', Trans(), key)],
remainder=DoubleTrans())
assert_array_equal(ct.fit_transform(X_array), X_res_both)
assert_array_equal(ct.fit(X_array).transform(X_array), X_res_both)
assert len(ct.transformers_) == 2
assert ct.transformers_[-1][0] == 'remainder'
assert isinstance(ct.transformers_[-1][1], DoubleTrans)
assert_array_equal(ct.transformers_[-1][2], [1, 2])
示例9: test_column_transformer_drops_all_remainder_transformer
# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def test_column_transformer_drops_all_remainder_transformer():
X_array = np.array([[0, 1, 2],
[2, 4, 6],
[8, 6, 4]]).T
# columns are doubled when remainder = DoubleTrans
X_res_both = 2 * X_array.copy()[:, 1:3]
ct = ColumnTransformer([('trans1', 'drop', [0])],
remainder=DoubleTrans())
assert_array_equal(ct.fit_transform(X_array), X_res_both)
assert_array_equal(ct.fit(X_array).transform(X_array), X_res_both)
assert len(ct.transformers_) == 2
assert ct.transformers_[-1][0] == 'remainder'
assert isinstance(ct.transformers_[-1][1], DoubleTrans)
assert_array_equal(ct.transformers_[-1][2], [1, 2])
示例10: test_column_transformer_sparse_remainder_transformer
# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def test_column_transformer_sparse_remainder_transformer():
X_array = np.array([[0, 1, 2],
[2, 4, 6],
[8, 6, 4]]).T
ct = ColumnTransformer([('trans1', Trans(), [0])],
remainder=SparseMatrixTrans(),
sparse_threshold=0.8)
X_trans = ct.fit_transform(X_array)
assert sparse.issparse(X_trans)
# SparseMatrixTrans creates 3 features for each column. There is
# one column in ``transformers``, thus:
assert X_trans.shape == (3, 3 + 1)
exp_array = np.hstack(
(X_array[:, 0].reshape(-1, 1), np.eye(3)))
assert_array_equal(X_trans.toarray(), exp_array)
assert len(ct.transformers_) == 2
assert ct.transformers_[-1][0] == 'remainder'
assert isinstance(ct.transformers_[-1][1], SparseMatrixTrans)
assert_array_equal(ct.transformers_[-1][2], [1, 2])
示例11: fit
# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def fit(self, X, y=None):
self.pipeline_ = ColumnTransformer(
[
(
"repeatingbasis",
_RepeatingBasisFunction(
n_periods=self.n_periods, input_range=self.input_range
),
[self.column],
)
],
remainder=self.remainder,
)
self.pipeline_.fit(X, y)
return self
示例12: _filter
# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def _filter(obj):
if isinstance(obj, DataFrameMapper):
obj.features = _filter_steps(obj.features)
if hasattr(obj, "built_features"):
if obj.built_features is not None:
obj.built_features = _filter_steps(obj.built_features)
elif isinstance(obj, ColumnTransformer):
obj.transformers = _filter_steps(obj.transformers)
obj.remainder = _filter(obj.remainder)
if hasattr(obj, "transformers_"):
obj.transformers_ = _filter_steps(obj.transformers_)
elif isinstance(obj, FeatureUnion):
obj.transformer_list = _filter_steps(obj.transformer_list)
elif isinstance(obj, Pipeline):
obj.steps = _filter_steps(obj.steps)
elif isinstance(obj, SelectorMixin):
return SelectorProxy(obj)
elif isinstance(obj, list):
return [_filter(e) for e in obj]
return obj
示例13: make_xgboost_column_transformer
# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def make_xgboost_column_transformer(dtypes, missing_value_aware = True):
"""Construct a ColumnTransformer for feeding complex data into an XGBModel.
Parameters
----------
dtypes: iterable of tuples (column, dtype)
missing_value_aware: boolean
If true, use missing value aware transformers.
Returns
-------
ColumnTransformer
"""
transformers = list()
for column, dtype in dtypes.items():
if _is_categorical(dtype):
transformers.append((str(column), PMMLLabelBinarizer(sparse_output = True) if missing_value_aware else Pipeline([("ordinal_encoder", OrdinalEncoder()), ("one_hot_encoder", OneHotEncoder())]), [column]))
else:
transformers.append((str(column), "passthrough", [column]))
return ColumnTransformer(transformers, remainder = "drop")
示例14: test_column_transformer_weights
# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def test_column_transformer_weights(self):
model, X = fit_classification_model(
ColumnTransformer(
[('pca', PCA(n_components=5), slice(0, 10)),
('svd', TruncatedSVD(n_components=5), slice(10, 100))],
transformer_weights={'pca': 2, 'svd': 3}), 3, n_features=100)
model_onnx = convert_sklearn(
model,
"column transformer weights",
[("input", FloatTensorType([None, X.shape[1]]))],
dtype=numpy.float32,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X,
model,
model_onnx,
basename="SklearnColumnTransformerWeights-Dec4",
allow_failure="StrictVersion(onnxruntime.__version__)"
"<= StrictVersion('0.2.1')",
)
示例15: test_column_transformer_drop
# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def test_column_transformer_drop(self):
model, X = fit_classification_model(
ColumnTransformer(
[('pca', PCA(n_components=5), slice(0, 10)),
('svd', TruncatedSVD(n_components=5), slice(80, 100))],
remainder='drop'), 3, n_features=100)
model_onnx = convert_sklearn(
model,
"column transformer drop",
[("input", FloatTensorType([None, X.shape[1]]))],
dtype=numpy.float32,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X,
model,
model_onnx,
basename="SklearnColumnTransformerDrop",
allow_failure="StrictVersion(onnxruntime.__version__)"
"<= StrictVersion('0.2.1')",
)