本文整理汇总了Python中sklearn.compose.make_column_transformer函数的典型用法代码示例。如果您正苦于以下问题:Python make_column_transformer函数的具体用法?Python make_column_transformer怎么用?Python make_column_transformer使用的例子?那么, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了make_column_transformer函数的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_column_transformer_mixed_cols_sparse
def test_column_transformer_mixed_cols_sparse():
df = np.array([['a', 1, True],
['b', 2, False]],
dtype='O')
ct = make_column_transformer(
(OneHotEncoder(), [0]),
('passthrough', [1, 2]),
sparse_threshold=1.0
)
# this shouldn't fail, since boolean can be coerced into a numeric
# See: https://github.com/scikit-learn/scikit-learn/issues/11912
X_trans = ct.fit_transform(df)
assert X_trans.getformat() == 'csr'
assert_array_equal(X_trans.toarray(), np.array([[1, 0, 1, 1],
[0, 1, 2, 0]]))
ct = make_column_transformer(
(OneHotEncoder(), [0]),
('passthrough', [0]),
sparse_threshold=1.0
)
with pytest.raises(ValueError,
match="For a sparse output, all columns should"):
# this fails since strings `a` and `b` cannot be
# coerced into a numeric.
ct.fit_transform(df)
示例2: test_make_column_transformer_pandas
def test_make_column_transformer_pandas():
pd = pytest.importorskip('pandas')
X_array = np.array([[0, 1, 2], [2, 4, 6]]).T
X_df = pd.DataFrame(X_array, columns=['first', 'second'])
norm = Normalizer()
# XXX remove in v0.22
with pytest.warns(DeprecationWarning,
match='`make_column_transformer` now expects'):
ct1 = make_column_transformer((X_df.columns, norm))
ct2 = make_column_transformer((norm, X_df.columns))
assert_almost_equal(ct1.fit_transform(X_df),
ct2.fit_transform(X_df))
示例3: test_make_column_transformer_remainder_transformer
def test_make_column_transformer_remainder_transformer():
scaler = StandardScaler()
norm = Normalizer()
remainder = StandardScaler()
ct = make_column_transformer(('first', scaler), (['second'], norm),
remainder=remainder)
assert ct.remainder == remainder
示例4: test_make_column_transformer_kwargs
def test_make_column_transformer_kwargs():
scaler = StandardScaler()
norm = Normalizer()
ct = make_column_transformer(('first', scaler), (['second'], norm),
n_jobs=3, remainder='drop')
assert_equal(ct.transformers, make_column_transformer(
('first', scaler), (['second'], norm)).transformers)
assert_equal(ct.n_jobs, 3)
assert_equal(ct.remainder, 'drop')
# invalid keyword parameters should raise an error message
assert_raise_message(
TypeError,
'Unknown keyword arguments: "transformer_weights"',
make_column_transformer, ('first', scaler), (['second'], norm),
transformer_weights={'pca': 10, 'Transf': 1}
)
示例5: test_column_transformer_remainder
def test_column_transformer_remainder():
X_array = np.array([[0, 1, 2], [2, 4, 6]]).T
X_res_first = np.array([0, 1, 2]).reshape(-1, 1)
X_res_second = np.array([2, 4, 6]).reshape(-1, 1)
X_res_both = X_array
# default drop
ct = ColumnTransformer([('trans1', Trans(), [0])])
assert_array_equal(ct.fit_transform(X_array), X_res_first)
assert_array_equal(ct.fit(X_array).transform(X_array), X_res_first)
assert len(ct.transformers_) == 2
assert ct.transformers_[-1][0] == 'remainder'
assert ct.transformers_[-1][1] == 'drop'
assert_array_equal(ct.transformers_[-1][2], [1])
# specify passthrough
ct = ColumnTransformer([('trans', Trans(), [0])], remainder='passthrough')
assert_array_equal(ct.fit_transform(X_array), X_res_both)
assert_array_equal(ct.fit(X_array).transform(X_array), X_res_both)
assert len(ct.transformers_) == 2
assert ct.transformers_[-1][0] == 'remainder'
assert ct.transformers_[-1][1] == 'passthrough'
assert_array_equal(ct.transformers_[-1][2], [1])
# column order is not preserved (passed through added to end)
ct = ColumnTransformer([('trans1', Trans(), [1])],
remainder='passthrough')
assert_array_equal(ct.fit_transform(X_array), X_res_both[:, ::-1])
assert_array_equal(ct.fit(X_array).transform(X_array), X_res_both[:, ::-1])
assert len(ct.transformers_) == 2
assert ct.transformers_[-1][0] == 'remainder'
assert ct.transformers_[-1][1] == 'passthrough'
assert_array_equal(ct.transformers_[-1][2], [0])
# passthrough when all actual transformers are skipped
ct = ColumnTransformer([('trans1', 'drop', [0])],
remainder='passthrough')
assert_array_equal(ct.fit_transform(X_array), X_res_second)
assert_array_equal(ct.fit(X_array).transform(X_array), X_res_second)
assert len(ct.transformers_) == 2
assert ct.transformers_[-1][0] == 'remainder'
assert ct.transformers_[-1][1] == 'passthrough'
assert_array_equal(ct.transformers_[-1][2], [1])
# error on invalid arg
ct = ColumnTransformer([('trans1', Trans(), [0])], remainder=1)
assert_raise_message(
ValueError,
"remainder keyword needs to be one of \'drop\', \'passthrough\', "
"or estimator.", ct.fit, X_array)
assert_raise_message(
ValueError,
"remainder keyword needs to be one of \'drop\', \'passthrough\', "
"or estimator.", ct.fit_transform, X_array)
# check default for make_column_transformer
ct = make_column_transformer(([0], Trans()))
assert ct.remainder == 'drop'
示例6: test_make_column_transformer
def test_make_column_transformer():
scaler = StandardScaler()
norm = Normalizer()
ct = make_column_transformer(('first', scaler), (['second'], norm))
names, transformers, columns = zip(*ct.transformers)
assert_equal(names, ("standardscaler", "normalizer"))
assert_equal(transformers, (scaler, norm))
assert_equal(columns, ('first', ['second']))
示例7: test_make_column_transformer
def test_make_column_transformer():
scaler = StandardScaler()
norm = Normalizer()
ct = make_column_transformer((scaler, 'first'), (norm, ['second']))
names, transformers, columns = zip(*ct.transformers)
assert_equal(names, ("standardscaler", "normalizer"))
assert_equal(transformers, (scaler, norm))
assert_equal(columns, ('first', ['second']))
# XXX remove in v0.22
with pytest.warns(DeprecationWarning,
match='`make_column_transformer` now expects'):
ct1 = make_column_transformer(([0], norm))
ct2 = make_column_transformer((norm, [0]))
X_array = np.array([[0, 1, 2], [2, 4, 6]]).T
assert_almost_equal(ct1.fit_transform(X_array),
ct2.fit_transform(X_array))
with pytest.warns(DeprecationWarning,
match='`make_column_transformer` now expects'):
make_column_transformer(('first', 'drop'))
with pytest.warns(DeprecationWarning,
match='`make_column_transformer` now expects'):
make_column_transformer(('passthrough', 'passthrough'),
('first', 'drop'))
示例8: test_make_column_transformer_pandas
def test_make_column_transformer_pandas():
pd = pytest.importorskip('pandas')
X_array = np.array([[0, 1, 2], [2, 4, 6]]).T
X_df = pd.DataFrame(X_array, columns=['first', 'second'])
norm = Normalizer()
ct1 = ColumnTransformer([('norm', Normalizer(), X_df.columns)])
ct2 = make_column_transformer((norm, X_df.columns))
assert_almost_equal(ct1.fit_transform(X_df),
ct2.fit_transform(X_df))
示例9: fit
def fit(self, X, y):
encode_columns = [item for item in X.columns if 'suit' in item]
scale_columns = [item for item in X.columns if item not in encode_columns]
self.column_transformer = make_column_transformer(
(StandardScaler(), scale_columns),
(OneHotEncoder(categories='auto'), encode_columns))
self.column_transformer.fit(X)
return self
示例10: make_pipeline
# - pclass: ordinal integers {1, 2, 3}.
numeric_features = ['age', 'fare']
categorical_features = ['embarked', 'sex', 'pclass']
# Provisionally, use pd.fillna() to impute missing values for categorical
# features; SimpleImputer will eventually support strategy="constant".
data[categorical_features] = data[categorical_features].fillna(value='missing')
# We create the preprocessing pipelines for both numeric and categorical data.
numeric_transformer = make_pipeline(SimpleImputer(), StandardScaler())
categorical_transformer = CategoricalEncoder('onehot-dense',
handle_unknown='ignore')
preprocessing_pl = make_column_transformer(
(numeric_features, numeric_transformer),
(categorical_features, categorical_transformer),
remainder='drop'
)
# Append classifier to preprocessing pipeline.
# Now we have a full prediction pipeline.
clf = make_pipeline(preprocessing_pl, LogisticRegression())
X = data.drop('survived', axis=1)
y = data.survived.values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
shuffle=True)
clf.fit(X_train, y_train)
print("model score: %f" % clf.score(X_test, y_test))