当前位置: 首页>>代码示例>>Python>>正文


Python compose.ColumnTransformer方法代码示例

本文整理汇总了Python中sklearn.compose.ColumnTransformer方法的典型用法代码示例。如果您正苦于以下问题:Python compose.ColumnTransformer方法的具体用法?Python compose.ColumnTransformer怎么用?Python compose.ColumnTransformer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.compose的用法示例。


在下文中一共展示了compose.ColumnTransformer方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_column_transformer_list

# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def test_column_transformer_list():
    X_list = [
        [1, float('nan'), 'a'],
        [0, 0, 'b']
    ]
    expected_result = np.array([
        [1, float('nan'), 1, 0],
        [-1, 0, 0, 1],
    ])

    ct = ColumnTransformer([
        ('numerical', StandardScaler(), [0, 1]),
        ('categorical', OneHotEncoder(), [2]),
    ])

    assert_array_equal(ct.fit_transform(X_list), expected_result)
    assert_array_equal(ct.fit(X_list).transform(X_list), expected_result) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:19,代码来源:test_column_transformer.py

示例2: test_column_transformer

# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def test_column_transformer(self):
        # see issue #169
            for encoder_name in (set(encoders.__all__) - {'HashingEncoder'}):  # HashingEncoder does not accept handle_missing parameter
                with self.subTest(encoder_name=encoder_name):

                    # we can only test one data type at once. Here, we test string columns.
                    tested_columns = ['unique_str', 'invariant', 'underscore', 'none', 'extra']

                    # ColumnTransformer instantiates the encoder twice -> we have to make sure the encoder settings are correctly passed
                    ct = ColumnTransformer([
                        ("dummy_encoder_name", getattr(encoders, encoder_name)(handle_missing="return_nan"), tested_columns)
                    ])
                    obtained = ct.fit_transform(X, y)

                    # the old-school approach
                    enc = getattr(encoders, encoder_name)(handle_missing="return_nan", return_df=False)
                    expected = enc.fit_transform(X[tested_columns], y)

                    np.testing.assert_array_equal(obtained, expected) 
开发者ID:scikit-learn-contrib,项目名称:category_encoders,代码行数:21,代码来源:test_encoders.py

示例3: make_pipeline

# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def make_pipeline(encoding_method):
    # static transformers from the other columns
    transformers = [(enc + '_' + col, encoders_dict[enc], [col])
                    for col, enc in clean_columns.items()]
    # adding the encoded column
    transformers += [(encoding_method, encoders_dict[encoding_method],
                      [dirty_column])]
    pipeline = Pipeline([
        # Use ColumnTransformer to combine the features
        ('union', ColumnTransformer(
            transformers=transformers,
            remainder='drop')),
        ('scaler', StandardScaler(with_mean=False)),
        ('clf', RidgeCV())
    ])
    return pipeline


#########################################################################
# Fitting each encoding methods with a RidgeCV
# --------------------------------------------
# Eventually, we loop over the different encoding methods,
# instantiate each time a new pipeline, fit it
# and store the returned cross-validation score: 
开发者ID:dirty-cat,项目名称:dirty_cat,代码行数:26,代码来源:02_fit_predict_plot_employee_salaries.py

示例4: make_pipeline

# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def make_pipeline(encoding_method):
    # static transformers from the other columns
    transformers = [('one-hot-clean', encoder_dict['one-hot'], clean_columns)]
    # adding the encoded column
    transformers += [(encoding_method + '-dirty', encoder_dict[encoding_method],
                      [dirty_column])]
    pipeline = Pipeline([
        # Use ColumnTransformer to combine the features
        ('union', ColumnTransformer(
            transformers=transformers,
            remainder='drop')),
        ('scaler', StandardScaler(with_mean=False)),
        ('classifier', RandomForestClassifier(random_state=5))
    ])

    return pipeline


###############################################################################
# Evaluation of different encoding methods
# -----------------------------------------
# We then loop over encoding methods, scoring the different pipeline predictions
# using a cross validation score: 
开发者ID:dirty-cat,项目名称:dirty_cat,代码行数:25,代码来源:03_fit_predict_plot_midwest_survey.py

示例5: test_column_transformer_sparse_array

# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def test_column_transformer_sparse_array():
    X_sparse = sparse.eye(3, 2).tocsr()

    # no distinction between 1D and 2D
    X_res_first = X_sparse[:, 0]
    X_res_both = X_sparse

    for col in [0, [0], slice(0, 1)]:
        for remainder, res in [('drop', X_res_first),
                               ('passthrough', X_res_both)]:
            ct = ColumnTransformer([('trans', Trans(), col)],
                                   remainder=remainder,
                                   sparse_threshold=0.8)
            assert sparse.issparse(ct.fit_transform(X_sparse))
            assert_allclose_dense_sparse(ct.fit_transform(X_sparse), res)
            assert_allclose_dense_sparse(ct.fit(X_sparse).transform(X_sparse),
                                         res)

    for col in [[0, 1], slice(0, 2)]:
        ct = ColumnTransformer([('trans', Trans(), col)],
                               sparse_threshold=0.8)
        assert sparse.issparse(ct.fit_transform(X_sparse))
        assert_allclose_dense_sparse(ct.fit_transform(X_sparse), X_res_both)
        assert_allclose_dense_sparse(ct.fit(X_sparse).transform(X_sparse),
                                     X_res_both) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:27,代码来源:test_column_transformer.py

示例6: test_column_transformer_sparse_stacking

# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def test_column_transformer_sparse_stacking():
    X_array = np.array([[0, 1, 2], [2, 4, 6]]).T
    col_trans = ColumnTransformer([('trans1', Trans(), [0]),
                                   ('trans2', SparseMatrixTrans(), 1)],
                                  sparse_threshold=0.8)
    col_trans.fit(X_array)
    X_trans = col_trans.transform(X_array)
    assert sparse.issparse(X_trans)
    assert_equal(X_trans.shape, (X_trans.shape[0], X_trans.shape[0] + 1))
    assert_array_equal(X_trans.toarray()[:, 1:], np.eye(X_trans.shape[0]))
    assert len(col_trans.transformers_) == 2
    assert col_trans.transformers_[-1][0] != 'remainder'

    col_trans = ColumnTransformer([('trans1', Trans(), [0]),
                                   ('trans2', SparseMatrixTrans(), 1)],
                                  sparse_threshold=0.1)
    col_trans.fit(X_array)
    X_trans = col_trans.transform(X_array)
    assert not sparse.issparse(X_trans)
    assert X_trans.shape == (X_trans.shape[0], X_trans.shape[0] + 1)
    assert_array_equal(X_trans[:, 1:], np.eye(X_trans.shape[0])) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:23,代码来源:test_column_transformer.py

示例7: test_column_transformer_remainder_pandas

# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def test_column_transformer_remainder_pandas(key):
    # test different ways that columns are specified with passthrough
    pd = pytest.importorskip('pandas')
    if isinstance(key, str) and key == 'pd-index':
        key = pd.Index(['first'])

    X_array = np.array([[0, 1, 2], [2, 4, 6]]).T
    X_df = pd.DataFrame(X_array, columns=['first', 'second'])
    X_res_both = X_array

    ct = ColumnTransformer([('trans1', Trans(), key)],
                           remainder='passthrough')
    assert_array_equal(ct.fit_transform(X_df), X_res_both)
    assert_array_equal(ct.fit(X_df).transform(X_df), X_res_both)
    assert len(ct.transformers_) == 2
    assert ct.transformers_[-1][0] == 'remainder'
    assert ct.transformers_[-1][1] == 'passthrough'
    assert_array_equal(ct.transformers_[-1][2], [1]) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:20,代码来源:test_column_transformer.py

示例8: test_column_transformer_remainder_transformer

# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def test_column_transformer_remainder_transformer(key):
    X_array = np.array([[0, 1, 2],
                        [2, 4, 6],
                        [8, 6, 4]]).T
    X_res_both = X_array.copy()

    # second and third columns are doubled when remainder = DoubleTrans
    X_res_both[:, 1:3] *= 2

    ct = ColumnTransformer([('trans1', Trans(), key)],
                           remainder=DoubleTrans())

    assert_array_equal(ct.fit_transform(X_array), X_res_both)
    assert_array_equal(ct.fit(X_array).transform(X_array), X_res_both)
    assert len(ct.transformers_) == 2
    assert ct.transformers_[-1][0] == 'remainder'
    assert isinstance(ct.transformers_[-1][1], DoubleTrans)
    assert_array_equal(ct.transformers_[-1][2], [1, 2]) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:20,代码来源:test_column_transformer.py

示例9: test_column_transformer_drops_all_remainder_transformer

# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def test_column_transformer_drops_all_remainder_transformer():
    X_array = np.array([[0, 1, 2],
                        [2, 4, 6],
                        [8, 6, 4]]).T

    # columns are doubled when remainder = DoubleTrans
    X_res_both = 2 * X_array.copy()[:, 1:3]

    ct = ColumnTransformer([('trans1', 'drop', [0])],
                           remainder=DoubleTrans())

    assert_array_equal(ct.fit_transform(X_array), X_res_both)
    assert_array_equal(ct.fit(X_array).transform(X_array), X_res_both)
    assert len(ct.transformers_) == 2
    assert ct.transformers_[-1][0] == 'remainder'
    assert isinstance(ct.transformers_[-1][1], DoubleTrans)
    assert_array_equal(ct.transformers_[-1][2], [1, 2]) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:19,代码来源:test_column_transformer.py

示例10: test_column_transformer_sparse_remainder_transformer

# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def test_column_transformer_sparse_remainder_transformer():
    X_array = np.array([[0, 1, 2],
                        [2, 4, 6],
                        [8, 6, 4]]).T

    ct = ColumnTransformer([('trans1', Trans(), [0])],
                           remainder=SparseMatrixTrans(),
                           sparse_threshold=0.8)

    X_trans = ct.fit_transform(X_array)
    assert sparse.issparse(X_trans)
    # SparseMatrixTrans creates 3 features for each column. There is
    # one column in ``transformers``, thus:
    assert X_trans.shape == (3, 3 + 1)

    exp_array = np.hstack(
        (X_array[:, 0].reshape(-1, 1), np.eye(3)))
    assert_array_equal(X_trans.toarray(), exp_array)
    assert len(ct.transformers_) == 2
    assert ct.transformers_[-1][0] == 'remainder'
    assert isinstance(ct.transformers_[-1][1], SparseMatrixTrans)
    assert_array_equal(ct.transformers_[-1][2], [1, 2]) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:24,代码来源:test_column_transformer.py

示例11: fit

# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def fit(self, X, y=None):
        self.pipeline_ = ColumnTransformer(
            [
                (
                    "repeatingbasis",
                    _RepeatingBasisFunction(
                        n_periods=self.n_periods, input_range=self.input_range
                    ),
                    [self.column],
                )
            ],
            remainder=self.remainder,
        )

        self.pipeline_.fit(X, y)

        return self 
开发者ID:koaning,项目名称:scikit-lego,代码行数:19,代码来源:repeatingbasis.py

示例12: _filter

# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def _filter(obj):
	if isinstance(obj, DataFrameMapper):
		obj.features = _filter_steps(obj.features)
		if hasattr(obj, "built_features"):
			if obj.built_features is not None:
				obj.built_features = _filter_steps(obj.built_features)
	elif isinstance(obj, ColumnTransformer):
		obj.transformers = _filter_steps(obj.transformers)
		obj.remainder = _filter(obj.remainder)
		if hasattr(obj, "transformers_"):
			obj.transformers_ = _filter_steps(obj.transformers_)
	elif isinstance(obj, FeatureUnion):
		obj.transformer_list = _filter_steps(obj.transformer_list)
	elif isinstance(obj, Pipeline):
		obj.steps = _filter_steps(obj.steps)
	elif isinstance(obj, SelectorMixin):
		return SelectorProxy(obj)
	elif isinstance(obj, list):
		return [_filter(e) for e in obj]
	return obj 
开发者ID:jpmml,项目名称:sklearn2pmml,代码行数:22,代码来源:__init__.py

示例13: make_xgboost_column_transformer

# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def make_xgboost_column_transformer(dtypes, missing_value_aware = True):
	"""Construct a ColumnTransformer for feeding complex data into an XGBModel.

	Parameters
	----------

	dtypes: iterable of tuples (column, dtype)

	missing_value_aware: boolean
		If true, use missing value aware transformers.

	Returns
	-------
	ColumnTransformer

	"""
	transformers = list()
	for column, dtype in dtypes.items():
		if _is_categorical(dtype):
			transformers.append((str(column), PMMLLabelBinarizer(sparse_output = True) if missing_value_aware else Pipeline([("ordinal_encoder", OrdinalEncoder()), ("one_hot_encoder", OneHotEncoder())]), [column]))
		else:
			transformers.append((str(column), "passthrough", [column]))
	return ColumnTransformer(transformers, remainder = "drop") 
开发者ID:jpmml,项目名称:sklearn2pmml,代码行数:25,代码来源:xgboost.py

示例14: test_column_transformer_weights

# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def test_column_transformer_weights(self):
        model, X = fit_classification_model(
            ColumnTransformer(
                [('pca', PCA(n_components=5), slice(0, 10)),
                 ('svd', TruncatedSVD(n_components=5), slice(10, 100))],
                transformer_weights={'pca': 2, 'svd': 3}), 3, n_features=100)
        model_onnx = convert_sklearn(
            model,
            "column transformer weights",
            [("input", FloatTensorType([None, X.shape[1]]))],
            dtype=numpy.float32,
        )
        self.assertIsNotNone(model_onnx)
        dump_data_and_model(
            X,
            model,
            model_onnx,
            basename="SklearnColumnTransformerWeights-Dec4",
            allow_failure="StrictVersion(onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
开发者ID:onnx,项目名称:sklearn-onnx,代码行数:23,代码来源:test_sklearn_pipeline.py

示例15: test_column_transformer_drop

# 需要导入模块: from sklearn import compose [as 别名]
# 或者: from sklearn.compose import ColumnTransformer [as 别名]
def test_column_transformer_drop(self):
        model, X = fit_classification_model(
            ColumnTransformer(
                [('pca', PCA(n_components=5), slice(0, 10)),
                 ('svd', TruncatedSVD(n_components=5), slice(80, 100))],
                remainder='drop'), 3, n_features=100)
        model_onnx = convert_sklearn(
            model,
            "column transformer drop",
            [("input", FloatTensorType([None, X.shape[1]]))],
            dtype=numpy.float32,
        )
        self.assertIsNotNone(model_onnx)
        dump_data_and_model(
            X,
            model,
            model_onnx,
            basename="SklearnColumnTransformerDrop",
            allow_failure="StrictVersion(onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
开发者ID:onnx,项目名称:sklearn-onnx,代码行数:23,代码来源:test_sklearn_pipeline.py


注:本文中的sklearn.compose.ColumnTransformer方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。