本文整理汇总了Python中sklearn.pipeline方法的典型用法代码示例。如果您正苦于以下问题:Python sklearn.pipeline方法的具体用法?Python sklearn.pipeline怎么用?Python sklearn.pipeline使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn
的用法示例。
在下文中一共展示了sklearn.pipeline方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: make_pmml_pipeline
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import pipeline [as 别名]
def make_pmml_pipeline(obj, active_fields = None, target_fields = None):
"""Translates a regular Scikit-Learn estimator or pipeline to a PMML pipeline.
Parameters:
----------
obj: BaseEstimator
The object.
active_fields: list of strings, optional
Feature names. If missing, "x1", "x2", .., "xn" are assumed.
target_fields: list of strings, optional
Label name(s). If missing, "y" is assumed.
"""
steps = _filter_steps(_get_steps(obj))
pipeline = PMMLPipeline(steps)
if active_fields is not None:
pipeline.active_fields = numpy.asarray(active_fields)
if target_fields is not None:
pipeline.target_fields = numpy.asarray(target_fields)
return pipeline
示例2: make_svm
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import pipeline [as 别名]
def make_svm(gamma, C):
cls = sklearn.pipeline.make_pipeline(StandardScaler(),
SVC(gamma=gamma, C=C, probability=True, cache_size=500, random_state=0))
name = 'ss-svc-g%.4f-C%.1f' % (gamma, C)
return (cls, name)
示例3: make_lr
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import pipeline [as 别名]
def make_lr(C):
cls = sklearn.pipeline.make_pipeline(StandardScaler(), LogisticRegression(C=C))
name = 'ss-lr-C%.4f' % C
return (cls, name)
示例4: make_simple_lr
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import pipeline [as 别名]
def make_simple_lr():
return (sklearn.pipeline.make_pipeline(StandardScaler(), SimpleLogisticRegression()), 'ss-slr')
示例5: test_set_params
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import pipeline [as 别名]
def test_set_params():
# test nested estimator parameter setting
clf = Pipeline([("svc", SVC())])
# non-existing parameter in svc
assert_raises(ValueError, clf.set_params, svc__stupid_param=True)
# non-existing parameter of pipeline
assert_raises(ValueError, clf.set_params, svm__stupid_param=True)
# we don't currently catch if the things in pipeline are estimators
# bad_pipeline = Pipeline([("bad", NoEstimator())])
# assert_raises(AttributeError, bad_pipeline.set_params,
# bad__stupid_param=True)
示例6: check_pipeline_consistency
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import pipeline [as 别名]
def check_pipeline_consistency(name, estimator_orig):
if estimator_orig._get_tags()['non_deterministic']:
msg = name + ' is non deterministic'
raise SkipTest(msg)
# check that make_pipeline(est) gives same score as est
X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],
random_state=0, n_features=2, cluster_std=0.1)
X -= X.min()
X = pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel)
estimator = clone(estimator_orig)
y = multioutput_estimator_convert_y_2d(estimator, y)
set_random_state(estimator)
pipeline = make_pipeline(estimator)
estimator.fit(X, y)
pipeline.fit(X, y)
funcs = ["score", "fit_transform"]
for func_name in funcs:
func = getattr(estimator, func_name, None)
if func is not None:
func_pipeline = getattr(pipeline, func_name)
result = func(X, y)
result_pipe = func_pipeline(X, y)
assert_allclose_dense_sparse(result, result_pipe)
示例7: __init__
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import pipeline [as 别名]
def __init__(self, groupby, pipeline, errors='raise'):
self.groupby = groupby
self.pipeline = pipeline
self.errors = errors
示例8: _fit_subdf
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import pipeline [as 别名]
def _fit_subdf(self, sub_df, y=None):
return clone(self.pipeline).fit(sub_df, y=y)
示例9: _call_pipeline
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import pipeline [as 别名]
def _call_pipeline(self, df, y=None, attr=None):
check_is_fitted(self, 'pipelines_')
self.one_transformed = False
transformed = [
self._call_pipeline_subdf(key, sub_df, attr=attr)
for key, sub_df, sub_y in self._iter_groups(df, y=y)
]
if not self.one_transformed and self.errors == 'return_empty':
raise KeyError('All keys missing in fitted pipelines')
out = pd.concat(transformed).reindex(df.index)
# Convert back to np.array if the pipeline returns a np.array
if self.one_transformed and self.cast_to_numpy:
return out.values
return out
示例10: required_columns
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import pipeline [as 别名]
def required_columns(self):
groupby = [self.groupby] if type(self.groupby) is str else self.groupby
return self.pipeline.required_columns | set(groupby)
示例11: transformed_columns
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import pipeline [as 别名]
def transformed_columns(self, input_columns):
return self.pipeline.transformed_columns(input_columns)
示例12: _extract_metadata_from_model
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import pipeline [as 别名]
def _extract_metadata_from_model(
model: BaseEstimator, metadata: dict = dict()
) -> dict:
"""
Recursively check for :class:`gordo.machine.model.base.GordoBase` in a
given ``model``. If such the model exists buried inside of a
:class:`sklearn.pipeline.Pipeline` which is then part of another
:class:`sklearn.base.BaseEstimator`, this function will return its metadata.
Parameters
----------
model: BaseEstimator
metadata: dict
Any initial starting metadata, but is mainly meant to be used during
the recursive calls to accumulate any multiple
:class:`gordo.machine.model.base.GordoBase` models found in this model
Notes
-----
If there is a ``GordoBase`` model inside of a ``Pipeline`` which is not the final
step, this function will not find it.
Returns
-------
dict
Dictionary representing accumulated calls to
:meth:`gordo.machine.model.base.GordoBase.get_metadata`
"""
metadata = metadata.copy()
# If it's a Pipeline, only need to get the last step, which potentially has metadata
if isinstance(model, Pipeline):
final_step = model.steps[-1][1]
metadata.update(ModelBuilder._extract_metadata_from_model(final_step))
return metadata
# GordoBase is simple, having a .get_metadata()
if isinstance(model, GordoBase):
metadata.update(model.get_metadata())
# Continue to look at object values in case, we decided to have a GordoBase
# which also had a GordoBase as a parameter/attribute, but will satisfy BaseEstimators
# which can take a GordoBase model as a parameter, which will then have metadata to get
for val in model.__dict__.values():
if isinstance(val, Pipeline):
metadata.update(
ModelBuilder._extract_metadata_from_model(val.steps[-1][1])
)
elif isinstance(val, GordoBase) or isinstance(val, BaseEstimator):
metadata.update(ModelBuilder._extract_metadata_from_model(val))
return metadata
示例13: _get_estimator
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import pipeline [as 别名]
def _get_estimator(pblm, clf_key):
"""
Returns sklearn classifier
"""
tup = clf_key.split('-')
wrap_type = None if len(tup) == 1 else tup[1]
est_type = tup[0]
multiclass_wrapper = {
None: ut.identity,
'OVR': sklearn.multiclass.OneVsRestClassifier,
'OVO': sklearn.multiclass.OneVsOneClassifier,
}[wrap_type]
est_class = {
'RF': sklearn.ensemble.RandomForestClassifier,
'SVC': sklearn.svm.SVC,
'Logit': sklearn.linear_model.LogisticRegression,
'MLP': sklearn.neural_network.MLPClassifier,
}[est_type]
est_kw1, est_kw2 = pblm._estimator_params(est_type)
est_params = ut.merge_dicts(est_kw1, est_kw2)
# steps = []
# steps.append((est_type, est_class(**est_params)))
# if wrap_type is not None:
# steps.append((wrap_type, multiclass_wrapper))
if est_type == 'MLP':
def clf_partial():
pipe = sklearn.pipeline.Pipeline([
('inputer', sklearn.preprocessing.Imputer(
missing_values='NaN', strategy='mean', axis=0)),
# ('scale', sklearn.preprocessing.StandardScaler),
('est', est_class(**est_params)),
])
return multiclass_wrapper(pipe)
elif est_type == 'Logit':
def clf_partial():
pipe = sklearn.pipeline.Pipeline([
('inputer', sklearn.preprocessing.Imputer(
missing_values='NaN', strategy='mean', axis=0)),
('est', est_class(**est_params)),
])
return multiclass_wrapper(pipe)
else:
def clf_partial():
return multiclass_wrapper(est_class(**est_params))
return clf_partial
示例14: train_wdclassifier_user
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import pipeline [as 别名]
def train_wdclassifier_user(training_set: Tuple[np.ndarray, np.ndarray],
svmType: str,
C: float,
gamma: Optional[float]) -> sklearn.svm.SVC:
""" Trains an SVM classifier for a user
Parameters
----------
training_set: Tuple (x, y)
The training set (features and labels). y should have labels -1 and 1
svmType: string ('linear' or 'rbf')
The SVM type
C: float
Regularization for the SVM optimization
gamma: float
Hyperparameter for the RBF kernel
Returns
-------
sklearn.svm.SVC:
The learned classifier
"""
assert svmType in ['linear', 'rbf']
train_x = training_set[0]
train_y = training_set[1]
# Adjust for the skew between positive and negative classes
n_genuine = len([x for x in train_y if x == 1])
n_forg = len([x for x in train_y if x == -1])
skew = n_forg / float(n_genuine)
# Train the model
if svmType == 'rbf':
model = sklearn.svm.SVC(C=C, gamma=gamma, class_weight={1: skew})
else:
model = sklearn.svm.SVC(kernel='linear', C=C, class_weight={1: skew})
model_with_scaler = pipeline.Pipeline([('scaler', preprocessing.StandardScaler(with_mean=False)),
('classifier', model)])
model_with_scaler.fit(train_x, train_y)
return model_with_scaler