本文整理汇总了Python中sklearn.base方法的典型用法代码示例。如果您正苦于以下问题:Python sklearn.base方法的具体用法?Python sklearn.base怎么用?Python sklearn.base使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn
的用法示例。
在下文中一共展示了sklearn.base方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _determine_offset
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import base [as 别名]
def _determine_offset(
model: BaseEstimator, X: Union[np.ndarray, pd.DataFrame]
) -> int:
"""
Determine the model's offset. How much does the output of the model differ
from its input?
Parameters
----------
model: sklearn.base.BaseEstimator
Trained model with either ``predict`` or ``transform`` method, preference
given to ``predict``.
X: Union[np.ndarray, pd.DataFrame]
Data to pass to the model's ``predict`` or ``transform`` method.
Returns
-------
int
The difference between X and the model's output lengths.
"""
out = model.predict(X) if hasattr(model, "predict") else model.transform(X)
return len(X) - len(out)
示例2: test_repr
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import base [as 别名]
def test_repr():
# Smoke test the repr of the base estimator.
my_estimator = MyEstimator()
repr(my_estimator)
test = T(K(), K())
assert_equal(
repr(test),
"T(a=K(c=None, d=None), b=K(c=None, d=None))"
)
some_est = T(a=["long_params"] * 1000)
assert_equal(len(repr(some_est)), 495)
示例3: test_str
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import base [as 别名]
def test_str():
# Smoke test the str of the base estimator
my_estimator = MyEstimator()
str(my_estimator)
示例4: test_all_estimator_no_base_class
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import base [as 别名]
def test_all_estimator_no_base_class():
# test that all_estimators doesn't find abstract classes.
for name, Estimator in all_estimators():
msg = ("Base estimators such as {0} should not be included"
" in all_estimators").format(name)
assert not name.lower().startswith('base'), msg
示例5: test_root_import_all_completeness
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import base [as 别名]
def test_root_import_all_completeness():
EXCEPTIONS = ('utils', 'tests', 'base', 'setup', 'conftest')
for _, modname, _ in pkgutil.walk_packages(path=sklearn.__path__,
onerror=lambda _: None):
if '.' in modname or modname.startswith('_') or modname in EXCEPTIONS:
continue
assert_in(modname, sklearn.__all__)
示例6: test_wrap_from_instance
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import base [as 别名]
def test_wrap_from_instance(self):
from lale.operators import make_operator, TrainableIndividualOp
from lale.sklearn_compat import make_sklearn_compat
from sklearn.base import clone
self.assertFalse(isinstance(UnknownOp, TrainableIndividualOp))
instance = UnknownOp(n_neighbors=3)
self.assertFalse(isinstance(instance, TrainableIndividualOp))
wrapped = make_operator(instance)
self.assertTrue(isinstance(wrapped, TrainableIndividualOp))
self.assertEqual(wrapped.hyperparams(), {'n_neighbors': 3})
cloned = clone(make_sklearn_compat(wrapped)).to_lale()
self.assertTrue(isinstance(cloned, TrainableIndividualOp))
self.assertEqual(cloned.hyperparams(), {'n_neighbors': 3})
示例7: _clone_impl
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import base [as 别名]
def _clone_impl(self):
impl_instance = self._impl_instance()
if hasattr(impl_instance, 'get_params'):
result = sklearn.base.clone(impl_instance)
else:
try:
result = copy.deepcopy(impl_instance)
except:
impl_class = self._impl_class()
params_all = self._get_params_all()
result = impl_class(**params_all)
return result
示例8: __constructor_for_cloning
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import base [as 别名]
def __constructor_for_cloning(self, steps:List[OpType]):
edges:List[Tuple[OpType, OpType]] = []
prev_op:Optional[OpType] = None
#This is due to scikit base's clone method that needs the same list object
self._steps = steps
prev_leaves:List[OpType]
curr_roots:List[OpType]
for curr_op in self._steps:
if isinstance(prev_op, BasePipeline):
prev_leaves = prev_op._find_sink_nodes()
else:
prev_leaves = [] if prev_op is None else [prev_op]
if isinstance(curr_op, BasePipeline):
curr_roots = curr_op._find_source_nodes()
self._steps.extend(curr_op.steps())
edges.extend(curr_op.edges())
else:
curr_roots = [curr_op]
edges.extend([(src, tgt) for src in prev_leaves for tgt in curr_roots])
prev_op = curr_op
seen_steps:List[OpType] = []
for step in self._steps:
if step in seen_steps:
raise ValueError('Same instance of {} already exists in the pipeline. '\
'This is not allowed.'.format(step.name()))
seen_steps.append(step)
self._preds = { step: [] for step in self._steps }
for (src, dst) in edges:
self._preds[dst].append(src)
#Since this case is only allowed for linear pipelines, it is always
#expected to be in topological order
assert self.__is_in_topological_order()
示例9: test_repr
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import base [as 别名]
def test_repr():
# Smoke test the repr of the base estimator.
my_estimator = MyEstimator()
repr(my_estimator)
test = T(K(), K())
assert_equal(
repr(test),
"T(a=K(c=None, d=None), b=K(c=None, d=None))"
)
some_est = T(a=["long_params"] * 1000)
assert_equal(len(repr(some_est)), 415)
示例10: _extract_metadata_from_model
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import base [as 别名]
def _extract_metadata_from_model(
model: BaseEstimator, metadata: dict = dict()
) -> dict:
"""
Recursively check for :class:`gordo.machine.model.base.GordoBase` in a
given ``model``. If such the model exists buried inside of a
:class:`sklearn.pipeline.Pipeline` which is then part of another
:class:`sklearn.base.BaseEstimator`, this function will return its metadata.
Parameters
----------
model: BaseEstimator
metadata: dict
Any initial starting metadata, but is mainly meant to be used during
the recursive calls to accumulate any multiple
:class:`gordo.machine.model.base.GordoBase` models found in this model
Notes
-----
If there is a ``GordoBase`` model inside of a ``Pipeline`` which is not the final
step, this function will not find it.
Returns
-------
dict
Dictionary representing accumulated calls to
:meth:`gordo.machine.model.base.GordoBase.get_metadata`
"""
metadata = metadata.copy()
# If it's a Pipeline, only need to get the last step, which potentially has metadata
if isinstance(model, Pipeline):
final_step = model.steps[-1][1]
metadata.update(ModelBuilder._extract_metadata_from_model(final_step))
return metadata
# GordoBase is simple, having a .get_metadata()
if isinstance(model, GordoBase):
metadata.update(model.get_metadata())
# Continue to look at object values in case, we decided to have a GordoBase
# which also had a GordoBase as a parameter/attribute, but will satisfy BaseEstimators
# which can take a GordoBase model as a parameter, which will then have metadata to get
for val in model.__dict__.values():
if isinstance(val, Pipeline):
metadata.update(
ModelBuilder._extract_metadata_from_model(val.steps[-1][1])
)
elif isinstance(val, GordoBase) or isinstance(val, BaseEstimator):
metadata.update(ModelBuilder._extract_metadata_from_model(val))
return metadata
示例11: __init__
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import base [as 别名]
def __init__(
self,
featurizer: Optional[MaxHistoryTrackerFeaturizer] = None,
priority: int = 1,
model: Optional['sklearn.base.BaseEstimator'] = None,
param_grid: Optional[Dict[Text, List] or List[Dict]] = None,
cv: Optional[int] = None,
scoring: Optional[Text or List or Dict or Callable] = 'accuracy',
label_encoder: LabelEncoder = LabelEncoder(),
shuffle: bool = True,
**kwargs: Any
) -> None:
"""Create a new sklearn policy.
Args:
featurizer: Featurizer used to convert the training data into
vector format.
model: The sklearn model or model pipeline.
param_grid: If *param_grid* is not None and *cv* is given,
a grid search on the given *param_grid* is performed
(e.g. *param_grid={'n_estimators': [50, 100]}*).
cv: If *cv* is not None, perform a cross validation on
the training data. *cv* should then conform to the
sklearn standard (e.g. *cv=5* for a 5-fold cross-validation).
scoring: Scoring strategy, using the sklearn standard.
label_encoder: Encoder for the labels. Must implement an
*inverse_transform* method.
shuffle: Whether to shuffle training data.
"""
if featurizer:
if not isinstance(featurizer, MaxHistoryTrackerFeaturizer):
raise TypeError("Passed featurizer of type {}, should be "
"MaxHistoryTrackerFeaturizer."
"".format(type(featurizer).__name__))
super(SklearnPolicy, self).__init__(featurizer, priority)
self.model = model or self._default_model()
self.cv = cv
self.param_grid = param_grid
self.scoring = scoring
self.label_encoder = label_encoder
self.shuffle = shuffle
# attributes that need to be restored after loading
self._pickle_params = [
'model', 'cv', 'param_grid', 'scoring', 'label_encoder']
self._train_params = kwargs
示例12: get_estimators
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import base [as 别名]
def get_estimators(type_filter='all'):
"""Return a list of classes that inherit from `sklearn.BaseEstimator`.
This code is based on `sklearn.utils.testing.all_estimators`.
Parameters
----------
type_filter : str (default: 'all')
A value in ['all', 'classifier', 'transformer', 'cluster'] which
defines which type of estimators to retrieve
Returns
-------
list
Collection of estimators of the type specified in `type_filter`
"""
if type_filter not in ['all', 'classifier', 'transformer', 'cluster']:
# TODO: make this exception more specific
raise Exception("type_filter should be element of "
"['all', 'classifier', 'transformer', 'cluster']")
all_classes = _get_all_classes()
# Filter out those that are not a subclass of `sklearn.BaseEstimator`
all_classes = [c for c in set(all_classes)
if issubclass(c[1], BaseEstimator)]
# get rid of abstract base classes
all_classes = filter(lambda c: not is_abstract(c[1]), all_classes)
# only keep those that are from tslearn
all_classes = filter(lambda c: not is_sklearn(c[1]), all_classes)
# Now filter out the estimators that are not of the specified type
filters = {
'all': [ClassifierMixin, RegressorMixin,
TransformerMixin, ClusterMixin],
'classifier': [ClassifierMixin],
'transformer': [TransformerMixin],
'cluster': [ClusterMixin]
}[type_filter]
filtered_classes = []
for _class in all_classes:
if any([issubclass(_class[1], mixin) for mixin in filters]):
filtered_classes.append(_class)
# Remove duplicates and return the list of remaining estimators
return sorted(set(filtered_classes), key=itemgetter(0))
示例13: check_estimator
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import base [as 别名]
def check_estimator(Estimator):
"""Check if estimator adheres to scikit-learn conventions.
This estimator will run an extensive test-suite for input validation,
shapes, etc.
Additional tests for classifiers, regressors, clustering or transformers
will be run if the Estimator class inherits from the corresponding mixin
from sklearn.base.
This test can be applied to classes or instances.
Classes currently have some additional tests that related to construction,
while passing instances allows the testing of multiple options.
Parameters
----------
estimator : estimator object or class
Estimator to check. Estimator is a class object or instance.
"""
if isinstance(Estimator, type):
# got a class
name = Estimator.__name__
estimator = Estimator()
check_parameters_default_constructible(name, Estimator)
check_no_attributes_set_in_init(name, estimator)
else:
# got an instance
estimator = Estimator
name = type(estimator).__name__
if hasattr(estimator, 'max_iter'):
if (isinstance(estimator, LearningShapelets) or
isinstance(estimator, SerializableShapeletModel)):
estimator.set_params(max_iter=100)
else:
estimator.set_params(max_iter=10)
if hasattr(estimator, 'total_lengths'):
estimator.set_params(total_lengths=1)
if hasattr(estimator, 'probability'):
estimator.set_params(probability=True)
for check in checks._yield_all_checks(name, estimator):
try:
check(name, estimator)
except SkipTest as exception:
# the only SkipTest thrown currently results from not
# being able to import pandas.
warnings.warn(str(exception), SkipTestWarning)
示例14: __init__
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import base [as 别名]
def __init__(
self,
featurizer: Optional[MaxHistoryTrackerFeaturizer] = None,
priority: int = DEFAULT_POLICY_PRIORITY,
model: Optional["sklearn.base.BaseEstimator"] = None,
param_grid: Optional[Dict[Text, List] or List[Dict]] = None,
cv: Optional[int] = None,
scoring: Optional[Text or List or Dict or Callable] = "accuracy",
label_encoder: LabelEncoder = LabelEncoder(),
shuffle: bool = True,
**kwargs: Any,
) -> None:
"""Create a new sklearn policy.
Args:
featurizer: Featurizer used to convert the training data into
vector format.
model: The sklearn model or model pipeline.
param_grid: If *param_grid* is not None and *cv* is given,
a grid search on the given *param_grid* is performed
(e.g. *param_grid={'n_estimators': [50, 100]}*).
cv: If *cv* is not None, perform a cross validation on
the training data. *cv* should then conform to the
sklearn standard (e.g. *cv=5* for a 5-fold cross-validation).
scoring: Scoring strategy, using the sklearn standard.
label_encoder: Encoder for the labels. Must implement an
*inverse_transform* method.
shuffle: Whether to shuffle training data.
"""
if featurizer:
if not isinstance(featurizer, MaxHistoryTrackerFeaturizer):
raise TypeError(
"Passed featurizer of type {}, should be "
"MaxHistoryTrackerFeaturizer."
"".format(type(featurizer).__name__)
)
super().__init__(featurizer, priority)
self.model = model or self._default_model()
self.cv = cv
self.param_grid = param_grid
self.scoring = scoring
self.label_encoder = label_encoder
self.shuffle = shuffle
# attributes that need to be restored after loading
self._pickle_params = ["model", "cv", "param_grid", "scoring", "label_encoder"]
self._train_params = kwargs
示例15: __init__
# 需要导入模块: import sklearn [as 别名]
# 或者: from sklearn import base [as 别名]
def __init__(self, sklearnEstimator=None, keyCols=["key"], xCol="features",
outputCol="output", yCol=None, estimatorType=None):
"""For all instances, the ordered list of ``keyCols`` determine the set of groups which each
``sklearnEstimator`` is applied to.
For every unique ``keyCols`` value, the remaining columns are aggregated and used to train
the scikit-learn estimator.
``estimatorType`` inference is conducted as follows: if ``yCol`` is specified, then this is
assumed to be of ``"predictor"`` type, else a ``"transformer"`` or a ``"clusterer"``,
depending on the estimator having the ``transform()`` or ``fit_predict()`` attributes, with
``"clusterer"`` being chosen in case both attributes are present.
:param sklearnEstimator: An instance of a scikit-learn estimator, with parameters configured
as desired for each user.
:param keyCols: Key column names list used to group data to which models are applied, where
order implies lexicographical importance.
:param xCol: Name of column of input features used for training and
transformation/prediction.
:param yCol: Specifies name of label column for regression or classification pipelines.
Required for predictors, must be unspecified or ``None`` for transformers.
:param estimatorType: Identifies the type of scikit-learn estimator being used, which
changes the interface the ``sklearnEstimator`` is expected to have.
This parameter's value is inferred using reflection by default,
but may be manually overriden.
:raise ValueError: if ``sklearnEstimator`` is ``None``.
:raise ValueError: if ``sklearnEstimator`` does not derive from
``sklearn.base.BaseEstimator``.
:raise ValueError: if ``keyCols`` is empty.
:raise ValueError: if any column has the name ``"estimator"``
:raise AttributeError: if reflection checks indicate that parameter estimator is not equipped
with a ``fit()`` method.
"""
if sklearnEstimator is None:
raise ValueError("sklearnEstimator should be specified")
if not isinstance(sklearnEstimator, sklearn.base.BaseEstimator):
raise ValueError("sklearnEstimator should be an sklearn.base.BaseEstimator")
if len(keyCols) == 0:
raise ValueError("keyCols should not be empty")
if "estimator" in keyCols + [xCol, yCol]:
raise ValueError("keyCols should not contain a column named \"estimator\"")
# The superclass expects Param attributes to already be set, so we only init it after
# doing so.
for paramName, paramSpec in KeyedEstimator._paramSpecs.items():
setattr(self, paramName, Param(Params._dummy(), paramName, paramSpec["doc"]))
super(KeyedEstimator, self).__init__()
self._setDefault(**{paramName: paramSpec["default"]
for paramName, paramSpec in KeyedEstimator._paramSpecs.items()
if "default" in paramSpec})
kwargs = KeyedEstimator._inferredParams(sklearnEstimator, self._input_kwargs)
self._set(**kwargs)
self._verifyEstimatorType()