本文整理汇总了Python中sklearn.pipeline.Pipeline方法的典型用法代码示例。如果您正苦于以下问题:Python pipeline.Pipeline方法的具体用法?Python pipeline.Pipeline怎么用?Python pipeline.Pipeline使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.pipeline
的用法示例。
在下文中一共展示了pipeline.Pipeline方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: pca
# 需要导入模块: from sklearn import pipeline [as 别名]
# 或者: from sklearn.pipeline import Pipeline [as 别名]
def pca(self, **kwargs):
if 'n_components' in kwargs:
nComp = kwargs['n_components']
else:
nComp = 0.995
if 'dates' in kwargs:
mat = self.to_matrix(kwargs['dates'])
else:
mat = self.to_matrix()
scaler = StandardScaler()
pca = PCA(n_components=nComp)
self._pipeline = Pipeline([('scaler', scaler), ('pca', pca)])
self._pipeline.fit(mat)
if 'file' in kwargs:
tofile(kwargs['file'], self._pipeline)
return self._pipeline
示例2: run
# 需要导入模块: from sklearn import pipeline [as 别名]
# 或者: from sklearn.pipeline import Pipeline [as 别名]
def run(self):
'''
Runs a model with params p.
'''
self.clf.set_params(**self.params)
# f = get_feature_transformer(self.parser)
# self.X_train_fts = f.fit_transform(self.X_train)
# self.X_test_fts = f.transform(self.X_test)
self.pipeline = Pipeline([
# ('feature_gen', f),
('clf', self.clf),
])
self.y_pred_probs = self.pipeline.fit(self.X_train,self.y_train).predict_proba(self.X_test)[:,1]
if self.model_type in ['RF', 'ET', 'AB', 'GB', 'DT']:
self.importances = self.clf.feature_importances_
elif self.model_type in ['SVM', 'LR', 'SGD']:
self.importances = self.clf.coef_[0]
示例3: full_pipeline
# 需要导入模块: from sklearn import pipeline [as 别名]
# 或者: from sklearn.pipeline import Pipeline [as 别名]
def full_pipeline(model_type, predicted_column, grain_column, impute=True, verbose=True, imputeStrategy='MeanMode', tunedRandomForest=False, numeric_columns_as_categorical=None):
"""
Builds the data preparation pipeline. Sequentially runs transformers and filters to clean and prepare the data.
Note advanced users may wish to use their own custom pipeline.
"""
# Note: this could be done more elegantly using FeatureUnions _if_ you are not using pandas dataframes for
# inputs of the later pipelines as FeatureUnion intrinsically converts outputs to numpy arrays.
pipeline = Pipeline([
('remove_DTS_columns', hcai_filters.DataframeColumnSuffixFilter()),
('remove_grain_column', hcai_filters.DataframeColumnRemover(grain_column)),
# Perform one of two basic imputation methods
# TODO we need to think about making this optional to solve the problem of rare and very predictive values
('imputation', hcai_transformers.DataFrameImputer(impute=impute, verbose=verbose, imputeStrategy=imputeStrategy, tunedRandomForest=tunedRandomForest, numeric_columns_as_categorical=numeric_columns_as_categorical)),
('null_row_filter', hcai_filters.DataframeNullValueFilter(excluded_columns=None)),
('convert_target_to_binary', hcai_transformers.DataFrameConvertTargetToBinary(model_type, predicted_column)),
('prediction_to_numeric', hcai_transformers.DataFrameConvertColumnToNumeric(predicted_column)),
('create_dummy_variables', hcai_transformers.DataFrameCreateDummyVariables(excluded_columns=[predicted_column])),
])
return pipeline
示例4: test_stability_selection_regression
# 需要导入模块: from sklearn import pipeline [as 别名]
# 或者: from sklearn.pipeline import Pipeline [as 别名]
def test_stability_selection_regression():
n, p, k = 500, 1000, 5
X, y, important_betas = _generate_dummy_regression_data(n=n, k=k)
base_estimator = Pipeline([
('scaler', StandardScaler()),
('model', Lasso())
])
lambdas_grid = np.logspace(-1, 1, num=10)
selector = StabilitySelection(base_estimator=base_estimator,
lambda_name='model__alpha',
lambda_grid=lambdas_grid)
selector.fit(X, y)
chosen_betas = selector.get_support(indices=True)
assert_almost_equal(important_betas, chosen_betas)
示例5: test_with_complementary_pairs_bootstrap
# 需要导入模块: from sklearn import pipeline [as 别名]
# 或者: from sklearn.pipeline import Pipeline [as 别名]
def test_with_complementary_pairs_bootstrap():
n, p, k = 500, 1000, 5
X, y, important_betas = _generate_dummy_regression_data(n=n, k=k)
base_estimator = Pipeline([
('scaler', StandardScaler()),
('model', Lasso())
])
lambdas_grid = np.logspace(-1, 1, num=10)
selector = StabilitySelection(base_estimator=base_estimator,
lambda_name='model__alpha',
lambda_grid=lambdas_grid,
bootstrap_func='complementary_pairs')
selector.fit(X, y)
chosen_betas = selector.get_support(indices=True)
assert_almost_equal(important_betas, chosen_betas)
示例6: test_different_shape
# 需要导入模块: from sklearn import pipeline [as 别名]
# 或者: from sklearn.pipeline import Pipeline [as 别名]
def test_different_shape():
n, p, k = 100, 200, 5
X, y, important_betas = _generate_dummy_regression_data(n=n, k=k)
base_estimator = Pipeline([
('scaler', StandardScaler()),
('model', Lasso())
])
lambdas_grid = np.logspace(-1, 1, num=10)
selector = StabilitySelection(base_estimator=base_estimator,
lambda_name='model__alpha',
lambda_grid=lambdas_grid)
selector.fit(X, y)
selector.transform(X[:, :-2])
示例7: test_no_features
# 需要导入模块: from sklearn import pipeline [as 别名]
# 或者: from sklearn.pipeline import Pipeline [as 别名]
def test_no_features():
n, p, k = 100, 200, 0
X, y, important_betas = _generate_dummy_regression_data(n=n, k=k)
base_estimator = Pipeline([
('scaler', StandardScaler()),
('model', Lasso())
])
lambdas_grid = np.logspace(-1, 1, num=10)
selector = StabilitySelection(base_estimator=base_estimator,
lambda_name='model__alpha',
lambda_grid=lambdas_grid)
selector.fit(X, y)
assert_almost_equal(selector.transform(X),
np.empty(0).reshape((X.shape[0], 0)))
示例8: run_logreg
# 需要导入模块: from sklearn import pipeline [as 别名]
# 或者: from sklearn.pipeline import Pipeline [as 别名]
def run_logreg(X_train, y_train, selection_threshold=0.2):
print("\nrunning logistic regression...")
print("using a selection threshold of {}".format(selection_threshold))
pipe = Pipeline(
[
(
"feature_selection",
RandomizedLogisticRegression(selection_threshold=selection_threshold),
),
("classification", LogisticRegression()),
]
)
pipe.fit(X_train, y_train)
print("training accuracy : {}".format(pipe.score(X_train, y_train)))
print("testing accuracy : {}".format(pipe.score(X_test, y_test)))
return pipe
示例9: make_pipeline
# 需要导入模块: from sklearn import pipeline [as 别名]
# 或者: from sklearn.pipeline import Pipeline [as 别名]
def make_pipeline(encoding_method):
# static transformers from the other columns
transformers = [(enc + '_' + col, encoders_dict[enc], [col])
for col, enc in clean_columns.items()]
# adding the encoded column
transformers += [(encoding_method, encoders_dict[encoding_method],
[dirty_column])]
pipeline = Pipeline([
# Use ColumnTransformer to combine the features
('union', ColumnTransformer(
transformers=transformers,
remainder='drop')),
('scaler', StandardScaler(with_mean=False)),
('clf', RidgeCV())
])
return pipeline
#########################################################################
# Fitting each encoding methods with a RidgeCV
# --------------------------------------------
# Eventually, we loop over the different encoding methods,
# instantiate each time a new pipeline, fit it
# and store the returned cross-validation score:
示例10: make_pipeline
# 需要导入模块: from sklearn import pipeline [as 别名]
# 或者: from sklearn.pipeline import Pipeline [as 别名]
def make_pipeline(encoding_method):
# static transformers from the other columns
transformers = [('one-hot-clean', encoder_dict['one-hot'], clean_columns)]
# adding the encoded column
transformers += [(encoding_method + '-dirty', encoder_dict[encoding_method],
[dirty_column])]
pipeline = Pipeline([
# Use ColumnTransformer to combine the features
('union', ColumnTransformer(
transformers=transformers,
remainder='drop')),
('scaler', StandardScaler(with_mean=False)),
('classifier', RandomForestClassifier(random_state=5))
])
return pipeline
###############################################################################
# Evaluation of different encoding methods
# -----------------------------------------
# We then loop over encoding methods, scoring the different pipeline predictions
# using a cross validation score:
示例11: test_keras_autoencoder_scoring
# 需要导入模块: from sklearn import pipeline [as 别名]
# 或者: from sklearn.pipeline import Pipeline [as 别名]
def test_keras_autoencoder_scoring(model, kind, n_features_out):
"""
Test the KerasAutoEncoder and KerasLSTMAutoEncoder have a working scoring function
"""
Model = pydoc.locate(f"gordo.machine.model.models.{model}")
model = Pipeline([("model", Model(kind=kind))])
X = np.random.random((8, 2))
# Should be able to deal with y output different than X input features
y = np.random.random((8, n_features_out))
with pytest.raises(NotFittedError):
model.score(X, y)
model.fit(X, y)
score = model.score(X, y)
logger.info(f"Score: {score:.4f}")
示例12: load
# 需要导入模块: from sklearn import pipeline [as 别名]
# 或者: from sklearn.pipeline import Pipeline [as 别名]
def load(source_dir: Union[os.PathLike, str]) -> Any:
"""
Load an object from a directory, saved by
``gordo.serializer.pipeline_serializer.dump``
This take a directory, which is either top-level, meaning it contains
a sub directory in the naming scheme: "n_step=<int>-class=<path.to.Class>"
or the aforementioned naming scheme directory directly. Will return that
unsterilized object.
Parameters
----------
source_dir: Union[os.PathLike, str]
Location of the top level dir the pipeline was saved
Returns
-------
Union[GordoBase, Pipeline, BaseEstimator]
"""
# This source dir should have a single pipeline entry directory.
# may have been passed a top level dir, containing such an entry:
with open(os.path.join(source_dir, "model.pkl"), "rb") as f:
return pickle.load(f)
示例13: create_ngram_model
# 需要导入模块: from sklearn import pipeline [as 别名]
# 或者: from sklearn.pipeline import Pipeline [as 别名]
def create_ngram_model(params=None):
def preprocessor(tweet):
global emoticons_replaced
tweet = tweet.lower()
for k in emo_repl_order:
tweet = tweet.replace(k, emo_repl[k])
for r, repl in re_repl.iteritems():
tweet = re.sub(r, repl, tweet)
return tweet
tfidf_ngrams = TfidfVectorizer(preprocessor=preprocessor,
analyzer="word")
clf = MultinomialNB()
pipeline = Pipeline([('tfidf', tfidf_ngrams), ('clf', clf)])
if params:
pipeline.set_params(**params)
return pipeline
开发者ID:PacktPublishing,项目名称:Building-Machine-Learning-Systems-With-Python-Second-Edition,代码行数:23,代码来源:03_clean.py
示例14: create_union_model
# 需要导入模块: from sklearn import pipeline [as 别名]
# 或者: from sklearn.pipeline import Pipeline [as 别名]
def create_union_model(params=None):
def preprocessor(tweet):
tweet = tweet.lower()
for k in emo_repl_order:
tweet = tweet.replace(k, emo_repl[k])
for r, repl in re_repl.iteritems():
tweet = re.sub(r, repl, tweet)
return tweet.replace("-", " ").replace("_", " ")
tfidf_ngrams = TfidfVectorizer(preprocessor=preprocessor,
analyzer="word")
ling_stats = LinguisticVectorizer()
all_features = FeatureUnion(
[('ling', ling_stats), ('tfidf', tfidf_ngrams)])
#all_features = FeatureUnion([('tfidf', tfidf_ngrams)])
#all_features = FeatureUnion([('ling', ling_stats)])
clf = MultinomialNB()
pipeline = Pipeline([('all', all_features), ('clf', clf)])
if params:
pipeline.set_params(**params)
return pipeline
开发者ID:PacktPublishing,项目名称:Building-Machine-Learning-Systems-With-Python-Second-Edition,代码行数:27,代码来源:04_sent.py
示例15: test_imputation_pipeline_grid_search
# 需要导入模块: from sklearn import pipeline [as 别名]
# 或者: from sklearn.pipeline import Pipeline [as 别名]
def test_imputation_pipeline_grid_search():
# Test imputation within a pipeline + gridsearch.
X = sparse_random_matrix(100, 100, density=0.10)
missing_values = X.data[0]
pipeline = Pipeline([('imputer',
SimpleImputer(missing_values=missing_values)),
('tree',
tree.DecisionTreeRegressor(random_state=0))])
parameters = {
'imputer__strategy': ["mean", "median", "most_frequent"]
}
Y = sparse_random_matrix(100, 1, density=0.10).toarray()
gs = GridSearchCV(pipeline, parameters)
gs.fit(X, Y)