本文整理汇总了Python中sklearn.pipeline.Pipeline.set_params方法的典型用法代码示例。如果您正苦于以下问题:Python Pipeline.set_params方法的具体用法?Python Pipeline.set_params怎么用?Python Pipeline.set_params使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.pipeline.Pipeline
的用法示例。
在下文中一共展示了Pipeline.set_params方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: score_for_params
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
def score_for_params(kw) :
'''
| Get score from just one set of parameters
| Takes in keyword arguments, including whether or not median filter will be included.
'''
# Turn the tuple into a packed dictionary to get all parameters
params = _get_params(logistic_regression__C=kw)
# Create the pipeline which consists of image
# processing and a classifier
# Note - can make this map to a dictionary of image processors instead of just median
image_processors = [ ('hog', image_processing.HOG()) ]
if params.pop('median') :
image_processors.insert(0,('median_smooth', image_processing.MedianSmooth()))
else :
params.pop('median_smooth__radius')
classifier = ('logistic_regression', LogisticRegression())
estimators = image_processors + [classifier]
pipeline = Pipeline(estimators)
# Create the grid search with list of parameters
# to search. All values are now tuples
pipeline.set_params(**params).fit(X_train, y_train)
pipeline.predict(X_test)
return pipeline.score(X_train,y_train), pipeline.score(X_test, y_test)
示例2: predictModelForModelSelection
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
def predictModelForModelSelection(city, config, grid, grid_crimes, grid_column, freq_short, n_feat, crime_column,
cell_id, train_win, tspan, sspan, estimator, param_grid, model_name):
preparator = dataprep.DataPreparator(city, config, grid, grid_crimes,
grid_column, freq_short, n_feat)
# get the data into df_base format
df_base = preparator.getTimeSeriesData(crime_column, cell_id, train_win, tspan, 1,
sspan, 1, 'ModelSel_R_in', 'ModelSel_R_out')
# prepare df_base to fit the formal requirements of tsfresh
input_columns_lag = ['Crimes']
y_col = 'Crimes'
ts_data, y = preparator.prepareTimeSeriesData(df_base, input_columns_lag, y_col)
# split data
X_train, X_test, y_train, y_test = helper.splitData(y, n_train, n_test)
pipeline = Pipeline([('lagCrime', lagExtractor.LagExtractor(n_lag=n_feat)),
('normalizer', normalizer),
('estimator', estimator)
])
pipeline.set_params(lagCrime__lagged_ts_container=preparator.ts_lag(df_base, 'Crimes'))
rand_grid_search = model_selection.RandomizedSearchCV(pipeline, param_distributions=param_grid, n_iter = 100,
cv=model_selection.TimeSeriesSplit(n_splits=3))
rand_grid_search.fit(X_train, y_train)
y_pred = rand_grid_search.predict(X_test)
return reporting.getPredictionResults_cell_only_Model(cell_id, model_name, n_feat, n_train, n_test, y_test, y_pred), y_test, y_pred
示例3: main
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
def main(datafile, threshold):
filename = "out{}{}.hrc".format(os.sep, os.path.basename(datafile.name))
if not os.path.isfile(filename):
header = datafile.readline()
collist = [i for i, toggle in enumerate(header.split(",")) if toggle != "0"]
datafile.seek(0)
data = pd.read_csv(datafile, usecols=collist).as_matrix()
pipeline = Pipeline([("clf", Hierarchical())])
pipeline.set_params(**{})
pipeline.fit(data)
clf = pipeline.get_params()["clf"]
hierarchy = clf.hierarchy_
with open(filename, "wb") as fh:
fh.write(ET.tostring(hierarchy.to_xml()))
else:
with open(filename, "rb") as fh:
hierarchy = Cluster.from_xml(ET.parse(fh).getroot())
print(ET.tostring(hierarchy.to_xml()).decode("utf-8"))
if threshold != None:
clusters = hierarchy.cut(threshold)
print("\n".join(c.to_str(i) for i, c in enumerate(clusters)))
dump_graph(clusters)
示例4: DecompositionPreprocessor
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
class DecompositionPreprocessor(Preprocessor):
def __init__(self, training_data, input_variables, decomp_method='pca', n_components=2):
assert training_data.shape[1] == len(input_variables)
self.input_variables = input_variables
self.n_components = n_components
self.variables = ['V%d' % (i + 1) for i in range(self.n_components)]
impute = Imputer()
nzv = VarianceThreshold()
scale = StandardScaler()
model_l = [('impute', impute), ('nzv', nzv), ('scale', scale)]
if decomp_method == 'pca':
pca = PCA()
model_l.append(('pca', pca))
self.model = Pipeline(model_l)
self.model.set_params(pca__whiten = False, pca__n_components = n_components)
elif decomp_method == 'mbspca':
mbspca = MiniBatchSparsePCA()
model_l.append(('mbspca', mbspca))
self.model = Pipeline(model_l)
self.model.set_params(mbspca__n_components = n_components, mbspca__verbose = True)
self._train(training_data)
def _train(self, training_data):
self.model.fit(training_data)
def __call__(self, data):
return self.model.transform(data)
示例5: PlotPerfPercentFeatures
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
def PlotPerfPercentFeatures(X,y,est=LinearSVC()):
'''
Performance of a classifier (default: SVM-Anova)
varying the percentile of features selected (F-test) .
http://scikit-learn.org/stable/auto_examples/svm/plot_svm_anova.html#example-svm-plot-svm-anova-py
'''
transform = SelectPercentile(f_classif)
clf = Pipeline([('anova', transform), ('est', est)])
###############################################################################
# Plot the cross-validation score as a function of percentile of features
score_means = list()
score_stds = list()
percentiles = (1,2,3,5,7,10,13,15,20,25,33,50,65,75,90, 100)
# percentiles = (1,5,10,25,50,75,90)
for percentile in percentiles:
# print(percentile)
clf.set_params(anova__percentile=percentile)
this_scores = cross_val_score(clf, X, y,cv=StratifiedShuffleSplit(y, n_iter=7, test_size=0.3), n_jobs=-1)
score_means.append(this_scores.mean())
score_stds.append(this_scores.std())
print("Outputting Graph:")
plt.errorbar(percentiles, score_means, np.array(score_stds))
plt.title(
'Predictor Performance, varying percent of features used')
plt.xlabel('Percentile')
plt.ylabel('Prediction Performance')
plt.axis('tight')
plt.show()
示例6: create_ngram_model
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
def create_ngram_model(params=None):
tfidf_ngrams = TfidfVectorizer(ngram_range=(1, 3),
analyzer="word", binary=False)
clf = MultinomialNB()
# 结果比较:
# 0.757 0.046 0.850 0.044
# == Pos vs. rest ==
# 0.595 0.064 0.668 0.071
# == Neg vs. rest ==
# 0.712 0.044 0.505 0.067
# time spent: 51.199045181274414
# 0.771 0.048 0.869 0.053
# == Pos vs. rest ==
# 0.614 0.044 0.692 0.048
# == Neg vs. rest ==
# 0.748 0.030 0.504 0.069
# params1 = {"alpha": 0.05, "class_prior": None, "fit_prior":True}
# clf.set_params(**params1)
pipeline = Pipeline([('vect', tfidf_ngrams), ('clf', clf)])
if params:
pipeline.set_params(**params)
return pipeline
示例7: classify
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
def classify(data_trn,lbl_trn,data_vld,lbl_vld,data_tst,lbl_tst):
data_trn = normalize(data_trn,copy=False)
data_vld = normalize(data_vld,copy=False)
data_tst = normalize(data_tst,copy=False)
# accuracy metric
metric_obj = mean_squared_error
'''
Train our model to predict labels for the dataset #1
'''
parameters = {'svr__gamma': 1.5, 'svr__probability': False, 'svr__epsilon': 0.4, 'svr__C': 1, 'svr__kernel': 'rbf'}
cls = Pipeline([
#('feature_selection',LinearSVC()),
('svr', SVR())
])
cls.set_params(**parameters)
cls.fit(data_trn, lbl_trn)
pred_vld = cls.predict(data_vld)
pred_tst = cls.predict(data_tst)
print ("Score for vld: %.6f" % (metric_obj(lbl_vld, pred_vld),))
print ("Score for tst: %.6f" % (metric_obj(lbl_tst, pred_tst),))
return pred_vld,pred_tst
示例8: baart_criteria
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
def baart_criteria(self, X, y):
"""
Returns the optimal Fourier series degree as determined by
`Baart's Criteria <http://articles.adsabs.harvard.edu/cgi-bin/nph-iarticle_query?1986A%26A...170...59P&data_type=PDF_HIGH&whole_paper=YES&type=PRINTER&filetype=.pdf>`_ [JOP]_.
**Citations**
.. [JOP] J. O. Petersen, 1986,
"Studies of Cepheid type variability. IV.
The uncertainties of Fourier decomposition parameters.",
A&A, Vol. 170, p. 59-69
"""
try:
min_degree, max_degree = self.degree_range
except ValueError:
raise ValueError("Degree range must be a length two sequence")
cutoff = self.baart_tolerance(X)
pipeline = Pipeline([('Fourier', Fourier()),
('Regressor', self.regressor)])
sorted_X = numpy.sort(X, axis=0)
X_sorting = numpy.argsort(rowvec(X))
for degree in range(min_degree, max_degree):
pipeline.set_params(Fourier__degree=degree)
pipeline.fit(X, y)
lc = pipeline.predict(sorted_X)
residuals = y[X_sorting] - lc
p_c = autocorrelation(residuals)
if abs(p_c) <= cutoff:
return degree
# reached max_degree without reaching cutoff
return max_degree
示例9: svm_anova
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
def svm_anova(train_data, labels):
###############################################################################
# Create a feature-selection transform and an instance of SVM that we
# combine together to have an full-blown estimator
transform = SelectPercentile(f_classif)
clf = Pipeline([('anova', transform), ('svc', SVC(C = 1.0))])
###############################################################################
# Plot the cross-validation score as a function of percentile of features
score_means = list()
score_stds = list()
percentiles = (1, 3, 6, 10, 15, 20, 30, 40, 60, 80, 100)
for percentile in percentiles:
clf.set_params(anova__percentile = percentile)
# Compute cross-validation score using all CPUs
this_scores = cross_val_score(clf, train_data, labels, n_jobs = 1)
score_means.append(this_scores.mean())
score_stds.append(this_scores.std())
plt.errorbar(percentiles, score_means, np.array(score_stds))
plt.title('Performance of the SVM-Anova varying the percentile of features selected')
plt.xlabel('Percentile')
plt.ylabel('Prediction rate')
plt.axis('tight')
plt.show()
示例10: create_union_model
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
def create_union_model(params=None):
def preprocessor(tweet):
tweet = tweet.lower()
for k in emo_repl_order:
tweet = tweet.replace(k, emo_repl[k])
for r, repl in re_repl.items():
tweet = re.sub(r, repl, tweet)
return tweet.replace("-", " ").replace("_", " ")
tfidf_ngrams = TfidfVectorizer(preprocessor=preprocessor,
analyzer="word")
ling_stats = LinguisticVectorizer()
all_features = FeatureUnion(
[('ling', ling_stats), ('tfidf', tfidf_ngrams)])
#all_features = FeatureUnion([('tfidf', tfidf_ngrams)])
#all_features = FeatureUnion([('ling', ling_stats)])
clf = MultinomialNB()
pipeline = Pipeline([('all', all_features), ('clf', clf)])
if params:
pipeline.set_params(**params)
return pipeline
示例11: create_ngram_model
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
def create_ngram_model(params=None):
tfidf_ngrams = TfidfVectorizer(ngram_range=(1, 3), analyzer="word", binary=False)
clf = MultinomialNB()
pipeline = Pipeline([("vect", tfidf_ngrams), ("clf", clf)])
if params:
pipeline.set_params(**params)
return pipeline
示例12: create_model
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
def create_model(self, params=None):
all_features = self._unify_features()
pipeline = Pipeline([('all', all_features), ('clf', self.classifier)])
if params:
pipeline.set_params(**params)
return pipeline
示例13: test_set_params_nested_pipeline
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
def test_set_params_nested_pipeline():
estimator = Pipeline([
('a', Pipeline([
('b', DummyRegressor())
]))
])
estimator.set_params(a__b__alpha=0.001, a__b=Lasso())
estimator.set_params(a__steps=[('b', LogisticRegression())], a__b__C=5)
示例14: test_pipeline_init
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
def test_pipeline_init():
# Test the various init parameters of the pipeline.
assert_raises(TypeError, Pipeline)
# Check that we can't instantiate pipelines with objects without fit
# method
pipe = assert_raises(TypeError, Pipeline, [('svc', IncorrectT)])
# Smoke test with only an estimator
clf = T()
pipe = Pipeline([('svc', clf)])
assert_equal(pipe.get_params(deep=True),
dict(svc__a=None, svc__b=None, svc=clf,
**pipe.get_params(deep=False)
))
# Check that params are set
pipe.set_params(svc__a=0.1)
assert_equal(clf.a, 0.1)
assert_equal(clf.b, None)
# Smoke test the repr:
repr(pipe)
# Test with two objects
clf = SVC()
filter1 = SelectKBest(f_classif)
pipe = Pipeline([('anova', filter1), ('svc', clf)])
# Check that we can't use the same stage name twice
assert_raises(ValueError, Pipeline, [('svc', SVC()), ('svc', SVC())])
# Check that params are set
pipe.set_params(svc__C=0.1)
assert_equal(clf.C, 0.1)
# Smoke test the repr:
repr(pipe)
# Check that params are not set when naming them wrong
assert_raises(ValueError, pipe.set_params, anova__C=0.1)
# Test clone
pipe2 = clone(pipe)
assert_false(pipe.named_steps['svc'] is pipe2.named_steps['svc'])
# Check that apart from estimators, the parameters are the same
params = pipe.get_params(deep=True)
params2 = pipe2.get_params(deep=True)
for x in pipe.get_params(deep=False):
params.pop(x)
for x in pipe2.get_params(deep=False):
params2.pop(x)
# Remove estimators that where copied
params.pop('svc')
params.pop('anova')
params2.pop('svc')
params2.pop('anova')
assert_equal(params, params2)
示例15: featureSelection_cK
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
def featureSelection_cK(title,clf,X,y,CV,n_jobs=1,kvalues=[i for i in range(1,43)]):
"""
Perform "Choose K best" feature selection and returns plot of performance vs. number of features
Parameters
----------
clf : object type that implements the "fit" and "predict" methods
An object of that type which is cloned for each validation.
title : string
Title for the chart.
X : array-like, shape (n_samples, n_features)
Training vector, where n_samples is the number of samples and
n_features is the number of features.
y : array-like, shape (n_samples) or (n_samples, n_features), optional
Target relative to X for classification or regression;
None for unsupervised learning.
cv : integer, cross-validation generator, optional
If an integer is passed, it is the number of folds (defaults to 3).
Specific cross-validation objects can be passed, see
sklearn.cross_validation module for the list of possible objects
n_jobs : integer, optional
Number of jobs to run in parallel (default 1).
kvalues : list
Specify the number of features to test in each iteration
"""
# Combine a feature-selection transform and a classifier to create a full-blown estimator
transform = feature_selection.SelectKBest(feature_selection.f_classif)
clf_k = Pipeline([('anova', transform), ('svc', clf)])
# Plot the cross-validation score as a function of number of features
score_means = list()
score_stds = list()
for k in kvalues:
clf_k.set_params(anova__k=k)
# Compute cross-validation score
this_scores = cross_validation.cross_val_score(clf_k, X, y, cv=CV, n_jobs=1)
score_means.append(this_scores.mean())
score_stds.append(this_scores.std())
transform.fit(X,y)
feat_scores = transform.scores_
plt.errorbar(kvalues, score_means, np.array(score_stds))
plt.title(title)
plt.xlabel('Number of Features')
plt.ylabel('Prediction rate')
plt.axis('tight')
return plt,score_means,feat_scores