本文整理汇总了Python中sklearn.pipeline.Pipeline.get_params方法的典型用法代码示例。如果您正苦于以下问题:Python Pipeline.get_params方法的具体用法?Python Pipeline.get_params怎么用?Python Pipeline.get_params使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.pipeline.Pipeline
的用法示例。
在下文中一共展示了Pipeline.get_params方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_pipeline_init
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import get_params [as 别名]
def test_pipeline_init():
# Test the various init parameters of the pipeline.
assert_raises(TypeError, Pipeline)
# Check that we can't instantiate pipelines with objects without fit
# method
pipe = assert_raises(TypeError, Pipeline, [('svc', IncorrectT)])
# Smoke test with only an estimator
clf = T()
pipe = Pipeline([('svc', clf)])
assert_equal(pipe.get_params(deep=True),
dict(svc__a=None, svc__b=None, svc=clf,
**pipe.get_params(deep=False)
))
# Check that params are set
pipe.set_params(svc__a=0.1)
assert_equal(clf.a, 0.1)
assert_equal(clf.b, None)
# Smoke test the repr:
repr(pipe)
# Test with two objects
clf = SVC()
filter1 = SelectKBest(f_classif)
pipe = Pipeline([('anova', filter1), ('svc', clf)])
# Check that we can't use the same stage name twice
assert_raises(ValueError, Pipeline, [('svc', SVC()), ('svc', SVC())])
# Check that params are set
pipe.set_params(svc__C=0.1)
assert_equal(clf.C, 0.1)
# Smoke test the repr:
repr(pipe)
# Check that params are not set when naming them wrong
assert_raises(ValueError, pipe.set_params, anova__C=0.1)
# Test clone
pipe2 = clone(pipe)
assert_false(pipe.named_steps['svc'] is pipe2.named_steps['svc'])
# Check that apart from estimators, the parameters are the same
params = pipe.get_params(deep=True)
params2 = pipe2.get_params(deep=True)
for x in pipe.get_params(deep=False):
params.pop(x)
for x in pipe2.get_params(deep=False):
params2.pop(x)
# Remove estimators that where copied
params.pop('svc')
params.pop('anova')
params2.pop('svc')
params2.pop('anova')
assert_equal(params, params2)
示例2: train_SGD_SVM
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import get_params [as 别名]
def train_SGD_SVM(train_data, train_label):
pipeline = Pipeline([('clf', SGDClassifier(loss="huber", penalty="l2",shuffle=True,alpha=0.0001,epsilon=0.1, verbose=1))])
print pipeline.get_params().keys()
parameters = {'clf__alpha': (0.001,0.01)}
grid_search = GridSearchCV(pipeline, parameters,verbose=1, scoring='f1')
grid_search.fit(train_data, train_label)
print 'Best score: %0.3f' % grid_search.best_score_
print 'Best parameters set:'
best_parameters = grid_search.best_estimator_.get_params()
for param_name in sorted(parameters.keys()):
print '\t%s: %r' % (param_name, best_parameters[param_name])
return grid_search
示例3: test_set_random_states
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import get_params [as 别名]
def test_set_random_states():
# Linear Discriminant Analysis doesn't have random state: smoke test
_set_random_states(LinearDiscriminantAnalysis(), random_state=17)
clf1 = Perceptron(tol=1e-3, random_state=None)
assert_equal(clf1.random_state, None)
# check random_state is None still sets
_set_random_states(clf1, None)
assert_true(isinstance(clf1.random_state, int))
# check random_state fixes results in consistent initialisation
_set_random_states(clf1, 3)
assert_true(isinstance(clf1.random_state, int))
clf2 = Perceptron(tol=1e-3, random_state=None)
_set_random_states(clf2, 3)
assert_equal(clf1.random_state, clf2.random_state)
# nested random_state
def make_steps():
return [('sel', SelectFromModel(Perceptron(tol=1e-3,
random_state=None))),
('clf', Perceptron(tol=1e-3, random_state=None))]
est1 = Pipeline(make_steps())
_set_random_states(est1, 3)
assert_true(isinstance(est1.steps[0][1].estimator.random_state, int))
assert_true(isinstance(est1.steps[1][1].random_state, int))
assert_not_equal(est1.get_params()['sel__estimator__random_state'],
est1.get_params()['clf__random_state'])
# ensure multiple random_state paramaters are invariant to get_params()
# iteration order
class AlphaParamPipeline(Pipeline):
def get_params(self, *args, **kwargs):
params = Pipeline.get_params(self, *args, **kwargs).items()
return OrderedDict(sorted(params))
class RevParamPipeline(Pipeline):
def get_params(self, *args, **kwargs):
params = Pipeline.get_params(self, *args, **kwargs).items()
return OrderedDict(sorted(params, reverse=True))
for cls in [AlphaParamPipeline, RevParamPipeline]:
est2 = cls(make_steps())
_set_random_states(est2, 3)
assert_equal(est1.get_params()['sel__estimator__random_state'],
est2.get_params()['sel__estimator__random_state'])
assert_equal(est1.get_params()['clf__random_state'],
est2.get_params()['clf__random_state'])
示例4: OddtRegressor
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import get_params [as 别名]
class OddtRegressor(RegressorMixin):
_model = None
def __init__(self, *args, **kwargs):
""" Assemble Neural network or SVM using sklearn pipeline """
# Cherrypick arguments for model. Exclude 'steps', which is pipeline argument
local_kwargs = {key: kwargs.pop(key) for key in list(kwargs.keys())
if key != 'steps' and '__' not in key}
if self._model is None:
raise ValueError('Model not specified!')
model = self._model(*args, **local_kwargs)
self.pipeline = Pipeline([('empty_dims_remover', VarianceThreshold()),
('scaler', StandardScaler()),
('model', model)]).set_params(**kwargs)
def get_params(self, deep=True):
return self.pipeline.get_params(deep=deep)
def set_params(self, **kwargs):
return self.pipeline.set_params(**kwargs)
def fit(self, descs, target_values, **kwargs):
self.pipeline.fit(descs, target_values, **kwargs)
return self
def predict(self, descs):
return self.pipeline.predict(descs)
def score(self, descs, target_values):
return self.pipeline.score(descs, target_values)
示例5: svm
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import get_params [as 别名]
class svm(RegressorMixin):
def __init__(self, *args, **kwargs):
""" Assemble a proper SVM using sklearn tools regressor """
# Cherrypick arguments for model. Exclude 'steps', which is pipeline argument
local_kwargs = {key: kwargs.pop(key) for key in kwargs.keys() if key != 'steps' and len(key.split('__', 1)) == 1}
self.pipeline = Pipeline([('empty_dims_remover', VarianceThreshold()),
('scaler', MinMaxScaler()),
('svm', SVR(*args, **local_kwargs))
]).set_params(**kwargs)
def get_params(self, deep=True):
return self.pipeline.get_params(deep=deep)
def set_params(self, **kwargs):
return self.pipeline.set_params(**kwargs)
def fit(self, descs, target_values, **kwargs):
self.pipeline.fit(descs, target_values, **kwargs)
return self
def predict(self, descs):
return self.pipeline.predict(descs)
def score(self, descs, target_values):
return self.pipeline.score(descs, target_values)
示例6: main
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import get_params [as 别名]
def main(datafile, threshold):
filename = "out{}{}.hrc".format(os.sep, os.path.basename(datafile.name))
if not os.path.isfile(filename):
header = datafile.readline()
collist = [i for i, toggle in enumerate(header.split(",")) if toggle != "0"]
datafile.seek(0)
data = pd.read_csv(datafile, usecols=collist).as_matrix()
pipeline = Pipeline([("clf", Hierarchical())])
pipeline.set_params(**{})
pipeline.fit(data)
clf = pipeline.get_params()["clf"]
hierarchy = clf.hierarchy_
with open(filename, "wb") as fh:
fh.write(ET.tostring(hierarchy.to_xml()))
else:
with open(filename, "rb") as fh:
hierarchy = Cluster.from_xml(ET.parse(fh).getroot())
print(ET.tostring(hierarchy.to_xml()).decode("utf-8"))
if threshold != None:
clusters = hierarchy.cut(threshold)
print("\n".join(c.to_str(i) for i, c in enumerate(clusters)))
dump_graph(clusters)
示例7: test_csp_pipeline
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import get_params [as 别名]
def test_csp_pipeline():
"""Test if CSP works in a pipeline."""
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
csp = CSP(reg=1, norm_trace=False)
svc = SVC()
pipe = Pipeline([("CSP", csp), ("SVC", svc)])
pipe.set_params(CSP__reg=0.2)
assert (pipe.get_params()["CSP__reg"] == 0.2)
示例8: main
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import get_params [as 别名]
def main(datafile, eps, min_samples):
data = pd.read_csv(datafile, sep=' ')
matrix = data.as_matrix()
pipeline = Pipeline([('scaler', RobustScaler()), ('clusterer', DBSCAN())])
pipeline.set_params(**{
'clusterer__eps': eps,
'clusterer__min_samples': min_samples,
})
pipeline.fit(matrix)
clusterer = pipeline.get_params()['clusterer']
dump_graph(clusterer.labels_)
示例9: load_feature_set
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import get_params [as 别名]
def load_feature_set(db, cache_path='../data', position='RB', load_cached=True, nlag=6, to_yr_wk=(2015, 6), stat_override=None):
if(not load_cached):
# make player data transformer
yr_wk = [(j, i) for j in range(2009,to_yr_wk[0]) for i in range(1,18)]
yr_wk += [(to_yr_wk[0], i) for i in range(1,to_yr_wk[1]+1)]
if(stat_override):
stats = stat_override
else:
stats = position_stats(position)
player_info = ['player_id','full_name','position']
playerData = WeeklyPlayerData(db=db, yr_wk=yr_wk, stats=stats, player_info=player_info, fill_time=True, position=position)
# creates lags of the data
lag_cols = ['year', 'week', 'played'] + stats
lagData = LagPlayerData(nlag=nlag, groupby_cols=['player_id'], lag_cols=lag_cols, same_year_bool=True)
# creates means of the data
mean_cols = stats
meanData = MeanPlayerData(groupby_cols=['player_id'], mean_cols=mean_cols)
# pipeline for getting data
pipe1 = Pipeline(steps=[('data',playerData), ('lag',lagData), ('mean',meanData)])
#processed_data = pipe1.fit_transform(X=None)
# print processed_data
# pipeline for seting which columns we want and handling NaN
pct_played_threshold = 0.0
pipe2_steps = [('handle',HandleNaN(method='fill')), ('filterplayed',FilterPlayedPercent(pct_played_threshold=pct_played_threshold))]
pipe2 = Pipeline(steps=pipe2_steps)
pipe = Pipeline([('pipe1',pipe1),('pipe2',pipe2)])
all_columns = pipe.fit_transform(X=None)
all_columns.position = all_columns.position.astype(str)
# pickle files
pickle.dump(pipe.set_params(pipe1__data__db=None), open(cache_path + '/pipe_'+position+'.p', 'wb'))
pickle.dump(all_columns, open(cache_path + '/data_'+position+'.p', 'wb'))
else:
# Load from "cached" (pickled) transformer and data
# data
all_columns = pickle.load(open(cache_path + '/data_'+position+'.p', 'rb'))
# pipeline
pipe = pickle.load(open(cache_path + '/pipe_'+position+'.p', 'rb'))
# retrieve the list of stats that was predicted
pipe_params = pipe.get_params()
stats = pipe_params['pipe1__data__stats']
pipe.set_params(pipe1__data__db=db)
return (all_columns, pipe, stats)
示例10: main
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import get_params [as 别名]
def main(datafile, k):
header = datafile.readline()
collist = [i for i, toggle in enumerate(header.split(',')) if toggle != "0"]
datafile.seek(0)
data = pd.read_csv(datafile, usecols=collist).as_matrix()
pipeline = Pipeline([('clf', KMeans())])
pipeline.set_params(**{
'clf__k': k,
'clf__delta_sse': 4,
})
pipeline.fit(data)
clf = pipeline.get_params()['clf']
print('\n'.join(str(c) for c in clf.clusters_))
dump_graph(data, clf.assignments_)
示例11: BaseMethod
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import get_params [as 别名]
class BaseMethod(object):
def __init__(self, train, feature_union=None, useCache=True, clf=None, defaults={}):
self.clf = clf(**defaults)
self.useCache = useCache
self.feature_union = feature_union
self.grid = Pipeline([
('features', self.feature_union),
('clf', self.clf)
])
self.train(train[:, 0], train[:, 1])
def train(self, docs_train, y_train):
feature_union_hash = re.sub(r" at 0x[0-9a-f]+>", "", str(self.feature_union.transformer_list))
cache_key = feature_union_hash + str(self.clf) + str(docs_train)
cached = cache.load_pickle(cache_key)
if cached and self.useCache:
print "Loading from cache..."
self.best_estimator = cached['est']
self.best_score = cached['scr']
self.best_params = cached['parm']
else:
self.grid.fit(docs_train, y_train)
self.best_estimator = self.grid
self.best_params = self.grid.get_params(False)
self.best_score = 1
cache.save_pickle(cache_key, {
"est": self.best_estimator,
"scr": self.best_score,
"parm": self.best_params
})
return self.grid
def predict(self, arg_input):
orig = arg_input
if isinstance(arg_input, basestring):
orig = [orig]
predictions = self.best_estimator.predict(orig)
if isinstance(arg_input, basestring):
return predictions[0]
return predictions
示例12: linear
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import get_params [as 别名]
def linear():
# warnings.filterwarnings(action='ignore', category=ConvergenceWarning)
np.random.seed(0)
np.set_printoptions(linewidth=1000)
N = 2
x = np.linspace(0, 6, N) + np.random.randn(N)
x = np.sort(x)
y = x ** 2 - 4 * x - 3 + np.random.randn(N)
x.shape = -1, 1
y.shape = -1, 1
p = Pipeline([
('poly', PolynomialFeatures()),
('linear', LinearRegression(fit_intercept=False))])
mpl.rcParams['font.sans-serif'] = [u'simHei']
mpl.rcParams['axes.unicode_minus'] = False
np.set_printoptions(suppress=True)
# plt.figure(figsize=(8, 6), facecolor='w')
d_pool = np.arange(1, N, 1) # 阶
m = d_pool.size
clrs = [] # 颜色
for c in np.linspace(16711680, 255, m):
clrs.append('#%06x' % c)
line_width = np.linspace(5, 2, m)
# plt.plot(x, y, 'ro', ms=10, zorder=N)
for i, d in enumerate(d_pool):
p.set_params(poly__degree=d)
p.fit(x, y.ravel())
lin = p.get_params('linear')['linear']
output = u'%s:%d阶,系数为:' % (u'线性回归', d)
print output, lin.coef_.ravel()
x_hat = np.linspace(x.min(), x.max(), num=100)
x_hat.shape = -1, 1
y_hat = p.predict(x_hat)
s = p.score(x, y)
z = N - 1 if (d == 2) else 0
label = u'%d阶,$R^2$=%.3f' % (d, s)
示例13: print
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import get_params [as 别名]
if accuracy > best_fold:
best_fold = accuracy
print("Classifier accuracy is: "+str(accuracy*100)+"%")
print("done in %0.3fs" % (time() - t0))
if settings.SHOW_EVALUATION:
print("Best accuracy:", best_fold)
#Get questions from user
while True:
print()
user_question = get_question_features(input("Enter a question: "))
tok_question = word_tokenize(user_question)
atype = QA.get_answer_type(tok_question)
predict_dist = classifier.predict_proba([user_question])
predict_dist = [(classifier.get_params()['clf'].classes_[i],predict_dist[0][i]) for i in range(len(predict_dist[0]))]
predict_dist.sort(key=operator.itemgetter(1), reverse=True)
final_answer = "I don't know the answer to that question."
#Very Certain
if predict_dist[0][1] >= 0.06:
if settings.SHOW_PREDICTIONS:
print("Your question belongs to:", predict_dist[0][0].replace('.txt', ''))
answers = QA.get_candidate_answers(user_question, [predict_dist[0][0].replace('.txt', '')])
final_answer = QA.extract_passage(user_question, atype, answers)
#Partially Certain
elif predict_dist[0][1] >= 0.04:
subdomains = []
for i in range(3):
示例14: GaussianNB
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import get_params [as 别名]
naivebayes = GaussianNB()
logistic = LogisticRegression(penalty='l2', max_iter=5, random_state=40)
#newsGroupClassifier(svd_transformer,naivebayes,twenty_train,twenty_test)
newsGroupClassifier(svd_transformer,svc,twenty_train,twenty_test)
#newsGroupClassifier(svd_transformer,svcl,twenty_train,twenty_test)
#newsGroupClassifier(svd_transformer,logistic,twenty_train,twenty_test)
import numpy as np
from sklearn import metrics
# Grid Search
# define a pipeline
pipeline = Pipeline([
('vect', svd_transformer),
('svmobj', svc),
])
print(pipeline.get_params().keys())
parameters = {
#'svmobj__alpha': (0.00001, 0.000001),
#'svmobj__penalty': ('l2', 'elasticnet'),
'svmobj__gamma': [1e-3, 1e3]
}
if __name__ == "__main__":
grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1, cv=5)
print("Performing grid search...")
#print("pipeline:", [name for name, _ in pipeline.steps])
#print("parameters:")
# pprint(parameters)
t0 = time()
grid_search.fit(twenty_train.data, twenty_train.target)
示例15: MKSHomogenizationModel
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import get_params [as 别名]
#.........这里部分代码省略.........
X_reshape = self._reduce_shape(X)
X_reduced = self._fit_transform(X_reshape, reduce_labels)
self._linker.fit(X_reduced, y)
def predict(self, X, confidence_index=None):
"""Predicts macroscopic property for the microstructures `X`.
Args:
X (ND array): The microstructure, an `(n_samples, n_x, ...)`
shaped array where `n_samples` is the number of samples and
`n_x` is the spatial discretization.
confidence_index (ND array, optional): array with same shape as X
used to assign a confidence value for each data point.
Returns:
The predicted macroscopic property for `X`.
Example
>>> import numpy as np
>>> from sklearn.manifold import LocallyLinearEmbedding
>>> from sklearn.linear_model import BayesianRidge
>>> from pymks.bases import PrimitiveBasis
>>> np.random.seed(1)
>>> X = np.random.randint(2, size=(50, 100))
>>> y = np.random.random(50)
>>> reducer = LocallyLinearEmbedding()
>>> linker = BayesianRidge()
>>> prim_basis = PrimitiveBasis(2, domain=[0, 1])
>>> model = MKSHomogenizationModel(prim_basis, n_components=2,
... dimension_reducer=reducer,
... property_linker=linker)
>>> model.fit(X, y)
>>> X_test = np.random.randint(2, size=(1, 100))
Predict with microstructures
>>> y_pred = model.predict(X_test)
Predict with spatial correlations
>>> from pymks.stats import correlate
>>> model.compute_correlations = False
>>> X_corr = correlate(X, prim_basis, correlations=[(0, 0)])
>>> model.fit(X_corr, y)
>>> X_corr_test = correlate(X_test, prim_basis,
... correlations=[(0, 0)])
>>> y_pred_stats = model.predict(X_corr_test)
>>> assert np.allclose(y_pred_stats, y_pred, atol=1e-3)
"""
if not hasattr(self._linker.get_params()['connector'], "coef_"):
raise RuntimeError('fit() method must be run before predict().')
_size = self._size_axes(self.basis)
X = self.basis._reshape_feature(X, tuple(_size))
if self.compute_correlations is True:
X = self._compute_stats(X, confidence_index)
X_reduced = self._transform(X)
self.reduced_predict_data = X_reduced
return self._linker.predict(X_reduced)
def score(self, X, y, confidence_index=None):
"""
The score function for the MKSHomogenizationModel. It formats the
data and uses the score method from the property_linker.
Args:
X (ND array): The microstructure, an `(n_samples, n_x, ...)`
shaped array where `n_samples` is the number of samples and
`n_x` is the spatial discretization.
y (1D array): The material property associated with `X`.
confidence_index (ND array, optional): array with same shape as X
used to assign a confidence value for each data point.
Returns:
Score for MKSHomogenizationModel from the selected
property_linker.
"""
if not callable(getattr(self._linker, "score", None)):
raise RuntimeError(
"property_linker does not have score() method.")
_size = self._size_axes(self.basis)
X = self.basis._reshape_feature(X, _size)
if self.compute_correlations:
X = self._compute_stats(X, confidence_index)
X_reduced = self._transform(X)
return self._linker.score(X_reduced, y)
def _size_axes(self, basis):
"""Helper function used to get the correct size of the axes when using
for both periodic and non-periodic axes.
"""
_size = self.basis._axes_shape
if self.periodic_axes is None or len(self.periodic_axes) != len(_size):
_axes = list(range(len(_size)))
if self.periodic_axes is not None:
[_axes.remove(a) for a in self.periodic_axes]
_size = np.ones(len(_size), dtype=int) * _size
_size[_axes] = _size[_axes] // 2
return tuple(_size)