本文整理汇总了Python中sklearn.pipeline.make_pipeline函数的典型用法代码示例。如果您正苦于以下问题:Python make_pipeline函数的具体用法?Python make_pipeline怎么用?Python make_pipeline使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了make_pipeline函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
def main(met_fname, gday_outfname, var):
# Load met data
s = remove_comments_from_header(met_fname)
df_met = pd.read_csv(s, parse_dates=[[0,1]], skiprows=4, index_col=0,
sep=",", keep_date_col=True,
date_parser=date_converter)
# Need to build numpy array, so drop year, doy cols
met_data = df_met.ix[:,2:].values
met_data_train = df_met.ix[0:4000,2:].values
# Load GDAY outputs
df = pd.read_csv(gday_outfname, skiprows=3, sep=",", skipinitialspace=True)
df['date'] = make_data_index(df)
df = df.set_index('date')
target = df[var][0:4000].values
# BUILD MODELS
# hold back 40% of the dataset for testing
#X_train, X_test, Y_train, Y_test = \
# cross_validation.train_test_split(met_data, target, \
# test_size=0.4, random_state=0)
param_KNR = { "n_neighbors": [20], "weights": ['distance'] }
#regmod = DecisionTreeRegressor()
#regmod = RandomForestRegressor()
#regmod = SVR()
regmod = KNeighborsRegressor()
pipeit3 = lambda model: make_pipeline(StandardScaler(), PCA(), model)
pipeit2 = lambda model: make_pipeline(StandardScaler(), model)
regmod_p = pipeit2(regmod)
modlab = regmod_p.steps[-1][0]
par_grid = {'{0}__{1}'.format(modlab, parkey): pardat \
for (parkey, pardat) in param_KNR.iteritems()}
#emulator = GridSearchCV(regmod, param_grid=param_DTR, cv=5)
emulator = GridSearchCV(regmod_p, param_grid=par_grid, cv=5)
#emulator.fit(X_train, Y_train)
emulator.fit(met_data_train, target)
predict = emulator.predict(met_data)
df = pd.DataFrame({'DT': df.index, 'emu': predict, 'gday': df[var]})
plt.plot_date(df.index[4000:4383], df['emu'][4000:4383], 'o',
label='Emulator')
plt.plot_date(df.index[4000:4383], df['gday'][4000:4383], 'o',
label='GDAY')
plt.ylabel('GPP (g C m$^{-2}$ s$^{-1}$)')
plt.legend()
plt.show()
示例2: test_pipeline_ducktyping
def test_pipeline_ducktyping():
pipeline = make_pipeline(Mult(5))
pipeline.predict
pipeline.transform
pipeline.inverse_transform
pipeline = make_pipeline(Transf())
assert not hasattr(pipeline, 'predict')
pipeline.transform
pipeline.inverse_transform
pipeline = make_pipeline('passthrough')
assert pipeline.steps[0] == ('passthrough', 'passthrough')
assert not hasattr(pipeline, 'predict')
pipeline.transform
pipeline.inverse_transform
pipeline = make_pipeline(Transf(), NoInvTransf())
assert not hasattr(pipeline, 'predict')
pipeline.transform
assert not hasattr(pipeline, 'inverse_transform')
pipeline = make_pipeline(NoInvTransf(), Transf())
assert not hasattr(pipeline, 'predict')
pipeline.transform
assert not hasattr(pipeline, 'inverse_transform')
示例3: preprocess
def preprocess(self,any_set,is_train):
if is_train:
dico_pattern={'match_lowercase_only':'\\b[a-z]+\\b',
'match_word':'\\w{2,}',
'match_word1': '(?u)\\b\\w+\\b',
'match_word_punct': '\w+|[,.?!;]',
'match_NNP': '\\b[A-Z][a-z]+\\b|\\b[A-Z]+\\b',
'match_punct': "[,.?!;'-]"
}
tfv_title = TfidfVectorizer(lowercase=True, stop_words='english', token_pattern=dico_pattern["match_word1"],
ngram_range=(1, 2), max_df=1.0, min_df=2, max_features=None,
vocabulary=None, binary=True, norm=u'l2',
use_idf=True, smooth_idf=True, sublinear_tf=True)
tfv_desc = TfidfVectorizer(lowercase=True, stop_words='english', token_pattern=dico_pattern["match_word1"],
ngram_range=(1, 2), max_df=1.0, min_df=2, max_features=None,
vocabulary=None, binary=True, norm=u'l2',
use_idf=True, smooth_idf=True, sublinear_tf=True)
title_pipe = make_pipeline(ColumnSelector(key='title'), tfv_title)
desc_pipe = make_pipeline(ColumnSelector(key='description'), tfv_desc)
self.pipeline = make_union(title_pipe, desc_pipe)
return self.pipeline.fit_transform(any_set)
else:
return self.pipeline.transform(any_set)
示例4: __init__
def __init__(self, **config):
# Validate options are present
for option in _configuration_options:
if option not in config:
raise ValueError("Missing configuration "
"option {!r}".format(option))
# Feature extraction
sparse_features = parse_features(config["sparse_features"])
densifier = make_pipeline(Vectorizer(sparse_features, sparse=True),
ClassifierAsFeature())
dense_features = parse_features(config["dense_features"])
vectorization = make_union(densifier,
Vectorizer(dense_features, sparse=False))
# Classifier
try:
classifier = _valid_classifiers[config["classifier"]]
except KeyError:
raise ValueError("Unknown classification algorithm "
"{!r}".format(config["classifier"]))
classifier = classifier(**config["classifier_args"])
self.pipeline = make_pipeline(vectorization, StandardScaler())
self.classifier = classifier
示例5: get_pipeline
def get_pipeline(fsmethods, clfmethod):
"""Returns an instance of a sklearn Pipeline given the parameters
fsmethod1 and fsmethod2 will be joined in a FeatureUnion, then it will joined
in a Pipeline with clfmethod
Parameters
----------
fsmethods: list of estimators
All estimators in a pipeline, must be transformers (i.e. must have a transform method).
clfmethod: classifier
The last estimator may be any type (transformer, classifier, etc.).
Returns
-------
pipe
"""
feat_union = None
if not isinstance(fsmethods, list):
if hasattr(fsmethods, 'transform'):
feat_union = fsmethods
else:
raise ValueError('fsmethods expected to be either a list or a transformer method')
else:
feat_union = make_union(*fsmethods)
if feat_union is None:
pipe = make_pipeline(clfmethod)
else:
pipe = make_pipeline(feat_union, clfmethod)
return pipe
示例6: analysis
def analysis(name, typ, condition=None, query=None, title=None):
"""Wrapper to ensure that we attribute the same function for each type
of analyses: e.g. categorical, regression, circular regression."""
# Define univariate analysis
erf_function = None # Default is fast_mannwhitneyu
# /!\ for categorical analyses, the contrast is min(y) - max(y)
# e.g. target_present==False - target_present==True
if typ == 'categorize':
# estimator is normalization + l2 Logistic Regression
clf = make_pipeline(
StandardScaler(),
force_predict(LogisticRegression(class_weight='balanced'), axis=1))
scorer = scorer_auc
chance = .5
elif typ == 'regress':
# estimator is normalization + l2 Ridge
clf = make_pipeline(StandardScaler(), Ridge())
scorer = scorer_spearman
chance = 0.
elif typ == 'circ_regress':
# estimator is normalization + l2 Logistic Regression on cos and sin
clf = make_pipeline(StandardScaler(), PolarRegression(Ridge()))
scorer = scorer_angle
chance = 0.
# The univariate analysis needs a different scorer
erf_function = scorer_circlin
if condition is None:
condition = name
return dict(name=name, condition=condition, query=query, clf=clf,
scorer=scorer, chance=chance, erf_function=erf_function,
cv=8, typ=typ, title=title, single_trial=True)
示例7: test_pipeline_ducktyping
def test_pipeline_ducktyping():
pipeline = make_pipeline(Mult(5))
pipeline.predict
pipeline.transform
pipeline.inverse_transform
pipeline = make_pipeline(Transf())
assert_false(hasattr(pipeline, 'predict'))
pipeline.transform
pipeline.inverse_transform
pipeline = make_pipeline(None)
assert_false(hasattr(pipeline, 'predict'))
pipeline.transform
pipeline.inverse_transform
pipeline = make_pipeline(Transf(), NoInvTransf())
assert_false(hasattr(pipeline, 'predict'))
pipeline.transform
assert_false(hasattr(pipeline, 'inverse_transform'))
pipeline = make_pipeline(NoInvTransf(), Transf())
assert_false(hasattr(pipeline, 'predict'))
pipeline.transform
assert_false(hasattr(pipeline, 'inverse_transform'))
示例8: test_bagging_classifier_with_missing_inputs
def test_bagging_classifier_with_missing_inputs():
# Check that BaggingClassifier can accept X with missing/infinite data
X = np.array([
[1, 3, 5],
[2, None, 6],
[2, np.nan, 6],
[2, np.inf, 6],
[2, np.NINF, 6],
])
y = np.array([3, 6, 6, 6, 6])
classifier = DecisionTreeClassifier()
pipeline = make_pipeline(
FunctionTransformer(replace, validate=False),
classifier
)
pipeline.fit(X, y).predict(X)
bagging_classifier = BaggingClassifier(pipeline)
bagging_classifier.fit(X, y)
y_hat = bagging_classifier.predict(X)
assert_equal(y.shape, y_hat.shape)
bagging_classifier.predict_log_proba(X)
bagging_classifier.predict_proba(X)
# Verify that exceptions can be raised by wrapper classifier
classifier = DecisionTreeClassifier()
pipeline = make_pipeline(classifier)
assert_raises(ValueError, pipeline.fit, X, y)
bagging_classifier = BaggingClassifier(pipeline)
assert_raises(ValueError, bagging_classifier.fit, X, y)
示例9: cross_validation_LR
def cross_validation_LR(X,Y, n_folds, C_seq, K_seq, verbose = False):
'''
To classify Y using X, we first use ANOVA to choose K dimensions
in X, where the difference between different Ys are highest, then run
a logistic regression classifier with regularization parameter C on
the K dimensions.
To quantify how well X can classify Y, without specifying training and
testing partition, we do n_folds cross validation.
In each fold, during training, we do an inner loop cross validation to
select C and K that give the best classification accuracy from a given
range; and then we use this to classify the held-out testing data.
Inputs:
X, [n, p], n trials of p dimensional data, used for classification
Y, [n], class labels
n_folds,integer, split the data into n_folds for cross validation
C_seq, a sequence of regularizatioin parameters for logistic
regression classifiers, smaller values specify stronger
regularization.
e.g. C_seq = 10.0** np.arange(-3,1,1)
K_seq, a sequence of integers,
e.g. K_seq = (np.floor(np.arange(0.2,1,0.2)*p)).astype(np.int)
verbose: boolean, if ture, print the best C and K chosen
Output:
averaged classification accuracy of the n_folds
'''
cv0 = StratifiedKFold(Y,n_folds = n_folds)
cv_acc = np.zeros(n_folds)
for i in range(n_folds):
ind_test = cv0.test_folds == i
ind_train = cv0.test_folds != i
tmpX_train = X[ind_train,:]
tmpY_train = Y[ind_train]
tmpX_test = X[ind_test,:]
tmpY_test = Y[ind_test]
# grid search
tmp_cv_score = np.zeros([len(C_seq), len(K_seq)])
for j in range(len(C_seq)):
for k in range(len(K_seq)):
cv1 = StratifiedKFold(tmpY_train,n_folds = n_folds)
anova_filter = SelectKBest(f_regression, k = K_seq[k])
clf = LogisticRegression(C = C_seq[j], penalty = "l2")
anova_clf = make_pipeline(anova_filter, clf)
tmp_cv_score[j,k] = cross_val_score(anova_clf, tmpX_train,
tmpY_train, scoring = "accuracy", cv = cv1).mean()
best_ind = np.argmax(tmp_cv_score.ravel())
best_j, best_k = np.unravel_index(best_ind, tmp_cv_score.shape)
anova_filter = SelectKBest(f_regression, k = K_seq[k])
clf = LogisticRegression(C = C_seq[j], penalty = "l2")
anova_clf = make_pipeline(anova_filter, clf)
tmpY_predict = anova_clf.fit(tmpX_train, tmpY_train).predict(tmpX_test)
if verbose:
print C_seq[best_j],K_seq[best_k]
cv_acc[i] = np.mean(tmpY_test == tmpY_predict)
return np.mean(cv_acc)
示例10: test_generator_ok
def test_generator_ok(self):
pipeline = make_pipeline(FakeGenerator(fakes=['job', 'name', 'address'], nb_sample=20, random_state=40))
result = pipeline.fit_transform(None)
self.assertEqual(result.shape, (20, 3))
pipeline = make_pipeline(FakeGenerator(fakes=['job', 'name', 'address'], nb_sample=20, random_state=40))
result_2 = pipeline.fit_transform(None)
# Testing the seed
assert_frame_equal(result, result_2)
示例11: fit
def fit(self, X, y):
# Filthy hack
sids = X[:, -1]
all_pipelines = [make_pipeline(LogisticRegressionCV()).fit(X_s, y_s) for
X_s, y_s in subject_splitter(X[:, :-1], y, sids)]
f_union = make_union(*[FeatureUnionWrapper(p) for p in all_pipelines])
self.clf_ = make_pipeline(f_union, LogisticRegressionCV()).fit(X[:, :-1], y)
return self
示例12: test_make_pipeline_memory
def test_make_pipeline_memory():
cachedir = mkdtemp()
memory = Memory(cachedir=cachedir)
pipeline = make_pipeline(DummyTransf(), SVC(), memory=memory)
assert_true(pipeline.memory is memory)
pipeline = make_pipeline(DummyTransf(), SVC())
assert_true(pipeline.memory is None)
shutil.rmtree(cachedir)
示例13: __init__
def __init__(self):
self.clf1 = [make_pipeline(Imputer(),
GradientBoostingRegressor(n_estimators=5000, max_depth=8)) for _ in range(5)]
self.clf2 = [make_pipeline(Imputer(strategy='median'),
ExtraTreesRegressor(n_estimators=5000, criterion='mse', max_depth=8,
min_samples_split=10, min_samples_leaf=1,
min_weight_fraction_leaf=0.0,
max_features='auto', max_leaf_nodes=None, bootstrap=False,
oob_score=False,
n_jobs=1, random_state=42, verbose=0, warm_start=True)) for _ in range(5)]
self.clf3 = [make_pipeline(Imputer(),
svm.LinearSVR()) for _ in range(5)]
self.clf = [linear_model.LinearRegression() for _ in range(5)]
示例14: test_classes_property
def test_classes_property():
iris = load_iris()
X = iris.data
y = iris.target
reg = make_pipeline(SelectKBest(k=1), LinearRegression())
reg.fit(X, y)
assert_raises(AttributeError, getattr, reg, "classes_")
clf = make_pipeline(SelectKBest(k=1), LogisticRegression(random_state=0))
assert_raises(AttributeError, getattr, clf, "classes_")
clf.fit(X, y)
assert_array_equal(clf.classes_, np.unique(y))
示例15: test_make_pipeline_memory
def test_make_pipeline_memory():
cachedir = mkdtemp()
if LooseVersion(joblib_version) < LooseVersion('0.12'):
# Deal with change of API in joblib
memory = Memory(cachedir=cachedir, verbose=10)
else:
memory = Memory(location=cachedir, verbose=10)
pipeline = make_pipeline(DummyTransf(), SVC(), memory=memory)
assert_true(pipeline.memory is memory)
pipeline = make_pipeline(DummyTransf(), SVC())
assert_true(pipeline.memory is None)
shutil.rmtree(cachedir)