本文整理汇总了Python中sklearn.pipeline.Pipeline.transform方法的典型用法代码示例。如果您正苦于以下问题:Python Pipeline.transform方法的具体用法?Python Pipeline.transform怎么用?Python Pipeline.transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.pipeline.Pipeline
的用法示例。
在下文中一共展示了Pipeline.transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: fit_cnn_char
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import transform [as 别名]
def fit_cnn_char(self):
distant_docs, distant_labels = self.distant_docs[:10**6], self.distant_labels[:10**6]
normalize = Map(compose(str.lower, str.strip, lambda s: re.sub(r'\s+', ' ', s), normalize_special))
alphabet = ' abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:\'"/\\|[email protected]#$%^&*~`+-=<>()[]{}'
emb = self._fit_embedding_char('onehot', alphabet)
ft = Pipeline([
('normalize', normalize),
('embeddings', emb),
])
ft_syn = Pipeline([
('pos', CachedFitTransform(ArkTweetPosTagger(), self.memory)),
('pos_map', MapTokens(lambda entry: (entry[0], {
'N': 'n', 'V': 'v', 'A': 'a', 'R': 'r',
}.get(entry[1], 'o'), entry[2]))),
('syn', ReplaceSynonyms()),
('normalize', MapTokens(normalize_special)),
('embeddings', emb),
])
ft_typo = Pipeline([
('normalize', normalize),
('typos', IntroduceTypos(alphabet)),
('embeddings', emb),
])
cf = CNNChar(batch_size=128, emb_X=emb.X, input_size=140, output_size=3, static_mode=0, f1_classes=[0, 2])
# cf = CachedFitTransform(cf, self.memory)
kw = dict(val_docs=ft.transform(self.val_docs), val_y=self.val_labels())
cf.fit(ft.transform(distant_docs), distant_labels(), epoch_size=10**4, max_epochs=100, **kw)
# cf = NNShallow(batch_size=128, model=classifier, num_train=5)
cf.fit(ft_typo.transform(self.train_docs), self.train_labels(), max_epochs=15, **kw)
estimator = Pipeline([('features', ft), ('classifier', cf)])
return 'cnn_char', estimator
示例2: fit_nn_word
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import transform [as 别名]
def fit_nn_word(self):
distant_docs, distant_labels = self.distant_docs[:2 * 10**5], self.distant_labels[:2 * 10**5]
tokenize_sense = CachedFitTransform(Pipeline([
('tokenize', Map(compose(tokenize, normalize_special))),
('normalize', MapTokens(normalize_elongations)),
]), self.memory)
emb_type = 'google'
emb = self._fit_embedding_word(emb_type, [self.val_docs, self.train_docs, distant_docs], tokenize_sense, d=100)
ft = Pipeline([
('tokenize', tokenize_sense),
('embeddings', emb),
])
cf = CNNWord(
batch_size=64, emb_X=emb.X, input_size=56, conv_param=(100, [3, 4, 5]), dense_params=[],
output_size=3, static_mode=1, max_norm=3, f1_classes=[0, 2]
)
# cf = CNNWordPredInteraction(
# batch_size=64, emb_X=emb.X, input_size=56, conv_param=(100, [3, 4, 5]), dense_params=[],
# output_size=3, max_norm=3, f1_classes=[0, 2]
# )
# cf = RNNWord(batch_size=64, emb_X=emb.X, lstm_param=300, output_size=3, f1_classes=[0, 2])
# cf = RNNMultiWord(
# batch_size=64, input_size=56, emb_X=emb.X, conv_param=3, lstm_param=300, output_size=3, f1_classes=[0, 2]
# )
kw = dict(val_docs=ft.transform(self.val_docs), val_y=self.val_labels())
cf.fit(ft.transform(distant_docs), distant_labels(), epoch_size=10**4, max_epochs=20, **kw)
cf.fit(ft.transform(self.train_docs), self.train_labels(), epoch_size=1000, max_epochs=100, **kw)
estimator = Pipeline([('features', ft), ('classifier', cf)])
return '{}(embedding={})'.format(snake_case(type(cf).__name__), emb_type), estimator
示例3: main
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import transform [as 别名]
def main():
location = "/home/joe/drivers/"
z = sys.argv[1]
k = int(sys.argv[2])
trips = read_trips(location + z)
fmatrix = feature_matrix(trips)
n_rows, n_comps = fmatrix.shape
train_fmatrix = fmatrix[: int(len(fmatrix) * 0.9)]
train_targets = np.ones(len(train_fmatrix))
test_fmatrix = fmatrix[int(len(fmatrix) * 0.9) :]
test_targets = np.ones(len(test_fmatrix))
targets = np.ones(len(fmatrix))
i = k
j = 0
while i < (k + 20) and j < 10000:
j += 1
try:
trips = read_trips(location + str(i))
fm = feature_matrix(trips)
train_fm = fm[:9]
test_fm = fm[9:10]
train_fmatrix = np.vstack((train_fmatrix, train_fm))
test_fmatrix = np.vstack((test_fmatrix, test_fm))
fmatrix = np.vstack((fmatrix, fm))
targets = np.hstack((targets, np.zeros(len(fm)) * i))
train_targets = np.hstack((train_targets, np.zeros(len(train_fm)) * i))
test_targets = np.hstack((test_targets, np.zeros(len(test_fm)) * i))
print(i)
i += 1
except IOError:
pass
pipeline = Pipeline([("scale", StandardScaler()), ("ICA", PCA(n_components=50))])
pipeline.fit(fmatrix)
train_trans = pipeline.transform(train_fmatrix)
test_trans = pipeline.transform(test_fmatrix)
print("point teng", num_t_targets / len(test_targets))
gb = RandomForestClassifier(n_estimators=50)
gb.fit(train_fmatrix, train_targets)
gb_score = gb.score(test_fmatrix, test_targets)
print("gb", gb_score)
dt = DecisionTreeClassifier()
dt.fit(train_fmatrix, train_targets)
dt_score = dt.score(test_fmatrix, test_targets)
print("dt", dt_score)
svc = SVC()
svc.fit(train_fmatrix, train_targets)
svc_score = svc.score(test_fmatrix, test_targets)
print("svc", svc_score)
# rfs = [RandomForestClassifier(n_estimators=40) for i in range(20)]
rfs = [GradientBoostingClassifier(n_estimators=100) for i in range(20)]
scores = []
for i, rf in enumerate(rfs):
rf.fit(train_fmatrix, train_targets)
scores.append(rf.score(test_fmatrix, test_targets))
print(i, scores[i])
print("average", np.mean(scores))
示例4: run
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import transform [as 别名]
def run():
X_train, Y_train = load_training_data()
"""
X_train=crop_images (X_train)
print("Train images cropped")
print(X_train.shape())
"""
X_train, Y_train = rotate_dataset(X_train, Y_train, 1)
print("Training Data Rotated")
## X_train, Y_train = nudge_dataset(X_train, Y_train, nudge_size = 2)
## print("Training Data augmented")
n_features = X_train.shape[1]
n_classes = 10
"Unsupervised learning+pretraining"
test_data = get_test_data_set()
#test_data = rotate_dataset(test_data, None, 3)
# test_data=crop_images (test_data)
XX = np.vstack((X_train,rotate_dataset(test_data, None, 3)))
print("Stacked, unlabelled XX.shape: ",XX.shape)
## aeelm = AEELMTransformer(n_components=700, activation='relu')
## X_transform = aeelm.fit(XX).transform(XX)
## print("AE-ELM 1/2")
##
## aeelm_2 = AEELMTransformer(n_components=640, activation='relu')
## X_train = aeelm_2.fit(X_transform).transform(aeelm.transform(X_train))
## print("AE-ELM 2/2 - First autoencoder trained")
p = Pipeline([
('aelm1', AEELMTransformer(n_components=710, activation='tanh')),
# ('aelm2', AEELMTransformer(n_components=670, activation='relu')),
('aelm3', AEELMTransformer(n_components=500, activation='tanh')),
('aelm4', AEELMTransformer(n_components=400, activation='relu')),
('aelm5', AEELMTransformer(n_components=350, activation='relu')),
## ('aelm6', AEELMTransformer(n_components=480, activation='tanh'))
])
p.fit(XX)
print("AE-ELM - autoencoder trained")
X_train = p.transform(X_train)
test_data = p.transform(test_data)
classifier = LogisticRegression(C=100)
print("AE-ELM Transformed training Perf:")
eval_model(classifier, X_train, Y_train, cv=2, n_jobs=3)
classifier = SVC(kernel="rbf", C=2.8, gamma=.0073,cache_size = 6500,verbose=False)
# classifier = DBN([n_features, 8000, n_classes],
# learn_rates=0.4, learn_rate_decays=0.9 ,epochs=25, verbose=1)
classifier.fit(X_train, Y_train)
predictions = classifier.predict(test_data)
write_predictions_to_csv(predictions)
示例5: __init__
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import transform [as 别名]
class DeepRbmMnistClassifier:
def __init__(self):
self.n_components_first = 500
self.n_components_second = 500
self.n_components_third = 2000
self.n_iter_first = 20
self.n_iter_second = 20
self.n_iter_third = 20
self.learning_rate_first = 0.06
self.learning_rate_second = 0.06
self.learning_rate_third = 0.06
self.verbose = True
def label_to_feature(self,y):
feature = [0]*10
feature[y] = 1
return feature
def fit(self,X,y):
self.rbm_1 = BernoulliRBM(verbose=self.verbose,
n_components=self.n_components_first,
n_iter=self.n_iter_first,
learning_rate=self.learning_rate_first)
self.rbm_2 = BernoulliRBM(verbose=self.verbose,
n_components=self.n_components_second,
n_iter=self.n_iter_second,
learning_rate=self.learning_rate_second)
self.first_pipeline = Pipeline(steps=[('rbm_1',self.rbm_1), ('rbm_2',self.rbm_2)])
self.first_pipeline.fit(X,y)
# TODO improve. Look at how it is done in classify
new_features = []
for example,label in zip(X,y):
transformed = self.first_pipeline.transform(example)[0]
new_features.append(np.concatenate((transformed,self.label_to_feature(label))))
self.rbm_3 = BernoulliRBM(verbose=self.verbose,
n_components=self.n_components_third,
n_iter=self.n_iter_third,
learning_rate=self.learning_rate_third)
self.rbm_3.fit(new_features,y)
def classify(self,X):
transformed = self.first_pipeline.transform(X)
transformed = np.concatenate((transformed,[[0]*10]*len(transformed)),axis=1)
# The inverse of rbm_3 to go from hidden layer to visible layer
rbm_aux = BernoulliRBM()
rbm_aux.intercept_hidden_ = self.rbm_3.intercept_visible_
rbm_aux.intercept_visible_ = self.rbm_3.intercept_hidden_
rbm_aux.components_ = np.transpose(self.rbm_3.components_)
results = rbm_aux.transform(self.rbm_3.transform(transformed))
results = results[:,-10:]
return np.argmax(results,axis=1)
示例6: MultinomialDEP
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import transform [as 别名]
class MultinomialDEP(Step):
def __init__(self, percentile_threshold, bins):
self.lower = percentile_threshold
self.upper = 100 - percentile_threshold
scaler = MinMaxScaler()
discretizer = FunctionTransformer(Discretizer(bins))
self.pipeline = Pipeline(
[('scaler', scaler), ('discretizer', discretizer)])
def fit(self, vectors):
self.lower_clip = np.percentile(vectors, self.lower, axis=0)
self.upper_clip = np.percentile(vectors, self.upper, axis=0)
vectors = np.clip(vectors, self.lower_clip, self.upper_clip)
self.transformed_vectors = self.pipeline.fit_transform(vectors)
def transform(self, vectors):
assert self.transformed_vectors is not None
vectors = np.clip(vectors, self.lower_clip, self.upper_clip)
probabilities = []
vectors = self.pipeline.transform(vectors)
docs = self.transformed_vectors.shape[0]
for x in vectors:
count = np.count_nonzero(
(self.transformed_vectors == x).all(axis=1))
pr = count / docs
probabilities.append(pr)
return -np.log(np.maximum(1e-10, np.array(probabilities)))
示例7: DecompositionPreprocessor
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import transform [as 别名]
class DecompositionPreprocessor(Preprocessor):
def __init__(self, training_data, input_variables, decomp_method='pca', n_components=2):
assert training_data.shape[1] == len(input_variables)
self.input_variables = input_variables
self.n_components = n_components
self.variables = ['V%d' % (i + 1) for i in range(self.n_components)]
impute = Imputer()
nzv = VarianceThreshold()
scale = StandardScaler()
model_l = [('impute', impute), ('nzv', nzv), ('scale', scale)]
if decomp_method == 'pca':
pca = PCA()
model_l.append(('pca', pca))
self.model = Pipeline(model_l)
self.model.set_params(pca__whiten = False, pca__n_components = n_components)
elif decomp_method == 'mbspca':
mbspca = MiniBatchSparsePCA()
model_l.append(('mbspca', mbspca))
self.model = Pipeline(model_l)
self.model.set_params(mbspca__n_components = n_components, mbspca__verbose = True)
self._train(training_data)
def _train(self, training_data):
self.model.fit(training_data)
def __call__(self, data):
return self.model.transform(data)
示例8: test_multiple_cols
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import transform [as 别名]
def test_multiple_cols(self):
t = bt.Split_transform(input_features=["a","b"],output_feature="res")
df = pd.DataFrame.from_dict([{"a":"a b","b":"c d","c":3},{"a":"word1","b":"word2"}])
transformers = [("split_transform",t)]
p = Pipeline(transformers)
df2 = p.transform(df)
self.assertTrue(len(df2["res"][0]) == 4)
示例9: main
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import transform [as 别名]
def main(opt):
with codecs.open(opt.vocab, encoding='utf-8') as f:
vocab = load_vocab(f)
id2word = build_id2word(vocab)
_, docs_train, _ = load_all_data(opt.train_jsons)
lda = Pipeline([
('bow', BagOfWords(vocab=vocab)),
('lda', Lda(id2word=id2word, num_topics=opt.num_topics))])
lda_vec_train = lda.fit_transform(docs_train)
sent_set = set()
tmp_path = opt.lda_vec_path + '.tmp'
with codecs.open(tmp_path, encoding='utf-8', mode='w') as f:
dump_lda_vec(docs_train, lda_vec_train, sent_set, f)
if opt.test_jsons:
_, docs_test, _ = load_all_data(opt.test_jsons)
lda_vec_test = lda.transform(docs_test)
with codecs.open(tmp_path, encoding='utf-8', mode='a') as f:
dump_lda_vec(docs_test, lda_vec_test, sent_set, f)
with codecs.open(tmp_path, encoding='utf-8') as fin, \
codecs.open(opt.lda_vec_path, encoding='utf-8', mode='w') as fout:
fout.write('{} {}\n'.format(len(sent_set), opt.num_topics))
for line in fin:
fout.write(line)
os.remove(tmp_path)
示例10: XY9
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import transform [as 别名]
def XY9():
X, y, X_test, X_test_index = load_xy()
#### DON'T CHANGE BEFORE
dummy_cols = ['FinelineNumber']
keep_cols = ['Weekday', 'Returns']
mul_col = None
dfta = ft.DataFrameToArray()
add_returns = ft.NGAddReturns()
print("starting grouping")
grouper = ft.GDummyKeepAndMultiplierTransform(dummy_cols, mul_col,
keep_cols)
print("done grouping")
transform_steps = [("imputer", ft.NGNAImputer()),
("add_returns", add_returns), ('grouper', grouper)]
### DON'T CHANGE AFTER
transform_steps.append((("dfta", dfta)))
transform_pipe = Pipeline(steps=transform_steps)
print("done with pipeline, now calculating")
return {
"X": transform_pipe.fit_transform(X),
"y": y,
"X_test": transform_pipe.transform(X_test),
"X_test_index": X_test_index
}
示例11: XY8
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import transform [as 别名]
def XY8():
X, y, X_test, X_test_index = load_xy()
#### DON'T CHANGE BEFORE
dummy_cols = ['DepartmentDescription']
keep_cols = ['Weekday', 'Returns']
mul_col = 'ScanCount'
dfta = ft.DataFrameToArray()
add_returns = ft.NGAddReturns()
grouper = ft.GDummyKeepAndMultiplierTransform(dummy_cols, mul_col,
keep_cols)
transform_steps = [("imputer", ft.NGNAImputer()),
("add_returns", add_returns), ('grouper', grouper)]
### DON'T CHANGE AFTER
transform_steps.append((("dfta", dfta)))
transform_pipe = Pipeline(steps=transform_steps)
return {
"X": transform_pipe.fit_transform(X),
"y": y,
"X_test": transform_pipe.transform(X_test),
"X_test_index": X_test_index
}
示例12: XY7
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import transform [as 别名]
def XY7():
X, y, X_test, X_test_index = load_xy()
#### DON'T CHANGE BEFORE
dummy_cols = ['DepartmentDescription']
keep_cols = ['Weekday']
mul_col = 'ScanCount'
dfta = ft.DataFrameToArray()
grouper = ft.GDummyKeepAndMultiplierTransform(dummy_cols, mul_col,
keep_cols)
transform_steps = [("imputer", ft.NGNAImputer())] + \
list(ft.wrapStep(('grouper', grouper)))
### DON'T CHANGE AFTER
transform_steps.append((("dfta", dfta)))
transform_pipe = Pipeline(steps=transform_steps)
kh.start_pipeline()
kh.record_metric("validation", "start", "NA", "transform_pipeline",
str(transform_pipe), "NA")
return {
"X": transform_pipe.fit_transform(X),
"y": y,
"X_test": transform_pipe.transform(X_test),
"X_test_index": X_test_index
}
示例13: XY1
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import transform [as 别名]
def XY1():
X, y, X_test, X_test_index = load_xy()
####### VARIABLES
dummy_cols = ['Weekday', 'DepartmentDescription']
keep_cols = ['ScanCount', 'Returns']
funcs = [np.sum, np.count_nonzero]
dfta = ft.DataFrameToArray()
add_returns = ft.NGAddReturns()
gdd = ft.GDummyAndKeepTransform(dummy_cols, keep_cols,
funcs) # Doesn't work!
transform_steps = [("imputer", ft.NGNAImputer())] + \
list(ft.wrapStep(("add_returns", add_returns))) + \
list(ft.wrapStep(('grouper', gdd))) + \
[("dfta", dfta)]
transform_pipe = Pipeline(steps=transform_steps)
kh.start_pipeline()
kh.record_metric("validation", "start", "NA", "transform_pipeline",
str(transform_pipe), "NA")
return {
"X": transform_pipe.fit_transform(X),
"y": y,
"X_test": transform_pipe.transform(X_test),
"X_test_index": X_test_index
}
示例14: test_relevant_extraction
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import transform [as 别名]
def test_relevant_extraction(self):
self.assertGreater(len(self.y), 0)
self.assertGreater(len(self.df), 0)
self.assertGreater(len(self.timeseries), 0)
relevant_augmenter = RelevantFeatureAugmenter(column_id="id", column_sort="time")
relevant_augmenter.set_timeseries_container(self.timeseries)
pipe = Pipeline([("relevant_augmenter", relevant_augmenter)])
pipe.fit(self.df, self.y)
extracted_features = pipe.transform(self.df)
some_expected_features = {'F_x__abs_energy',
'F_x__absolute_sum_of_changes',
'F_x__ar_coefficient__k_10__coeff_0',
'F_x__autocorrelation__lag_1',
'F_x__binned_entropy__max_bins_10',
'F_x__count_above_mean',
'F_x__longest_strike_above_mean',
'F_x__maximum',
'F_x__mean_abs_change',
'F_x__minimum',
'F_x__quantile__q_0.1',
'F_x__range_count__max_1__min_-1',
'F_x__spkt_welch_density__coeff_2',
'F_x__standard_deviation',
'F_x__value_count__value_0',
'F_x__variance',
'F_x__variance_larger_than_standard_deviation'}
self.assertGreaterEqual(set(extracted_features.columns), some_expected_features)
self.assertGreater(len(extracted_features), 0)
示例15: ExplainedVariance
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import transform [as 别名]
class ExplainedVariance(FeatureVisualizer):
"""
Parameters
----------
Examples
--------
>>> visualizer = ExplainedVariance()
>>> visualizer.fit(X)
>>> visualizer.transform(X)
>>> visualizer.poof()
Notes
-----
"""
def __init__(self, n_components=None, ax=None, scale=True, center=True,
colormap=palettes.DEFAULT_SEQUENCE, **kwargs):
super(ExplainedVariance, self).__init__(ax=ax, **kwargs)
self.colormap = colormap
self.n_components = n_components
self.center = center
self.scale = scale
self.pipeline = Pipeline([('scale', StandardScaler(with_mean=self.center,
with_std=self.scale)),
('pca', PCA(n_components=self.n_components))])
self.pca_features = None
@property
def explained_variance_(self):
return self.pipeline.steps[-1][1].explained_variance_
def fit(self, X, y=None):
self.pipeline.fit(X)
self.draw()
return self
def transform(self, X):
self.pca_features = self.pipeline.transform(X)
return self.pca_features
def draw(self):
X = self.explained_variance_
self.ax.plot(X)
return self.ax
def finalize(self, **kwargs):
# Set the title
self.set_title('Explained Variance Plot')
# Set the axes labels
self.ax.set_ylabel('Explained Variance')
self.ax.set_xlabel('Number of Components')