本文整理汇总了Python中sklearn.pipeline.FeatureUnion.transform方法的典型用法代码示例。如果您正苦于以下问题:Python FeatureUnion.transform方法的具体用法?Python FeatureUnion.transform怎么用?Python FeatureUnion.transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.pipeline.FeatureUnion
的用法示例。
在下文中一共展示了FeatureUnion.transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_set_feature_union_steps
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import transform [as 别名]
def test_set_feature_union_steps():
mult2 = Mult(2)
mult2.get_feature_names = lambda: ["x2"]
mult3 = Mult(3)
mult3.get_feature_names = lambda: ["x3"]
mult5 = Mult(5)
mult5.get_feature_names = lambda: ["x5"]
ft = FeatureUnion([("m2", mult2), ("m3", mult3)])
assert_array_equal([[2, 3]], ft.transform(np.asarray([[1]])))
assert_equal(["m2__x2", "m3__x3"], ft.get_feature_names())
# Directly setting attr
ft.transformer_list = [("m5", mult5)]
assert_array_equal([[5]], ft.transform(np.asarray([[1]])))
assert_equal(["m5__x5"], ft.get_feature_names())
# Using set_params
ft.set_params(transformer_list=[("mock", mult3)])
assert_array_equal([[3]], ft.transform(np.asarray([[1]])))
assert_equal(["mock__x3"], ft.get_feature_names())
# Using set_params to replace single step
ft.set_params(mock=mult5)
assert_array_equal([[5]], ft.transform(np.asarray([[1]])))
assert_equal(["mock__x5"], ft.get_feature_names())
示例2: test_feature_union_parallel
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import transform [as 别名]
def test_feature_union_parallel():
# test that n_jobs work for FeatureUnion
X = JUNK_FOOD_DOCS
fs = FeatureUnion([("words", CountVectorizer(analyzer="word")), ("chars", CountVectorizer(analyzer="char"))])
fs_parallel = FeatureUnion(
[("words", CountVectorizer(analyzer="word")), ("chars", CountVectorizer(analyzer="char"))], n_jobs=2
)
fs_parallel2 = FeatureUnion(
[("words", CountVectorizer(analyzer="word")), ("chars", CountVectorizer(analyzer="char"))], n_jobs=2
)
fs.fit(X)
X_transformed = fs.transform(X)
assert_equal(X_transformed.shape[0], len(X))
fs_parallel.fit(X)
X_transformed_parallel = fs_parallel.transform(X)
assert_equal(X_transformed.shape, X_transformed_parallel.shape)
assert_array_equal(X_transformed.toarray(), X_transformed_parallel.toarray())
# fit_transform should behave the same
X_transformed_parallel2 = fs_parallel2.fit_transform(X)
assert_array_equal(X_transformed.toarray(), X_transformed_parallel2.toarray())
# transformers should stay fit after fit_transform
X_transformed_parallel2 = fs_parallel2.transform(X)
assert_array_equal(X_transformed.toarray(), X_transformed_parallel2.toarray())
示例3: test_feature_union
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import transform [as 别名]
def test_feature_union(self):
"""Tests that combining multiple featurizers works as expected"""
modules = ["bag-of-words", "entities"]
modules_list, _ = modules_to_dictionary(modules)
feature_union = FeatureUnion(modules_list)
feature_union.fit(texts_entities, outcomes)
feature_union.transform(["unknown"])
示例4: pca
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import transform [as 别名]
def pca(x, y, test_x, n_features=-1):
if n_features == -1:
n_features = int(np.ceil(np.sqrt(x.shape[1])))
pca = PCA(n_components=n_features)
selection = SelectKBest(k=n_features/2)
combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])
combined_features.fit(x, y)
return combined_features.transform(x), combined_features.transform(test_x)
示例5: prediction
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import transform [as 别名]
def prediction(train_df, test_df, MODEL):
print "... start prediction"
fu_obj = FeatureUnion(transformer_list=features.feature_list)
train_X = fu_obj.fit_transform(train_df)
train_y = train_df["Sales"].as_matrix()
clf = GridSearchCV(estimator=clf_dict[MODEL]["clf"],
param_grid=clf_dict[MODEL]["paramteters"],
n_jobs=3, scoring=rmspe, verbose=1)
clf.fit(train_X, train_y)
print clf.best_score_
index_sr = pd.Series(get_split_feature_list(fu_obj), name="Feature")
if hasattr(clf.best_estimator_, "coef_"):
coef_sr = pd.Series(clf.best_estimator_.coef_, name="Coef")
coef_df = pd.concat([index_sr, coef_sr], axis=1).set_index("Feature")
coeffile = SUBMISSION + "coef_%s.csv" % MODEL
coef_df.to_csv(coeffile)
if hasattr(clf.best_estimator_, "feature_importances_"):
coef_sr = pd.Series(clf.best_estimator_.feature_importances_,
name="Importance")
coef_df = pd.concat([index_sr, coef_sr], axis=1).set_index("Feature")
coeffile = SUBMISSION + "importance_%s.csv" % MODEL
coef_df.to_csv(coeffile)
print "... start y_pred"
test_X = fu_obj.transform(test_df)
y_pred = clf.predict(test_X)
pred_sr = pd.Series(y_pred, name="Sales", index=test_df["Id"])
submissionfile = SUBMISSION + "submission_%s.csv" % MODEL
pred_sr.to_csv(submissionfile, header=True, index_label="ID")
示例6: test_feature_stacker
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import transform [as 别名]
def test_feature_stacker():
# basic sanity check for feature stacker
iris = load_iris()
X = iris.data
X -= X.mean(axis=0)
y = iris.target
pca = RandomizedPCA(n_components=2)
select = SelectKBest(k=1)
fs = FeatureUnion([("pca", pca), ("select", select)])
fs.fit(X, y)
X_transformed = fs.transform(X)
assert_equal(X_transformed.shape, (X.shape[0], 3))
# check if it does the expected thing
assert_array_almost_equal(X_transformed[:, :-1], pca.fit_transform(X))
assert_array_equal(X_transformed[:, -1],
select.fit_transform(X, y).ravel())
# test if it also works for sparse input
X_sp = sparse.csr_matrix(X)
X_sp_transformed = fs.fit_transform(X_sp, y)
assert_array_almost_equal(X_transformed, X_sp_transformed.toarray())
# test setting parameters
fs.set_params(select__k=2)
assert_equal(fs.fit_transform(X, y).shape, (X.shape[0], 4))
示例7: test_feature_union
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import transform [as 别名]
def test_feature_union():
# basic sanity check for feature union
iris = load_iris()
X = iris.data
X -= X.mean(axis=0)
y = iris.target
svd = TruncatedSVD(n_components=2, random_state=0)
select = SelectKBest(k=1)
fs = FeatureUnion([("svd", svd), ("select", select)])
fs.fit(X, y)
X_transformed = fs.transform(X)
assert_equal(X_transformed.shape, (X.shape[0], 3))
# check if it does the expected thing
assert_array_almost_equal(X_transformed[:, :-1], svd.fit_transform(X))
assert_array_equal(X_transformed[:, -1],
select.fit_transform(X, y).ravel())
# test if it also works for sparse input
# We use a different svd object to control the random_state stream
fs = FeatureUnion([("svd", svd), ("select", select)])
X_sp = sparse.csr_matrix(X)
X_sp_transformed = fs.fit_transform(X_sp, y)
assert_array_almost_equal(X_transformed, X_sp_transformed.toarray())
# test setting parameters
fs.set_params(select__k=2)
assert_equal(fs.fit_transform(X, y).shape, (X.shape[0], 4))
# test it works with transformers missing fit_transform
fs = FeatureUnion([("mock", TransfT()), ("svd", svd), ("select", select)])
X_transformed = fs.fit_transform(X, y)
assert_equal(X_transformed.shape, (X.shape[0], 8))
示例8: test_feature_union_weights
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import transform [as 别名]
def test_feature_union_weights():
# test feature union with transformer weights
iris = load_iris()
X = iris.data
y = iris.target
pca = RandomizedPCA(n_components=2, random_state=0)
select = SelectKBest(k=1)
# test using fit followed by transform
fs = FeatureUnion([("pca", pca), ("select", select)],
transformer_weights={"pca": 10})
fs.fit(X, y)
X_transformed = fs.transform(X)
# test using fit_transform
fs = FeatureUnion([("pca", pca), ("select", select)],
transformer_weights={"pca": 10})
X_fit_transformed = fs.fit_transform(X, y)
# test it works with transformers missing fit_transform
fs = FeatureUnion([("mock", TransfT()), ("pca", pca), ("select", select)],
transformer_weights={"mock": 10})
X_fit_transformed_wo_method = fs.fit_transform(X, y)
# check against expected result
# We use a different pca object to control the random_state stream
assert_array_almost_equal(X_transformed[:, :-1], 10 * pca.fit_transform(X))
assert_array_equal(X_transformed[:, -1],
select.fit_transform(X, y).ravel())
assert_array_almost_equal(X_fit_transformed[:, :-1],
10 * pca.fit_transform(X))
assert_array_equal(X_fit_transformed[:, -1],
select.fit_transform(X, y).ravel())
assert_equal(X_fit_transformed_wo_method.shape, (X.shape[0], 7))
示例9: test_same_result
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import transform [as 别名]
def test_same_result(self):
X, Z = self.make_text_rdd(2)
loc_char = CountVectorizer(analyzer="char_wb", ngram_range=(3, 3))
dist_char = SparkCountVectorizer(analyzer="char_wb", ngram_range=(3, 3))
loc_word = CountVectorizer(analyzer="word")
dist_word = SparkCountVectorizer(analyzer="word")
loc_union = FeatureUnion([
("chars", loc_char),
("words", loc_word)
])
dist_union = SparkFeatureUnion([
("chars", dist_char),
("words", dist_word)
])
# test same feature names
loc_union.fit(X)
dist_union.fit(Z)
assert_equal(
loc_union.get_feature_names(),
dist_union.get_feature_names()
)
# test same results
X_transformed = loc_union.transform(X)
Z_transformed = sp.vstack(dist_union.transform(Z).collect())
assert_array_equal(X_transformed.toarray(), Z_transformed.toarray())
# test same results with fit_transform
X_transformed = loc_union.fit_transform(X)
Z_transformed = sp.vstack(dist_union.fit_transform(Z).collect())
assert_array_equal(X_transformed.toarray(), Z_transformed.toarray())
# test same results in parallel
loc_union_par = FeatureUnion([
("chars", loc_char),
("words", loc_word)
], n_jobs=2)
dist_union_par = SparkFeatureUnion([
("chars", dist_char),
("words", dist_word)
], n_jobs=2)
loc_union_par.fit(X)
dist_union_par.fit(Z)
X_transformed = loc_union_par.transform(X)
Z_transformed = sp.vstack(dist_union_par.transform(Z).collect())
assert_array_equal(X_transformed.toarray(), Z_transformed.toarray())
示例10: q5_feature_UNION
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import transform [as 别名]
class q5_feature_UNION(BaseEstimator, RegressorMixin, TransformerMixin):
def __init__(self):
self.q5_feature_UNION = FeatureUnion([('q2_mlm_KNN', q2_mlm_KNN()), ('q3_mlm_RIDGE', q3_mlm_RIDGE()), ('q4_mlm_RIDGE', q4_mlm_RIDGE())])
def transform(self, X):
model_union = self.q5_feature_UNION.transform(X)
prediction = np.asscalar(np.average(model_union))
return prediction
示例11: fit_logreg
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import transform [as 别名]
def fit_logreg(self):
tokenize_sense = CachedFitTransform(Pipeline([
('tokenize', Map(compose(tokenize, normalize_special, unescape))),
('normalize', MapTokens(normalize_elongations)),
]), self.memory)
features = FeatureUnion([
# ('w2v_doc', ToCorporas(Pipeline([
# ('tokenize', MapCorporas(tokenize_sense)),
# ('feature', MergeSliceCorporas(Doc2VecTransform(CachedFitTransform(Doc2Vec(
# dm=0, dbow_words=1, size=100, window=10, hs=0, negative=5, sample=1e-3, min_count=1, iter=20,
# workers=16
# ), self.memory)))),
# ]).fit([self.train_docs, self.unsup_docs[:10**6], self.val_docs, self.test_docs]))),
# ('w2v_word_avg', Pipeline([
# ('tokenize', tokenize_sense),
# ('feature', Word2VecAverage(CachedFitTransform(Word2Vec(
# sg=1, size=100, window=10, hs=0, negative=5, sample=1e-3, min_count=1, iter=20, workers=16
# ), self.memory))),
# ]).fit(self.unsup_docs[:10**6])),
# ('w2v_word_avg_google', Pipeline([
# ('tokenize', tokenize_sense),
# ('feature', Word2VecAverage(joblib.load('data/google/GoogleNews-vectors-negative300.pickle'))),
# ])),
# ('w2v_word_norm_avg', Pipeline([
# ('tokenize', tokenize_sense),
# ('feature', Word2VecNormAverage(CachedFitTransform(Word2Vec(
# sg=1, size=100, window=10, hs=0, negative=5, sample=1e-3, min_count=1, iter=20, workers=16
# ), self.memory))),
# ]).fit(self.unsup_docs[:10**6])),
('w2v_word_norm_avg_google', Pipeline([
('tokenize', tokenize_sense),
('feature', Word2VecNormAverage(joblib.load('data/google/GoogleNews-vectors-negative300.pickle'))),
])),
# ('w2v_word_max', Pipeline([
# ('tokenize', tokenize_sense),
# ('feature', Word2VecMax(CachedFitTransform(Word2Vec(
# sg=1, size=100, window=10, hs=0, negative=5, sample=1e-3, min_count=1, iter=20, workers=16
# ), self.memory))),
# ]).fit(self.unsup_docs[:10**6])),
# ('w2v_word_max_google', Pipeline([
# ('tokenize', tokenize_sense),
# ('feature', Word2VecMax(joblib.load('data/google/GoogleNews-vectors-negative300.pickle'))),
# ])),
# ('w2v_word_inv', ToCorporas(Pipeline([
# ('tokenize', MapCorporas(tokenize_sense)),
# ('feature', MergeSliceCorporas(Word2VecInverse(CachedFitTransform(Word2Vec(
# sg=1, size=100, window=10, hs=0, negative=5, sample=0, min_count=1, iter=20, workers=16
# ), self.memory)))),
# ]).fit([self.train_docs, self.unsup_docs[:10**5], self.val_docs, self.test_docs]))),
])
classifier = LogisticRegression()
with temp_log_level({'gensim.models.word2vec': logging.INFO}):
classifier.fit(features.transform(self.train_docs), self.train_labels())
estimator = Pipeline([('features', features), ('classifier', classifier)])
return 'logreg({})'.format(','.join(name for name, _ in features.transformer_list)), estimator
示例12: MuscleClassifier
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import transform [as 别名]
class MuscleClassifier():
def __init__(self, auto_load=True):
""" Initializes our MuscleClassifier
Option to preload it or start from fresh model
"""
#=====[ If auto_load, then we rehydrate our existing models ]=====
if auto_load:
self.model = pickle.load(open('modules/pickled/muscle_classifier.p','r'))
self.le = pickle.load(open('modules/pickled/muscle_classifier_le.p','r'))
self.vectorizer = pickle.load(open('modules/pickled/muscle_classifier_vectorizer.p','r'))
else:
self.model = BernoulliNB()
def train(self, muscle_groups, labels):
"""
Vectorizes raw input and trains our classifier
"""
#=====[ Instantiate label encoder to turn text labels into ints ]=====
self.le = preprocessing.LabelEncoder()
#=====[ Declare vectorizers and merge them via a FeatureUnion ]=====
char_vzr = feature_extraction.text.CountVectorizer(lowercase=True, ngram_range=(3,8), analyzer='char', encoding='utf-8')
word_vzr = feature_extraction.text.CountVectorizer(lowercase=True, ngram_range=(1,5), analyzer='word', encoding='utf-8')
self.vectorizer = FeatureUnion([('char',char_vzr),('word',word_vzr)])
#=====[ Transform our input and labels ]=====
X = self.vectorizer.fit_transform(muscle_groups).toarray()
Y = self.le.fit_transform(labels)
#=====[ Fit our model and then run inference on training data ]=====
self.model.fit(X,Y)
y = self.model.predict(X)
#=====[ Report Traning Accuracy ]=====
print "Training Accuracy: %f " % (sum(y != Y)/float(len(Y)))
def predict(self, exercises):
""" Takes in raw input, vectorizes it, and reports back predicted muscle group """
X = self.vectorizer.transform(exercises).toarray()
y = self.model.predict(X)
return self.le.classes_[y]
示例13: test_feature_stacker_weights
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import transform [as 别名]
def test_feature_stacker_weights():
# test feature stacker with transformer weights
iris = load_iris()
X = iris.data
y = iris.target
pca = RandomizedPCA(n_components=2)
select = SelectKBest(k=1)
fs = FeatureUnion([("pca", pca), ("select", select)],
transformer_weights={"pca": 10})
fs.fit(X, y)
X_transformed = fs.transform(X)
# check against expected result
assert_array_almost_equal(X_transformed[:, :-1], 10 * pca.fit_transform(X))
assert_array_equal(X_transformed[:, -1],
select.fit_transform(X, y).ravel())
示例14: test_feature_union
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import transform [as 别名]
def test_feature_union():
# basic sanity check for feature union
iris = load_iris()
X = iris.data
X -= X.mean(axis=0)
y = iris.target
svd = TruncatedSVD(n_components=2, random_state=0)
select = SelectKBest(k=1)
fs = FeatureUnion([("svd", svd), ("select", select)])
fs.fit(X, y)
X_transformed = fs.transform(X)
assert_equal(X_transformed.shape, (X.shape[0], 3))
# check if it does the expected thing
assert_array_almost_equal(X_transformed[:, :-1], svd.fit_transform(X))
assert_array_equal(X_transformed[:, -1],
select.fit_transform(X, y).ravel())
# test if it also works for sparse input
# We use a different svd object to control the random_state stream
fs = FeatureUnion([("svd", svd), ("select", select)])
X_sp = sparse.csr_matrix(X)
X_sp_transformed = fs.fit_transform(X_sp, y)
assert_array_almost_equal(X_transformed, X_sp_transformed.toarray())
# Test clone
fs2 = assert_no_warnings(clone, fs)
assert_false(fs.transformer_list[0][1] is fs2.transformer_list[0][1])
# test setting parameters
fs.set_params(select__k=2)
assert_equal(fs.fit_transform(X, y).shape, (X.shape[0], 4))
# test it works with transformers missing fit_transform
fs = FeatureUnion([("mock", Transf()), ("svd", svd), ("select", select)])
X_transformed = fs.fit_transform(X, y)
assert_equal(X_transformed.shape, (X.shape[0], 8))
# test error if some elements do not support transform
assert_raises_regex(TypeError,
'All estimators should implement fit and '
'transform.*\\bNoTrans\\b',
FeatureUnion,
[("transform", Transf()), ("no_transform", NoTrans())])
# test that init accepts tuples
fs = FeatureUnion((("svd", svd), ("select", select)))
fs.fit(X, y)
示例15: test_reference_plusplus_legacy
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import transform [as 别名]
def test_reference_plusplus_legacy(self):
"""compare with reference result of original implementation"""
image_list = ['./v1like_ref/sample_{}.png'.format(i) for i in range(10)]
reference_result = loadmat('./v1like_ref/reference_v1like_result_plusplus.mat')['feature_matrix']
X = [imread(imagename) for imagename in image_list]
v1like_instance_1 = v1like.V1Like(pars_baseline='simple_plus', legacy=True, debug=debug)
v1like_instance_2 = v1like.V1Like(pars_baseline='simple_plusplus_2nd_scale', legacy=True, debug=debug)
v1like_instance = FeatureUnion([('scale_1', v1like_instance_1),
('scale_2', v1like_instance_2)])
# seems that FeatureUnion's X can't be a iterator. must be a true array.
with Timer('simple_plusplus legacy version'):
result_legacy = v1like_instance.transform(X)
self.assertEqual(reference_result.dtype, result_legacy.dtype)
self.assertEqual(reference_result.shape, result_legacy.shape)
if debug:
print(abs(reference_result[:, :] - result_legacy[:, :]).max())
self.assertTrue(np.allclose(reference_result, result_legacy, atol=tol))