本文整理汇总了Python中sklearn.pipeline.FeatureUnion.fit_transform方法的典型用法代码示例。如果您正苦于以下问题:Python FeatureUnion.fit_transform方法的具体用法?Python FeatureUnion.fit_transform怎么用?Python FeatureUnion.fit_transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.pipeline.FeatureUnion
的用法示例。
在下文中一共展示了FeatureUnion.fit_transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_feature_stacker
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import fit_transform [as 别名]
def test_feature_stacker():
# basic sanity check for feature stacker
iris = load_iris()
X = iris.data
X -= X.mean(axis=0)
y = iris.target
pca = RandomizedPCA(n_components=2)
select = SelectKBest(k=1)
fs = FeatureUnion([("pca", pca), ("select", select)])
fs.fit(X, y)
X_transformed = fs.transform(X)
assert_equal(X_transformed.shape, (X.shape[0], 3))
# check if it does the expected thing
assert_array_almost_equal(X_transformed[:, :-1], pca.fit_transform(X))
assert_array_equal(X_transformed[:, -1],
select.fit_transform(X, y).ravel())
# test if it also works for sparse input
X_sp = sparse.csr_matrix(X)
X_sp_transformed = fs.fit_transform(X_sp, y)
assert_array_almost_equal(X_transformed, X_sp_transformed.toarray())
# test setting parameters
fs.set_params(select__k=2)
assert_equal(fs.fit_transform(X, y).shape, (X.shape[0], 4))
示例2: test_set_feature_union_step_none
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import fit_transform [as 别名]
def test_set_feature_union_step_none():
mult2 = Mult(2)
mult2.get_feature_names = lambda: ['x2']
mult3 = Mult(3)
mult3.get_feature_names = lambda: ['x3']
X = np.asarray([[1]])
ft = FeatureUnion([('m2', mult2), ('m3', mult3)])
assert_array_equal([[2, 3]], ft.fit(X).transform(X))
assert_array_equal([[2, 3]], ft.fit_transform(X))
assert_equal(['m2__x2', 'm3__x3'], ft.get_feature_names())
ft.set_params(m2=None)
assert_array_equal([[3]], ft.fit(X).transform(X))
assert_array_equal([[3]], ft.fit_transform(X))
assert_equal(['m3__x3'], ft.get_feature_names())
ft.set_params(m3=None)
assert_array_equal([[]], ft.fit(X).transform(X))
assert_array_equal([[]], ft.fit_transform(X))
assert_equal([], ft.get_feature_names())
# check we can change back
ft.set_params(m3=mult3)
assert_array_equal([[3]], ft.fit(X).transform(X))
示例3: test_feature_union
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import fit_transform [as 别名]
def test_feature_union():
# basic sanity check for feature union
iris = load_iris()
X = iris.data
X -= X.mean(axis=0)
y = iris.target
svd = TruncatedSVD(n_components=2, random_state=0)
select = SelectKBest(k=1)
fs = FeatureUnion([("svd", svd), ("select", select)])
fs.fit(X, y)
X_transformed = fs.transform(X)
assert_equal(X_transformed.shape, (X.shape[0], 3))
# check if it does the expected thing
assert_array_almost_equal(X_transformed[:, :-1], svd.fit_transform(X))
assert_array_equal(X_transformed[:, -1],
select.fit_transform(X, y).ravel())
# test if it also works for sparse input
# We use a different svd object to control the random_state stream
fs = FeatureUnion([("svd", svd), ("select", select)])
X_sp = sparse.csr_matrix(X)
X_sp_transformed = fs.fit_transform(X_sp, y)
assert_array_almost_equal(X_transformed, X_sp_transformed.toarray())
# test setting parameters
fs.set_params(select__k=2)
assert_equal(fs.fit_transform(X, y).shape, (X.shape[0], 4))
# test it works with transformers missing fit_transform
fs = FeatureUnion([("mock", TransfT()), ("svd", svd), ("select", select)])
X_transformed = fs.fit_transform(X, y)
assert_equal(X_transformed.shape, (X.shape[0], 8))
示例4: test_feature_union_weights
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import fit_transform [as 别名]
def test_feature_union_weights():
# test feature union with transformer weights
iris = load_iris()
X = iris.data
y = iris.target
pca = RandomizedPCA(n_components=2, random_state=0)
select = SelectKBest(k=1)
# test using fit followed by transform
fs = FeatureUnion([("pca", pca), ("select", select)],
transformer_weights={"pca": 10})
fs.fit(X, y)
X_transformed = fs.transform(X)
# test using fit_transform
fs = FeatureUnion([("pca", pca), ("select", select)],
transformer_weights={"pca": 10})
X_fit_transformed = fs.fit_transform(X, y)
# test it works with transformers missing fit_transform
fs = FeatureUnion([("mock", TransfT()), ("pca", pca), ("select", select)],
transformer_weights={"mock": 10})
X_fit_transformed_wo_method = fs.fit_transform(X, y)
# check against expected result
# We use a different pca object to control the random_state stream
assert_array_almost_equal(X_transformed[:, :-1], 10 * pca.fit_transform(X))
assert_array_equal(X_transformed[:, -1],
select.fit_transform(X, y).ravel())
assert_array_almost_equal(X_fit_transformed[:, :-1],
10 * pca.fit_transform(X))
assert_array_equal(X_fit_transformed[:, -1],
select.fit_transform(X, y).ravel())
assert_equal(X_fit_transformed_wo_method.shape, (X.shape[0], 7))
示例5: train_model
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import fit_transform [as 别名]
def train_model(trainset, testset):
word_vector = TfidfVectorizer(analyzer="word", ngram_range=(2,2), binary = False, max_features= 2000,min_df=1,decode_error="ignore")
# print word_vector
# print "works fine"
char_vector = TfidfVectorizer(ngram_range=(2,3), analyzer="char", binary = False, min_df = 1, max_features = 2000,decode_error= "ignore")
vectorizer =FeatureUnion([ ("chars", char_vector),("words", word_vector) ])
corpus = []
classes = []
testclasses = []
testcorpus = []
for item in trainset:
corpus.append(item['text'])
classes.append(item['label'])
for item in testset:
testcorpus.append(item['text'])
testclasses.append(item['label'])
# print "Training instances : ", len(classes)
# print "Testing instances : ", len(set(classes))
matrix = vectorizer.fit_transform(corpus)
testmatrix = vectorizer.fit_transform(testcorpus)
# print "feature count :. ", len(vectorizer.get_feature_names())
# print "training model"
X = matrix.toarray()
TX = testmatrix.toarray()
Ty= numpy.asarray(testclasses)
y = numpy.asarray(classes)
X_train, X_test, y_train, y_test= train_test_split(X,y,train_size=0.9999,test_size=.00001,random_state=0)
model = LinearSVC(dual=True, loss='l1')
# model = SVC()
# model = NuSVC()
# model = RandomForestClassifier()
#scores=cross_validation.cross_val_score(model,X,y)
#print "Accuracy "+ str(scores.mean())
# print y_pred
y_prob = model.fit(X_train, y_train).predict(TX)
# y_prob = OneVsRestClassifier(model).fit(X_train, y_train).predict(X_test)
# print(y_prob)
# cm = confusion_matrix(y_test, y_pred)
# cr = classification_report(y_test, y_pred)
# print cr
# print(cm)
# pl.matshow()
# pl.title('Confusion matrix#')
# pl.colorbar()
# pl.ylabel('True label')
# pl.xlabel('Predicted label')
# pl.show()
print accuracy_score(y_prob,Ty)
示例6: make_checkdata
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import fit_transform [as 别名]
def make_checkdata(mode="df"):
fu = FeatureUnion(transformer_list=f.feature_transformer_rule)
Std = preprocessing.StandardScaler()
_, _, _, train_gray_data, test_gray_data, _, labels = i_p.load_data()
train_keys = train_gray_data.keys()[:2]
train_inputs = {}
train_labels = {}
for i in xrange(len(train_keys)):
input_ = train_gray_data[train_keys[i]]
label = labels[train_keys[i]]
train_inputs.update({train_keys[i]:input_})
train_labels.update({train_keys[i]:label})
test_keys = test_gray_data.keys()[:2]
test_inputs = {}
for i in xrange(len(test_keys)):
input_ = test_gray_data[test_keys[i]]
test_inputs.update({test_keys[i]:input_})
train_df = f.make_data_df(train_inputs, train_labels)
test_df = f.make_test_df(test_inputs)
if mode == "df":
train_df = train_df.reset_index()
test_df = test_df.reset_index()
train_df.columns = ["pngname", "input", "label"]
test_df.columns = ["pngname", "input"]
return train_df, train_keys, test_df, test_keys
elif mode == "feature":
X_train = fu.fit_transform(train_df)
X_train = Std.fit_transform(X_train)
y_train = np.concatenate(train_df["label"].apply(lambda x: x.flatten()))
X_test = fu.fit_transform(test_df)
X_test = Std.fit_transform(X_test)
return X_train, y_train, X_test
示例7: test_feature_union
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import fit_transform [as 别名]
def test_feature_union():
# basic sanity check for feature union
iris = load_iris()
X = iris.data
X -= X.mean(axis=0)
y = iris.target
svd = TruncatedSVD(n_components=2, random_state=0)
select = SelectKBest(k=1)
fs = FeatureUnion([("svd", svd), ("select", select)])
fs.fit(X, y)
X_transformed = fs.transform(X)
assert_equal(X_transformed.shape, (X.shape[0], 3))
# check if it does the expected thing
assert_array_almost_equal(X_transformed[:, :-1], svd.fit_transform(X))
assert_array_equal(X_transformed[:, -1],
select.fit_transform(X, y).ravel())
# test if it also works for sparse input
# We use a different svd object to control the random_state stream
fs = FeatureUnion([("svd", svd), ("select", select)])
X_sp = sparse.csr_matrix(X)
X_sp_transformed = fs.fit_transform(X_sp, y)
assert_array_almost_equal(X_transformed, X_sp_transformed.toarray())
# Test clone
fs2 = assert_no_warnings(clone, fs)
assert_false(fs.transformer_list[0][1] is fs2.transformer_list[0][1])
# test setting parameters
fs.set_params(select__k=2)
assert_equal(fs.fit_transform(X, y).shape, (X.shape[0], 4))
# test it works with transformers missing fit_transform
fs = FeatureUnion([("mock", Transf()), ("svd", svd), ("select", select)])
X_transformed = fs.fit_transform(X, y)
assert_equal(X_transformed.shape, (X.shape[0], 8))
# test error if some elements do not support transform
assert_raises_regex(TypeError,
'All estimators should implement fit and '
'transform.*\\bNoTrans\\b',
FeatureUnion,
[("transform", Transf()), ("no_transform", NoTrans())])
# test that init accepts tuples
fs = FeatureUnion((("svd", svd), ("select", select)))
fs.fit(X, y)
示例8: train_model
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import fit_transform [as 别名]
def train_model(trainset):
word_vector = TfidfVectorizer(analyzer="word", ngram_range=(2,2), binary = False, max_features= 2000,min_df=1,decode_error="ignore")
# print word_vector
print "works fine"
char_vector = TfidfVectorizer(ngram_range=(2,3), analyzer="char", binary = False, min_df = 1, max_features = 2000,decode_error= "ignore")
vectorizer =FeatureUnion([ ("chars", char_vector),("words", word_vector) ])
corpus = []
classes = []
for item in trainset:
corpus.append(item['text'])
classes.append(item['label'])
print "Training instances : ", 0.8*len(classes)
print "Testing instances : ", 0.2*len(classes)
matrix = vectorizer.fit_transform(corpus)
print "feature count : ", len(vectorizer.get_feature_names())
print "training model"
X = matrix.toarray()
y = numpy.asarray(classes)
model =LinearSVC()
X_train, X_test, y_train, y_test= train_test_split(X,y,train_size=0.8,test_size=.2,random_state=0)
y_pred = OneVsRestClassifier(model).fit(X_train, y_train).predict(X_test)
#y_prob = OneVsRestClassifier(model).fit(X_train, y_train).decision_function(X_test)
#print y_prob
#con_matrix = []
#for row in range(len(y_prob)):
# temp = [y_pred[row]]
# for prob in y_prob[row]:
# temp.append(prob)
# con_matrix.append(temp)
#for row in con_matrix:
# output.write(str(row)+"\n")
#print y_pred
#print y_test
res1=[i for i, j in enumerate(y_pred) if j == 'anonEdited']
res2=[i for i, j in enumerate(y_test) if j == 'anonEdited']
reset=[]
for r in res1:
if y_test[r] != "anonEdited":
reset.append(y_test[r])
for r in res2:
if y_pred[r] != "anonEdited":
reset.append(y_pred[r])
output=open(sys.argv[2],"w")
for suspect in reset:
output.write(str(suspect)+"\n")
cm = confusion_matrix(y_test, y_pred)
print(cm)
pl.matshow(cm)
pl.title('Confusion matrix')
pl.colorbar()
pl.ylabel('True label')
pl.xlabel('Predicted label')
pl.show()
print accuracy_score(y_pred,y_test)
示例9: test_feature_union_parallel
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import fit_transform [as 别名]
def test_feature_union_parallel():
# test that n_jobs work for FeatureUnion
X = JUNK_FOOD_DOCS
fs = FeatureUnion([("words", CountVectorizer(analyzer="word")), ("chars", CountVectorizer(analyzer="char"))])
fs_parallel = FeatureUnion(
[("words", CountVectorizer(analyzer="word")), ("chars", CountVectorizer(analyzer="char"))], n_jobs=2
)
fs_parallel2 = FeatureUnion(
[("words", CountVectorizer(analyzer="word")), ("chars", CountVectorizer(analyzer="char"))], n_jobs=2
)
fs.fit(X)
X_transformed = fs.transform(X)
assert_equal(X_transformed.shape[0], len(X))
fs_parallel.fit(X)
X_transformed_parallel = fs_parallel.transform(X)
assert_equal(X_transformed.shape, X_transformed_parallel.shape)
assert_array_equal(X_transformed.toarray(), X_transformed_parallel.toarray())
# fit_transform should behave the same
X_transformed_parallel2 = fs_parallel2.fit_transform(X)
assert_array_equal(X_transformed.toarray(), X_transformed_parallel2.toarray())
# transformers should stay fit after fit_transform
X_transformed_parallel2 = fs_parallel2.transform(X)
assert_array_equal(X_transformed.toarray(), X_transformed_parallel2.toarray())
示例10: prediction
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import fit_transform [as 别名]
def prediction(train_df, test_df, MODEL):
print "... start prediction"
fu_obj = FeatureUnion(transformer_list=features.feature_list)
train_X = fu_obj.fit_transform(train_df)
train_y = train_df["Sales"].as_matrix()
clf = GridSearchCV(estimator=clf_dict[MODEL]["clf"],
param_grid=clf_dict[MODEL]["paramteters"],
n_jobs=3, scoring=rmspe, verbose=1)
clf.fit(train_X, train_y)
print clf.best_score_
index_sr = pd.Series(get_split_feature_list(fu_obj), name="Feature")
if hasattr(clf.best_estimator_, "coef_"):
coef_sr = pd.Series(clf.best_estimator_.coef_, name="Coef")
coef_df = pd.concat([index_sr, coef_sr], axis=1).set_index("Feature")
coeffile = SUBMISSION + "coef_%s.csv" % MODEL
coef_df.to_csv(coeffile)
if hasattr(clf.best_estimator_, "feature_importances_"):
coef_sr = pd.Series(clf.best_estimator_.feature_importances_,
name="Importance")
coef_df = pd.concat([index_sr, coef_sr], axis=1).set_index("Feature")
coeffile = SUBMISSION + "importance_%s.csv" % MODEL
coef_df.to_csv(coeffile)
print "... start y_pred"
test_X = fu_obj.transform(test_df)
y_pred = clf.predict(test_X)
pred_sr = pd.Series(y_pred, name="Sales", index=test_df["Id"])
submissionfile = SUBMISSION + "submission_%s.csv" % MODEL
pred_sr.to_csv(submissionfile, header=True, index_label="ID")
示例11: convert_testdata
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import fit_transform [as 别名]
def convert_testdata(test_gray_data, feature_rule=f.feature_transformer_rule):
data_df = f.make_test_df(test_gray_data)
fu = FeatureUnion(transformer_list=feature_rule)
Std = preprocessing.StandardScaler()
X_test = fu.fit_transform(data_df)
#X_test = Std.fit_transform(X_test)
return X_test
示例12: set_traindata
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import fit_transform [as 别名]
def set_traindata(df, key):
fu = FeatureUnion(transformer_list=f.feature_transformer_rule)
Std = preprocessing.StandardScaler()
X = fu.fit_transform(df)
y = np.concatenate(df["label"].apply(lambda x: x.flatten()))
X = Std.fit_transform(X)
return (X, y)
示例13: cv_score
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import fit_transform [as 别名]
def cv_score(train_df, MODEL):
print "... start cross validation"
fu_obj = FeatureUnion(transformer_list=features.feature_list)
train_X = fu_obj.fit_transform(train_df)
train_y = train_df["Sales"].as_matrix()
clf = GridSearchCV(estimator=clf_dict[MODEL]["clf"],
param_grid=clf_dict[MODEL]["paramteters"],
n_jobs=-1, scoring=rmspe, cv=None)
print cross_val_score(clf, train_X, train_y, scoring=rmspe, cv=5, n_jobs=3)
示例14: convert_traindata
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import fit_transform [as 别名]
def convert_traindata(train_gray_data, labels):
data_df = f.make_data_df(train_gray_data, labels)
fu = FeatureUnion(transformer_list=f.feature_transformer_rule)
Std = preprocessing.StandardScaler()
X_train = fu.fit_transform(data_df)
y_train = np.concatenate(data_df["label"].apply(lambda x: x.flatten()))
X_train = Std.fit_transform(X_train)
return X_train, y_train
示例15: get_data
# 需要导入模块: from sklearn.pipeline import FeatureUnion [as 别名]
# 或者: from sklearn.pipeline.FeatureUnion import fit_transform [as 别名]
def get_data():
'''
get X, y data
:rtype: tuple
'''
_, _, _, train_gray_data, _, _, labels = i_p.load_data()
data_df = f.make_data_df(train_gray_data, labels)
fu = FeatureUnion(transformer_list=f.feature_transformer_rule)
X = fu.fit_transform(data_df)
y = np.concatenate(data_df["label"].apply(lambda x: x.flatten()))
return (X, y)