本文整理汇总了Python中sklearn.pipeline.FeatureUnion类的典型用法代码示例。如果您正苦于以下问题:Python FeatureUnion类的具体用法?Python FeatureUnion怎么用?Python FeatureUnion使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了FeatureUnion类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: pca_kpca
def pca_kpca(train_data, labels):
estimators = make_union(PCA(), TruncatedSVD(), KernelPCA())
# estimators = [('linear_pca', PCA()), ('kernel_pca', KernelPCA())]
combined = FeatureUnion(estimators)
combined.fit(train_data, labels) # combined.fit_tranform(tain_data, labels)
return combined
示例2: test_feature_union
def test_feature_union(self):
"""Tests that combining multiple featurizers works as expected"""
modules = ["bag-of-words", "entities"]
modules_list, _ = modules_to_dictionary(modules)
feature_union = FeatureUnion(modules_list)
feature_union.fit(texts_entities, outcomes)
feature_union.transform(["unknown"])
示例3: testLogistic
def testLogistic(lbda=1.0, n_components=20, kbest=4):
# X = otto.data[:1000, :20]
# y = otto.target[:1000]
otto = load_otto()
X = otto.data[:, :]
y = otto.target[:]
# n_components = 20
# kbest = 4
# print 'y.shape =', y.shape
scalar = StandardScaler().fit(X)
X = scalar.transform(X)
pca = PCA(n_components=n_components)
selection = SelectKBest(k=kbest)
combined_features = FeatureUnion(
[("pca", pca), ('univ_select', selection)]
)
X_features = combined_features.fit(X,y).transform(X)
logistic = LogisticRegression(C=1.0/lbda)
pipe = Pipeline(steps=[('features', combined_features), ('logistic', logistic)])
trainData = X
trainTarget = y
pipe.fit(trainData, trainTarget)
# print trainTarget
test_otto = load_testotto()
testData = test_otto.data
testData = scalar.transform(testData)
# logging.debug('lambda=%.3f: score is %.3f' % (lbda, pipe.score()))
'save the prediction'
prediction = pipe.predict_proba(testData)
proba = pipe.predict_proba(testData)
save_submission(lbda, proba, prediction)
示例4: testSVC
def testSVC(lbda=1.0, n_components=20, kbest=4):
otto = load_otto()
X = otto.data
y = otto.target
# X = otto.data[:10000, :10]
# y = otto.target[:10000]
scaler = StandardScaler().fit(X)
X = scaler.transform(X)
pca = PCA(n_components=n_components)
selection = SelectKBest(k=kbest)
combined_features = FeatureUnion(
[("pca", pca), ("univ_select", selection)]
)
X_features = combined_features.fit(X, y).transform(X)
svc = SVC(C=1.0/lbda, kernel='rbf', cache_size=400, probability=True)
pipe = Pipeline(steps=[('features', combined_features), ('svc', svc)])
trainData = X
trainTarget = y
pipe.fit(trainData, trainTarget)
test_otto = load_testotto()
testData = test_otto.data
testData = scaler.transform(testData)
'save the prediction'
prediction = pipe.predict_proba(testData)
proba = pipe.predict_proba(testData)
save_submission(lbda, proba, prediction)
示例5: best_estimator
def best_estimator(self, X, y):
try:
pca = PCA(n_components=2)
selection = SelectKBest(k=2)
combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])
X_features = combined_features.fit(X, y).transform(X)
regr = linear_model.LassoCV()
pipeline = Pipeline([("features", combined_features), ("regression", regr)])
if 'batter' in self.player:
param_grid = dict(features__pca__n_components=[1, 2, 3],
features__univ_select__k=[1, 2])
else:
param_grid = dict(features__pca__n_components=[1, 2,3],
features__univ_select__k=[1,2])
grid_search = GridSearchCV(pipeline, param_grid=param_grid, verbose=100)
grid_search.fit(X, y)
self.modelled = True
regr = grid_search
return regr
except ValueError,e:
print e
self.modelled = False
return None
示例6: prediction
def prediction(train_df, test_df, MODEL):
print "... start prediction"
fu_obj = FeatureUnion(transformer_list=features.feature_list)
train_X = fu_obj.fit_transform(train_df)
train_y = train_df["Sales"].as_matrix()
clf = GridSearchCV(estimator=clf_dict[MODEL]["clf"],
param_grid=clf_dict[MODEL]["paramteters"],
n_jobs=3, scoring=rmspe, verbose=1)
clf.fit(train_X, train_y)
print clf.best_score_
index_sr = pd.Series(get_split_feature_list(fu_obj), name="Feature")
if hasattr(clf.best_estimator_, "coef_"):
coef_sr = pd.Series(clf.best_estimator_.coef_, name="Coef")
coef_df = pd.concat([index_sr, coef_sr], axis=1).set_index("Feature")
coeffile = SUBMISSION + "coef_%s.csv" % MODEL
coef_df.to_csv(coeffile)
if hasattr(clf.best_estimator_, "feature_importances_"):
coef_sr = pd.Series(clf.best_estimator_.feature_importances_,
name="Importance")
coef_df = pd.concat([index_sr, coef_sr], axis=1).set_index("Feature")
coeffile = SUBMISSION + "importance_%s.csv" % MODEL
coef_df.to_csv(coeffile)
print "... start y_pred"
test_X = fu_obj.transform(test_df)
y_pred = clf.predict(test_X)
pred_sr = pd.Series(y_pred, name="Sales", index=test_df["Id"])
submissionfile = SUBMISSION + "submission_%s.csv" % MODEL
pred_sr.to_csv(submissionfile, header=True, index_label="ID")
示例7: trainItalianSexClassifier
def trainItalianSexClassifier(self):
#get correct labels from dictionary in trainY and testY
trainX = self.italianTrainData[0]
trainY = self.getYlabels(self.italianTrainData[1], 'sex')
combined_features = FeatureUnion([("tfidf", TfidfVectorizer()),
("ngrams", TfidfVectorizer(ngram_range=(3, 3), analyzer="char")),
("counts", CountVectorizer()),
("latin", Latin()),
],transformer_weights={
'latin': 1,
'tfidf': 2,
'ngrams': 2,
'counts': 1,
})
X_features = combined_features.fit(trainX, trainY).transform(trainX)
classifier = svm.LinearSVC()
pipeline = Pipeline([("features", combined_features), ("classifier", classifier)])
pipeline.fit(trainX, trainY)
return pipeline
示例8: fit
def fit(self, X, y=None):
Trans2 = Q2Transformer()
Trans3 = Q3Transformer()
Trans4 = Q4Transformer()
combined_features = FeatureUnion([("Q2", Trans2), ("Q3", Trans3), ("Q4", Trans4)])
self.fit = combined_features.fit(X)
return self
示例9: best_estimator
def best_estimator(self, X, y):
try:
pca = PCA(n_components=2)
selection = SelectKBest(k=2)
combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])
X_features = combined_features.fit(X, y).transform(X)
regr = linear_model.LassoCV()
pipeline = Pipeline([("features", combined_features), ("regression", regr)])
if 'batter' in self.player:
param_grid = dict(features__pca__n_components=[1],
features__univ_select__k=[1])
else:
param_grid = dict(features__pca__n_components=[1,2,3,4],
features__univ_select__k=[1,2,3,4])
grid_search = GridSearchCV(pipeline, param_grid=param_grid, verbose=0)
grid_search.fit(X, y)
self.modelled = True
regr = grid_search
self.R2=r2_score(self.target_matrix,regr.predict(self.feature_matrix)) #Ian: should do R2 on predicted points vs. points on a given day
return regr
except ValueError,e:
print e
self.modelled = False
return None
示例10: rbf_kernels
def rbf_kernels(env, n_samples=100000, gamma=[0.01, 0.1], n_components=100):
"""Represent observation samples using RBF-kernels.
EXAMPLE
-------
>>> env = gym.make('MountainCar-v0')
>>> n_params, rbf = rbf_kernels(env, n_components=100)
>>> sample = env.observation_space.sample().reshape((1, env.observation_space.shape[0]))
>>> rbf(sample).shape
(1, 100)
"""
observation_examples = np.array([env.observation_space.sample() for _ in range(n_samples)])
# Fit feature scaler
scaler = sklearn.preprocessing.StandardScaler()
scaler.fit(observation_examples)
# Fir feature extractor
features = []
for g in gamma:
features.append(('gamma={}'.format(g), RBFSampler(n_components=n_components // len(gamma), gamma=g)))
features = FeatureUnion(features)
features.fit(scaler.transform(observation_examples))
def _rbf_kernels(observation):
return features.transform(scaler.transform(observation))
return _rbf_kernels
示例11: concat_feature_extractors
def concat_feature_extractors(train_data, labels):
# This dataset is way to high-dimensional. Better do PCA:
pca = PCA(n_components = 2)
# Maybe some original features where good, too?
selection = SelectKBest(k = 1)
# Build estimator from PCA and Univariate selection:
combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])
# Use combined features to transform dataset:
X_features = combined_features.fit(train_data, labels).transform(train_data)
# Classify:
svm = SVC(kernel = "linear")
svm.fit(X_features, labels)
# Do grid search over k, n_components and C:
pipeline = Pipeline([("features", combined_features), ("svm", svm)])
param_grid = dict(features__pca__n_components = [1, 2, 3],
features__univ_select__k = [1, 2],
svm__C = [0.1, 1, 10])
grid_search = GridSearchCV(pipeline, param_grid=param_grid, verbose = 10)
grid_search.fit(train_data, labels)
print(grid_search.best_estimator_)
示例12: train_model
def train_model(trainset):
word_vector = TfidfVectorizer(analyzer="word", ngram_range=(2,2), binary = False, max_features= 2000,min_df=1,decode_error="ignore")
# print word_vector
print "works fine"
char_vector = TfidfVectorizer(ngram_range=(2,3), analyzer="char", binary = False, min_df = 1, max_features = 2000,decode_error= "ignore")
vectorizer =FeatureUnion([ ("chars", char_vector),("words", word_vector) ])
corpus = []
classes = []
for item in trainset:
corpus.append(item['text'])
classes.append(item['label'])
print "Training instances : ", 0.8*len(classes)
print "Testing instances : ", 0.2*len(classes)
matrix = vectorizer.fit_transform(corpus)
print "feature count : ", len(vectorizer.get_feature_names())
print "training model"
X = matrix.toarray()
y = numpy.asarray(classes)
model =LinearSVC()
X_train, X_test, y_train, y_test= train_test_split(X,y,train_size=0.8,test_size=.2,random_state=0)
y_pred = OneVsRestClassifier(model).fit(X_train, y_train).predict(X_test)
#y_prob = OneVsRestClassifier(model).fit(X_train, y_train).decision_function(X_test)
#print y_prob
#con_matrix = []
#for row in range(len(y_prob)):
# temp = [y_pred[row]]
# for prob in y_prob[row]:
# temp.append(prob)
# con_matrix.append(temp)
#for row in con_matrix:
# output.write(str(row)+"\n")
#print y_pred
#print y_test
res1=[i for i, j in enumerate(y_pred) if j == 'anonEdited']
res2=[i for i, j in enumerate(y_test) if j == 'anonEdited']
reset=[]
for r in res1:
if y_test[r] != "anonEdited":
reset.append(y_test[r])
for r in res2:
if y_pred[r] != "anonEdited":
reset.append(y_pred[r])
output=open(sys.argv[2],"w")
for suspect in reset:
output.write(str(suspect)+"\n")
cm = confusion_matrix(y_test, y_pred)
print(cm)
pl.matshow(cm)
pl.title('Confusion matrix')
pl.colorbar()
pl.ylabel('True label')
pl.xlabel('Predicted label')
pl.show()
print accuracy_score(y_pred,y_test)
示例13: test_feature_union_feature_names
def test_feature_union_feature_names():
word_vect = CountVectorizer(analyzer="word")
char_vect = CountVectorizer(analyzer="char_wb", ngram_range=(3, 3))
ft = FeatureUnion([("chars", char_vect), ("words", word_vect)])
ft.fit(JUNK_FOOD_DOCS)
feature_names = ft.get_feature_names()
for feat in feature_names:
assert_true("chars__" in feat or "words__" in feat)
assert_equal(len(feature_names), 35)
示例14: convert_testdata
def convert_testdata(test_gray_data, feature_rule=f.feature_transformer_rule):
data_df = f.make_test_df(test_gray_data)
fu = FeatureUnion(transformer_list=feature_rule)
Std = preprocessing.StandardScaler()
X_test = fu.fit_transform(data_df)
#X_test = Std.fit_transform(X_test)
return X_test
示例15: get_pca_transformer
def get_pca_transformer(train_x, train_y, n_components=-1):
if n_components == -1:
n_components = int(np.ceil(np.sqrt(train_x.shape[1])))
pca = PCA(n_components=n_components)
selection = SelectKBest(k=n_components/2)
combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])
return combined_features.fit(train_x, train_y)