当前位置: 首页>>代码示例>>Python>>正文


Python pipeline.FeatureUnion类代码示例

本文整理汇总了Python中sklearn.pipeline.FeatureUnion的典型用法代码示例。如果您正苦于以下问题:Python FeatureUnion类的具体用法?Python FeatureUnion怎么用?Python FeatureUnion使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了FeatureUnion类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: pca_kpca

def pca_kpca(train_data, labels):
    estimators = make_union(PCA(), TruncatedSVD(), KernelPCA())
#    estimators = [('linear_pca', PCA()), ('kernel_pca', KernelPCA())]
    combined = FeatureUnion(estimators)
    combined.fit(train_data, labels) # combined.fit_tranform(tain_data, labels)

    return combined
开发者ID:kirk86,项目名称:Task-1,代码行数:7,代码来源:misc.py

示例2: test_feature_union

 def test_feature_union(self):
     """Tests that combining multiple featurizers works as expected"""
     modules = ["bag-of-words", "entities"]
     modules_list, _ = modules_to_dictionary(modules)
     feature_union = FeatureUnion(modules_list)
     feature_union.fit(texts_entities, outcomes)
     feature_union.transform(["unknown"])
开发者ID:cgoldammer,项目名称:simple_text_analysis,代码行数:7,代码来源:tests.py

示例3: testLogistic

def testLogistic(lbda=1.0, n_components=20, kbest=4):
	# X = otto.data[:1000, :20]
	# y = otto.target[:1000]
	otto = load_otto()
	X = otto.data[:, :]
	y = otto.target[:]
	# n_components = 20
	# kbest = 4
#	print 'y.shape =', y.shape

	scalar = StandardScaler().fit(X)
	X = scalar.transform(X)

	pca = PCA(n_components=n_components)
	selection = SelectKBest(k=kbest)

	combined_features = FeatureUnion(
		[("pca", pca), ('univ_select', selection)]
	)
	X_features = combined_features.fit(X,y).transform(X)

	logistic = LogisticRegression(C=1.0/lbda)
	pipe = Pipeline(steps=[('features', combined_features), ('logistic', logistic)])
	trainData = X
	trainTarget = y
	pipe.fit(trainData, trainTarget)
	# print trainTarget
	test_otto = load_testotto()
	testData = test_otto.data
	testData = scalar.transform(testData)
	# logging.debug('lambda=%.3f: score is %.3f' % (lbda, pipe.score()))
	'save the prediction'
	prediction = pipe.predict_proba(testData)
	proba = pipe.predict_proba(testData)
	save_submission(lbda, proba, prediction)
开发者ID:Turf1013,项目名称:Machine_Learning,代码行数:35,代码来源:logistic_submission.py

示例4: testSVC

def testSVC(lbda=1.0, n_components=20, kbest=4):
	otto = load_otto()
	X = otto.data
	y = otto.target
	# X = otto.data[:10000, :10]
	# y = otto.target[:10000]

	scaler = StandardScaler().fit(X)
	X = scaler.transform(X)

	pca = PCA(n_components=n_components)
	selection = SelectKBest(k=kbest)

	combined_features = FeatureUnion(
		[("pca", pca), ("univ_select", selection)]
	)
	X_features = combined_features.fit(X, y).transform(X)

	svc = SVC(C=1.0/lbda, kernel='rbf', cache_size=400, probability=True)
	pipe = Pipeline(steps=[('features', combined_features), ('svc', svc)])
	trainData = X
	trainTarget = y
	pipe.fit(trainData, trainTarget)
	test_otto = load_testotto()
	testData = test_otto.data
	testData = scaler.transform(testData)
	'save the prediction'
	prediction = pipe.predict_proba(testData)
	proba = pipe.predict_proba(testData)
	save_submission(lbda, proba, prediction)
开发者ID:Turf1013,项目名称:Machine_Learning,代码行数:30,代码来源:svc_submission.py

示例5: best_estimator

	def best_estimator(self, X, y):
		try:
			pca = PCA(n_components=2)
			selection = SelectKBest(k=2)
			combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])
			X_features = combined_features.fit(X, y).transform(X)
			regr = linear_model.LassoCV()
			pipeline = Pipeline([("features", combined_features), ("regression", regr)])

			if 'batter' in self.player:
				param_grid = dict(features__pca__n_components=[1, 2, 3],
				                  features__univ_select__k=[1, 2])
			else:
				param_grid = dict(features__pca__n_components=[1, 2,3],
				                  features__univ_select__k=[1,2])

			grid_search = GridSearchCV(pipeline, param_grid=param_grid, verbose=100)
			grid_search.fit(X, y)
			self.modelled = True
			regr = grid_search
			return regr
		except ValueError,e:
			print e
			self.modelled = False
			return None
开发者ID:emschorsch,项目名称:fanduel,代码行数:25,代码来源:Model.py

示例6: prediction

def prediction(train_df, test_df, MODEL):

    print "... start prediction"

    fu_obj = FeatureUnion(transformer_list=features.feature_list)

    train_X = fu_obj.fit_transform(train_df)
    train_y = train_df["Sales"].as_matrix()

    clf = GridSearchCV(estimator=clf_dict[MODEL]["clf"],
                       param_grid=clf_dict[MODEL]["paramteters"],
                       n_jobs=3, scoring=rmspe, verbose=1)
    clf.fit(train_X, train_y)
    print clf.best_score_
    index_sr = pd.Series(get_split_feature_list(fu_obj), name="Feature")
    if hasattr(clf.best_estimator_, "coef_"):
        coef_sr = pd.Series(clf.best_estimator_.coef_, name="Coef")
        coef_df = pd.concat([index_sr, coef_sr], axis=1).set_index("Feature")
        coeffile = SUBMISSION + "coef_%s.csv" % MODEL
        coef_df.to_csv(coeffile)
    if hasattr(clf.best_estimator_, "feature_importances_"):
        coef_sr = pd.Series(clf.best_estimator_.feature_importances_,
                            name="Importance")
        coef_df = pd.concat([index_sr, coef_sr], axis=1).set_index("Feature")
        coeffile = SUBMISSION + "importance_%s.csv" % MODEL
        coef_df.to_csv(coeffile)

    print "... start y_pred"
    test_X = fu_obj.transform(test_df)

    y_pred = clf.predict(test_X)
    pred_sr = pd.Series(y_pred, name="Sales", index=test_df["Id"])
    submissionfile = SUBMISSION + "submission_%s.csv" % MODEL
    pred_sr.to_csv(submissionfile, header=True, index_label="ID")
开发者ID:guruttosekai2011,项目名称:Rossmann_Store_Sales,代码行数:34,代码来源:prediction.py

示例7: trainItalianSexClassifier

	def trainItalianSexClassifier(self):
		#get correct labels from dictionary in trainY and testY
		trainX = self.italianTrainData[0]
		trainY = self.getYlabels(self.italianTrainData[1], 'sex')

		

		combined_features = FeatureUnion([("tfidf", TfidfVectorizer()),
										("ngrams", TfidfVectorizer(ngram_range=(3, 3), analyzer="char")), 
										("counts", CountVectorizer()),
										("latin", Latin()),	
										],transformer_weights={
											'latin': 1,
											'tfidf': 2,
											'ngrams': 2,
											'counts': 1,

        								})
		
		X_features = combined_features.fit(trainX, trainY).transform(trainX)
		classifier = svm.LinearSVC()
		pipeline = Pipeline([("features", combined_features), ("classifier", classifier)])
		pipeline.fit(trainX, trainY)
		
		return pipeline
开发者ID:chrispool,项目名称:lfd,代码行数:25,代码来源:classifiers.py

示例8: fit

 def fit(self, X, y=None):
     Trans2 = Q2Transformer()
     Trans3 = Q3Transformer()
     Trans4 = Q4Transformer()
     combined_features = FeatureUnion([("Q2", Trans2), ("Q3", Trans3), ("Q4", Trans4)])
     self.fit = combined_features.fit(X)
     return self
开发者ID:FangMath,项目名称:MachineLearning_Mini_Project,代码行数:7,代码来源:Models_ml.py

示例9: best_estimator

	def best_estimator(self, X, y):
		try:
			pca = PCA(n_components=2)
			selection = SelectKBest(k=2)
			combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])
			X_features = combined_features.fit(X, y).transform(X)
			regr = linear_model.LassoCV()
			pipeline = Pipeline([("features", combined_features), ("regression", regr)])

			if 'batter' in self.player:
				param_grid = dict(features__pca__n_components=[1],
				                  features__univ_select__k=[1])
			else:
				param_grid = dict(features__pca__n_components=[1,2,3,4],
				                  features__univ_select__k=[1,2,3,4])

			grid_search = GridSearchCV(pipeline, param_grid=param_grid, verbose=0)
			grid_search.fit(X, y)
			self.modelled = True
			regr = grid_search
			self.R2=r2_score(self.target_matrix,regr.predict(self.feature_matrix)) #Ian: should do R2 on predicted points vs. points on a given day
			return regr
		except ValueError,e:
			print e
			self.modelled = False
			return None
开发者ID:cole-maclean,项目名称:fanduel,代码行数:26,代码来源:Model.py

示例10: rbf_kernels

def rbf_kernels(env, n_samples=100000, gamma=[0.01, 0.1], n_components=100):
    """Represent observation samples using RBF-kernels.

    EXAMPLE
    -------
    >>> env = gym.make('MountainCar-v0')
    >>> n_params, rbf = rbf_kernels(env, n_components=100)
    >>> sample = env.observation_space.sample().reshape((1, env.observation_space.shape[0]))
    >>> rbf(sample).shape
    (1, 100)
    """
    observation_examples = np.array([env.observation_space.sample() for _ in range(n_samples)])

    # Fit feature scaler
    scaler = sklearn.preprocessing.StandardScaler()
    scaler.fit(observation_examples)

    # Fir feature extractor
    features = []
    for g in gamma:
        features.append(('gamma={}'.format(g), RBFSampler(n_components=n_components // len(gamma), gamma=g)))

    features = FeatureUnion(features)
    features.fit(scaler.transform(observation_examples))

    def _rbf_kernels(observation):
        return features.transform(scaler.transform(observation))

    return _rbf_kernels
开发者ID:sotetsuk,项目名称:pyRLbook,代码行数:29,代码来源:function_approximation.py

示例11: concat_feature_extractors

def concat_feature_extractors(train_data, labels):
    # This dataset is way to high-dimensional. Better do PCA:
    pca = PCA(n_components = 2)

    # Maybe some original features where good, too?
    selection = SelectKBest(k = 1)

    # Build estimator from PCA and Univariate selection:

    combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])

    # Use combined features to transform dataset:
    X_features = combined_features.fit(train_data, labels).transform(train_data)

    # Classify:
    svm = SVC(kernel = "linear")
    svm.fit(X_features, labels)

    # Do grid search over k, n_components and C:

    pipeline = Pipeline([("features", combined_features), ("svm", svm)])

    param_grid = dict(features__pca__n_components = [1, 2, 3],
                      features__univ_select__k = [1, 2],
                      svm__C = [0.1, 1, 10])

    grid_search = GridSearchCV(pipeline, param_grid=param_grid, verbose = 10)
    grid_search.fit(train_data, labels)
    print(grid_search.best_estimator_)
开发者ID:kirk86,项目名称:Task-1,代码行数:29,代码来源:misc.py

示例12: train_model

def train_model(trainset):
	word_vector = TfidfVectorizer(analyzer="word", ngram_range=(2,2), binary = False, max_features= 2000,min_df=1,decode_error="ignore")
#	print word_vector	
	print "works fine"
	char_vector = TfidfVectorizer(ngram_range=(2,3), analyzer="char", binary = False, min_df = 1, max_features = 2000,decode_error= "ignore")
	vectorizer =FeatureUnion([ ("chars", char_vector),("words", word_vector) ])
	corpus = []
	classes = []

	for item in trainset:
		corpus.append(item['text'])
		classes.append(item['label'])

	print "Training instances : ", 0.8*len(classes)
	print "Testing instances : ", 0.2*len(classes) 
	
	matrix = vectorizer.fit_transform(corpus)
	print "feature count : ", len(vectorizer.get_feature_names())
	print "training model"
	X = matrix.toarray()
	y = numpy.asarray(classes)
	model =LinearSVC()
	X_train, X_test, y_train, y_test= train_test_split(X,y,train_size=0.8,test_size=.2,random_state=0)
	y_pred = OneVsRestClassifier(model).fit(X_train, y_train).predict(X_test)
	#y_prob = OneVsRestClassifier(model).fit(X_train, y_train).decision_function(X_test)
	#print y_prob
	#con_matrix = []
	#for row in range(len(y_prob)):
	#	temp = [y_pred[row]]	
	#	for prob in y_prob[row]:
	#		temp.append(prob)
	#	con_matrix.append(temp)
	#for row in con_matrix:
	#	output.write(str(row)+"\n")
	#print y_pred		
	#print y_test
	
	res1=[i for i, j in enumerate(y_pred) if j == 'anonEdited']
	res2=[i for i, j in enumerate(y_test) if j == 'anonEdited']
	reset=[]
	for r in res1:
		if y_test[r] != "anonEdited":
			reset.append(y_test[r])
	for r in res2:
		if y_pred[r] != "anonEdited":
			reset.append(y_pred[r])
	
	
	output=open(sys.argv[2],"w")
	for suspect in reset:
		output.write(str(suspect)+"\n")	
	cm = confusion_matrix(y_test, y_pred)
	print(cm)
	pl.matshow(cm)
	pl.title('Confusion matrix')
	pl.colorbar()
	pl.ylabel('True label')
	pl.xlabel('Predicted label')
	pl.show()
	print accuracy_score(y_pred,y_test)
开发者ID:srini21,项目名称:Amazon-deceptive-reviews,代码行数:60,代码来源:anontesting.py

示例13: test_feature_union_feature_names

def test_feature_union_feature_names():
    word_vect = CountVectorizer(analyzer="word")
    char_vect = CountVectorizer(analyzer="char_wb", ngram_range=(3, 3))
    ft = FeatureUnion([("chars", char_vect), ("words", word_vect)])
    ft.fit(JUNK_FOOD_DOCS)
    feature_names = ft.get_feature_names()
    for feat in feature_names:
        assert_true("chars__" in feat or "words__" in feat)
    assert_equal(len(feature_names), 35)
开发者ID:Givonaldo,项目名称:scikit-learn,代码行数:9,代码来源:test_pipeline.py

示例14: convert_testdata

def convert_testdata(test_gray_data, feature_rule=f.feature_transformer_rule):

    data_df = f.make_test_df(test_gray_data)
    fu = FeatureUnion(transformer_list=feature_rule)
    Std = preprocessing.StandardScaler()

    X_test = fu.fit_transform(data_df)
    #X_test = Std.fit_transform(X_test)

    return X_test
开发者ID:haisland0909,项目名称:Denoising-Dirty-Documents,代码行数:10,代码来源:repredict.py

示例15: get_pca_transformer

def get_pca_transformer(train_x, train_y, n_components=-1):
    if n_components == -1:
        n_components = int(np.ceil(np.sqrt(train_x.shape[1])))

    pca = PCA(n_components=n_components)
    selection = SelectKBest(k=n_components/2)

    combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])

    return combined_features.fit(train_x, train_y)
开发者ID:challenging,项目名称:kaggle,代码行数:10,代码来源:pre_loader.py


注:本文中的sklearn.pipeline.FeatureUnion类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。