当前位置: 首页>>代码示例>>Python>>正文


Python PCA.fit_transform方法代码示例

本文整理汇总了Python中sklearn.decomposition.PCA.fit_transform方法的典型用法代码示例。如果您正苦于以下问题:Python PCA.fit_transform方法的具体用法?Python PCA.fit_transform怎么用?Python PCA.fit_transform使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.decomposition.PCA的用法示例。


在下文中一共展示了PCA.fit_transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
class GMM:
	def __init__(self, reduction=1, do_pca=False):
		self.means = []
		self.vars = []
		self.user_ids = {}
		self.pca = PCA(2)
		self.do_pca = do_pca
		self.reduction = reduction

	def fit(self, xdata, ydata):
		if self.do_pca:
			xx, xy = zip(*self.pca.fit_transform(xdata))
			xdata = np.array(zip(xx,np.array(xy)/self.reduction))
		users = np.unique(ydata)

		for u in users:
			curdata = xdata[ydata == u]
			self.user_ids[len(self.means)] = u
			self.means.append(np.mean(curdata, 0))
			self.vars.append(np.sqrt(np.var(curdata, 0)))

	def __compute_closest(self, xval):
			return self.user_ids[np.argmax([np.sum(np.abs((xval-self.means[i])/self.vars[i])) for i in xrange(len(self.means))])]

	def predict(self, xdata):
		if self.do_pca:
			xx, xy = zip(*self.pca.fit_transform(xdata))
			xdata = np.array(zip(xx,np.array(xy)/self.reduction))
		return np.array([self.__compute_closest(x) for x in xdata])
开发者ID:sbalanovich,项目名称:APM115Proj1,代码行数:31,代码来源:util.py

示例2: train_pca_linreg_model

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
        def train_pca_linreg_model(out_of_transit_mask, oot_no_validation_mask, n_comp):

            # OOT chunk first:
            pca = PCA(n_components=n_comp)
            reduced_regressors = pca.fit_transform(regressors[out_of_transit_mask],
                                                   target_fluxes[out_of_transit_mask])

            prepended_regressors_oot = np.hstack([ones[out_of_transit_mask],
                                                  reduced_regressors])
            c_oot = regression_coeffs(prepended_regressors_oot,
                                      target_fluxes[out_of_transit_mask],
                                      target_errors[out_of_transit_mask])

            lc_training = (target_fluxes[out_of_transit_mask] -
                           regression_model(c_oot, prepended_regressors_oot))

            median_oot = np.median(target_fluxes[out_of_transit_mask])
            std_lc_training = np.std((lc_training + median_oot) / median_oot)

            # Now on validation chunk:
            reduced_regressors_no_validation = pca.fit_transform(regressors[oot_no_validation_mask],
                                                                 target_fluxes[oot_no_validation_mask])

            prepended_regressors_no_validation = np.hstack([ones[oot_no_validation_mask],
                                                            reduced_regressors_no_validation])
            c_no_validation = regression_coeffs(prepended_regressors_no_validation,
                                                target_fluxes[oot_no_validation_mask],
                                                target_errors[oot_no_validation_mask])

            lc_validation = (target_fluxes[out_of_transit_mask] -
                             regression_model(c_no_validation, prepended_regressors_oot))

            std_lc_validation = np.std((lc_validation + median_oot) / median_oot)

            return lc_training, lc_validation, std_lc_training, std_lc_validation
开发者ID:bmorris3,项目名称:trappist1_arctic_2016,代码行数:37,代码来源:pca.py

示例3: __init__

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
	def __init__(self, pca_components=None, whiten=True, k_best=False):
		train = pd.read_csv('data/train.csv')
		test = pd.read_csv('data/test.csv')
		# Some rows have zero variance
		# train = train.loc[:, train.std() > 0] 
		# test = test.loc[:, test.std() > 0]

		# # Treating -999999 as missing; impute with knn
		train['var3'] = train['var3'].replace(-999999, 2)
		test['var3'] = test['var3'].replace(-999999, 2)
		X_train = train.ix[:, :-1].values
		y_train = train.ix[:, -1].values
		X_test = test.values

		# Perform PCA
		pca = PCA(n_components=pca_components, whiten=whiten)
		X_train = pca.fit_transform(X_train, y_train)
		X_test = pca.fit_transform(X_test)

		if k_best:
			if k_best > pca_components:
				k_best='all'
			# Select k best features by F-score
			kb = SelectKBest(f_classif, k=k_best)
			X_train = kb.fit_transform(X_train, y_train)
			X_test = kb.transform(X_test)
			
		self.X_train = X_train
		self.y_train = y_train
		self.X_test = X_test
开发者ID:quasi-coherent,项目名称:Kaggle-Santander,代码行数:32,代码来源:santander_preprocess.py

示例4: reduce_dimensions

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
def reduce_dimensions(X):
    '''
    Reduce the dimensionality of X with different reducers.

    Return a sequence of tuples containing:
        (title, x coordinates, y coordinates)
    for each reducer.
    '''

    # Principal Component Analysis (PCA) is a linear reduction model
    # that identifies the components of the data with the largest
    # variance.
    from sklearn.decomposition import PCA
    reducer = PCA(n_components=2)
    X_r = reducer.fit_transform(X)
    yield 'PCA', X_r[:, 0], X_r[:, 1]

    # Independent Component Analysis (ICA) decomposes a signal by
    # identifying the independent contributing sources.
    from sklearn.decomposition import FastICA
    reducer = FastICA(n_components=2)
    X_r = reducer.fit_transform(X)
    yield 'ICA', X_r[:, 0], X_r[:, 1]

    # t-distributed Stochastic Neighbor Embedding (t-SNE) is a
    # non-linear reduction model. It operates best on data with a low
    # number of attributes (<50) and is often preceded by a linear
    # reduction model such as PCA.
    from sklearn.manifold import TSNE
    reducer = TSNE(n_components=2)
    X_r = reducer.fit_transform(X)
    yield 't-SNE', X_r[:, 0], X_r[:, 1]
开发者ID:AlexanderTekle,项目名称:PTVS,代码行数:34,代码来源:clustering.py

示例5: feature_extraction_partialPCA

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
def feature_extraction_partialPCA(X_grad_train,X_grad_test,X_mag_train,X_mag_test):
    #Function flatten data, then center them and calculates PCA on data from each sensor (grad & magn) type separately
    #then standartise them (z-score)

    from sklearn.preprocessing import StandardScaler
    def flat_n_standartize(Xtrain,Xtest):
        # Flatten times x channels arrays and calc z-score
        Xtrain = Xtrain.reshape(Xtrain.shape[0],-1) #flatten array n_samples x n_time x n_channels to n_samples x n_features
        mean = Xtrain.mean(axis=0)
        Xtrain = Xtrain - mean
        Xtest = Xtest.reshape(Xtest.shape[0],-1)
        Xtest = Xtest - mean
        return Xtrain,Xtest #Data with same sensor type have same scale 
    X_grad_train,X_grad_test = flat_n_standartize(X_grad_train,X_grad_test)
    X_mag_train,X_mag_test = flat_n_standartize(X_mag_train,X_mag_test)

    effective_pca_num = 40 # PCA components

    # Whitening scales variance to unit, without this svm would not work
    pca = PCA(n_components=effective_pca_num,whiten = True)
    X_grad_train=pca.fit_transform(X_grad_train)
    X_grad_test=pca.transform(X_grad_test)

    X_mag_train= pca.fit_transform(X_mag_train)
    X_mag_test=pca.transform(X_mag_test)
    Xtrain = np.hstack((X_grad_train,X_mag_train))
    Xtest = np.hstack((X_grad_test,X_mag_test))

    scaler = StandardScaler().fit(Xtrain)
    return scaler.transform(Xtrain),scaler.transform(Xtest)
开发者ID:LIKAN-BLK,项目名称:MEGcluster,代码行数:32,代码来源:main.py

示例6: runPCA

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
def runPCA(hogDir):
    ### Directory stuff
    if not os.path.exists(hogDir):
        print "You must first create HOG features"
        sys.exit(1)

    trainData = np.load(hogDir + 'train/hog.npy')
    testData = np.load(hogDir + 'test/hog.npy')

    # print "shape trainData: ", trainData.shape
    # print "shape testData: ", testData.shape

    data = np.append(trainData, testData, axis = 0)
    # print "data shape: ", data.shape

    ### Fit the pca model
    pca = PCA(n_components = min(testData.shape[0], trainData.shape[0], testData.shape[1], trainData.shape[1]))
    pca.fit(data)
    trainData = pca.fit_transform(trainData)
    testData = pca.fit_transform(testData)

    # print "shape trainData: ", trainData.shape
    # print "shape testData: ", testData.shape

    ### Store the labels and the array with transformed feature vectors
    np.save(hogDir + 'train/pca', trainData)
    np.save(hogDir + 'test/pca', testData)
开发者ID:XeryusTC,项目名称:handwritingrecog,代码行数:29,代码来源:pca.py

示例7: test_feature_union_weights

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
def test_feature_union_weights():
    # test feature union with transformer weights
    iris = load_iris()
    X = iris.data
    y = iris.target
    pca = PCA(n_components=2, svd_solver='randomized', random_state=0)
    select = SelectKBest(k=1)
    # test using fit followed by transform
    fs = FeatureUnion([("pca", pca), ("select", select)],
                      transformer_weights={"pca": 10})
    fs.fit(X, y)
    X_transformed = fs.transform(X)
    # test using fit_transform
    fs = FeatureUnion([("pca", pca), ("select", select)],
                      transformer_weights={"pca": 10})
    X_fit_transformed = fs.fit_transform(X, y)
    # test it works with transformers missing fit_transform
    fs = FeatureUnion([("mock", Transf()), ("pca", pca), ("select", select)],
                      transformer_weights={"mock": 10})
    X_fit_transformed_wo_method = fs.fit_transform(X, y)
    # check against expected result

    # We use a different pca object to control the random_state stream
    assert_array_almost_equal(X_transformed[:, :-1], 10 * pca.fit_transform(X))
    assert_array_equal(X_transformed[:, -1],
                       select.fit_transform(X, y).ravel())
    assert_array_almost_equal(X_fit_transformed[:, :-1],
                              10 * pca.fit_transform(X))
    assert_array_equal(X_fit_transformed[:, -1],
                       select.fit_transform(X, y).ravel())
    assert_equal(X_fit_transformed_wo_method.shape, (X.shape[0], 7))
开发者ID:dsquareindia,项目名称:scikit-learn,代码行数:33,代码来源:test_pipeline.py

示例8: examples

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
def examples():
	# example
	X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
	pca = PCA(n_components=2)
	pca.fit(X)
	#print(pca.explained_variance_ratio_) 
	#[ 0.99244...  0.00755...]


	indivs, genoArr = parseHapmap.runParse()
	genoArr_copy = copy.deepcopy(genoArr)

	# with 2 components
	pca2 = PCA(n_components=2)
	pca.fit(genoArr_copy)
	print(pca2)
	#print(pca2.explained_variance_ratio_)
	print(genoArr_copy)
	print('\n\n\n')

	# with 2 components and transform data in place
	pca2_trans = PCA(n_components=2)
	genoArr_trans = pca2_trans.fit_transform(genoArr_copy)
	print(pca2_trans)
	#print(pca2_trans.explained_variance_ratio_) 
	print(genoArr_trans)

	# with 10 components, first 2 components are same as before
	pca10_trans = PCA(n_components = 10)
	genoArr_trans10 = pca10_trans.fit_transform(genoArr_copy)
	print(genoArr_trans10)
开发者ID:lsgai,项目名称:global_ancestry,代码行数:33,代码来源:PCA_nocluster.py

示例9: test_pca

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
def test_pca():
    # PCA on dense arrays
    X = iris.data

    for n_comp in np.arange(X.shape[1]):
        pca = PCA(n_components=n_comp, svd_solver='full')

        X_r = pca.fit(X).transform(X)
        np.testing.assert_equal(X_r.shape[1], n_comp)

        X_r2 = pca.fit_transform(X)
        assert_array_almost_equal(X_r, X_r2)

        X_r = pca.transform(X)
        X_r2 = pca.fit_transform(X)
        assert_array_almost_equal(X_r, X_r2)

        # Test get_covariance and get_precision
        cov = pca.get_covariance()
        precision = pca.get_precision()
        assert_array_almost_equal(np.dot(cov, precision),
                                  np.eye(X.shape[1]), 12)

    # test explained_variance_ratio_ == 1 with all components
    pca = PCA(svd_solver='full')
    pca.fit(X)
    assert_almost_equal(pca.explained_variance_ratio_.sum(), 1.0, 3)
开发者ID:amueller,项目名称:scikit-learn,代码行数:29,代码来源:test_pca.py

示例10: learn_and_classify

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
def learn_and_classify(training_matrix, training_targets, testing_matrix, options, clffile, cores=1):
    from sklearn import svm
    from sklearn.decomposition import PCA
    import pickle, datetime, multiprocessing, Queue

    # Default values for the options
    g = 0.7
    C = 1.0
    n = 100
    
    # If options provided, replace the default values
    if options:
        for option in options:
            exec option
    
    # Dimension reduction
    pca = PCA(n_components=n)
    print "[%s] fit & transform the training matrix" % datetime.datetime.now()
    pca.fit_transform(training_matrix, training_targets)
    print "[%s] transform the testing matrix" % datetime.datetime.now()
    pca.transform(testing_matrix)
    
    # SVM fitting
    print "[%s] learning" % datetime.datetime.now()
    rbf_svc = svm.SVC(kernel='rbf', gamma=g, C=C).fit(training_matrix, training_targets)
    
    # Saving model
    print "[%s] saving model" % datetime.datetime.now()
    with open(clffile, 'w') as fh:
		pickle.dump((pca, rbf_svc), fh)
    
    #print "predicting"
    print "[%s] classifying" % datetime.datetime.now()
    
    return split_predict(testing_matrix, rbf_svc, cores)
开发者ID:EdwardBetts,项目名称:metaviro,代码行数:37,代码来源:classif_svm_pca.py

示例11: pcaProj

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
 def pcaProj(self):
   pca = PCA(n_components=2)
   aa = pca.fit_transform(array([r.cells[:-2] for r in self.train]))
   bb = pca.fit_transform(array([r for r in self.test]))
   cc = pca.fit_transform(array([r.cells[:-2] for r in self.delta]))
   self.scatterplot(
       [aa, bb, cc], c=[[0.5, 0.5, 0.5], [0.85, 0.0, 0.0], [0, 0.85, 0]])
开发者ID:queenstina,项目名称:Transfer-Learning,代码行数:9,代码来源:scatterPlot.py

示例12: feature_scaled_nn_acc

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
def feature_scaled_nn_acc(mds, type):
    train, validation = validation_split(mds)
    # Multiply by 1 to convert to bool
    y_train = train['Up'] * 1
    X_train = train.drop('Up', axis=1)
    y_validation = validation['Up'] * 1
    X_validation = validation.drop('Up', axis=1)
    pre = PCA(n_components=19, whiten=True)
    X_train_pca = pre.fit_transform(X_train)
    X_validation_pca = pre.fit_transform(X_validation)
    model = create_model(X_train_pca.shape[1], type)
    # Convert to Keras format
    y_train = to_categorical(y_train.values)
    y_validation = to_categorical(y_validation.values)
    model.fit(X_train_pca, y_train, nb_epoch=5, batch_size=16)
    time.sleep(0.1)
    # Fit and guess
    guess_train = model.predict_classes(X_train_pca)
    guess_train = to_categorical(guess_train)

    guess_validation = model.predict_classes(X_validation_pca)
    guess_validation = to_categorical(guess_validation)

    train_acc = accuracy_score(y_train, guess_train)
    validation_acc = accuracy_score(y_validation, guess_validation)
    print "\n neural net train accuracy is {}".format(train_acc)
    print "\n neural net validation accuracy is {}".format(validation_acc)
    return guess_validation
开发者ID:Dsinghbailey,项目名称:futures_predictor,代码行数:30,代码来源:nn.py

示例13: plot_original_data

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
    def plot_original_data(self):
        """
        plot original two data.
        :return: None
        """

        pca = PCA(n_components=2)
        x = pca.fit_transform(self.english_feature.feature)
        y = pca.fit_transform(self.image_feature.feature)
        z = pca.fit_transform(self.japanese_feature.feature)

        print x[x!=0]
        print y
        print z[z!=0]

        # plot
        plt.subplot(311)
        plt.plot(x[:, 0], x[:, 1], '.r')
        plt.title('X')

        plt.subplot(312)
        plt.plot(y[:, 0], y[:, 1], '.g')
        plt.title('Y')

        plt.subplot(313)
        plt.plot(z[:, 0], z[:, 1], '.b')
        plt.title('Z')
        plt.show()
开发者ID:rupy,项目名称:CrossLingualSentenceRetrieval,代码行数:30,代码来源:joint.py

示例14: test_pca

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
def test_pca():
    """PCA on dense arrays"""
    pca = PCA(n_components=2)
    X = iris.data
    X_r = pca.fit(X).transform(X)
    np.testing.assert_equal(X_r.shape[1], 2)

    X_r2 = pca.fit_transform(X)
    assert_array_almost_equal(X_r, X_r2)

    pca = PCA()
    pca.fit(X)
    assert_almost_equal(pca.explained_variance_ratio_.sum(), 1.0, 3)

    X_r = pca.transform(X)
    X_r2 = pca.fit_transform(X)

    assert_array_almost_equal(X_r, X_r2)

    # Test get_covariance and get_precision with n_components == n_features
    # with n_components < n_features and with n_components == 0
    for n_components in [0, 2, X.shape[1]]:
        pca.n_components = n_components
        pca.fit(X)
        cov = pca.get_covariance()
        precision = pca.get_precision()
        assert_array_almost_equal(np.dot(cov, precision), np.eye(X.shape[1]), 12)
开发者ID:Garrett-R,项目名称:scikit-learn,代码行数:29,代码来源:test_pca.py

示例15: plot_variance_graph

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
    def plot_variance_graph(self):

        # Get list of features
        count_vect = CountVectorizer(stop_words=stopwords, min_df=3, max_df=0.90, ngram_range=(1,1))
        X_CV = count_vect.fit_transform(docs_train)

        # print number of unique words (n_features)
        print ("Shape of train data is "+str(X_CV.shape))

        # tfidf transformation###

        tfidf_transformer = TfidfTransformer(use_idf = True)
        X_tfidf = tfidf_transformer.fit_transform(X_CV)

        X_dense = X_tfidf.toarray()

        pca = PCA() # if no n_components specified, then n_components = n_features

        ###############################################################################
        # Plot the PCA spectrum

        pca.fit_transform(X_dense)
        print ("#############")
        print ("Explained variance ratio is "+str(pca.explained_variance_ratio_))

        #plt.figure(1, figsize=(4, 3))
        plt.clf()
        #plt.axes([.2, .2, .7, .7])
        plt.plot(pca.explained_variance_, linewidth=2)
        plt.axis('tight')
        plt.xlabel('n_components')
        plt.ylabel('explained_variance_')
        plt.show()

        return
开发者ID:cirnelle,项目名称:FacebookML,代码行数:37,代码来源:pca.py


注:本文中的sklearn.decomposition.PCA.fit_transform方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。