本文整理汇总了Python中sklearn.decomposition.PCA.fit_transform方法的典型用法代码示例。如果您正苦于以下问题:Python PCA.fit_transform方法的具体用法?Python PCA.fit_transform怎么用?Python PCA.fit_transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.decomposition.PCA
的用法示例。
在下文中一共展示了PCA.fit_transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
class GMM:
def __init__(self, reduction=1, do_pca=False):
self.means = []
self.vars = []
self.user_ids = {}
self.pca = PCA(2)
self.do_pca = do_pca
self.reduction = reduction
def fit(self, xdata, ydata):
if self.do_pca:
xx, xy = zip(*self.pca.fit_transform(xdata))
xdata = np.array(zip(xx,np.array(xy)/self.reduction))
users = np.unique(ydata)
for u in users:
curdata = xdata[ydata == u]
self.user_ids[len(self.means)] = u
self.means.append(np.mean(curdata, 0))
self.vars.append(np.sqrt(np.var(curdata, 0)))
def __compute_closest(self, xval):
return self.user_ids[np.argmax([np.sum(np.abs((xval-self.means[i])/self.vars[i])) for i in xrange(len(self.means))])]
def predict(self, xdata):
if self.do_pca:
xx, xy = zip(*self.pca.fit_transform(xdata))
xdata = np.array(zip(xx,np.array(xy)/self.reduction))
return np.array([self.__compute_closest(x) for x in xdata])
示例2: train_pca_linreg_model
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
def train_pca_linreg_model(out_of_transit_mask, oot_no_validation_mask, n_comp):
# OOT chunk first:
pca = PCA(n_components=n_comp)
reduced_regressors = pca.fit_transform(regressors[out_of_transit_mask],
target_fluxes[out_of_transit_mask])
prepended_regressors_oot = np.hstack([ones[out_of_transit_mask],
reduced_regressors])
c_oot = regression_coeffs(prepended_regressors_oot,
target_fluxes[out_of_transit_mask],
target_errors[out_of_transit_mask])
lc_training = (target_fluxes[out_of_transit_mask] -
regression_model(c_oot, prepended_regressors_oot))
median_oot = np.median(target_fluxes[out_of_transit_mask])
std_lc_training = np.std((lc_training + median_oot) / median_oot)
# Now on validation chunk:
reduced_regressors_no_validation = pca.fit_transform(regressors[oot_no_validation_mask],
target_fluxes[oot_no_validation_mask])
prepended_regressors_no_validation = np.hstack([ones[oot_no_validation_mask],
reduced_regressors_no_validation])
c_no_validation = regression_coeffs(prepended_regressors_no_validation,
target_fluxes[oot_no_validation_mask],
target_errors[oot_no_validation_mask])
lc_validation = (target_fluxes[out_of_transit_mask] -
regression_model(c_no_validation, prepended_regressors_oot))
std_lc_validation = np.std((lc_validation + median_oot) / median_oot)
return lc_training, lc_validation, std_lc_training, std_lc_validation
示例3: __init__
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
def __init__(self, pca_components=None, whiten=True, k_best=False):
train = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')
# Some rows have zero variance
# train = train.loc[:, train.std() > 0]
# test = test.loc[:, test.std() > 0]
# # Treating -999999 as missing; impute with knn
train['var3'] = train['var3'].replace(-999999, 2)
test['var3'] = test['var3'].replace(-999999, 2)
X_train = train.ix[:, :-1].values
y_train = train.ix[:, -1].values
X_test = test.values
# Perform PCA
pca = PCA(n_components=pca_components, whiten=whiten)
X_train = pca.fit_transform(X_train, y_train)
X_test = pca.fit_transform(X_test)
if k_best:
if k_best > pca_components:
k_best='all'
# Select k best features by F-score
kb = SelectKBest(f_classif, k=k_best)
X_train = kb.fit_transform(X_train, y_train)
X_test = kb.transform(X_test)
self.X_train = X_train
self.y_train = y_train
self.X_test = X_test
示例4: reduce_dimensions
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
def reduce_dimensions(X):
'''
Reduce the dimensionality of X with different reducers.
Return a sequence of tuples containing:
(title, x coordinates, y coordinates)
for each reducer.
'''
# Principal Component Analysis (PCA) is a linear reduction model
# that identifies the components of the data with the largest
# variance.
from sklearn.decomposition import PCA
reducer = PCA(n_components=2)
X_r = reducer.fit_transform(X)
yield 'PCA', X_r[:, 0], X_r[:, 1]
# Independent Component Analysis (ICA) decomposes a signal by
# identifying the independent contributing sources.
from sklearn.decomposition import FastICA
reducer = FastICA(n_components=2)
X_r = reducer.fit_transform(X)
yield 'ICA', X_r[:, 0], X_r[:, 1]
# t-distributed Stochastic Neighbor Embedding (t-SNE) is a
# non-linear reduction model. It operates best on data with a low
# number of attributes (<50) and is often preceded by a linear
# reduction model such as PCA.
from sklearn.manifold import TSNE
reducer = TSNE(n_components=2)
X_r = reducer.fit_transform(X)
yield 't-SNE', X_r[:, 0], X_r[:, 1]
示例5: feature_extraction_partialPCA
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
def feature_extraction_partialPCA(X_grad_train,X_grad_test,X_mag_train,X_mag_test):
#Function flatten data, then center them and calculates PCA on data from each sensor (grad & magn) type separately
#then standartise them (z-score)
from sklearn.preprocessing import StandardScaler
def flat_n_standartize(Xtrain,Xtest):
# Flatten times x channels arrays and calc z-score
Xtrain = Xtrain.reshape(Xtrain.shape[0],-1) #flatten array n_samples x n_time x n_channels to n_samples x n_features
mean = Xtrain.mean(axis=0)
Xtrain = Xtrain - mean
Xtest = Xtest.reshape(Xtest.shape[0],-1)
Xtest = Xtest - mean
return Xtrain,Xtest #Data with same sensor type have same scale
X_grad_train,X_grad_test = flat_n_standartize(X_grad_train,X_grad_test)
X_mag_train,X_mag_test = flat_n_standartize(X_mag_train,X_mag_test)
effective_pca_num = 40 # PCA components
# Whitening scales variance to unit, without this svm would not work
pca = PCA(n_components=effective_pca_num,whiten = True)
X_grad_train=pca.fit_transform(X_grad_train)
X_grad_test=pca.transform(X_grad_test)
X_mag_train= pca.fit_transform(X_mag_train)
X_mag_test=pca.transform(X_mag_test)
Xtrain = np.hstack((X_grad_train,X_mag_train))
Xtest = np.hstack((X_grad_test,X_mag_test))
scaler = StandardScaler().fit(Xtrain)
return scaler.transform(Xtrain),scaler.transform(Xtest)
示例6: runPCA
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
def runPCA(hogDir):
### Directory stuff
if not os.path.exists(hogDir):
print "You must first create HOG features"
sys.exit(1)
trainData = np.load(hogDir + 'train/hog.npy')
testData = np.load(hogDir + 'test/hog.npy')
# print "shape trainData: ", trainData.shape
# print "shape testData: ", testData.shape
data = np.append(trainData, testData, axis = 0)
# print "data shape: ", data.shape
### Fit the pca model
pca = PCA(n_components = min(testData.shape[0], trainData.shape[0], testData.shape[1], trainData.shape[1]))
pca.fit(data)
trainData = pca.fit_transform(trainData)
testData = pca.fit_transform(testData)
# print "shape trainData: ", trainData.shape
# print "shape testData: ", testData.shape
### Store the labels and the array with transformed feature vectors
np.save(hogDir + 'train/pca', trainData)
np.save(hogDir + 'test/pca', testData)
示例7: test_feature_union_weights
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
def test_feature_union_weights():
# test feature union with transformer weights
iris = load_iris()
X = iris.data
y = iris.target
pca = PCA(n_components=2, svd_solver='randomized', random_state=0)
select = SelectKBest(k=1)
# test using fit followed by transform
fs = FeatureUnion([("pca", pca), ("select", select)],
transformer_weights={"pca": 10})
fs.fit(X, y)
X_transformed = fs.transform(X)
# test using fit_transform
fs = FeatureUnion([("pca", pca), ("select", select)],
transformer_weights={"pca": 10})
X_fit_transformed = fs.fit_transform(X, y)
# test it works with transformers missing fit_transform
fs = FeatureUnion([("mock", Transf()), ("pca", pca), ("select", select)],
transformer_weights={"mock": 10})
X_fit_transformed_wo_method = fs.fit_transform(X, y)
# check against expected result
# We use a different pca object to control the random_state stream
assert_array_almost_equal(X_transformed[:, :-1], 10 * pca.fit_transform(X))
assert_array_equal(X_transformed[:, -1],
select.fit_transform(X, y).ravel())
assert_array_almost_equal(X_fit_transformed[:, :-1],
10 * pca.fit_transform(X))
assert_array_equal(X_fit_transformed[:, -1],
select.fit_transform(X, y).ravel())
assert_equal(X_fit_transformed_wo_method.shape, (X.shape[0], 7))
示例8: examples
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
def examples():
# example
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
pca = PCA(n_components=2)
pca.fit(X)
#print(pca.explained_variance_ratio_)
#[ 0.99244... 0.00755...]
indivs, genoArr = parseHapmap.runParse()
genoArr_copy = copy.deepcopy(genoArr)
# with 2 components
pca2 = PCA(n_components=2)
pca.fit(genoArr_copy)
print(pca2)
#print(pca2.explained_variance_ratio_)
print(genoArr_copy)
print('\n\n\n')
# with 2 components and transform data in place
pca2_trans = PCA(n_components=2)
genoArr_trans = pca2_trans.fit_transform(genoArr_copy)
print(pca2_trans)
#print(pca2_trans.explained_variance_ratio_)
print(genoArr_trans)
# with 10 components, first 2 components are same as before
pca10_trans = PCA(n_components = 10)
genoArr_trans10 = pca10_trans.fit_transform(genoArr_copy)
print(genoArr_trans10)
示例9: test_pca
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
def test_pca():
# PCA on dense arrays
X = iris.data
for n_comp in np.arange(X.shape[1]):
pca = PCA(n_components=n_comp, svd_solver='full')
X_r = pca.fit(X).transform(X)
np.testing.assert_equal(X_r.shape[1], n_comp)
X_r2 = pca.fit_transform(X)
assert_array_almost_equal(X_r, X_r2)
X_r = pca.transform(X)
X_r2 = pca.fit_transform(X)
assert_array_almost_equal(X_r, X_r2)
# Test get_covariance and get_precision
cov = pca.get_covariance()
precision = pca.get_precision()
assert_array_almost_equal(np.dot(cov, precision),
np.eye(X.shape[1]), 12)
# test explained_variance_ratio_ == 1 with all components
pca = PCA(svd_solver='full')
pca.fit(X)
assert_almost_equal(pca.explained_variance_ratio_.sum(), 1.0, 3)
示例10: learn_and_classify
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
def learn_and_classify(training_matrix, training_targets, testing_matrix, options, clffile, cores=1):
from sklearn import svm
from sklearn.decomposition import PCA
import pickle, datetime, multiprocessing, Queue
# Default values for the options
g = 0.7
C = 1.0
n = 100
# If options provided, replace the default values
if options:
for option in options:
exec option
# Dimension reduction
pca = PCA(n_components=n)
print "[%s] fit & transform the training matrix" % datetime.datetime.now()
pca.fit_transform(training_matrix, training_targets)
print "[%s] transform the testing matrix" % datetime.datetime.now()
pca.transform(testing_matrix)
# SVM fitting
print "[%s] learning" % datetime.datetime.now()
rbf_svc = svm.SVC(kernel='rbf', gamma=g, C=C).fit(training_matrix, training_targets)
# Saving model
print "[%s] saving model" % datetime.datetime.now()
with open(clffile, 'w') as fh:
pickle.dump((pca, rbf_svc), fh)
#print "predicting"
print "[%s] classifying" % datetime.datetime.now()
return split_predict(testing_matrix, rbf_svc, cores)
示例11: pcaProj
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
def pcaProj(self):
pca = PCA(n_components=2)
aa = pca.fit_transform(array([r.cells[:-2] for r in self.train]))
bb = pca.fit_transform(array([r for r in self.test]))
cc = pca.fit_transform(array([r.cells[:-2] for r in self.delta]))
self.scatterplot(
[aa, bb, cc], c=[[0.5, 0.5, 0.5], [0.85, 0.0, 0.0], [0, 0.85, 0]])
示例12: feature_scaled_nn_acc
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
def feature_scaled_nn_acc(mds, type):
train, validation = validation_split(mds)
# Multiply by 1 to convert to bool
y_train = train['Up'] * 1
X_train = train.drop('Up', axis=1)
y_validation = validation['Up'] * 1
X_validation = validation.drop('Up', axis=1)
pre = PCA(n_components=19, whiten=True)
X_train_pca = pre.fit_transform(X_train)
X_validation_pca = pre.fit_transform(X_validation)
model = create_model(X_train_pca.shape[1], type)
# Convert to Keras format
y_train = to_categorical(y_train.values)
y_validation = to_categorical(y_validation.values)
model.fit(X_train_pca, y_train, nb_epoch=5, batch_size=16)
time.sleep(0.1)
# Fit and guess
guess_train = model.predict_classes(X_train_pca)
guess_train = to_categorical(guess_train)
guess_validation = model.predict_classes(X_validation_pca)
guess_validation = to_categorical(guess_validation)
train_acc = accuracy_score(y_train, guess_train)
validation_acc = accuracy_score(y_validation, guess_validation)
print "\n neural net train accuracy is {}".format(train_acc)
print "\n neural net validation accuracy is {}".format(validation_acc)
return guess_validation
示例13: plot_original_data
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
def plot_original_data(self):
"""
plot original two data.
:return: None
"""
pca = PCA(n_components=2)
x = pca.fit_transform(self.english_feature.feature)
y = pca.fit_transform(self.image_feature.feature)
z = pca.fit_transform(self.japanese_feature.feature)
print x[x!=0]
print y
print z[z!=0]
# plot
plt.subplot(311)
plt.plot(x[:, 0], x[:, 1], '.r')
plt.title('X')
plt.subplot(312)
plt.plot(y[:, 0], y[:, 1], '.g')
plt.title('Y')
plt.subplot(313)
plt.plot(z[:, 0], z[:, 1], '.b')
plt.title('Z')
plt.show()
示例14: test_pca
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
def test_pca():
"""PCA on dense arrays"""
pca = PCA(n_components=2)
X = iris.data
X_r = pca.fit(X).transform(X)
np.testing.assert_equal(X_r.shape[1], 2)
X_r2 = pca.fit_transform(X)
assert_array_almost_equal(X_r, X_r2)
pca = PCA()
pca.fit(X)
assert_almost_equal(pca.explained_variance_ratio_.sum(), 1.0, 3)
X_r = pca.transform(X)
X_r2 = pca.fit_transform(X)
assert_array_almost_equal(X_r, X_r2)
# Test get_covariance and get_precision with n_components == n_features
# with n_components < n_features and with n_components == 0
for n_components in [0, 2, X.shape[1]]:
pca.n_components = n_components
pca.fit(X)
cov = pca.get_covariance()
precision = pca.get_precision()
assert_array_almost_equal(np.dot(cov, precision), np.eye(X.shape[1]), 12)
示例15: plot_variance_graph
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit_transform [as 别名]
def plot_variance_graph(self):
# Get list of features
count_vect = CountVectorizer(stop_words=stopwords, min_df=3, max_df=0.90, ngram_range=(1,1))
X_CV = count_vect.fit_transform(docs_train)
# print number of unique words (n_features)
print ("Shape of train data is "+str(X_CV.shape))
# tfidf transformation###
tfidf_transformer = TfidfTransformer(use_idf = True)
X_tfidf = tfidf_transformer.fit_transform(X_CV)
X_dense = X_tfidf.toarray()
pca = PCA() # if no n_components specified, then n_components = n_features
###############################################################################
# Plot the PCA spectrum
pca.fit_transform(X_dense)
print ("#############")
print ("Explained variance ratio is "+str(pca.explained_variance_ratio_))
#plt.figure(1, figsize=(4, 3))
plt.clf()
#plt.axes([.2, .2, .7, .7])
plt.plot(pca.explained_variance_, linewidth=2)
plt.axis('tight')
plt.xlabel('n_components')
plt.ylabel('explained_variance_')
plt.show()
return