本文整理汇总了Python中sklearn.decomposition.PCA.transform方法的典型用法代码示例。如果您正苦于以下问题:Python PCA.transform方法的具体用法?Python PCA.transform怎么用?Python PCA.transform使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.decomposition.PCA
的用法示例。
在下文中一共展示了PCA.transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: buildpca
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import transform [as 别名]
def buildpca(self):
totalset=[]
for path in self.filelist:
l=math.ceil(len(self.dic[path].framelistYin)/5)
for i in range(5):
frames=[]
for f in range(self.dic[path].framelistYin[int(l*i)],self.dic[path].framelistYin[min(len(self.dic[path].framelistYin)-1,int(l*(i+1)))]):
if f in self.dic[path].framelistYin:
frames.append(f)
if self.dic[path].traintest=='train':
hogset=self.dic[path].buildpca(frames)
for hog in hogset:
totalset.append(hog)
if self.dic[path].traintest=='test':
self.dic[path].buildpca(frames)
pca = PCA(n_components=51)
pca.fit(totalset)
#hogSet = pca.transform(totalset).tolist()
for path in self.filelist:
print path
for i in range(5):
print pca.transform(self.dic[path].hogset[i]).shape
self.dic[path].getHogYin(pca.transform(self.dic[path].hogset[i]))
示例2: k_means_classify
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import transform [as 别名]
def k_means_classify(data_list, n_clusters=15, n_components=30, pca=None):
"""
使用k-mean对patch进行分类
list 原始数据 (num,dim)
:n_clusters: 需要分类的数量
:n_components: 需要使用的维度
:return: 表示分类结果
"""
if len(data_list[1]) > n_components:
if pca is None:
# 将原本的数据进行降维
print "生成PCA进行降维"
pca = PCA(n_components=n_components)
pca = pca.fit(data_list)
data_list = pca.transform(data_list)
else:
print "用已有的PCA进行降维"
data_list = pca.transform(data_list)
else:
print "已进行降维"
# 进行k-means聚类
k_means = KMeans(n_clusters=n_clusters)
k_means = k_means.fit(data_list)
y_predict = k_means.predict(data_list)
return y_predict, k_means, pca
示例3: svm
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import transform [as 别名]
def svm(self):
img = self.training_set[0]
img.image_data = gaussian_filter(img.image_data, 15)
# img_average = numpy.average(img.image_data)
training_set = self.generate_feature(img)
img = self.training_set[1]
img.image_data = gaussian_filter(img.image_data, 15)
test_set = self.generate_feature(img)
pca = PCA(n_components = 20)
pca.fit([item[0] for item in training_set]+[item[0] for item in test_set])
pca_training = pca.transform([item[0] for item in training_set])
# for img in training_set:
# print_image(img[0].reshape(2*self.MAT_SIZE[0]+1,2*self.MAT_SIZE[1]+1), '{}_fig_{}_{}.png'.format(img[1], img[2][0], img[2][1]))
# training_set = training_set.map(lambda x: (x[0]-img_average, x[1]))
model = svm.SVC()
# model = tree.DecisionTreeClassifier()
model.fit(pca_training,numpy.array([item[1] for item in training_set]))
training_result = model.predict(pca_training)
hit = 0
for index, tag in enumerate(training_result):
if tag == training_set[index][1]:
hit += 1
print(float(hit) / float(len(training_set)))
pca_test = pca.transform([item[0] for item in test_set])
# test_set = test_set.map(lambda x: (x[0]-img_average, x[1]))
predicted = model.predict(pca_test)
hit = 0
for index, tag in enumerate(predicted):
if tag == test_set[index][1]:
hit += 1
print(float(hit) / float(len(test_set)))
示例4: test_explained_variance
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import transform [as 别名]
def test_explained_variance():
# Check that PCA output has unit-variance
rng = np.random.RandomState(0)
n_samples = 100
n_features = 80
X = rng.randn(n_samples, n_features)
pca = PCA(n_components=2, svd_solver="full").fit(X)
apca = PCA(n_components=2, svd_solver="arpack", random_state=0).fit(X)
assert_array_almost_equal(pca.explained_variance_, apca.explained_variance_, 1)
assert_array_almost_equal(pca.explained_variance_ratio_, apca.explained_variance_ratio_, 3)
rpca = PCA(n_components=2, svd_solver="randomized", random_state=42).fit(X)
assert_array_almost_equal(pca.explained_variance_, rpca.explained_variance_, 1)
assert_array_almost_equal(pca.explained_variance_ratio_, rpca.explained_variance_ratio_, 1)
# compare to empirical variances
X_pca = pca.transform(X)
assert_array_almost_equal(pca.explained_variance_, np.var(X_pca, axis=0))
X_pca = apca.transform(X)
assert_array_almost_equal(apca.explained_variance_, np.var(X_pca, axis=0))
X_rpca = rpca.transform(X)
assert_array_almost_equal(rpca.explained_variance_, np.var(X_rpca, axis=0), decimal=1)
# Same with correlated data
X = datasets.make_classification(n_samples, n_features, n_informative=n_features - 2, random_state=rng)[0]
pca = PCA(n_components=2).fit(X)
rpca = PCA(n_components=2, svd_solver="randomized", random_state=rng).fit(X)
assert_array_almost_equal(pca.explained_variance_ratio_, rpca.explained_variance_ratio_, 5)
示例5: fit_pca
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import transform [as 别名]
def fit_pca(self, train_pairs, test_pairs):
train_pairs_flat = [item for subtuple in train_pairs for item in subtuple]
test_pairs_flat = [item for subtuple in test_pairs for item in subtuple]
pca = PCA(n_components = self.pca_components)
pca.fit(train_pairs_flat)
train_pairs_pca_flat = pca.transform(train_pairs_flat)
test_pairs_pca_flat = pca.transform(test_pairs_flat)
train_pairs_pca = list()
test_pairs_pca = list()
for i in xrange(0, len(train_pairs_pca_flat), 2):
a = i
b = i + 1
train_pairs_pca.append((train_pairs_pca_flat[a],
train_pairs_pca_flat[b]))
for i in xrange(0, len(test_pairs_pca_flat), 2):
a = i
b = i + 1
test_pairs_pca.append((test_pairs_pca_flat[a],
test_pairs_pca_flat[b]))
return train_pairs_pca, test_pairs_pca
示例6: a_b_classify_pca
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import transform [as 别名]
def a_b_classify_pca((f_train, t_train, f_test, t_test, n_components)):
'''
Uses an SVM to classify A and B sections based on the feature vectors
built above, and returns some statistical results
'''
print '{}: Starting PCA with {} components (this could take a while...)'.format(time.ctime(), n_components)
pca = PCA(n_components = n_components)
pca.fit(f_train)
f_train_pca = list(pca.transform(f_train))
f_test_pca = list(pca.transform(f_test))
print '{0}: Training the SVM'.format(time.ctime())
clf = svm.SVC()
clf.fit(f_train_pca, t_train)
print '{0}: Classifying using SVM'.format(time.ctime())
t_predict = clf.predict(f_test_pca)
t_train_predict = clf.predict(f_train_pca)
print 'Confusion matrix is built so that C_ij is the number of observations known to be in group i but predicted to be in group j. In this case, group 0 corresponds to A sections and group 1 corresponds to B sections.'
print 'Confusion matrix on test data:'
test_confuse = confusion_matrix(t_test, t_predict)
print test_confuse
print 'Confusion matrix on training data:'
train_confuse = confusion_matrix(t_train, t_train_predict)
print train_confuse
return train_confuse, test_confuse
示例7: perform_pca
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import transform [as 别名]
def perform_pca(training_data, testing_data, component_number, verbose=False):
'''
Perform PCA to compress the number of features in the training and test matrices.
Input:
* training data matrix of tweets -> features
* testing data matrix of tweets -> features
* the number of components to compress to
* verbosity
Output:
* compressed training matrix
* compressed testing matrix
'''
if verbose: print "Performing PCA Compression to %s Components ..." % component_number
from sklearn.decomposition import PCA
pca = PCA(n_components=component_number, whiten=True)
pca.fit(training_data)
training_data = pca.transform(training_data)
testing_data = pca.transform(testing_data)
if verbose: print "Done"
return training_data, testing_data
示例8: compress
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import transform [as 别名]
def compress(self, d):
X_train, Y_train = self.separate_XY(self.XY_train)
X_test, Y_test = self.separate_XY(self.XY_test)
pca = PCA(n_components=d)
X_train_set = self.build_relation_set(X_train)
pca.fit(X_train_set)
# transform X_train
print 'Compressing training data . . .'
N, D = X_train.shape
X1_train = X_train[:,:D/2]
X2_train = X_train[:,D/2:]
new_XY_train = np.empty((N, 2*d + 2))
new_XY_train[:,:d] = pca.transform(X1_train)
new_XY_train[:,d:-2] = pca.transform(X2_train)
new_XY_train[:,-2:] = Y_train
print 'dimensions:', new_XY_train.shape
np.save(self.f_train, new_XY_train)
# transform X_test
print 'Compressing testing data . . .'
N, D = X_test.shape
X1_test = X_test[:,:D/2]
X2_test = X_test[:,D/2:]
new_XY_test = np.empty((N, 2*d + 2))
new_XY_test[:,:d] = pca.transform(X1_test)
new_XY_test[:,d:-2] = pca.transform(X2_test)
new_XY_test[:,-2:] = Y_test
print 'dimensions:', new_XY_test.shape
np.save(self.f_test, new_XY_test)
示例9: plot3DimPCA
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import transform [as 别名]
def plot3DimPCA(X, y, file_name=None):
fig = plt.figure(1, figsize=(4, 3))
plt.clf()
ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=50)
plt.cla()
pca = PCA(n_components=3)
pca.fit(X)
X = pca.transform(X)
print pca.explained_variance_ratio_
for name, label in [('Negative', 0), ('Positive', 1)]:
ax.text3D(X[y == label, 0].mean(),
X[y == label, 1].mean(),
X[y == label, 2].mean(),
name,
horizontalalignment='center',
bbox=dict(alpha=.5, edgecolor='w', facecolor='w'))
# Reorder the labels to have colors matching the cluster results
y = numpy.choose(y, [1, 0]).astype(numpy.float)
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap=plt.cm.spectral)
x_surf = [X[:, 0].min(), X[:, 0].max(),
X[:, 0].min(), X[:, 0].max()]
y_surf = [X[:, 0].max(), X[:, 0].max(),
X[:, 0].min(), X[:, 0].min()]
x_surf = numpy.array(x_surf)
y_surf = numpy.array(y_surf)
v0 = pca.transform(pca.components_[[0]])
v0 /= v0[-1]
v1 = pca.transform(pca.components_[[1]])
v1 /= v1[-1]
ax.w_xaxis.set_ticklabels([])
ax.w_yaxis.set_ticklabels([])
ax.w_zaxis.set_ticklabels([])
if file_name is not None:
plt.figure().savefig(file_name, bbox_inches='tight')
示例10: prepare_dataset
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import transform [as 别名]
def prepare_dataset(XX_train, y_train, XX_test, var_ratio_min=99.9, ratio_sd=100):
# Scale it
myScaler = StandardScaler()
XX_train_scaled = myScaler.fit_transform(XX_train)
# Select the most significant features
pca_scaled = PCA(svd_solver='full', whiten=True, n_components=var_ratio_min/100).fit(XX_train_scaled)
XX_pca_scaled = pca_scaled.transform(XX_train_scaled)
print("%d features selected out of %d (%d %%) for PCA which explains %d %% of variance" % (pca_scaled.n_components_, XX_train.shape[1], pca_scaled.n_components_/XX_train.shape[1]*100, pca_scaled.explained_variance_ratio_.sum()*100))
# print("\n explained variance ratio as a 'per thousand' ratio for each of the selected features")
# print((pca_scaled.explained_variance_ratio_*1000).round())
# Select a certain amount of observations
n_sd = XX_train.shape[0]*ratio_sd/100 # effective number of observations retained
print("%d observations selected out of %d (%d %%) for Shuffling and training" % (n_sd, XX_train.shape[0], ratio_sd))
#S huffle it
XX_train_scaled_shuffled, yy_train_scaled_shuffled = shuffle(XX_pca_scaled, y_train, n_samples=n_sd)
# Adapt the test set accordingly
XX_test_scaled = myScaler.transform(XX_test)
XX_test_scaled_pca = pca_scaled.transform(XX_test_scaled)
return XX_train_scaled_shuffled, yy_train_scaled_shuffled, XX_test_scaled_pca
示例11: Tracer
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import transform [as 别名]
class Tracer(object):
def __init__(self, algo='pca'):
self.algo = algo
def fit(self, data):
"""Fit a dimensionality reduction model on data.
"""
print "Embedding"
print "Fitting PCA"
if self.algo == 'pca':
self.proj = PCA(n_components=2)
self.proj.fit(data)
else:
raise ValueError("Unknown algo {}".format(self.algo))
def project(self, data):
return self.proj.transform(data)
def traces(self, sents, reps, loc="best", eos=False):
"""Plots traces for given sents.
"""
last = None if eos else -1
for i in range(len(sents)):
xy = self.proj.transform(reps[i])
x = xy[0:last,0] ; y = xy[0:last,1]
pylab.plot(x, y, label=''.join(sents[i]), linewidth=3, alpha=0.5)
for j in range(0,xy.shape[0]-1):
pylab.text(xy[j,0], xy[j,1], sents[i][j], va='center', ha='center', alpha=0.5)
pylab.legend(loc=loc)
示例12: withPCA
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import transform [as 别名]
def withPCA (dimensions):
'''
It finds the principal components of fmri_train and keeps the
number of components given in dimensions. It then runs lasso on every
semantic feature for a list of lambdas from 80 to 120 and keeps the the
w with the least RMSE on the validation data. It returns the accuracy on
the test data, the best weights and the pca fit. It also saves w on a file.
:param dimensions: number of dimensions for the principal components
:return: accuracy, bestw, the the pca fit
'''
pca = PCA(n_components=dimensions)
pca.fit(fmri_train)
xtrainpcaed= pca.transform(fmri_train)
xtrainPCA = sparse.csc_matrix (xtrainpcaed)
xtest = pca.transform (fmri_test)
num_features = ytrain.shape[1]
d = xtrainPCA.shape[1]
ntotdata = xtrainPCA.shape[0]
ntrain = 250 # number of data to be trained on, rest are used as cross validation
bestw = np.zeros([num_features,d])
accuracy = np.zeros(d)
lasso = lassoSolver.LassoClass()
lambda_list = list(range(80,120)) # list of lambdas to use
for i in range(num_features):
print ('looking at feature ', i)
bestw[i,:] = lasso.descendingLambda(ytrain[0:ntrain,i].reshape(ntrain,1), xtrainPCA[0:ntrain,:], ytrain[ntrain:,i].reshape(ntotdata-ntrain,1), xtrainPCA[ntrain:,:], lambda_list).reshape(d)
wfile = "allwallfeatures_pca300_lambda80_120.mtx" # name of w file to save as
io.mmwrite(wfile, bestw)
test_suite.main(wfile,wordid_train,wordid_test,wordfeature_std,xtest)
return [accuracy,bestw, pca]
示例13: learn_and_classify
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import transform [as 别名]
def learn_and_classify(training_matrix, training_targets, testing_matrix, options, clffile, cores=1):
from sklearn import svm
from sklearn.decomposition import PCA
import pickle, datetime, multiprocessing, Queue
# Default values for the options
g = 0.7
C = 1.0
n = 100
# If options provided, replace the default values
if options:
for option in options:
exec option
# Dimension reduction
pca = PCA(n_components=n)
print "[%s] fit & transform the training matrix" % datetime.datetime.now()
pca.fit_transform(training_matrix, training_targets)
print "[%s] transform the testing matrix" % datetime.datetime.now()
pca.transform(testing_matrix)
# SVM fitting
print "[%s] learning" % datetime.datetime.now()
rbf_svc = svm.SVC(kernel='rbf', gamma=g, C=C).fit(training_matrix, training_targets)
# Saving model
print "[%s] saving model" % datetime.datetime.now()
with open(clffile, 'w') as fh:
pickle.dump((pca, rbf_svc), fh)
#print "predicting"
print "[%s] classifying" % datetime.datetime.now()
return split_predict(testing_matrix, rbf_svc, cores)
示例14: main
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import transform [as 别名]
def main():
cmd = sys.argv[3]
input_dir = sys.argv[1]
num_samples_per_site = int(sys.argv[2])
target_sites = get_target_sites(input_dir)
labels = dict(map(lambda (i, s): (s, i), enumerate(target_sites)))
X, Y = load_feature_vectors(input_dir, num_samples_per_site, labels)
X, Y, testX, testY = select_test_set(X, Y,
(num_samples_per_site / 2) * len(target_sites))
Y = map(lambda v: v*1.0, Y)
testY = map(lambda v: v*1.0, testY)
pca = PCA(n_components = 50)
print "Fitting X"
pca.fit(X)
print "Transforming X and testX"
Xnew = pca.transform(X)
testXnew = pca.transform(testX)
del X
del testX
if cmd == "monb":
multiclass_on_binary_svms(Xnew, Y, testXnew, testY)
elif cmd == "multiclass":
multiclass_svm(Xnew, testXnew, Y, testY, labels)
elif cmd == "anomdet":
anomaly_detection(labels, Xnew, Y, testXnew, testY)
示例15: fit
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import transform [as 别名]
def fit(self, features, labels):
kf = KFold(len(features), n_folds = self.n_folds)
self.accuracy_list = []
self.pca_list = []
self.clf_list = []
for train_index, test_index in kf:
features_train, features_test = features[train_index], features[test_index]
labels_train, labels_test = labels[train_index], labels[test_index]
### machine learning goes here!
### please name your classifier clf for easy export below
# Using the pca to transform the training set
pca = PCA(n_components=self.n_components, whiten=True).fit(features_train)
features_train_pca = pca.transform(features_train)
features_test_pca = pca.transform(features_test)
#clf = None ### get rid of this line! just here to keep code from crashing out-of-box
#from sklearn.tree import DecisionTreeClassifier
#clf = DecisionTreeClassifier()
#clf.fit(features_train_pca, labels_train)
#print clf.score(features_test_pca, labels_test)
clf = SVC(C=1, gamma=0)
clf.fit(features_train_pca, labels_train)
accuracy = clf.score(features_test_pca, labels_test)
self.accuracy_list.append(accuracy)
self.pca_list.append(pca)
self.clf_list.append(clf)