本文整理汇总了Python中sklearn.decomposition.RandomizedPCA.transform方法的典型用法代码示例。如果您正苦于以下问题:Python RandomizedPCA.transform方法的具体用法?Python RandomizedPCA.transform怎么用?Python RandomizedPCA.transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.decomposition.RandomizedPCA
的用法示例。
在下文中一共展示了RandomizedPCA.transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: tryLinearDiscriminantAnalysis
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
def tryLinearDiscriminantAnalysis(goFast):
from sklearn.datasets import dump_svmlight_file, load_svmlight_file
if goFast:
training_data, training_labels = load_svmlight_file("dt1_1500.trn.svm", n_features=253659, zero_based=True)
validation_data, validation_labels = load_svmlight_file("dt1_1500.vld.svm", n_features=253659, zero_based=True)
testing_data, testing_labels = load_svmlight_file("dt1_1500.tst.svm", n_features=253659, zero_based=True)
else:
training_data, training_labels = load_svmlight_file("dt1.trn.svm", n_features=253659, zero_based=True)
validation_data, validation_labels = load_svmlight_file("dt1.vld.svm", n_features=253659, zero_based=True)
testing_data, testing_labels = load_svmlight_file("dt1.tst.svm", n_features=253659, zero_based=True)
from sklearn.lda import LDA
from sklearn.metrics import accuracy_score
from sklearn.grid_search import ParameterGrid
from sklearn.decomposition import RandomizedPCA
rpcaDataGrid = [{"n_components": [10,45,70,100],
"iterated_power": [2, 3, 4],
"whiten": [True]}]
for rpca_parameter_set in ParameterGrid(rpcaDataGrid):
rpcaOperator = RandomizedPCA(**rpca_parameter_set)
rpcaOperator.fit(training_data,training_labels)
new_training_data = rpcaOperator.transform(training_data,training_labels)
new_validation_data = rpcaOperator.transform(validation_data,validation_labels)
ldaOperator = LDA()
ldaOperator.fit(new_training_data,training_labels)
print "Score = " + str(accuracy_score(validation_labels,ldaOperator.predict(new_validation_data)))
示例2: pca_estimator
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
def pca_estimator(data, targets, estimator, components_number=DEFAULT_COMPONENTS_NUMBER,
folds_number=DEFAULT_FOLDS_NUMBER):
kf = KFold(len(targets), n_folds=folds_number)
# 'scores' is numpy array. An index is a number of a fold. A value is a percent of right
# predicted samples from a test.
scores = np.zeros(folds_number)
start = time()
index = 0
for train, test in kf:
x_train, x_test, y_train, y_test = data[train], data[test], targets[train], targets[test]
pca = RandomizedPCA(n_components=components_number, whiten=True).fit(x_train)
x_train_pca = pca.transform(x_train)
x_test_pca = pca.transform(x_test)
clf = estimator.fit(x_train_pca, y_train)
scores[index] = clf.score(x_test_pca, y_test)
index += 1
# print("Iteration %d from %d has done! Score: %f" % (index, folds_number,
# scores[index - 1]))
finish = time()
return scores.mean(), scores.std() * 2, (finish - start)
示例3: main
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
def main():
#create the training & test sets, skipping the header row with [1:]
dataset = genfromtxt(open('data/train.csv','r'), delimiter=',', dtype='u1')[1:]
target = [x[0] for x in dataset]
train = [x[1:] for x in dataset]
test = genfromtxt(open('data/test.csv','r'), delimiter=',', dtype='u1')[1:]
#build crossvalidation training set
train_train, train_test, target_train, target_test = cross_validation.train_test_split(train, target, test_size=0.2, random_state=0)
print train_train.shape
print train_test.shape
#PCA
pca = RandomizedPCA(n_components=40)
pca.fit(train_train)
#create and train the random forest
rf = RandomForestClassifier(n_estimators=1000, n_jobs=4)
rf.fit(hstack((train_train, pca.transform(train_train))), target_train)
print "crossval score is: ", rf.score(hstack((train_test, pca.transform(train_test))), target_test)
labelid = np.array(range(1,28001))
output = rf.predict(hstack((test, pca.transform(test))))
savetxt('data/submission.csv', np.column_stack((labelid, output)), delimiter=',', header="ImageId,Label", fmt='%u', comments='')
示例4: SVM
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
def SVM(X_data, y_data):
X_data = equalize_hist(X_data)
preprocessing.normalize(X_data, 'max')
preprocessing.scale(X_data, axis=1)
# preprocessing.normalize(X_data, 'max')
# X_data = equalize_hist(X_data)
# divide our data set into a training set and a test set
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X_data, y_data, test_size=TRAIN_TEST_SPLIT_RATIO)
n_components = 120
print("Extracting the top %d eigenfaces from %d faces"
% (n_components, X_train.shape[0]))
pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train)
print("Projecting the input data on the eigenfaces orthonormal basis")
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)
print("done ")
param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
classifier = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
classifier.fit(X_train_pca, y_train)
print("====== PCA 150 ========")
print('TRAIN SCORE', classifier.score(X_train_pca, y_train))
print('TEST SCORE', classifier.score(X_test_pca, y_test))
示例5: pca_data
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
def pca_data(test_x, train_x, params):
print 'pcaing data ...'
components = int(params['components'])
pca = RandomizedPCA(components, whiten=True).fit(train_x)
pca_train_x = pca.transform(train_x)
pca_test_x = pca.transform(test_x)
return pca_test_x, pca_train_x
示例6: do_nbnn
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
def do_nbnn(train_folder, test_folder):
train = load_patches(args.train_folder)
test = load_patches(args.test_folder)
if options.relu:
get_logger().info("Applying RELU")
for class_data in train:
class_data.patches = class_data.patches.clip(min=0)
for class_data in test:
class_data.patches = class_data.patches.clip(min=0)
if options.scale:
get_logger().info("Applying standardization")
scaler = StandardScaler(copy=False)
scaler.fit(np.vstack([t.patches for t in train]))
for class_data in train:
class_data.patches = scaler.transform(class_data.patches)
for class_data in test:
class_data.patches = scaler.transform(class_data.patches)
if options.pca:
get_logger().info("Calculating PCA")
pca = RandomizedPCA(n_components=options.pca)
pca.fit(np.vstack([t.patches for t in train]))
#for class_data in train:
#get_logger().info("Fitting class " + class_data.name)
#pca.partial_fit(class_data.patches)
get_logger().info("Keeping " + str(pca.explained_variance_ratio_.sum()) + " variance (" + str(options.pca) +
") components\nApplying PCA")
for class_data in train:
class_data.patches = pca.transform(class_data.patches)
for class_data in test:
class_data.patches = pca.transform(class_data.patches)
nbnn(train, test, NN_Engine())
示例7: test_sparse_randomized_pca_inverse
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
def test_sparse_randomized_pca_inverse():
"""Test that RandomizedPCA is inversible on sparse data"""
rng = np.random.RandomState(0)
n, p = 50, 3
X = rng.randn(n, p) # spherical data
X[:, 1] *= 0.00001 # make middle component relatively small
# no large means because the sparse version of randomized pca does not do
# centering to avoid breaking the sparsity
X = csr_matrix(X)
# same check that we can find the original data from the transformed signal
# (since the data is almost of rank n_components)
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always", DeprecationWarning)
pca = RandomizedPCA(n_components=2, random_state=0).fit(X)
assert_equal(len(w), 1)
assert_equal(w[0].category, DeprecationWarning)
Y = pca.transform(X)
Y_inverse = pca.inverse_transform(Y)
assert_almost_equal(X.todense(), Y_inverse, decimal=2)
# same as above with whitening (approximate reconstruction)
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always", DeprecationWarning)
pca = RandomizedPCA(n_components=2, whiten=True, random_state=0).fit(X)
assert_equal(len(w), 1)
assert_equal(w[0].category, DeprecationWarning)
Y = pca.transform(X)
Y_inverse = pca.inverse_transform(Y)
relative_max_delta = (np.abs(X.todense() - Y_inverse) / np.abs(X).mean()).max()
# XXX: this does not seam to work as expected:
assert_almost_equal(relative_max_delta, 0.91, decimal=2)
示例8: SVM
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
def SVM(X_train, y_train, X_test):
print("SVM with PCA of rbf, writening all on, no normalize")
preprocessing.normalize(X_train, 'max')
preprocessing.normalize(X_test, 'max')
#preprocessing.robust_scale(X, axis=1, with_centering = True) #bad
X_train = equalize_hist(X_train)
X_test = equalize_hist(X_test)
'''X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=TRAIN_TEST_SPLIT_RATIO)'''
n_components = 147
print("Extracting the top %d eigenfaces from %d faces"
% (n_components, X_train.shape[0]))
pca = RandomizedPCA(n_components=n_components, whiten=False).fit(X_train)
print("Projecting the input data on the eigenfaces orthonormal basis")
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)
print("done ")
param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
classifier13 = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
classifier13.fit(X_train_pca, y_train)
return list(classifier13.predict(X_test_pca))
示例9: getPrincipleComponents
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
def getPrincipleComponents(xtr, xte, n_components=50):
train = np.array(xtr)
test = np.array(xte)
pca = RandomizedPCA(n_components=n_components).fit(train)
xtrain = pca.transform(train)
xtest = pca.transform(test)
return xtrain, xtest
示例10: LogisticRegressionPCA
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
def LogisticRegressionPCA(X, y):
# divide our data set into a training set and a test set
X_train, X_test, y_train, y_test = cross_validation.train_test_split(
X, y, test_size=TRAIN_TEST_SPLIT_RATIO)
# get randomized PCA model
num_components = 147
print("Extracting the top %d eigenfaces from %d faces"
% (num_components, X_train.shape[0]))
pca = RandomizedPCA(n_components=num_components, whiten=True).fit(X_train)
# use the PCA model on our training set and test set.
print("Projecting the input data on the eigenfaces orthonormal basis")
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)
print("done ")
h = .02 # step size in the mesh
logistic_regression = linear_model.LogisticRegression(C=1e5)
# we create an instance of Neighbours Classifier and fit the data.
logistic_regression.fit(X, y)
# print the performance of logistic regression
print("====== Logistic Regression with PCA ========")
print('TRAIN SCORE', logistic_regression.score(X_train, y_train))
print('TEST SCORE', logistic_regression.score(X_test, y_test))
示例11: SVM
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
def SVM(X, y):
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=TRAIN_TEST_SPLIT_RATIO)
print(len(X_train))
# Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled
# dataset): unsupervised feature extraction / dimensionality reduction
n_components = 150
pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train)
print("Projecting the input data on the eigenfaces orthonormal basis")
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)
print("done ")
X_train_pca = equalize_hist(X_train_pca)
preprocessing.scale(X_train_pca * 1.0, axis=1)
X_test_pca = equalize_hist(X_test_pca)
preprocessing.scale(X_test_pca * 1.0, axis=1)
# classifier = svm.SVC(kernel='poly', degree = 3)
# classifier.fit(X_train, y_train)
# # print("======",3,"========")
# print('TRAIN SCORE', classifier.score(X_train, y_train))
# print('TEST SCORE', classifier.score(X_test, y_test))
param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
classifier2 = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
classifier2.fit(X_train_pca, y_train)
# print("======",3,"========")
print('TRAIN SCORE', classifier2.score(X_train_pca, y_train))
print('TEST SCORE', classifier2.score(X_test_pca, y_test))
示例12: rpca
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
def rpca(train_X, test_X, n):
start_time = time.time()
pca = RandomizedPCA(n_components=n)
pca.fit(train_X.toarray())
train_X_pca = pca.transform(train_X.toarray())
test_X_pca = pca.transform(test_X.toarray())
print("--- %s seconds ---" % (time.time() - start_time))
return pca, train_X_pca, test_X_pca
示例13: Cluster
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
class Cluster(object):
def __init__(self, name):
self.name = name
self.raw_dataset = []
self.dataset = []
self.dataset_red = []
def get_featurevec(self, data):
'''Takes in data in the form of an array of EmoPackets, and outputs
a list of feature vectors.'''
# CHECKED, all good :)
num_bins = (len(data)/int(dsp.SAMPLE_RATE*dsp.STAGGER) -
int(dsp.BIN_SIZE / dsp.STAGGER) + 1)
size = int(dsp.BIN_SIZE*dsp.SAMPLE_RATE)
starts = int(dsp.SAMPLE_RATE*dsp.STAGGER)
points = []
for i in range(num_bins):
points.append(dsp.get_features(data[i*starts:i*starts+size]))
return points
def add_data(self, raw):
'''Allows the addition of new data. Will retrain upon addition.
Expects a list of EmoPackets.'''
self.dataset.extend(self.get_featurevec(raw))
def extract_features(self):
'''Does feature extraction for all of the datasets.'''
self.dataset = []
for sess in self.raw_dataset:
self.dataset.extend(self.get_featurevec(sess))
def reduce_dim(self, NDIM=5):
'''Reduces the dimension of the extracted feature vectors.'''
X = np.array(self.dataset)
self.pca = RandomizedPCA(n_components=NDIM).fit(X)
self.dataset_red = self.pca.transform(X)
def train(self):
'''Trains the classifier.'''
self.svm = OneClassSVM()
self.svm.fit(self.dataset_red)
def is_novel(self, pt):
'''Says whether or not the bin is novel. Expects an array of EmoPackets'''
X = self.pca.transform(np.array(self.get_featurevec(data)[0]))
ans = self.svm.predict(X)
self.dataset_red.append(X)
self.train()
return ans
def save(self):
'''Saves this classifier to a data directory.'''
this_dir, this_filename = os.path.split(__file__)
DATA_PATH = os.path.join(this_dir, "data", self.name+'.pkl')
dumpfile = open(DATA_PATH, "wb")
pickle.dump(self, dumpfile, pickle.HIGHEST_PROTOCOL)
dumpfile.close()
示例14: reduce_dim
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
def reduce_dim(self, NDIM=5):
'''Reduces the dimension of the extracted feature vectors.'''
X = np.array(self.neutral)
pca = RandomizedPCA(n_components=NDIM).fit(X)
print pca.explained_variance_ratio_
self.pca = pca
self.neutral_red = pca.transform(X)
for label in self.labelled:
X = np.array(self.labelled[label])
self.labelled_red[label] = pca.transform(X)
示例15: compute_pca
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import transform [as 别名]
def compute_pca(self):
# print 'We have ', self.x.shape[1], 'features. Reducing dimensionality.'
pca_count = 200
pca = RandomizedPCA(pca_count, copy = False, whiten=True)
pca.fit(self.x_train)
self.x_train = pca.transform(self.x_train)
if self.do_submission:
self.x_test = pca.transform(self.x_test)
if self.do_validation():
self.x_validate = pca.transform(self.x_validate)