本文整理汇总了Python中sklearn.decomposition.RandomizedPCA类的典型用法代码示例。如果您正苦于以下问题:Python RandomizedPCA类的具体用法?Python RandomizedPCA怎么用?Python RandomizedPCA使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了RandomizedPCA类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: tryLinearDiscriminantAnalysis
def tryLinearDiscriminantAnalysis(goFast):
from sklearn.datasets import dump_svmlight_file, load_svmlight_file
if goFast:
training_data, training_labels = load_svmlight_file("dt1_1500.trn.svm", n_features=253659, zero_based=True)
validation_data, validation_labels = load_svmlight_file("dt1_1500.vld.svm", n_features=253659, zero_based=True)
testing_data, testing_labels = load_svmlight_file("dt1_1500.tst.svm", n_features=253659, zero_based=True)
else:
training_data, training_labels = load_svmlight_file("dt1.trn.svm", n_features=253659, zero_based=True)
validation_data, validation_labels = load_svmlight_file("dt1.vld.svm", n_features=253659, zero_based=True)
testing_data, testing_labels = load_svmlight_file("dt1.tst.svm", n_features=253659, zero_based=True)
from sklearn.lda import LDA
from sklearn.metrics import accuracy_score
from sklearn.grid_search import ParameterGrid
from sklearn.decomposition import RandomizedPCA
rpcaDataGrid = [{"n_components": [10,45,70,100],
"iterated_power": [2, 3, 4],
"whiten": [True]}]
for rpca_parameter_set in ParameterGrid(rpcaDataGrid):
rpcaOperator = RandomizedPCA(**rpca_parameter_set)
rpcaOperator.fit(training_data,training_labels)
new_training_data = rpcaOperator.transform(training_data,training_labels)
new_validation_data = rpcaOperator.transform(validation_data,validation_labels)
ldaOperator = LDA()
ldaOperator.fit(new_training_data,training_labels)
print "Score = " + str(accuracy_score(validation_labels,ldaOperator.predict(new_validation_data)))
示例2: scatter
def scatter(data, labels=None, title=None, name=None):
"""2d PCA scatter plot with optional class info
Return the pca model to be able to introspect the components or transform
new data with the same model.
"""
data = atleast2d_or_csr(data)
if data.shape[1] == 2:
# No need for a PCA:
data_2d = data
else:
pca = RandomizedPCA(n_components=2)
data_2d = pca.fit_transform(data)
for i, c, m in zip(np.unique(labels), cycle(COLORS), cycle(MARKERS)):
plt.scatter(data_2d[labels == i, 0], data_2d[labels == i, 1],
c=c, marker=m, label=i, alpha=0.5)
plt.legend(loc='best')
if title is None:
title = "2D PCA scatter plot"
if name is not None:
title += " for " + name
plt.xlabel('First Principal Component')
plt.ylabel('Second Principal Component')
plt.title(title)
return pca
示例3: LogisticRegressionPCA
def LogisticRegressionPCA(X, y):
# divide our data set into a training set and a test set
X_train, X_test, y_train, y_test = cross_validation.train_test_split(
X, y, test_size=TRAIN_TEST_SPLIT_RATIO)
# get randomized PCA model
num_components = 147
print("Extracting the top %d eigenfaces from %d faces"
% (num_components, X_train.shape[0]))
pca = RandomizedPCA(n_components=num_components, whiten=True).fit(X_train)
# use the PCA model on our training set and test set.
print("Projecting the input data on the eigenfaces orthonormal basis")
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)
print("done ")
h = .02 # step size in the mesh
logistic_regression = linear_model.LogisticRegression(C=1e5)
# we create an instance of Neighbours Classifier and fit the data.
logistic_regression.fit(X, y)
# print the performance of logistic regression
print("====== Logistic Regression with PCA ========")
print('TRAIN SCORE', logistic_regression.score(X_train, y_train))
print('TEST SCORE', logistic_regression.score(X_test, y_test))
示例4: SVM
def SVM(X, y):
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=TRAIN_TEST_SPLIT_RATIO)
print(len(X_train))
# Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled
# dataset): unsupervised feature extraction / dimensionality reduction
n_components = 150
pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train)
print("Projecting the input data on the eigenfaces orthonormal basis")
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)
print("done ")
X_train_pca = equalize_hist(X_train_pca)
preprocessing.scale(X_train_pca * 1.0, axis=1)
X_test_pca = equalize_hist(X_test_pca)
preprocessing.scale(X_test_pca * 1.0, axis=1)
# classifier = svm.SVC(kernel='poly', degree = 3)
# classifier.fit(X_train, y_train)
# # print("======",3,"========")
# print('TRAIN SCORE', classifier.score(X_train, y_train))
# print('TEST SCORE', classifier.score(X_test, y_test))
param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
classifier2 = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
classifier2.fit(X_train_pca, y_train)
# print("======",3,"========")
print('TRAIN SCORE', classifier2.score(X_train_pca, y_train))
print('TEST SCORE', classifier2.score(X_test_pca, y_test))
示例5: pca_estimator
def pca_estimator(data, targets, estimator, components_number=DEFAULT_COMPONENTS_NUMBER,
folds_number=DEFAULT_FOLDS_NUMBER):
kf = KFold(len(targets), n_folds=folds_number)
# 'scores' is numpy array. An index is a number of a fold. A value is a percent of right
# predicted samples from a test.
scores = np.zeros(folds_number)
start = time()
index = 0
for train, test in kf:
x_train, x_test, y_train, y_test = data[train], data[test], targets[train], targets[test]
pca = RandomizedPCA(n_components=components_number, whiten=True).fit(x_train)
x_train_pca = pca.transform(x_train)
x_test_pca = pca.transform(x_test)
clf = estimator.fit(x_train_pca, y_train)
scores[index] = clf.score(x_test_pca, y_test)
index += 1
# print("Iteration %d from %d has done! Score: %f" % (index, folds_number,
# scores[index - 1]))
finish = time()
return scores.mean(), scores.std() * 2, (finish - start)
示例6: test_explained_variance
def test_explained_variance():
# Check that PCA output has unit-variance
rng = np.random.RandomState(0)
n_samples = 100
n_features = 80
X = rng.randn(n_samples, n_features)
pca = PCA(n_components=2).fit(X)
rpca = RandomizedPCA(n_components=2, random_state=rng).fit(X)
assert_array_almost_equal(pca.explained_variance_ratio_,
rpca.explained_variance_ratio_, 1)
# compare to empirical variances
X_pca = pca.transform(X)
assert_array_almost_equal(pca.explained_variance_,
np.var(X_pca, axis=0))
X_rpca = rpca.transform(X)
assert_array_almost_equal(rpca.explained_variance_, np.var(X_rpca, axis=0),
decimal=1)
# Same with correlated data
X = datasets.make_classification(n_samples, n_features,
n_informative=n_features-2,
random_state=rng)[0]
pca = PCA(n_components=2).fit(X)
rpca = RandomizedPCA(n_components=2, random_state=rng).fit(X)
assert_array_almost_equal(pca.explained_variance_ratio_,
rpca.explained_variance_ratio_, 5)
示例7: dimentionality_reduction
def dimentionality_reduction(train_x , test_x):
print "Dimentionality reduction to 10D on training and test data...."
pca = RandomizedPCA(n_components=10)
train_x = pca.fit_transform(train_x)
test_x = pca.transform(test_x)
print "Done."
return train_x , test_x
示例8: reduce_features
def reduce_features(features, var_explained=0.9, n_components=0, verbose=False):
"""
Performs feature reduction using PCA. Automatically selects nr. components
for explaining min_var_explained variance.
:param features: Features.
:param var_explained: Minimal variance explained.
:param n_components: Nr. of components.
:param exclude_columns: Columns to exclude.
:param verbose: Verbosity.
:return: Reduced feature set.
"""
if n_components == 0:
# Run full PCA to estimate nr. components for explaining given
# percentage of variance.
estimator = RandomizedPCA()
estimator.fit_transform(features)
variance = 0.0
for i in range(len(estimator.explained_variance_ratio_)):
variance += estimator.explained_variance_ratio_[i]
if variance > var_explained:
n_components = i + 1
if verbose:
print('{} % of variance explained using {} components'.format(var_explained, n_components))
break
# Re-run PCA with only estimated nr. components
estimator = RandomizedPCA(n_components=n_components)
features = estimator.fit_transform(features)
return features
示例9: do_nbnn
def do_nbnn(train_folder, test_folder):
train = load_patches(args.train_folder)
test = load_patches(args.test_folder)
if options.relu:
get_logger().info("Applying RELU")
for class_data in train:
class_data.patches = class_data.patches.clip(min=0)
for class_data in test:
class_data.patches = class_data.patches.clip(min=0)
if options.scale:
get_logger().info("Applying standardization")
scaler = StandardScaler(copy=False)
scaler.fit(np.vstack([t.patches for t in train]))
for class_data in train:
class_data.patches = scaler.transform(class_data.patches)
for class_data in test:
class_data.patches = scaler.transform(class_data.patches)
if options.pca:
get_logger().info("Calculating PCA")
pca = RandomizedPCA(n_components=options.pca)
pca.fit(np.vstack([t.patches for t in train]))
#for class_data in train:
#get_logger().info("Fitting class " + class_data.name)
#pca.partial_fit(class_data.patches)
get_logger().info("Keeping " + str(pca.explained_variance_ratio_.sum()) + " variance (" + str(options.pca) +
") components\nApplying PCA")
for class_data in train:
class_data.patches = pca.transform(class_data.patches)
for class_data in test:
class_data.patches = pca.transform(class_data.patches)
nbnn(train, test, NN_Engine())
示例10: build_classifier
def build_classifier(train_data_x_in, train_data_y, classifier_in="svc_basic"):
print "Attempting to build classifier."
train_data_x = train_data_x_in
transformer = ""
# classifier = grid_search.GridSearchCV(svm.SVC(), parameters).fit(train_data_x, train_data_y)
if classifier_in == "svc_basic":
classifier = svm.SVC()
print "Selection was basic svm.SVC."
elif classifier_in == "svc_extensive":
classifier = svm.SVC(kernel="linear", C=0.025, gamma=0.01)
print "Selection was extensive svm.SVC, with linear kernel, C==0.025 and gamma==0.01."
elif classifier_in == "kneighbors_basic":
transformer = RandomizedPCA(n_components=2000)
train_data_x = transformer.fit_transform(train_data_x)
classifier = KNeighborsClassifier()
print "Selection was KNeighbors basic, using RandomizedPCA to transform data first. n_components==2000."
elif classifier_in == "bagging_basic":
classifier = BaggingClassifier(KNeighborsClassifier(), max_samples=0.5, max_features=0.5)
print "Selection was Bagging basic, with max_samples==0.5 and max_features==0.5."
elif classifier_in == "spectral_basic":
transformer = SpectralEmbedding(n_components=2000)
train_data_x = transformer.fit_transform(train_data_x)
classifier = KNeighborsClassifier()
print "Selection was Spectral basic, using svm.SVC with Spectral data fitting. n_components==2000."
# default to SVC in case of any sort of parsing error.
else:
print "Error in selecting classifier class. Reverting to SVC."
classifier = svm.SVC()
classifier.fit(train_data_x, train_data_y)
print "Doing classifier estimation."
return classifier, train_data_x, transformer
示例11: calc_hog
def calc_hog(fpaths, save=False):
'''
Compute histogram of gradients (HOG). Saves in batches to prevent memory issues.
Input:
fpaths : files on which HOG will be computed
save : if true, output is saved to disk
'''
hogs = np.empty((len(fpaths), 15876))
for i, fpath in enumerate(fpaths):
img = imread(os.path.join(imgdir, fpath))
if len(img.shape)==3:
img = rgb2gray(img)
# rescale so all feature vectors are the same length
img_resize = resize(img, (128, 128))
img_hog = hog(img_resize)
hogs[i, :] = img_hog
hogs_sc = scale(hogs)
n_components = 15
pca = RandomizedPCA(n_components=n_components)
hogs_decomp = pca.fit_transform(hogs_sc)
df = pd.DataFrame(hogs_decomp, index=[os.path.split(i)[1] for i in fpaths])
df.index.name='fpath'
df.columns = ['feat_hog_%2.2u' % i for i in range(1, n_components+1)]
if save: df.to_csv('hog.csv')
return df
示例12: fit
def fit(self):
wordids_map = NameToIndex()
labs_map = NameToIndex()
wordscount = self._word_cluster.get_words_count()
print "start compute_tfidf ..."
#计算文档的词袋模型
docs = self._word_cluster.get_samples()
count =0
bow = []
labs = []
for k,v in docs.iteritems():
vec = numpy.zeros(wordscount).tolist()
for i in v:
vec[wordids_map.map(i)]+=1
bow.append(vec)
labs.append(labs_map.map(k[0]))
labs = numpy.array(labs)
tfidf = TfidfTransformer(smooth_idf=True, sublinear_tf=True,use_idf=True)
datas = numpy.array(tfidf.fit_transform(bow).toarray())
print "compute_tfidf done"
pca = RandomizedPCA(n_components=20, whiten=True).fit(datas)
svc = train_svc(numpy.array(labs_map.names), labs, pca.transform(datas))
self._tfidf = tfidf
self._svc = svc
self._labs_map = labs_map
self._wordids_map = wordids_map
self._pca = pca
示例13: test_randomized_pca_check_list
def test_randomized_pca_check_list():
"""Test that the projection by RandomizedPCA on list data is correct"""
X = [[1.0, 0.0], [0.0, 1.0]]
X_transformed = RandomizedPCA(n_components=1, random_state=0).fit(X).transform(X)
assert_equal(X_transformed.shape, (2, 1))
assert_almost_equal(X_transformed.mean(), 0.00, 2)
assert_almost_equal(X_transformed.std(), 0.71, 2)
示例14: pca_data
def pca_data(test_x, train_x, params):
print 'pcaing data ...'
components = int(params['components'])
pca = RandomizedPCA(components, whiten=True).fit(train_x)
pca_train_x = pca.transform(train_x)
pca_test_x = pca.transform(test_x)
return pca_test_x, pca_train_x
示例15: compute_pca
def compute_pca(reception_stats,n_components=5):
reception_mean = reception_stats.mean(axis=0)
pca = RandomizedPCA(n_components-1)
pca.fit(reception_stats)
pca_components = np.vstack([reception_mean,pca.components_])
return pca,pca_components