本文整理汇总了Python中sklearn.decomposition.RandomizedPCA.fit_transform方法的典型用法代码示例。如果您正苦于以下问题:Python RandomizedPCA.fit_transform方法的具体用法?Python RandomizedPCA.fit_transform怎么用?Python RandomizedPCA.fit_transform使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.decomposition.RandomizedPCA
的用法示例。
在下文中一共展示了RandomizedPCA.fit_transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: reduce_features
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
def reduce_features(features, var_explained=0.9, n_components=0, verbose=False):
"""
Performs feature reduction using PCA. Automatically selects nr. components
for explaining min_var_explained variance.
:param features: Features.
:param var_explained: Minimal variance explained.
:param n_components: Nr. of components.
:param exclude_columns: Columns to exclude.
:param verbose: Verbosity.
:return: Reduced feature set.
"""
if n_components == 0:
# Run full PCA to estimate nr. components for explaining given
# percentage of variance.
estimator = RandomizedPCA()
estimator.fit_transform(features)
variance = 0.0
for i in range(len(estimator.explained_variance_ratio_)):
variance += estimator.explained_variance_ratio_[i]
if variance > var_explained:
n_components = i + 1
if verbose:
print('{} % of variance explained using {} components'.format(var_explained, n_components))
break
# Re-run PCA with only estimated nr. components
estimator = RandomizedPCA(n_components=n_components)
features = estimator.fit_transform(features)
return features
示例2: build_classifier
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
def build_classifier(train_data_x_in, train_data_y, classifier_in="svc_basic"):
print "Attempting to build classifier."
train_data_x = train_data_x_in
transformer = ""
# classifier = grid_search.GridSearchCV(svm.SVC(), parameters).fit(train_data_x, train_data_y)
if classifier_in == "svc_basic":
classifier = svm.SVC()
print "Selection was basic svm.SVC."
elif classifier_in == "svc_extensive":
classifier = svm.SVC(kernel="linear", C=0.025, gamma=0.01)
print "Selection was extensive svm.SVC, with linear kernel, C==0.025 and gamma==0.01."
elif classifier_in == "kneighbors_basic":
transformer = RandomizedPCA(n_components=2000)
train_data_x = transformer.fit_transform(train_data_x)
classifier = KNeighborsClassifier()
print "Selection was KNeighbors basic, using RandomizedPCA to transform data first. n_components==2000."
elif classifier_in == "bagging_basic":
classifier = BaggingClassifier(KNeighborsClassifier(), max_samples=0.5, max_features=0.5)
print "Selection was Bagging basic, with max_samples==0.5 and max_features==0.5."
elif classifier_in == "spectral_basic":
transformer = SpectralEmbedding(n_components=2000)
train_data_x = transformer.fit_transform(train_data_x)
classifier = KNeighborsClassifier()
print "Selection was Spectral basic, using svm.SVC with Spectral data fitting. n_components==2000."
# default to SVC in case of any sort of parsing error.
else:
print "Error in selecting classifier class. Reverting to SVC."
classifier = svm.SVC()
classifier.fit(train_data_x, train_data_y)
print "Doing classifier estimation."
return classifier, train_data_x, transformer
示例3: test_feature_union_weights
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
def test_feature_union_weights():
# test feature union with transformer weights
iris = load_iris()
X = iris.data
y = iris.target
pca = RandomizedPCA(n_components=2, random_state=0)
select = SelectKBest(k=1)
# test using fit followed by transform
fs = FeatureUnion([("pca", pca), ("select", select)],
transformer_weights={"pca": 10})
fs.fit(X, y)
X_transformed = fs.transform(X)
# test using fit_transform
fs = FeatureUnion([("pca", pca), ("select", select)],
transformer_weights={"pca": 10})
X_fit_transformed = fs.fit_transform(X, y)
# test it works with transformers missing fit_transform
fs = FeatureUnion([("mock", TransfT()), ("pca", pca), ("select", select)],
transformer_weights={"mock": 10})
X_fit_transformed_wo_method = fs.fit_transform(X, y)
# check against expected result
# We use a different pca object to control the random_state stream
assert_array_almost_equal(X_transformed[:, :-1], 10 * pca.fit_transform(X))
assert_array_equal(X_transformed[:, -1],
select.fit_transform(X, y).ravel())
assert_array_almost_equal(X_fit_transformed[:, :-1],
10 * pca.fit_transform(X))
assert_array_equal(X_fit_transformed[:, -1],
select.fit_transform(X, y).ravel())
assert_equal(X_fit_transformed_wo_method.shape, (X.shape[0], 7))
示例4: pcaAndPlot
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
def pcaAndPlot(X, x_to_centroids, centroids, no_dims = 2):
pca = RandomizedPCA(n_components=no_dims)
x_trans = pca.fit_transform(X)
x_sizes = np.full((x_trans.shape[0]), 30, dtype=np.int)
plt.scatter(x_trans[:, 0], x_trans[:, 1], s=x_sizes, c=x_to_centroids)
centroids_trans = pca.fit_transform(centroids)
centroids_col = np.arange(centroids.shape[0])
centroids_sizes = np.full((centroids.shape[0]), 70, dtype=np.int)
plt.scatter(centroids_trans[:, 0], centroids_trans[:, 1], s=centroids_sizes, c=centroids_col)
plt.show()
示例5: read_data_sets
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
def read_data_sets():
class DataSets(object):
pass
NUM_CLASSES = 7
start = time.time()
data_sets = DataSets()
# Load the training data
mat_contents = sio.loadmat('labeled_images.mat')
train_labels = mat_contents['tr_labels']
train_identities = mat_contents['tr_identity']
train_images = mat_contents['tr_images']
# Load the test data
mat_contents = sio.loadmat('public_test_images.mat')
test_images = mat_contents['public_test_images']
test_set_length = len(test_images[0][0])
# Flatten images
test_images = flattenImages(test_images)
train_images = flattenImages(train_images)
# Split train into validation set of size ~ test_set_length
train_images, train_labels, validation_images, validation_labels = splitSet(
train_images,
train_labels,
train_identities,
test_set_length)
# Convert labels to one hot vectors
train_labels = convertToOneHot(train_labels, NUM_CLASSES)
validation_labels = convertToOneHot(validation_labels, NUM_CLASSES)
# Normalize the images
sd = np.sqrt(np.var(train_images) + 0.01)
train_images = (train_images - np.mean(train_images)) / sd
sd = np.sqrt(np.var(validation_images) + 0.01)
validation_images = (validation_images - np.mean(validation_images)) / sd
pca = RandomizedPCA(n_components=15)
train_images = pca.fit_transform(train_images)
validation_images = pca.fit_transform(validation_images)
# Setup the matrixes into an accessible data set class
data_sets.train_set = DataSet(train_images, train_labels)
data_sets.validation_set = DataSet(validation_images, validation_labels)
data_sets.test_set = DataSet(test_images, np.zeros((len(test_images), NUM_CLASSES)))
print('Finished setting up data! Took {} seconds'.format(time.time() - start))
return data_sets
示例6: get_features_from_images_PCA
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
def get_features_from_images_PCA(img_dir,data_set):
"""
Takes in a directory and gets all the images from
it and extracts the pixel values, flattens the matrix
into an array and performs principle component analysis
to get representative subset of features from the pixel
values of the image.
"""
print "\nExtracting features from given images..."
img_names = [f for f in os.listdir(img_dir)]
images = [img_dir+ f for f in os.listdir(img_dir)]
#print images
print "\nConverting images to vectors"
data = []
for image in images:
# print image
img = img_to_matrix(image)
img = flatten_image(img)
data.append(img)
print "Converting image data to numpy array"
time.sleep(5)
data = np.array(data)
print "Finished Conversion"
time.sleep(5)
print "\nPerforming PCA to get reqd features"
features = []
pca = RandomizedPCA(n_components=14)
for i in xrange(len(data)/100):
if features == []:
split = data[0:100]
features = pca.fit_transform(split)
else:
split = data[100*i:100*(i+1)]
features = np.concatenate((features,pca.fit_transform(split)),axis=0)
print "Writing feature data to file"
f = open(data_set+"_extracted_features.txt","w")
for i in xrange(len(img_names)):
s = str(img_names[i])
for value in features[i]:
s += " "+str(value)
s += "\n"
f.write(s)
f.close()
print "Write completed"
示例7: main
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
def main():
img_dir = 'images/'
images = [img_dir + f for f in os.listdir(img_dir)]
labels = [f.split('/')[-1].split('_')[0] for f in images]
label2ids = {v: i for i, v in enumerate(sorted(set(labels),
key=labels.index))}
y = np.array([label2ids[l] for l in labels])
data = []
for image_file in images:
img = img_to_matrix(image_file)
img = flatten_image(img)
data.append(img)
data = np.array(data)
# training samples
is_train = np.random.uniform(0, 1, len(data)) <= 0.7
train_X, train_y = data[is_train], y[is_train]
# training a classifier
pca = RandomizedPCA(n_components=5)
train_X = pca.fit_transform(train_X)
multi_svm = OneVsRestClassifier(LinearSVC())
multi_svm.fit(train_X, train_y)
# evaluating the model
test_X, test_y = data[is_train == False], y[is_train == False]
test_X = pca.transform(test_X)
print pd.crosstab(test_y, multi_svm.predict(test_X),
rownames=['Actual'], colnames=['Predicted'])
示例8: _prepare_pca
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
def _prepare_pca(self, data, max_n_components):
""" Helper Function """
from sklearn.decomposition import RandomizedPCA
# sklearn < 0.11 does not support random_state argument
kwargs = {'n_components': max_n_components, 'whiten': False}
aspec = inspect.getargspec(RandomizedPCA.__init__)
if 'random_state' not in aspec.args:
warnings.warn('RandomizedPCA does not support random_state '
'argument. Use scikit-learn to version 0.11 '
'or newer to get reproducible results.')
else:
kwargs['random_state'] = 0
pca = RandomizedPCA(**kwargs)
pca_data = pca.fit_transform(data.T)
if self._explained_var > 1.0:
if self.n_components is not None: # normal n case
self._comp_idx = np.arange(self.n_components)
to_ica = pca_data[:, self._comp_idx]
else: # None case
to_ica = pca_data
self.n_components = pca_data.shape[1]
self._comp_idx = np.arange(self.n_components)
else: # float case
expl_var = pca.explained_variance_ratio_
self._comp_idx = (np.where(expl_var.cumsum() <
self._explained_var)[0])
to_ica = pca_data[:, self._comp_idx]
self.n_components = len(self._comp_idx)
return to_ica, pca
示例9: do_pca
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
def do_pca(corr_matrix: _nested_ndarray, num_dim: int,
min_var_explanation: float =0.7) -> _nested_ndarray:
'''
This method performs PCA on a self-correlation matrix, reducing the number of columns to `num_dim`.
If such analysis does not sufficiently explain the underlying variance in the data, an exception is
thrown.
Args:
* `corr_matrix` - a square matrix of correlations
* `num_dim` - the number of dimensions to which the data should be reduced
* `min_var_explanation` - the minimum fraction of the underlying data variance that should be explained
Returns:
> A matrix of the PCA result on `corr_matrix`.
'''
num_dim = int(num_dim)
pca = PCA(n_components=num_dim, random_state=0)
pca_result = pca.fit_transform(corr_matrix)
var_ratio = pca.explained_variance_ratio_
if sum(var_ratio) < min_var_explanation:
raise PcaAccuracyException(
'PCA doesn\'t explain enough of the variance in the data')
return pca_result
示例10: rpca
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
def rpca(numpy_file='../data/Paintings/two_class/Paintings_train.csv'):
""" Performs randomized PCA on given numpy file.
Given a numpy file of n-rows and n-cols, where the last column is
the label and rest are features,n-rows are the samples.
:type numpy_file: string
:param numpy_file: The file name of numpy file to be analyzed.
"""
import numpy as np
import matplotlib.pyplot as pl
import pandas as pd
from sklearn.decomposition import RandomizedPCA
all_data = np.loadtxt(numpy_file,delimiter=',')
data = all_data[:,:-1]
y = all_data[:,-1]
pca = RandomizedPCA(n_components=2)
X = pca.fit_transform(data)
df = pd.DataFrame({"x": X[:, 0], "y": X[:, 1],\
"label":np.where(y==1, "realism", "abstract")})
colors = ["red", "yellow"]
for label, color in zip(df['label'].unique(), colors):
mask = df['label']==label
pl.scatter(df[mask]['x'], df[mask]['y'], c=color, label=label)
pl.legend()
pl.title('Randomized PCA analysis')
pl.show()
示例11: calc_hog
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
def calc_hog(fpaths, save=False):
'''
Compute histogram of gradients (HOG). Saves in batches to prevent memory issues.
Input:
fpaths : files on which HOG will be computed
save : if true, output is saved to disk
'''
hogs = np.empty((len(fpaths), 15876))
for i, fpath in enumerate(fpaths):
img = imread(os.path.join(imgdir, fpath))
if len(img.shape)==3:
img = rgb2gray(img)
# rescale so all feature vectors are the same length
img_resize = resize(img, (128, 128))
img_hog = hog(img_resize)
hogs[i, :] = img_hog
hogs_sc = scale(hogs)
n_components = 15
pca = RandomizedPCA(n_components=n_components)
hogs_decomp = pca.fit_transform(hogs_sc)
df = pd.DataFrame(hogs_decomp, index=[os.path.split(i)[1] for i in fpaths])
df.index.name='fpath'
df.columns = ['feat_hog_%2.2u' % i for i in range(1, n_components+1)]
if save: df.to_csv('hog.csv')
return df
示例12: scatter
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
def scatter(data, labels=None, title=None, name=None):
"""2d PCA scatter plot with optional class info
Return the pca model to be able to introspect the components or transform
new data with the same model.
"""
data = atleast2d_or_csr(data)
if data.shape[1] == 2:
# No need for a PCA:
data_2d = data
else:
pca = RandomizedPCA(n_components=2)
data_2d = pca.fit_transform(data)
for i, c, m in zip(np.unique(labels), cycle(COLORS), cycle(MARKERS)):
plt.scatter(data_2d[labels == i, 0], data_2d[labels == i, 1],
c=c, marker=m, label=i, alpha=0.5)
plt.legend(loc='best')
if title is None:
title = "2D PCA scatter plot"
if name is not None:
title += " for " + name
plt.xlabel('First Principal Component')
plt.ylabel('Second Principal Component')
plt.title(title)
return pca
示例13: main
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
def main():
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required = True, help = "Path to the image")
args = vars(ap.parse_args())
image = cv2.imread(args["image"])
rects, img = detect(image)
cropped = []
for idx, (x1, y1, x2, y2) in enumerate(rects):
crop_img = image[y1:y1 + (y2 - y1), x1:x1 + (x2 - x1)]
crop_img = cv2.resize(crop_img, (100,100), interpolation = cv2.INTER_AREA)
cv2.imshow("image" + str(idx), crop_img)
new_img = crop_img.reshape(crop_img.shape[0] * crop_img.shape[1], 3)
cropped.append(new_img.flatten())
# reduce feature size
cropped_pca = []
pca = RandomizedPCA(n_components=100)
cropped_pca = pca.fit_transform(cropped)
# training (hardcoded for now)
clf = SVC(probability=True)
train = cropped_pca[:7]
test = cropped_pca[7:13]
# clf.fit([[0,0],[1,1]], [1, 2])
clf.fit(train, [1,2,2,1,2,1,1])
for item in test:
print clf.predict_proba(item)
print clf.predict(item)
cv2.waitKey(0)
示例14: dimentionality_reduction
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
def dimentionality_reduction(train_x , test_x):
print "Dimentionality reduction to 10D on training and test data...."
pca = RandomizedPCA(n_components=10)
train_x = pca.fit_transform(train_x)
test_x = pca.transform(test_x)
print "Done."
return train_x , test_x
示例15: detect
# 需要导入模块: from sklearn.decomposition import RandomizedPCA [as 别名]
# 或者: from sklearn.decomposition.RandomizedPCA import fit_transform [as 别名]
def detect(self, imageURLs, params):
array = []
for param in params:
img = self.img_to_matrix(param['imageURL'])
data = self.flatten_image(img)
array.append(data)
array = np.array(array)
pca = RandomizedPCA(n_components=5)
n_data = pca.fit_transform(array)
clf = joblib.load('src/resource/models/model.pkl')
result = clf.predict(n_data).tolist()
for param, r in zip(params, result):
raw_img = urllib2.urlopen(param['imageURL']).read()
if r == 1:
cntr = len([i for i in os.listdir("test/images/rain/") if 'rain' in i]) + 1
path = "static/images/rain_" + str(cntr) + '.jpg'
f = open(path, 'wb')
f.write(raw_img)
f.close()
# イベント情報作成
when = {'type': 'timestamp', 'time':param['time']}
where = { "type": "Point", "coordinates": [param['longitude'], param['latitude']]}
what = {'topic': {'value':u'雨'}, 'tweet': param['value']}
who = [{"type": "url", "value": param['imageURL']},
{"value": "evwh <[email protected]>", "type": "author"}]
event = {'observation':{'what': what, 'when': when, 'where': where, 'who': who}}
self.connection['event']['TwitterImageRainSensor'].insert(event)