本文整理汇总了Python中sklearn.decomposition.PCA.fit方法的典型用法代码示例。如果您正苦于以下问题:Python PCA.fit方法的具体用法?Python PCA.fit怎么用?Python PCA.fit使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.decomposition.PCA
的用法示例。
在下文中一共展示了PCA.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: perform_pca
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit [as 别名]
def perform_pca(training_data, testing_data, component_number, verbose=False):
'''
Perform PCA to compress the number of features in the training and test matrices.
Input:
* training data matrix of tweets -> features
* testing data matrix of tweets -> features
* the number of components to compress to
* verbosity
Output:
* compressed training matrix
* compressed testing matrix
'''
if verbose: print "Performing PCA Compression to %s Components ..." % component_number
from sklearn.decomposition import PCA
pca = PCA(n_components=component_number, whiten=True)
pca.fit(training_data)
training_data = pca.transform(training_data)
testing_data = pca.transform(testing_data)
if verbose: print "Done"
return training_data, testing_data
示例2: fit_pca
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit [as 别名]
def fit_pca(trajs):
print 'fitting PCA...'
pca = PCA(2, copy=True, whiten=False)
X = np.vstack(trajs.values())
pca.fit(X)
print 'done'
return pca
示例3: ensemble_pca
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit [as 别名]
def ensemble_pca(self, ref_ensemble=None, ref_first=True):
data = prepare_pca_input(self._cgs)
pca = PCA(n_components=2)
if ref_ensemble:
ref_data = prepare_pca_input(ref_ensemble)
if ref_first:
pca.fit(ref_data)
if not ref_ensemble or not ref_first:
pca.fit(data)
reduced_data = pca.transform(data)
if ref_ensemble:
reduced_ref = pca.transform(ref_data)
plt.scatter(reduced_ref[:, 0], reduced_ref[:, 1],
color="green", label="background")
plt.scatter(reduced_data[:, 0], reduced_data[:,
1], color="blue", label="sampling")
if self._reference_cg:
data_true = prepare_pca_input([self._reference_cg])
reduced_true = pca.transform(data_true)
plt.scatter(reduced_true[:, 0], reduced_true[:,
1], color="red", label="reference")
plt.xlabel("First principal component")
plt.ylabel("Second principal component")
figname = "pca_{}_rf{}.svg".format(self._cgs[0].name, ref_first)
plt.savefig(figname)
log.info("Figure {} created".format(figname))
plt.clf()
plt.close()
示例4: pca
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit [as 别名]
def pca(df, n_components=2, mean_center=False, *args, **kwargs):
if not sklearn:
assert('This library depends on scikit-learn (sklearn) to perform PCA analysis')
from sklearn.decomposition import PCA
df = df.copy()
# We have to zero fill, nan errors in PCA
df[ np.isnan(df) ] = 0
if mean_center:
mean = np.mean(df.values, axis=0)
df = df - mean
pca = PCA(n_components=n_components, *args, **kwargs)
pca.fit(df.values.T)
scores = pd.DataFrame(pca.transform(df.values.T)).T
scores.index = ['Principal Component %d' % (n+1) for n in range(0, scores.shape[0])]
scores.columns = df.columns
weights = pd.DataFrame(pca.components_).T
weights.index = df.index
weights.columns = ['Weights on Principal Component %d' % (n+1) for n in range(0, weights.shape[1])]
return scores, weights
示例5: test_pca
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit [as 别名]
def test_pca():
# PCA on dense arrays
X = iris.data
for n_comp in np.arange(X.shape[1]):
pca = PCA(n_components=n_comp, svd_solver='full')
X_r = pca.fit(X).transform(X)
np.testing.assert_equal(X_r.shape[1], n_comp)
X_r2 = pca.fit_transform(X)
assert_array_almost_equal(X_r, X_r2)
X_r = pca.transform(X)
X_r2 = pca.fit_transform(X)
assert_array_almost_equal(X_r, X_r2)
# Test get_covariance and get_precision
cov = pca.get_covariance()
precision = pca.get_precision()
assert_array_almost_equal(np.dot(cov, precision),
np.eye(X.shape[1]), 12)
# test explained_variance_ratio_ == 1 with all components
pca = PCA(svd_solver='full')
pca.fit(X)
assert_almost_equal(pca.explained_variance_ratio_.sum(), 1.0, 3)
示例6: dim_red
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit [as 别名]
def dim_red(df, col, method, params, kws, load_fit=None):
if method == 'PCA':
do_dim_red = PCA(*params, **kws)
if method == 'FastICA':
do_dim_red = FastICA(*params, **kws)
if method == 't-SNE':
do_dim_red = TSNE(*params, **kws)
if method == 'LLE':
do_dim_red = LocallyLinearEmbedding(*params, **kws)
if method == 'JADE-ICA':
do_dim_red = JADE(*params, **kws)
if load_fit:
do_dim_red = load_fit
else:
if method != 't-SNE':
do_dim_red.fit(df[col])
dim_red_result = do_dim_red.transform(df[col])
else:
dim_red_result = do_dim_red.fit_transform(df[col])
for i in list(range(1, dim_red_result.shape[
1] + 1)): # will need to revisit this for other methods that don't use n_components to make sure column names still mamke sense
df[(method, str(i))] = dim_red_result[:, i - 1]
return df, do_dim_red
示例7: load_bipolar_cells
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit [as 别名]
def load_bipolar_cells(micronsPerDeg=50.):
''' Returns list of tuples (space, spatial receptive field)
'''
data_path, this_filename = os.path.split(__file__)
file_name1 = data_path + '/data/B1.txt'
file_name2 = data_path + '/data/B2.txt'
data_b1 = np.loadtxt(file_name1, delimiter="\t") # 50 time x 100 space
data_b2 = np.loadtxt(file_name2, delimiter="\t") # 50 time x 100 space
data_b = [data_b1, data_b2]
# get spacing for all bipolar spatial receptive fields
spatialDelta = 0.022 # mm
# since receptive fields are noisy, use PCA
spatial_rfs = []
for b in data_b:
pca = PCA(n_components=2)
pca.fit(b)
b_pca = pca.components_[0]
sign_of_pc = -1 * np.sign(b_pca[abs(b_pca) == np.max(abs(b_pca))])
space = get_space(b_pca, spatialDelta, micronsPerDeg)
spatial_rfs.append((space, sign_of_pc * b_pca))
return spatial_rfs
示例8: PCAReduction_pair
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit [as 别名]
def PCAReduction_pair(xList, xTestList, componentNum):
#kpca = KernelPCA(kernel="linear", n_components=componentNum)
pca = PCA(n_components=componentNum)
X = np.array(xList)
XTest = np.array(xTestList)
#newX = pca.fit_transform(X)
pca.fit(X)
newX = pca.transform(X)
newXTest = pca.transform(XTest)
newXList = []
for x in newX:
tmpList = [ i.real for i in x]
newXList.append(tmpList)
newXTestList = []
for x in newXTest:
tmpList = [ i.real for i in x]
newXTestList.append(tmpList)
return newXList, newXTestList
示例9: fit_pca
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit [as 别名]
def fit_pca(self, train_pairs, test_pairs):
train_pairs_flat = [item for subtuple in train_pairs for item in subtuple]
test_pairs_flat = [item for subtuple in test_pairs for item in subtuple]
pca = PCA(n_components = self.pca_components)
pca.fit(train_pairs_flat)
train_pairs_pca_flat = pca.transform(train_pairs_flat)
test_pairs_pca_flat = pca.transform(test_pairs_flat)
train_pairs_pca = list()
test_pairs_pca = list()
for i in xrange(0, len(train_pairs_pca_flat), 2):
a = i
b = i + 1
train_pairs_pca.append((train_pairs_pca_flat[a],
train_pairs_pca_flat[b]))
for i in xrange(0, len(test_pairs_pca_flat), 2):
a = i
b = i + 1
test_pairs_pca.append((test_pairs_pca_flat[a],
test_pairs_pca_flat[b]))
return train_pairs_pca, test_pairs_pca
示例10: t_sne_view
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit [as 别名]
def t_sne_view(norm_table, subj_cond, cohorts, image_type):
# t-SNE analysis: Use stochastic neighbor embedding to reduce dimensionality of
# data set to two dimensions in a non-linear, distance dependent fashion
# Perform PCA data reduction if dimensionality of feature space is large:
if len(norm_table.columns) > 12:
pca = PCA(n_components = 12)
pca.fit(norm_table.as_matrix())
raw_data = pca.transform(norm_table.as_matrix())
else:
raw_data = norm_table.as_matrix()
# Transform data into a two-dimensional embedded space:
tsne = TSNE(n_components = 2, perplexity = 40.0, early_exaggeration= 2.0,
learning_rate = 100.0, init = 'pca')
tsne_data = tsne.fit_transform(raw_data)
# Prepare for normalization and view:
cols = ['t-SNE', 'Cluster Visualization']
tsne_table = pd.DataFrame(tsne_data, index = norm_table.index, columns = cols)
# The output is no longer centered or normalized, so shift & scale it before display:
tsne_avg = ppmi.data_stats(tsne_table, subj_cond, cohorts)
tsne_norm_table = ppmi.normalize_table(tsne_table, tsne_avg)
# Send out to graphics rendering engine:
if (image_type == 'Gauss'):
return scg.scatter_gauss(tsne_norm_table[cols[0]], tsne_norm_table[cols[1]], subj_cond)
elif (image_type == 'Scatter'):
return scg.scatter_plain(tsne_norm_table[cols[0]], tsne_norm_table[cols[1]], subj_cond)
示例11: kmeans
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit [as 别名]
def kmeans(path,n_clusters):
"""
kemans culstering algoritgm, apply pca to visualize
"""
list_of_feature =['ZIP','BED','HALF_BATH','BATH','YR_BUILT','FLOORS','LAND_VAL1','BLDG_VAL1','BLDG_SQFT','LOT_SIZE','ASSMTVAL1']
df = pd.read_csv(path)
df = df[list_of_feature]
data = df.values
data = preprocessing.scale(data)
pca = PCA(n_components=2)
pca.fit(data)
reduced_data = pca.transform(data)
print(pca.explained_variance_ratio_)
print(pca.components_)
k_means = KMeans(init='k-means++', n_clusters=n_clusters,n_init=10)
k_means.fit(reduced_data)
k_means_labels = k_means.labels_
k_means_cluster_centers = k_means.cluster_centers_
k_means_labels_unique = np.unique(k_means_labels)
print k_means_labels,k_means_cluster_centers,k_means.inertia_
plot_cluster(reduced_data,k_means_labels,k_means_cluster_centers,n_clusters)
示例12: pca_view
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit [as 别名]
def pca_view(norm_table, subj_cond, cohorts, image_type):
# SVG-PCA analysis: Plot projections onto plane spanned by the two most
# significant principal axes (PCA components)
# Keep only two principal components
pca = PCA(n_components=2)
# Use normalized data:
norm_data = norm_table.as_matrix()
# Find principal axes:
pca.fit(norm_data)
# Project on principal components:
pca_data = pca.transform(norm_data)
# 'Captured' variance in percent:
pca_var = 100.0 * pca.explained_variance_ratio_.sum()
pca_note = '(Capture ratio: %.1f%%)' % pca_var
# Prepare for view:
cols = ['PCA View', pca_note]
pca_table = pd.DataFrame(pca_data, index = norm_table.index, columns = cols)
# Send out to graphics rendering engine:
if (image_type == 'Gauss'):
return scg.scatter_gauss(pca_table[cols[0]], pca_table[cols[1]], subj_cond)
elif (image_type == 'Scatter'):
return scg.scatter_plain(pca_table[cols[0]], pca_table[cols[1]], subj_cond)
示例13: _fit
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit [as 别名]
def _fit(self,xtr,ytr):
## dimred
if cfg['dimred']=='pca':
dimred = PCA(n_components=cfg['dimredNComponents'],svd_solver=cfg['dimredSolver'])
elif cfg['dimred']=='kpca':
dimred = KernelPCA(n_components=cfg['dimredNComponents'],kernel=cfg['kernel'],n_jobs=-1)
elif clf['dimred'=='none']:
dimred = None
else:
assert False,'FATAL: unknown dimred'
if dimred is not None:
dimred.fit(xtr)
xtr = dimred.transform(np.asarray(xtr))
## tuning
clf = svm.SVC(kernel=self._kernel,probability=True)
## train
if self._kernel=='precomputed':
assert self._simMat is not None
simMatTr = cutil.makeComProKernelMatFromSimMat(xtr,xtr,self._simMat)
clf.fit(simMatTr,ytr)
else:
clf.fit(xtr,ytr)
return (clf,xtr,dimred)
示例14: princomp2
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit [as 别名]
def princomp2(galax):
clustermeans = mykmeans(galax) #getting 10D clustermeans from the normalized dataset
print "*" * 45
print "K-means clustering"
print "*" * 45
print "k = 2"
print "Mean of clusters:", clustermeans
pca2 = PCA(n_components=2)
pca2.fit(galax)
transformed = pca2.transform(galax)
plt.title("Data projected on the first two principal components")
plt.xlabel("first principal component")
plt.ylabel("second principal component")
plt.plot([x[0] for x in transformed], [x[1] for x in transformed], 'bx', label = "Galaxies")
plt.legend(loc='upper right')
plt.show()
transformed_mean1 = pca2.transform(clustermeans[0])
transformed_mean2 = pca2.transform(clustermeans[1])
meanx = [transformed_mean1[0][0], transformed_mean2[0][0]]
meany = [transformed_mean1[0][1], transformed_mean2[0][1]]
plt.title("Data projected on the first two principal components with transformed clustermeans")
plt.xlabel("first principal component")
plt.ylabel("second principal component")
plt.plot([x[0] for x in transformed], [x[1] for x in transformed], 'bx', label = "Galaxies")
plt.plot(meanx, meany, 'ro', label = "Cluster means")
plt.legend(loc='upper right')
plt.show()
示例15: pca_variance
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import fit [as 别名]
def pca_variance(df): # inputs are original data frame
df_pca = PCA()
df_pca.fit(df)
ratio = df_pca.explained_variance_ratio_
components = [('component'+str(x)) for x in range(1, (df.shape[1]+1))]
df2 = pd.Series(ratio, index = components)
return df2