本文整理汇总了Python中sklearn.decomposition.FactorAnalysis类的典型用法代码示例。如果您正苦于以下问题:Python FactorAnalysis类的具体用法?Python FactorAnalysis怎么用?Python FactorAnalysis使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了FactorAnalysis类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: reduceDataset
def reduceDataset(self,nr=3,method='PCA'):
'''It reduces the dimensionality of a given dataset using different techniques provided by Sklearn library
Methods available:
'PCA'
'FactorAnalysis'
'KPCArbf','KPCApoly'
'KPCAcosine','KPCAsigmoid'
'IPCA'
'FastICADeflation'
'FastICAParallel'
'Isomap'
'LLE'
'LLEmodified'
'LLEltsa'
'''
dataset=self.ModelInputs['Dataset']
#dataset=self.dataset[Model.in_columns]
#dataset=self.dataset[['Humidity','TemperatureF','Sea Level PressureIn','PrecipitationIn','Dew PointF','Value']]
#PCA
if method=='PCA':
sklearn_pca = sklearnPCA(n_components=nr)
reduced = sklearn_pca.fit_transform(dataset)
#Factor Analysis
elif method=='FactorAnalysis':
fa=FactorAnalysis(n_components=nr)
reduced=fa.fit_transform(dataset)
#kernel pca with rbf kernel
elif method=='KPCArbf':
kpca=KernelPCA(nr,kernel='rbf')
reduced=kpca.fit_transform(dataset)
#kernel pca with poly kernel
elif method=='KPCApoly':
kpca=KernelPCA(nr,kernel='poly')
reduced=kpca.fit_transform(dataset)
#kernel pca with cosine kernel
elif method=='KPCAcosine':
kpca=KernelPCA(nr,kernel='cosine')
reduced=kpca.fit_transform(dataset)
#kernel pca with sigmoid kernel
elif method=='KPCAsigmoid':
kpca=KernelPCA(nr,kernel='sigmoid')
reduced=kpca.fit_transform(dataset)
#ICA
elif method=='IPCA':
ipca=IncrementalPCA(nr)
reduced=ipca.fit_transform(dataset)
#Fast ICA
elif method=='FastICAParallel':
fip=FastICA(nr,algorithm='parallel')
reduced=fip.fit_transform(dataset)
elif method=='FastICADeflation':
fid=FastICA(nr,algorithm='deflation')
reduced=fid.fit_transform(dataset)
elif method == 'All':
self.dimensionalityReduction(nr=nr)
return self
self.ModelInputs.update({method:reduced})
self.datasetsAvailable.append(method)
return self
示例2: factor_analysis
def factor_analysis(results_dir):
data_array = np.transpose(np.genfromtxt(os.path.join(results_dir,'summary.csv'),delimiter=','))
fa = FactorAnalysis(n_components = 2)
new_array = fa.fit_transform(data_array)
print fa.get_covariance().shape
print new_array
np.savetxt(os.path.join(results_dir,'FA-datasets-2.csv'), new_array, delimiter=',')
示例3: initialize
def initialize(self):
"""
Initialize the model.
"""
# inverse variance weighted mean
if np.sum(self.obsvar) != 0.0:
self.mean = np.sum(self.data / self.obsvar, axis=0) / \
np.sum(1.0 / self.obsvar, axis=0)
else:
self.mean = np.mean(self.data, axis=0)
# use Factor Analysis to initialize factor loadings
if self.M == 0:
self.lam = np.zeros(1)
else:
fa = FactorAnalysis(n_components=self.M)
fa.fit(self.data)
self.lam = fa.components_.T
# initialize jitter
if self.jtype is None:
self.jitter = np.array([])
elif self.jtype is 'one':
self.jitter = 0.0
else:
self.jitter = np.zeros(self.D)
# save a copy
self.initial_mean = self.mean.copy()
self.initial_jitter = self.jitter.copy()
self.initial_lambda = self.lam.copy()
示例4: dimensionalityReduction
def dimensionalityReduction(self,nr=5):
'''It applies all the dimensionality reduction techniques available in this class:
Techniques available:
'PCA'
'FactorAnalysis'
'KPCArbf','KPCApoly'
'KPCAcosine','KPCAsigmoid'
'IPCA'
'FastICADeflation'
'FastICAParallel'
'Isomap'
'LLE'
'LLEmodified'
'LLEltsa'
'''
dataset=self.ModelInputs['Dataset']
sklearn_pca = sklearnPCA(n_components=nr)
p_components = sklearn_pca.fit_transform(dataset)
fa=FactorAnalysis(n_components=nr)
factors=fa.fit_transform(dataset)
kpca=KernelPCA(nr,kernel='rbf')
rbf=kpca.fit_transform(dataset)
kpca=KernelPCA(nr,kernel='poly')
poly=kpca.fit_transform(dataset)
kpca=KernelPCA(nr,kernel='cosine')
cosine=kpca.fit_transform(dataset)
kpca=KernelPCA(nr,kernel='sigmoid')
sigmoid=kpca.fit_transform(dataset)
ipca=IncrementalPCA(nr)
i_components=ipca.fit_transform(dataset)
fip=FastICA(nr,algorithm='parallel')
fid=FastICA(nr,algorithm='deflation')
ficaD=fip.fit_transform(dataset)
ficaP=fid.fit_transform(dataset)
'''isomap=Isomap(n_components=nr).fit_transform(dataset)
try:
lle1=LocallyLinearEmbedding(n_components=nr).fit_transform(dataset)
except ValueError:
lle1=LocallyLinearEmbedding(n_components=nr,eigen_solver='dense').fit_transform(dataset)
try:
lle2=LocallyLinearEmbedding(n_components=nr,method='modified').fit_transform(dataset)
except ValueError:
lle2=LocallyLinearEmbedding(n_components=nr,method='modified',eigen_solver='dense').fit_transform(dataset)
try:
lle3=LocallyLinearEmbedding(n_components=nr,method='ltsa').fit_transform(dataset)
except ValueError:
lle3=LocallyLinearEmbedding(n_components=nr,method='ltsa',eigen_solver='dense').fit_transform(dataset)'''
values=[p_components,factors,rbf,poly,cosine,sigmoid,i_components,ficaD,ficaP]#,isomap,lle1,lle2,lle3]
keys=['PCA','FactorAnalysis','KPCArbf','KPCApoly','KPCAcosine','KPCAsigmoid','IPCA','FastICADeflation','FastICAParallel']#,'Isomap','LLE','LLEmodified','LLEltsa']
self.ModelInputs.update(dict(zip(keys, values)))
[self.datasetsAvailable.append(key) for key in keys ]
#debug
#dataset=pd.DataFrame(self.ModelInputs['Dataset'])
#dataset['Output']=self.ModelOutput
#self.debug['Dimensionalityreduction']=dataset
###
return self
示例5: factor_analysis
def factor_analysis(x, dims=3):
x = to_ndarray(x)
s = scale(x, axis=0, with_mean=True, with_std=True, copy=True)
fa_model = FactorAnalysis(n_components=dims, svd_method="lapack")
fitted = fa_model.fit(s)
y = fitted.transform(s)
print("Factor Analysis - Reduced dims from {} to {}".format( x.shape, y.shape ))
return y, fitted
示例6: test_factor_analysis
def test_factor_analysis():
"""Test FactorAnalysis ability to recover the data covariance structure
"""
rng = np.random.RandomState(0)
n_samples, n_features, n_components = 20, 5, 3
# Some random settings for the generative model
W = rng.randn(n_components, n_features)
# latent variable of dim 3, 20 of it
h = rng.randn(n_samples, n_components)
# using gamma to model different noise variance
# per component
noise = rng.gamma(1, size=n_features) * rng.randn(n_samples, n_features)
# generate observations
# wlog, mean is 0
X = np.dot(h, W) + noise
assert_raises(ValueError, FactorAnalysis, svd_method='foo')
fa_fail = FactorAnalysis()
fa_fail.svd_method = 'foo'
assert_raises(ValueError, fa_fail.fit, X)
fas = []
for method in ['randomized', 'lapack']:
fa = FactorAnalysis(n_components=n_components, svd_method=method)
fa.fit(X)
fas.append(fa)
X_t = fa.transform(X)
assert_equal(X_t.shape, (n_samples, n_components))
assert_almost_equal(fa.loglike_[-1], fa.score(X).sum())
diff = np.all(np.diff(fa.loglike_))
assert_greater(diff, 0., 'Log likelihood dif not increase')
# Sample Covariance
scov = np.cov(X, rowvar=0., bias=1.)
# Model Covariance
mcov = fa.get_covariance()
diff = np.sum(np.abs(scov - mcov)) / W.size
assert_less(diff, 0.1, "Mean absolute difference is %f" % diff)
fa = FactorAnalysis(n_components=n_components,
noise_variance_init=np.ones(n_features))
assert_raises(ValueError, fa.fit, X[:, :2])
f = lambda x, y: np.abs(getattr(x, y)) # sign will not be equal
fa1, fa2 = fas
for attr in ['loglike_', 'components_', 'noise_variance_']:
assert_almost_equal(f(fa1, attr), f(fa2, attr))
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always', ConvergenceWarning)
fa1.max_iter = 1
fa1.verbose = True
fa1.fit(X)
assert_true(w[-1].category == ConvergenceWarning)
warnings.simplefilter('always', DeprecationWarning)
FactorAnalysis(verbose=1)
assert_true(w[-1].category == DeprecationWarning)
示例7: get_inv_diag_plus_low_rank_cov_op
def get_inv_diag_plus_low_rank_cov_op(X, rank=2):
fa = FactorAnalysis(n_components=rank)
fa.fit(X)
components = fa.components_
noise_vars = fa.noise_variance_
activations = fa.transform(X)
return _woodbury_inverse(_diagonal_operator(1. / noise_vars),
aslinearoperator(np.linalg.inv(1. / len(activations) *
activations.T.dot(activations))),
components.T, components)
示例8: compute_scores
def compute_scores(X):
pca = PCA()
fa = FactorAnalysis()
pca_scores, fa_scores = [], []
for n in n_components:
pca.n_components = n
fa.n_components = n
pca_scores.append(np.mean(cross_val_score(pca, X)))
fa_scores.append(np.mean(cross_val_score(fa, X)))
return pca_scores, fa_scores
示例9: initializeParams
def initializeParams(Y, K, singleSigma=False, makePlot=False):
"""
initializes parameters using a standard factor analysis model (on imputed data) + exponential curve fitting.
Checked.
Input:
Y: data matrix, n_samples x n_genes
K: number of latent components
singleSigma: uses only a single sigma as opposed to a different sigma for every gene
makePlot: makes a mu - p_0 plot and shows the decaying exponential fit.
Returns:
A, mus, sigmas, decay_coef: initialized model parameters.
"""
N, D = Y.shape
model = FactorAnalysis(n_components=K)
zeroedY = deepcopy(Y)
mus = np.zeros([D, 1])
for j in range(D):
non_zero_idxs = np.abs(Y[:, j]) > 1e-6
mus[j] = zeroedY[:, j].mean()
zeroedY[:, j] = zeroedY[:, j] - mus[j]
model.fit(zeroedY)
A = model.components_.transpose()
sigmas = np.atleast_2d(np.sqrt(model.noise_variance_)).transpose()
if singleSigma:
sigmas = np.mean(sigmas) * np.ones(sigmas.shape)
# Now fit decay coefficient
means = []
ps = []
for j in range(D):
non_zero_idxs = np.abs(Y[:, j]) > 1e-6
means.append(Y[non_zero_idxs, j].mean())
ps.append(1 - non_zero_idxs.mean())
decay_coef, pcov = curve_fit(exp_decay, means, ps, p0=.05)
decay_coef = decay_coef[0]
mse = np.mean(np.abs(ps - np.exp(-decay_coef * (np.array(means) ** 2))))
if (mse > 0) and makePlot:
from matplotlib.pyplot import figure, scatter, plot, title, show
figure()
scatter(means, ps)
plot(np.arange(min(means), max(means), .1), np.exp(-decay_coef * (np.arange(min(means), max(means), .1) ** 2)))
title('Decay Coef is %2.3f; MSE is %2.3f' % (decay_coef, mse))
show()
return A, mus, sigmas, decay_coef
示例10: compute_scores
def compute_scores(X, n_components):
pca = PCA()
fa = FactorAnalysis()
pca_scores, fa_scores = [], []
for n in n_components:
print 'Processing dimension {}'.format(n)
pca.n_components = n
fa.n_components = n
pca_scores.append(np.mean(cross_val_score(pca, X)))
fa_scores.append(np.mean(cross_val_score(fa, X)))
return pca_scores, fa_scores
示例11: factor_analysis
def factor_analysis( data ):
fa = FactorAnalysis()
features = numerical_features + categorical_features
fa_data = fa.fit_transform( data[features] )
plt.figure()
plt.subplot(2,2,0)
plt.scatter( fa_data[:,0], fa_data[:,1], c=data[target] )
plt.subplot(2,2,1)
plt.scatter( fa_data[:,2], fa_data[:,3], c=data[target] )
plt.subplot(2,2,2)
plt.scatter( fa_data[:,4], fa_data[:,5], c=data[target] )
plt.subplot(2,2,3)
plt.scatter( fa_data[:,6], fa_data[:,7], c=data[target] )
return fa_data
示例12: sd_fa
def sd_fa(fname,components,result_name):
'''
pca 计算
'''
cl_data,area_list = data_set(fname)
values = cl_data.values
fa = FactorAnalysis(n_components=components)
#数据标准化
values = preprocessing.scale(values)
try:
fa.fit(values)
except Exception,e:
logging.error("factor analysis fit error")
sys.exit()
示例13: compute_scores
def compute_scores(X, n_components):
"""
This is the "y" data of the plots -- the CV scores.
"""
pca = PCA()
fa = FactorAnalysis()
pca_scores, fa_scores = [], []
for n in n_components:
pca.n_components = n
fa.n_components = n
pca_scores.append(np.mean(cross_val_score(pca, X)))
fa_scores.append(np.mean(cross_val_score(fa, X)))
return pca_scores, fa_scores
示例14: testAlgorithm
def testAlgorithm():
import matplotlib.pyplot as plt
random.seed(35)
np.random.seed(32)
n = 200
d = 20
k = 2
sigma = .3
n_clusters = 3
decay_coef = .1
X, Y, Z, ids = generateSimulatedDimensionalityReductionData(n_clusters, n, d, k, sigma, decay_coef)
Zhat, params = block_ZIFA.fitModel(Y, k)
colors = ['red', 'blue', 'green']
cluster_ids = sorted(list(set(ids)))
model = FactorAnalysis(n_components=k)
factor_analysis_Zhat = model.fit_transform(Y)
plt.figure(figsize=[15, 5])
plt.subplot(131)
for id in cluster_ids:
plt.scatter(Z[ids == id, 0], Z[ids == id, 1], color=colors[id - 1], s=4)
plt.title('True Latent Positions\nFraction of Zeros %2.3f' % (Y == 0).mean())
plt.xlim([-4, 4])
plt.ylim([-4, 4])
plt.subplot(132)
for id in cluster_ids:
plt.scatter(Zhat[ids == id, 0], Zhat[ids == id, 1], color=colors[id - 1], s=4)
plt.xlim([-4, 4])
plt.ylim([-4, 4])
plt.title('ZIFA Estimated Latent Positions')
# title(titles[method])
plt.subplot(133)
for id in cluster_ids:
plt.scatter(factor_analysis_Zhat[ids == id, 0], factor_analysis_Zhat[ids == id, 1], color = colors[id - 1], s = 4)
plt.xlim([-4, 4])
plt.ylim([-4, 4])
plt.title('Factor Analysis Estimated Latent Positions')
plt.show()
示例15: compute_scores
def compute_scores(X, n_components):
pca = PCA()
fa = FactorAnalysis()
pca_scores, fa_scores = [], []
for n in n_components:
start = time.time()
pca.n_components = n
fa.n_components = n
pca_scores.append(np.mean(cross_val_score(pca, X)))
fa_scores.append(np.mean(cross_val_score(fa, X)))
end = time.time()
print 'PCA scores (%3d)' % n, pca_scores
print 'FA scores (%3d)' % n, fa_scores
print 'TIME: ', end-start
return pca_scores, fa_scores