本文整理匯總了Python中sklearn.decomposition.FactorAnalysis.fit方法的典型用法代碼示例。如果您正苦於以下問題:Python FactorAnalysis.fit方法的具體用法?Python FactorAnalysis.fit怎麽用?Python FactorAnalysis.fit使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類sklearn.decomposition.FactorAnalysis
的用法示例。
在下文中一共展示了FactorAnalysis.fit方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: fit_factor_analysis
# 需要導入模塊: from sklearn.decomposition import FactorAnalysis [as 別名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 別名]
def fit_factor_analysis(percentage=0.8):
"""
Runs the factor analysis.
Parameters:
percentage: float, default:0.8
The percentage of the cumulative sum of the eigenvalues to be held. This number defines the number of loading factors in the analysis.
Returns:
X: array of floats [n_samples,n_factors]
The transformed data after the factor analysis.
components: array of floats [n_factors,n_samples]
The components of the factor analysis
"""
fa = FactorAnalysis()
fa.fit(data)
C = fa.get_covariance()
l,e = np.linalg.eigh(C)
cs = np.cumsum(l[::-1])/np.sum(l)
n = np.sum(cs<percentage)
fa.n_components = n
X_ = fa.fit_transform(data)
components = fa.components_
return X_,components
示例2: test_factor_analysis
# 需要導入模塊: from sklearn.decomposition import FactorAnalysis [as 別名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 別名]
def test_factor_analysis():
"""Test FactorAnalysis ability to recover the data covariance structure
"""
rng = np.random.RandomState(0)
n_samples, n_features, n_components = 20, 5, 3
# Some random settings for the generative model
W = rng.randn(n_components, n_features)
# latent variable of dim 3, 20 of it
h = rng.randn(n_samples, n_components)
# using gamma to model different noise variance
# per component
noise = rng.gamma(1, size=n_features) * rng.randn(n_samples, n_features)
# generate observations
# wlog, mean is 0
X = np.dot(h, W) + noise
assert_raises(ValueError, FactorAnalysis, svd_method='foo')
fa_fail = FactorAnalysis()
fa_fail.svd_method = 'foo'
assert_raises(ValueError, fa_fail.fit, X)
fas = []
for method in ['randomized', 'lapack']:
fa = FactorAnalysis(n_components=n_components, svd_method=method)
fa.fit(X)
fas.append(fa)
X_t = fa.transform(X)
assert_equal(X_t.shape, (n_samples, n_components))
assert_almost_equal(fa.loglike_[-1], fa.score(X).sum())
diff = np.all(np.diff(fa.loglike_))
assert_greater(diff, 0., 'Log likelihood dif not increase')
# Sample Covariance
scov = np.cov(X, rowvar=0., bias=1.)
# Model Covariance
mcov = fa.get_covariance()
diff = np.sum(np.abs(scov - mcov)) / W.size
assert_less(diff, 0.1, "Mean absolute difference is %f" % diff)
fa = FactorAnalysis(n_components=n_components,
noise_variance_init=np.ones(n_features))
assert_raises(ValueError, fa.fit, X[:, :2])
f = lambda x, y: np.abs(getattr(x, y)) # sign will not be equal
fa1, fa2 = fas
for attr in ['loglike_', 'components_', 'noise_variance_']:
assert_almost_equal(f(fa1, attr), f(fa2, attr))
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always', ConvergenceWarning)
fa1.max_iter = 1
fa1.verbose = True
fa1.fit(X)
assert_true(w[-1].category == ConvergenceWarning)
warnings.simplefilter('always', DeprecationWarning)
FactorAnalysis(verbose=1)
assert_true(w[-1].category == DeprecationWarning)
示例3: initialize
# 需要導入模塊: from sklearn.decomposition import FactorAnalysis [as 別名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 別名]
def initialize(self):
"""
Initialize the model.
"""
# inverse variance weighted mean
if np.sum(self.obsvar) != 0.0:
self.mean = np.sum(self.data / self.obsvar, axis=0) / \
np.sum(1.0 / self.obsvar, axis=0)
else:
self.mean = np.mean(self.data, axis=0)
# use Factor Analysis to initialize factor loadings
if self.M == 0:
self.lam = np.zeros(1)
else:
fa = FactorAnalysis(n_components=self.M)
fa.fit(self.data)
self.lam = fa.components_.T
# initialize jitter
if self.jtype is None:
self.jitter = np.array([])
elif self.jtype is 'one':
self.jitter = 0.0
else:
self.jitter = np.zeros(self.D)
# save a copy
self.initial_mean = self.mean.copy()
self.initial_jitter = self.jitter.copy()
self.initial_lambda = self.lam.copy()
示例4: get_inv_diag_plus_low_rank_cov_op
# 需要導入模塊: from sklearn.decomposition import FactorAnalysis [as 別名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 別名]
def get_inv_diag_plus_low_rank_cov_op(X, rank=2):
fa = FactorAnalysis(n_components=rank)
fa.fit(X)
components = fa.components_
noise_vars = fa.noise_variance_
activations = fa.transform(X)
return _woodbury_inverse(_diagonal_operator(1. / noise_vars),
aslinearoperator(np.linalg.inv(1. / len(activations) *
activations.T.dot(activations))),
components.T, components)
示例5: initializeParams
# 需要導入模塊: from sklearn.decomposition import FactorAnalysis [as 別名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 別名]
def initializeParams(Y, K, singleSigma=False, makePlot=False):
"""
initializes parameters using a standard factor analysis model (on imputed data) + exponential curve fitting.
Checked.
Input:
Y: data matrix, n_samples x n_genes
K: number of latent components
singleSigma: uses only a single sigma as opposed to a different sigma for every gene
makePlot: makes a mu - p_0 plot and shows the decaying exponential fit.
Returns:
A, mus, sigmas, decay_coef: initialized model parameters.
"""
N, D = Y.shape
model = FactorAnalysis(n_components=K)
zeroedY = deepcopy(Y)
mus = np.zeros([D, 1])
for j in range(D):
non_zero_idxs = np.abs(Y[:, j]) > 1e-6
mus[j] = zeroedY[:, j].mean()
zeroedY[:, j] = zeroedY[:, j] - mus[j]
model.fit(zeroedY)
A = model.components_.transpose()
sigmas = np.atleast_2d(np.sqrt(model.noise_variance_)).transpose()
if singleSigma:
sigmas = np.mean(sigmas) * np.ones(sigmas.shape)
# Now fit decay coefficient
means = []
ps = []
for j in range(D):
non_zero_idxs = np.abs(Y[:, j]) > 1e-6
means.append(Y[non_zero_idxs, j].mean())
ps.append(1 - non_zero_idxs.mean())
decay_coef, pcov = curve_fit(exp_decay, means, ps, p0=.05)
decay_coef = decay_coef[0]
mse = np.mean(np.abs(ps - np.exp(-decay_coef * (np.array(means) ** 2))))
if (mse > 0) and makePlot:
from matplotlib.pyplot import figure, scatter, plot, title, show
figure()
scatter(means, ps)
plot(np.arange(min(means), max(means), .1), np.exp(-decay_coef * (np.arange(min(means), max(means), .1) ** 2)))
title('Decay Coef is %2.3f; MSE is %2.3f' % (decay_coef, mse))
show()
return A, mus, sigmas, decay_coef
示例6: sd_fa
# 需要導入模塊: from sklearn.decomposition import FactorAnalysis [as 別名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 別名]
def sd_fa(fname,components,result_name):
'''
pca 計算
'''
cl_data,area_list = data_set(fname)
values = cl_data.values
fa = FactorAnalysis(n_components=components)
#數據標準化
values = preprocessing.scale(values)
try:
fa.fit(values)
except Exception,e:
logging.error("factor analysis fit error")
sys.exit()
示例7: factor_analysis
# 需要導入模塊: from sklearn.decomposition import FactorAnalysis [as 別名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 別名]
def factor_analysis(x, dims=3):
x = to_ndarray(x)
s = scale(x, axis=0, with_mean=True, with_std=True, copy=True)
fa_model = FactorAnalysis(n_components=dims, svd_method="lapack")
fitted = fa_model.fit(s)
y = fitted.transform(s)
print("Factor Analysis - Reduced dims from {} to {}".format( x.shape, y.shape ))
return y, fitted
示例8: dataTransformations
# 需要導入模塊: from sklearn.decomposition import FactorAnalysis [as 別名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 別名]
def dataTransformations(x):
x.rename(columns={'OCUPVIVPAR': 'Dwellers'}, inplace=True)
#water
x['Water'] = x['VPH_AGUAFV']/x['Houses']
#Sanitation use VPH_EXCSA and VPH_NODREN
x['Sanitation'] = (x['Houses'] - x['VPH_EXCSA'] + x['VPH_NODREN']) / (2.*x['Houses'])
#Overcrowding use VPH_1CUART and PRO_OCUP_C
# x['Density'] = 1. - 1./(1. +x['PRO_OCUP_C'])
x['Density'] = x['PRO_OCUP_C']-2.
x.loc[x.Density<0,'Density'] = 0.
x['Density'] = 1. - 1./(1. + x.Density)
x['Density'] = x['Density']/x['Density'].max()
#Structure VPH_1CUART and VPH_PISOTI
x['Structure'] = (x['VPH_PISOTI'] + x['VPH_1CUART']) / (2*x['Houses'])
ssiData = pd.DataFrame(normalize(x[['Water','Structure','Density','Sanitation']],axis=0), columns=['Water','Structure','Density','Sanitation'])
# x.loc[:,'Factor'] = zeros(len(x)
facAn = FactorAnalysis(n_components = 1)
facAn.fit(ssiData)
x.loc[:,'Factor'] = dot(facAn.components_**2,transpose(ssiData.values))[0]
#K-Means
k_meansX = ssiData
# do the clustering
k_means = KMeans(n_clusters=4)
k_means.fit(k_meansX)
x.loc[:,'K_Means'] = k_means.labels_
#linear combination
x.loc[:,'LC'] = x[['Water','Structure','Sanitation']].sum(axis=1) + (x['PRO_OCUP_C']/ x['PRO_OCUP_C'].max())
#save x to csv
# x.to_csv(folderPath+'dataTrans.csv')
return x
示例9: test_factor_analysis
# 需要導入模塊: from sklearn.decomposition import FactorAnalysis [as 別名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 別名]
def test_factor_analysis():
"""Test FactorAnalysis ability to recover the data covariance structure
"""
rng = np.random.RandomState(0)
n_samples, n_features, n_components = 20, 5, 3
# Some random settings for the generative model
W = rng.randn(n_components, n_features)
# latent variable of dim 3, 20 of it
h = rng.randn(n_samples, n_components)
# using gamma to model different noise variance
# per component
noise = rng.gamma(1, size=n_features) \
* rng.randn(n_samples, n_features)
# generate observations
# wlog, mean is 0
X = np.dot(h, W) + noise
fa = FactorAnalysis(n_components=n_components)
fa.fit(X)
X_t = fa.transform(X)
assert_true(X_t.shape == (n_samples, n_components))
assert_almost_equal(fa.loglike_[-1], fa.score(X).sum())
# Make log likelihood increases at each iteration
assert_true(np.all(np.diff(fa.loglike_) > 0.))
# Sample Covariance
scov = np.cov(X, rowvar=0., bias=1.)
# Model Covariance
mcov = fa.get_covariance()
diff = np.sum(np.abs(scov - mcov)) / W.size
assert_true(diff < 0.1, "Mean absolute difference is %f" % diff)
fa = FactorAnalysis(n_components=n_components,
noise_variance_init=np.ones(n_features))
assert_raises(ValueError, fa.fit, X[:, :2])
示例10: initalizeParams
# 需要導入模塊: from sklearn.decomposition import FactorAnalysis [as 別名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 別名]
def initalizeParams(Y, k, method = 'standard'):
"""
initializes parameters.
By default, (method set to "standard") initializes using a mixture model.
If method is set to "high_dimensional", first does dimensionality reduction using factor analysis
and then clusters the low-dimensional data.
Checked.
"""
assert(method in ['high_dimensional', 'standard'])
if method == 'high_dimensional':
N, D = Y.shape
#initialize using factor analysis.
model = FactorAnalysis(n_components = 5)
low_dim_Y = model.fit_transform(Y)
kmeans_model = KMeans(n_clusters = k)
z = kmeans_model.fit_predict(low_dim_Y)
cluster_mus = np.zeros([D, k])
cluster_weights = np.zeros([k,])
cluster_sigmas = np.zeros([D, k])
for z_i in sorted(set(z)):
idxs = (z == z_i)
cluster_weights[z_i] = np.mean(idxs)
cluster_Y = Y[idxs, :]
cluster_Y_is_nonzero = np.abs(cluster_Y) > 1e-6
cluster_mus[:, z_i] = cluster_Y.sum(axis = 0) / cluster_Y_is_nonzero.sum(axis = 0)
cluster_sigmas[:, z_i] = np.sqrt(((cluster_Y ** 2).sum(axis = 0) - 2 * cluster_mus[:, z_i] * (cluster_Y.sum(axis = 0)) + cluster_mus[:, z_i]**2 * cluster_Y_is_nonzero.sum(axis = 0)) / cluster_Y_is_nonzero.sum(axis = 0))
for j in range(1, 5):
assert(np.abs(cluster_sigmas[j, z_i] - np.std(cluster_Y[cluster_Y_is_nonzero[:, j], j])) < 1e-4)
if method == 'standard':
N, D = Y.shape
model = GMM(n_components = k)
imputedY = deepcopy(Y)
for j in range(D):
non_zero_idxs = np.abs(Y[:, j]) > 1e-6
for i in range(N):
if Y[i][j] == 0:
imputedY[i][j] = np.random.choice(Y[non_zero_idxs, j])
model.fit(imputedY)
cluster_mus = model.means_.transpose()
cluster_weights = model.weights_
cluster_sigmas = np.sqrt(model.covars_.transpose())
#now fit decay coefficient
means = []
ps = []
for j in range(D):
non_zero_idxs = np.abs(Y[:, j]) > 1e-6
means.append(Y[non_zero_idxs, j].mean())
ps.append(1 - non_zero_idxs.mean())
decay_coef, pcov = curve_fit(exp_decay, means, ps)
mse = np.mean(np.abs(ps - np.exp(-decay_coef * (np.array(means) ** 2))))
print 'Decay Coef is %2.3f; MSE is %2.3f' % (decay_coef, mse)
decay_coef = decay_coef[0]
assert(np.all(cluster_sigmas > 0))
return cluster_mus, cluster_sigmas, cluster_weights, decay_coef
示例11: load_data
# 需要導入模塊: from sklearn.decomposition import FactorAnalysis [as 別名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 別名]
from data import load_data
from sklearn.decomposition import FactorAnalysis
try:
import cPickle as pickle
except:
import pickle
# Factor Analysis
# ================================================================
# Apply factor analysis on the tf-idf matrix and transform raw documents into
# intermediate representation.
docs_tfidf, vocab_tfidf, vocabulary = load_data(subset='all')
n_components = 40
fa = FactorAnalysis(n_components=n_components)
fa.fit(docs_tfidf.toarray())
fa_words = fa.transform(vocab_tfidf.toarray())
# Create a dict to hold the new pca words.
fa_dict = dict(zip(vocabulary, fa_words))
# Store the intermediate representation pca words on disk.
fa_dict_filename = 'fa_dict.pk'
if not os.path.exists(fa_dict_filename):
fa_dict_file = open(fa_dict_filename, 'w')
pickle.dump(fa_dict, fa_dict_file)
# Store estimator on dist for further usage.
fa_estimator_filename = 'fa_estimator.pk'
if not os.path.exists(fa_estimator_filename):
fa_estimator_file = open(fa_estimator_filename, 'w')
示例12: test_factor_analysis
# 需要導入模塊: from sklearn.decomposition import FactorAnalysis [as 別名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 別名]
def test_factor_analysis():
# Test FactorAnalysis ability to recover the data covariance structure
rng = np.random.RandomState(0)
n_samples, n_features, n_components = 20, 5, 3
# Some random settings for the generative model
W = rng.randn(n_components, n_features)
# latent variable of dim 3, 20 of it
h = rng.randn(n_samples, n_components)
# using gamma to model different noise variance
# per component
noise = rng.gamma(1, size=n_features) * rng.randn(n_samples, n_features)
# generate observations
# wlog, mean is 0
X = np.dot(h, W) + noise
assert_raises(ValueError, FactorAnalysis, svd_method='foo')
fa_fail = FactorAnalysis()
fa_fail.svd_method = 'foo'
assert_raises(ValueError, fa_fail.fit, X)
fas = []
for method in ['randomized', 'lapack']:
fa = FactorAnalysis(n_components=n_components, svd_method=method)
fa.fit(X)
fas.append(fa)
X_t = fa.transform(X)
assert_equal(X_t.shape, (n_samples, n_components))
assert_almost_equal(fa.loglike_[-1], fa.score_samples(X).sum())
assert_almost_equal(fa.score_samples(X).mean(), fa.score(X))
diff = np.all(np.diff(fa.loglike_))
assert_greater(diff, 0., 'Log likelihood dif not increase')
# Sample Covariance
scov = np.cov(X, rowvar=0., bias=1.)
# Model Covariance
mcov = fa.get_covariance()
diff = np.sum(np.abs(scov - mcov)) / W.size
assert_less(diff, 0.1, "Mean absolute difference is %f" % diff)
fa = FactorAnalysis(n_components=n_components,
noise_variance_init=np.ones(n_features))
assert_raises(ValueError, fa.fit, X[:, :2])
f = lambda x, y: np.abs(getattr(x, y)) # sign will not be equal
fa1, fa2 = fas
for attr in ['loglike_', 'components_', 'noise_variance_']:
assert_almost_equal(f(fa1, attr), f(fa2, attr))
fa1.max_iter = 1
fa1.verbose = True
assert_warns(ConvergenceWarning, fa1.fit, X)
# Test get_covariance and get_precision with n_components == n_features
# with n_components < n_features and with n_components == 0
for n_components in [0, 2, X.shape[1]]:
fa.n_components = n_components
fa.fit(X)
cov = fa.get_covariance()
precision = fa.get_precision()
assert_array_almost_equal(np.dot(cov, precision),
np.eye(X.shape[1]), 12)
示例13: factorAna
# 需要導入模塊: from sklearn.decomposition import FactorAnalysis [as 別名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 別名]
def factorAna(x,testData,n_components):
fa = FactorAnalysis(n_components)
fa.fit(x)
newData = fa.transform(testData)
return newData
示例14: range
# 需要導入模塊: from sklearn.decomposition import FactorAnalysis [as 別名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 別名]
kf = cross_validation.KFold(cdata.shape[0], n_folds=4)
max_components=30
sc=numpy.zeros((max_components,4))
for n_components in range(1,max_components):
fa=FactorAnalysis(n_components=n_components)
fold=0
for train,test in kf:
train_data=cdata[train,:]
test_data=cdata[test,:]
fa.fit(train_data)
sc[n_components,fold]=fa.score(test_data)
fold+=1
meanscore=numpy.mean(sc,1)
meanscore[0]=-numpy.inf
maxscore=numpy.argmax(meanscore)
print ('crossvalidation suggests %d components'%maxscore)
# now run it on full dataset to get components
fa=FactorAnalysis(n_components=maxscore)
fa.fit(cdata)
for c in range(maxscore):
s=numpy.argsort(fa.components_[c,:])
print('')
示例15: learn
# 需要導入模塊: from sklearn.decomposition import FactorAnalysis [as 別名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 別名]
def learn(data):
model=FA(n_components =2)
model.fit(data)
return PreferenceGenerator(model.components_)