当前位置: 首页>>代码示例>>Python>>正文


Python FactorAnalysis.fit方法代码示例

本文整理汇总了Python中sklearn.decomposition.FactorAnalysis.fit方法的典型用法代码示例。如果您正苦于以下问题:Python FactorAnalysis.fit方法的具体用法?Python FactorAnalysis.fit怎么用?Python FactorAnalysis.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.decomposition.FactorAnalysis的用法示例。


在下文中一共展示了FactorAnalysis.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: fit_factor_analysis

# 需要导入模块: from sklearn.decomposition import FactorAnalysis [as 别名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 别名]
def fit_factor_analysis(percentage=0.8):
    """
    Runs the factor analysis.

    Parameters:

        percentage: float, default:0.8

        The percentage of the cumulative sum of the eigenvalues to be held. This number defines the number of loading factors in the analysis.

    Returns:
        
        X: array of floats [n_samples,n_factors]

            The transformed data after the factor analysis.

        components: array of floats [n_factors,n_samples]

            The components of the factor analysis
    """
    fa = FactorAnalysis()
    fa.fit(data)
    C = fa.get_covariance()
    l,e = np.linalg.eigh(C)
    cs = np.cumsum(l[::-1])/np.sum(l)
    n = np.sum(cs<percentage)

    fa.n_components = n
    X_ = fa.fit_transform(data)
    components = fa.components_
    return X_,components
开发者ID:pedropazzini,项目名称:factor_analysis,代码行数:33,代码来源:factor_analysis_script.py

示例2: test_factor_analysis

# 需要导入模块: from sklearn.decomposition import FactorAnalysis [as 别名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 别名]
def test_factor_analysis():
    """Test FactorAnalysis ability to recover the data covariance structure
    """
    rng = np.random.RandomState(0)
    n_samples, n_features, n_components = 20, 5, 3

    # Some random settings for the generative model
    W = rng.randn(n_components, n_features)
    # latent variable of dim 3, 20 of it
    h = rng.randn(n_samples, n_components)
    # using gamma to model different noise variance
    # per component
    noise = rng.gamma(1, size=n_features) * rng.randn(n_samples, n_features)

    # generate observations
    # wlog, mean is 0
    X = np.dot(h, W) + noise
    assert_raises(ValueError, FactorAnalysis, svd_method='foo')
    fa_fail = FactorAnalysis()
    fa_fail.svd_method = 'foo'
    assert_raises(ValueError, fa_fail.fit, X)
    fas = []
    for method in ['randomized', 'lapack']:
        fa = FactorAnalysis(n_components=n_components, svd_method=method)
        fa.fit(X)
        fas.append(fa)

        X_t = fa.transform(X)
        assert_equal(X_t.shape, (n_samples, n_components))

        assert_almost_equal(fa.loglike_[-1], fa.score(X).sum())

        diff = np.all(np.diff(fa.loglike_))
        assert_greater(diff, 0., 'Log likelihood dif not increase')

        # Sample Covariance
        scov = np.cov(X, rowvar=0., bias=1.)

        # Model Covariance
        mcov = fa.get_covariance()
        diff = np.sum(np.abs(scov - mcov)) / W.size
        assert_less(diff, 0.1, "Mean absolute difference is %f" % diff)
        fa = FactorAnalysis(n_components=n_components,
                            noise_variance_init=np.ones(n_features))
        assert_raises(ValueError, fa.fit, X[:, :2])

    f = lambda x, y: np.abs(getattr(x, y))  # sign will not be equal
    fa1, fa2 = fas
    for attr in ['loglike_', 'components_', 'noise_variance_']:
        assert_almost_equal(f(fa1, attr), f(fa2, attr))
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter('always', ConvergenceWarning)
        fa1.max_iter = 1
        fa1.verbose = True
        fa1.fit(X)
        assert_true(w[-1].category == ConvergenceWarning)

        warnings.simplefilter('always', DeprecationWarning)
        FactorAnalysis(verbose=1)
        assert_true(w[-1].category == DeprecationWarning)
开发者ID:ChicoQ,项目名称:scikit-learn,代码行数:62,代码来源:test_factor_analysis.py

示例3: initialize

# 需要导入模块: from sklearn.decomposition import FactorAnalysis [as 别名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 别名]
    def initialize(self):
        """
        Initialize the model.
        """
        # inverse variance weighted mean
        if np.sum(self.obsvar) != 0.0:
            self.mean = np.sum(self.data / self.obsvar, axis=0) / \
                np.sum(1.0 / self.obsvar, axis=0)
        else:
            self.mean = np.mean(self.data, axis=0)

        # use Factor Analysis to initialize factor loadings
        if self.M == 0:
            self.lam = np.zeros(1)
        else:
            fa = FactorAnalysis(n_components=self.M)
            fa.fit(self.data)
            self.lam = fa.components_.T

        # initialize jitter
        if self.jtype is None:
            self.jitter = np.array([])
        elif self.jtype is 'one':
            self.jitter = 0.0
        else:
            self.jitter = np.zeros(self.D)

        # save a copy
        self.initial_mean = self.mean.copy()
        self.initial_jitter = self.jitter.copy()
        self.initial_lambda = self.lam.copy()
开发者ID:Robabrown,项目名称:consomme,代码行数:33,代码来源:hfa_batch.py

示例4: get_inv_diag_plus_low_rank_cov_op

# 需要导入模块: from sklearn.decomposition import FactorAnalysis [as 别名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 别名]
def get_inv_diag_plus_low_rank_cov_op(X, rank=2):
    fa = FactorAnalysis(n_components=rank)
    fa.fit(X)
    components = fa.components_
    noise_vars = fa.noise_variance_
    activations = fa.transform(X)

    return _woodbury_inverse(_diagonal_operator(1. / noise_vars),
                 aslinearoperator(np.linalg.inv(1. / len(activations) * 
                                  activations.T.dot(activations))),
                 components.T, components)
开发者ID:eickenberg,项目名称:fbg_code,代码行数:13,代码来源:multi_target_ridge_with_noise_covariance.py

示例5: initializeParams

# 需要导入模块: from sklearn.decomposition import FactorAnalysis [as 别名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 别名]
def initializeParams(Y, K, singleSigma=False, makePlot=False):
	"""
	initializes parameters using a standard factor analysis model (on imputed data) + exponential curve fitting.
	Checked.
	Input:
	Y: data matrix, n_samples x n_genes
	K: number of latent components
	singleSigma: uses only a single sigma as opposed to a different sigma for every gene
	makePlot: makes a mu - p_0 plot and shows the decaying exponential fit.
	Returns:
	A, mus, sigmas, decay_coef: initialized model parameters.
	"""

	N, D = Y.shape
	model = FactorAnalysis(n_components=K)
	zeroedY = deepcopy(Y)
	mus = np.zeros([D, 1])

	for j in range(D):
		non_zero_idxs = np.abs(Y[:, j]) > 1e-6
		mus[j] = zeroedY[:, j].mean()
		zeroedY[:, j] = zeroedY[:, j] - mus[j]

	model.fit(zeroedY)

	A = model.components_.transpose()
	sigmas = np.atleast_2d(np.sqrt(model.noise_variance_)).transpose()
	if singleSigma:
		sigmas = np.mean(sigmas) * np.ones(sigmas.shape)

	# Now fit decay coefficient
	means = []
	ps = []
	for j in range(D):
		non_zero_idxs = np.abs(Y[:, j]) > 1e-6
		means.append(Y[non_zero_idxs, j].mean())
		ps.append(1 - non_zero_idxs.mean())

	decay_coef, pcov = curve_fit(exp_decay, means, ps, p0=.05)
	decay_coef = decay_coef[0]

	mse = np.mean(np.abs(ps - np.exp(-decay_coef * (np.array(means) ** 2))))

	if (mse > 0) and makePlot:
		from matplotlib.pyplot import figure, scatter, plot, title, show
		figure()
		scatter(means, ps)
		plot(np.arange(min(means), max(means), .1), np.exp(-decay_coef * (np.arange(min(means), max(means), .1) ** 2)))
		title('Decay Coef is %2.3f; MSE is %2.3f' % (decay_coef, mse))
		show()

	return A, mus, sigmas, decay_coef
开发者ID:epierson9,项目名称:ZIFA,代码行数:54,代码来源:ZIFA.py

示例6: sd_fa

# 需要导入模块: from sklearn.decomposition import FactorAnalysis [as 别名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 别名]
def sd_fa(fname,components,result_name):
    '''
    pca 计算
    '''
    cl_data,area_list = data_set(fname)
    values = cl_data.values
    fa = FactorAnalysis(n_components=components)
    #数据标准化
    values = preprocessing.scale(values)
    try:
        fa.fit(values)
    except Exception,e:
            logging.error("factor analysis fit error")
            sys.exit()
开发者ID:xiancode,项目名称:sd_fa,代码行数:16,代码来源:sd_fa.py

示例7: factor_analysis

# 需要导入模块: from sklearn.decomposition import FactorAnalysis [as 别名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 别名]
def factor_analysis(x, dims=3):
  x = to_ndarray(x)
  s = scale(x, axis=0, with_mean=True, with_std=True, copy=True)
  fa_model = FactorAnalysis(n_components=dims, svd_method="lapack")
  fitted = fa_model.fit(s)
  y = fitted.transform(s)
  print("Factor Analysis - Reduced dims from {} to {}".format( x.shape, y.shape ))
  return y, fitted
开发者ID:jakobjoachim,项目名称:text-mining-haw-bachelor,代码行数:10,代码来源:decomposition.py

示例8: dataTransformations

# 需要导入模块: from sklearn.decomposition import FactorAnalysis [as 别名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 别名]
def dataTransformations(x):

	x.rename(columns={'OCUPVIVPAR': 'Dwellers'}, inplace=True)
	#water
	x['Water'] = x['VPH_AGUAFV']/x['Houses']

	#Sanitation use VPH_EXCSA and VPH_NODREN
	x['Sanitation'] = (x['Houses'] - x['VPH_EXCSA'] + x['VPH_NODREN']) / (2.*x['Houses'])

	#Overcrowding use VPH_1CUART and PRO_OCUP_C
	# x['Density'] = 1. - 1./(1. +x['PRO_OCUP_C'])
	x['Density'] = x['PRO_OCUP_C']-2.
	x.loc[x.Density<0,'Density'] = 0.
	x['Density'] = 1. - 1./(1. + x.Density)
	x['Density'] = x['Density']/x['Density'].max()
	
	#Structure VPH_1CUART and VPH_PISOTI
	x['Structure'] = (x['VPH_PISOTI'] + x['VPH_1CUART']) / (2*x['Houses'])

	ssiData = pd.DataFrame(normalize(x[['Water','Structure','Density','Sanitation']],axis=0), columns=['Water','Structure','Density','Sanitation'])

	# x.loc[:,'Factor'] = zeros(len(x)	
	facAn = FactorAnalysis(n_components = 1)
	facAn.fit(ssiData)
	x.loc[:,'Factor'] = dot(facAn.components_**2,transpose(ssiData.values))[0]

	#K-Means
	k_meansX = ssiData

	# do the clustering
	k_means = KMeans(n_clusters=4)
	k_means.fit(k_meansX) 
	x.loc[:,'K_Means'] = k_means.labels_

	#linear combination

	x.loc[:,'LC'] = x[['Water','Structure','Sanitation']].sum(axis=1) + (x['PRO_OCUP_C']/ x['PRO_OCUP_C'].max())

	


	#save x to csv
	# x.to_csv(folderPath+'dataTrans.csv')
	return x
开发者ID:dabernal,项目名称:Slums,代码行数:46,代码来源:AGEB_New.py

示例9: test_factor_analysis

# 需要导入模块: from sklearn.decomposition import FactorAnalysis [as 别名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 别名]
def test_factor_analysis():
    """Test FactorAnalysis ability to recover the data covariance structure
    """
    rng = np.random.RandomState(0)
    n_samples, n_features, n_components = 20, 5, 3

    # Some random settings for the generative model
    W = rng.randn(n_components, n_features)
    # latent variable of dim 3, 20 of it
    h = rng.randn(n_samples, n_components)
    # using gamma to model different noise variance
    # per component
    noise = rng.gamma(1, size=n_features) \
                * rng.randn(n_samples, n_features)

    # generate observations
    # wlog, mean is 0
    X = np.dot(h, W) + noise

    fa = FactorAnalysis(n_components=n_components)
    fa.fit(X)
    X_t = fa.transform(X)
    assert_true(X_t.shape == (n_samples, n_components))

    assert_almost_equal(fa.loglike_[-1], fa.score(X).sum())

    # Make log likelihood increases at each iteration
    assert_true(np.all(np.diff(fa.loglike_) > 0.))

    # Sample Covariance
    scov = np.cov(X, rowvar=0., bias=1.)

    # Model Covariance
    mcov = fa.get_covariance()
    diff = np.sum(np.abs(scov - mcov)) / W.size
    assert_true(diff < 0.1, "Mean absolute difference is %f" % diff)

    fa = FactorAnalysis(n_components=n_components,
                        noise_variance_init=np.ones(n_features))
    assert_raises(ValueError, fa.fit, X[:, :2])
开发者ID:cdeil,项目名称:scikit-learn,代码行数:42,代码来源:test_factor_analysis.py

示例10: initalizeParams

# 需要导入模块: from sklearn.decomposition import FactorAnalysis [as 别名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 别名]
def initalizeParams(Y, k, method = 'standard'):
	"""
	initializes parameters. 
	By default, (method set to "standard") initializes using a mixture model. 
	If method is set to "high_dimensional", first does dimensionality reduction using factor analysis 
	and then clusters the low-dimensional data. 
	Checked.
	"""
	assert(method in ['high_dimensional', 'standard'])
	if method == 'high_dimensional':
		N, D = Y.shape
		#initialize using factor analysis. 
		model = FactorAnalysis(n_components = 5)
		low_dim_Y = model.fit_transform(Y)
		kmeans_model = KMeans(n_clusters = k)
		z = kmeans_model.fit_predict(low_dim_Y)
		cluster_mus = np.zeros([D, k])
		cluster_weights = np.zeros([k,])
		cluster_sigmas = np.zeros([D, k])
		
		for z_i in sorted(set(z)):
			idxs = (z == z_i)
			cluster_weights[z_i] = np.mean(idxs)
			cluster_Y = Y[idxs, :]
			cluster_Y_is_nonzero = np.abs(cluster_Y) > 1e-6
			cluster_mus[:, z_i] = cluster_Y.sum(axis = 0) / cluster_Y_is_nonzero.sum(axis = 0)
			
			cluster_sigmas[:, z_i] = np.sqrt(((cluster_Y ** 2).sum(axis = 0) - 2 * cluster_mus[:, z_i] * (cluster_Y.sum(axis = 0)) + cluster_mus[:, z_i]**2 * cluster_Y_is_nonzero.sum(axis = 0)) / cluster_Y_is_nonzero.sum(axis = 0))
			for j in range(1, 5):
				assert(np.abs(cluster_sigmas[j, z_i] - np.std(cluster_Y[cluster_Y_is_nonzero[:, j], j])) < 1e-4)		
		
		
	if method == 'standard':
		N, D = Y.shape
		model = GMM(n_components = k)
		imputedY = deepcopy(Y)
		for j in range(D):
			non_zero_idxs = np.abs(Y[:, j]) > 1e-6
			for i in range(N):
				if Y[i][j] == 0:
					imputedY[i][j] = np.random.choice(Y[non_zero_idxs, j])
		model.fit(imputedY)
		cluster_mus = model.means_.transpose()
		cluster_weights = model.weights_
		cluster_sigmas = np.sqrt(model.covars_.transpose())
		
	#now fit decay coefficient
	means = []
	ps = []
	for j in range(D):
		non_zero_idxs = np.abs(Y[:, j]) > 1e-6
		means.append(Y[non_zero_idxs, j].mean())
		ps.append(1 - non_zero_idxs.mean())
	
	
	decay_coef, pcov = curve_fit(exp_decay, means, ps)
	mse = np.mean(np.abs(ps - np.exp(-decay_coef * (np.array(means) ** 2))))
	print 'Decay Coef is %2.3f; MSE is %2.3f' % (decay_coef, mse)
	
	decay_coef = decay_coef[0]

	
	assert(np.all(cluster_sigmas > 0))
	return cluster_mus, cluster_sigmas, cluster_weights, decay_coef
开发者ID:ahwillia,项目名称:ZIMM,代码行数:66,代码来源:ZIMM.py

示例11: load_data

# 需要导入模块: from sklearn.decomposition import FactorAnalysis [as 别名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 别名]
from data import load_data
from sklearn.decomposition import FactorAnalysis
try:
    import cPickle as pickle
except:
    import pickle

# Factor Analysis

# ================================================================
# Apply factor analysis on the tf-idf matrix and transform raw documents into
# intermediate representation.
docs_tfidf, vocab_tfidf, vocabulary = load_data(subset='all')
n_components = 40
fa = FactorAnalysis(n_components=n_components)
fa.fit(docs_tfidf.toarray())
fa_words = fa.transform(vocab_tfidf.toarray())

# Create a dict to hold the new pca words.
fa_dict = dict(zip(vocabulary, fa_words))

# Store the intermediate representation pca words on disk.
fa_dict_filename = 'fa_dict.pk'
if not os.path.exists(fa_dict_filename):
    fa_dict_file = open(fa_dict_filename, 'w')
    pickle.dump(fa_dict, fa_dict_file)

# Store estimator on dist for further usage.
fa_estimator_filename = 'fa_estimator.pk'
if not os.path.exists(fa_estimator_filename):
    fa_estimator_file = open(fa_estimator_filename, 'w')
开发者ID:shawnLeeZX,项目名称:learning_intermediate_representation,代码行数:33,代码来源:fa.py

示例12: test_factor_analysis

# 需要导入模块: from sklearn.decomposition import FactorAnalysis [as 别名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 别名]
def test_factor_analysis():
    # Test FactorAnalysis ability to recover the data covariance structure
    rng = np.random.RandomState(0)
    n_samples, n_features, n_components = 20, 5, 3

    # Some random settings for the generative model
    W = rng.randn(n_components, n_features)
    # latent variable of dim 3, 20 of it
    h = rng.randn(n_samples, n_components)
    # using gamma to model different noise variance
    # per component
    noise = rng.gamma(1, size=n_features) * rng.randn(n_samples, n_features)

    # generate observations
    # wlog, mean is 0
    X = np.dot(h, W) + noise

    assert_raises(ValueError, FactorAnalysis, svd_method='foo')
    fa_fail = FactorAnalysis()
    fa_fail.svd_method = 'foo'
    assert_raises(ValueError, fa_fail.fit, X)
    fas = []
    for method in ['randomized', 'lapack']:
        fa = FactorAnalysis(n_components=n_components, svd_method=method)
        fa.fit(X)
        fas.append(fa)

        X_t = fa.transform(X)
        assert_equal(X_t.shape, (n_samples, n_components))

        assert_almost_equal(fa.loglike_[-1], fa.score_samples(X).sum())
        assert_almost_equal(fa.score_samples(X).mean(), fa.score(X))

        diff = np.all(np.diff(fa.loglike_))
        assert_greater(diff, 0., 'Log likelihood dif not increase')

        # Sample Covariance
        scov = np.cov(X, rowvar=0., bias=1.)

        # Model Covariance
        mcov = fa.get_covariance()
        diff = np.sum(np.abs(scov - mcov)) / W.size
        assert_less(diff, 0.1, "Mean absolute difference is %f" % diff)
        fa = FactorAnalysis(n_components=n_components,
                            noise_variance_init=np.ones(n_features))
        assert_raises(ValueError, fa.fit, X[:, :2])

    f = lambda x, y: np.abs(getattr(x, y))  # sign will not be equal
    fa1, fa2 = fas
    for attr in ['loglike_', 'components_', 'noise_variance_']:
        assert_almost_equal(f(fa1, attr), f(fa2, attr))

    fa1.max_iter = 1
    fa1.verbose = True
    assert_warns(ConvergenceWarning, fa1.fit, X)

    # Test get_covariance and get_precision with n_components == n_features
    # with n_components < n_features and with n_components == 0
    for n_components in [0, 2, X.shape[1]]:
        fa.n_components = n_components
        fa.fit(X)
        cov = fa.get_covariance()
        precision = fa.get_precision()
        assert_array_almost_equal(np.dot(cov, precision),
                                  np.eye(X.shape[1]), 12)
开发者ID:1992huanghai,项目名称:scikit-learn,代码行数:67,代码来源:test_factor_analysis.py

示例13: factorAna

# 需要导入模块: from sklearn.decomposition import FactorAnalysis [as 别名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 别名]
def factorAna(x,testData,n_components):
    fa = FactorAnalysis(n_components) 
    fa.fit(x)   
    newData = fa.transform(testData)  
    return newData    
开发者ID:ruige123456,项目名称:dataMining,代码行数:7,代码来源:dfs.py

示例14: range

# 需要导入模块: from sklearn.decomposition import FactorAnalysis [as 别名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 别名]

kf = cross_validation.KFold(cdata.shape[0], n_folds=4)

max_components=30

sc=numpy.zeros((max_components,4))

for n_components in range(1,max_components):
    fa=FactorAnalysis(n_components=n_components)
    fold=0
    for train,test in kf:
        train_data=cdata[train,:]
        test_data=cdata[test,:]

        fa.fit(train_data)
        sc[n_components,fold]=fa.score(test_data)
        fold+=1

meanscore=numpy.mean(sc,1)
meanscore[0]=-numpy.inf
maxscore=numpy.argmax(meanscore)
print ('crossvalidation suggests %d components'%maxscore)

# now run it on full dataset to get components
fa=FactorAnalysis(n_components=maxscore)
fa.fit(cdata)

for c in range(maxscore):
    s=numpy.argsort(fa.components_[c,:])
    print('')
开发者ID:IanEisenberg,项目名称:Self_Regulation_Ontology,代码行数:32,代码来源:survey_sklearn_factoranalysis.py

示例15: learn

# 需要导入模块: from sklearn.decomposition import FactorAnalysis [as 别名]
# 或者: from sklearn.decomposition.FactorAnalysis import fit [as 别名]
def learn(data):
    model=FA(n_components =2)
    model.fit(data)
    return PreferenceGenerator(model.components_)
开发者ID:tjacek,项目名称:LearningPreferences,代码行数:6,代码来源:generative.py


注:本文中的sklearn.decomposition.FactorAnalysis.fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。