本文整理汇总了Python中sklearn.decomposition.FactorAnalysis.transform方法的典型用法代码示例。如果您正苦于以下问题:Python FactorAnalysis.transform方法的具体用法?Python FactorAnalysis.transform怎么用?Python FactorAnalysis.transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.decomposition.FactorAnalysis
的用法示例。
在下文中一共展示了FactorAnalysis.transform方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_factor_analysis
# 需要导入模块: from sklearn.decomposition import FactorAnalysis [as 别名]
# 或者: from sklearn.decomposition.FactorAnalysis import transform [as 别名]
def test_factor_analysis():
"""Test FactorAnalysis ability to recover the data covariance structure
"""
rng = np.random.RandomState(0)
n_samples, n_features, n_components = 20, 5, 3
# Some random settings for the generative model
W = rng.randn(n_components, n_features)
# latent variable of dim 3, 20 of it
h = rng.randn(n_samples, n_components)
# using gamma to model different noise variance
# per component
noise = rng.gamma(1, size=n_features) * rng.randn(n_samples, n_features)
# generate observations
# wlog, mean is 0
X = np.dot(h, W) + noise
assert_raises(ValueError, FactorAnalysis, svd_method='foo')
fa_fail = FactorAnalysis()
fa_fail.svd_method = 'foo'
assert_raises(ValueError, fa_fail.fit, X)
fas = []
for method in ['randomized', 'lapack']:
fa = FactorAnalysis(n_components=n_components, svd_method=method)
fa.fit(X)
fas.append(fa)
X_t = fa.transform(X)
assert_equal(X_t.shape, (n_samples, n_components))
assert_almost_equal(fa.loglike_[-1], fa.score(X).sum())
diff = np.all(np.diff(fa.loglike_))
assert_greater(diff, 0., 'Log likelihood dif not increase')
# Sample Covariance
scov = np.cov(X, rowvar=0., bias=1.)
# Model Covariance
mcov = fa.get_covariance()
diff = np.sum(np.abs(scov - mcov)) / W.size
assert_less(diff, 0.1, "Mean absolute difference is %f" % diff)
fa = FactorAnalysis(n_components=n_components,
noise_variance_init=np.ones(n_features))
assert_raises(ValueError, fa.fit, X[:, :2])
f = lambda x, y: np.abs(getattr(x, y)) # sign will not be equal
fa1, fa2 = fas
for attr in ['loglike_', 'components_', 'noise_variance_']:
assert_almost_equal(f(fa1, attr), f(fa2, attr))
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always', ConvergenceWarning)
fa1.max_iter = 1
fa1.verbose = True
fa1.fit(X)
assert_true(w[-1].category == ConvergenceWarning)
warnings.simplefilter('always', DeprecationWarning)
FactorAnalysis(verbose=1)
assert_true(w[-1].category == DeprecationWarning)
示例2: get_inv_diag_plus_low_rank_cov_op
# 需要导入模块: from sklearn.decomposition import FactorAnalysis [as 别名]
# 或者: from sklearn.decomposition.FactorAnalysis import transform [as 别名]
def get_inv_diag_plus_low_rank_cov_op(X, rank=2):
fa = FactorAnalysis(n_components=rank)
fa.fit(X)
components = fa.components_
noise_vars = fa.noise_variance_
activations = fa.transform(X)
return _woodbury_inverse(_diagonal_operator(1. / noise_vars),
aslinearoperator(np.linalg.inv(1. / len(activations) *
activations.T.dot(activations))),
components.T, components)
示例3: test_factor_analysis
# 需要导入模块: from sklearn.decomposition import FactorAnalysis [as 别名]
# 或者: from sklearn.decomposition.FactorAnalysis import transform [as 别名]
def test_factor_analysis():
"""Test FactorAnalysis ability to recover the data covariance structure
"""
rng = np.random.RandomState(0)
n_samples, n_features, n_components = 20, 5, 3
# Some random settings for the generative model
W = rng.randn(n_components, n_features)
# latent variable of dim 3, 20 of it
h = rng.randn(n_samples, n_components)
# using gamma to model different noise variance
# per component
noise = rng.gamma(1, size=n_features) \
* rng.randn(n_samples, n_features)
# generate observations
# wlog, mean is 0
X = np.dot(h, W) + noise
fa = FactorAnalysis(n_components=n_components)
fa.fit(X)
X_t = fa.transform(X)
assert_true(X_t.shape == (n_samples, n_components))
assert_almost_equal(fa.loglike_[-1], fa.score(X).sum())
# Make log likelihood increases at each iteration
assert_true(np.all(np.diff(fa.loglike_) > 0.))
# Sample Covariance
scov = np.cov(X, rowvar=0., bias=1.)
# Model Covariance
mcov = fa.get_covariance()
diff = np.sum(np.abs(scov - mcov)) / W.size
assert_true(diff < 0.1, "Mean absolute difference is %f" % diff)
fa = FactorAnalysis(n_components=n_components,
noise_variance_init=np.ones(n_features))
assert_raises(ValueError, fa.fit, X[:, :2])
示例4: initialize
# 需要导入模块: from sklearn.decomposition import FactorAnalysis [as 别名]
# 或者: from sklearn.decomposition.FactorAnalysis import transform [as 别名]
def initialize(trials, params, config):
"""Make skeleton"""
# TODO: fast initialization for large dataset
from sklearn.decomposition import FactorAnalysis
zdim = params["zdim"]
xdim = params["xdim"]
# TODO: use only a subsample of trials?
y = np.concatenate([trial["y"] for trial in trials], axis=0)
subsample = np.random.choice(y.shape[0], max(y.shape[0] // 10, 50))
ydim = y.shape[-1]
fa = FactorAnalysis(n_components=zdim, random_state=0)
z = fa.fit_transform(y[subsample, :])
a = fa.components_
b = np.log(np.maximum(np.mean(y, axis=0, keepdims=True), config["eps"]))
noise = np.var(y[subsample, :] - z @ a, ddof=0, axis=0)
# stupid way of update
# two cases
# 1) no key
# 2) empty value (None)
if params.get("a") is None:
params.update(a=a)
if params.get("b") is None:
params.update(b=b)
if params.get("noise") is None:
params.update(noise=noise)
for trial in trials:
length = trial["y"].shape[0]
if trial.get("mu") is None:
trial.update(mu=fa.transform(trial["y"]))
if trial.get("x") is None:
trial.update(x=np.ones((length, xdim, ydim)))
trial.update({"w": np.zeros((length, zdim)), "v": np.zeros((length, zdim))})
示例5: test_factor_analysis
# 需要导入模块: from sklearn.decomposition import FactorAnalysis [as 别名]
# 或者: from sklearn.decomposition.FactorAnalysis import transform [as 别名]
def test_factor_analysis():
# Test FactorAnalysis ability to recover the data covariance structure
rng = np.random.RandomState(0)
n_samples, n_features, n_components = 20, 5, 3
# Some random settings for the generative model
W = rng.randn(n_components, n_features)
# latent variable of dim 3, 20 of it
h = rng.randn(n_samples, n_components)
# using gamma to model different noise variance
# per component
noise = rng.gamma(1, size=n_features) * rng.randn(n_samples, n_features)
# generate observations
# wlog, mean is 0
X = np.dot(h, W) + noise
assert_raises(ValueError, FactorAnalysis, svd_method='foo')
fa_fail = FactorAnalysis()
fa_fail.svd_method = 'foo'
assert_raises(ValueError, fa_fail.fit, X)
fas = []
for method in ['randomized', 'lapack']:
fa = FactorAnalysis(n_components=n_components, svd_method=method)
fa.fit(X)
fas.append(fa)
X_t = fa.transform(X)
assert_equal(X_t.shape, (n_samples, n_components))
assert_almost_equal(fa.loglike_[-1], fa.score_samples(X).sum())
assert_almost_equal(fa.score_samples(X).mean(), fa.score(X))
diff = np.all(np.diff(fa.loglike_))
assert_greater(diff, 0., 'Log likelihood dif not increase')
# Sample Covariance
scov = np.cov(X, rowvar=0., bias=1.)
# Model Covariance
mcov = fa.get_covariance()
diff = np.sum(np.abs(scov - mcov)) / W.size
assert_less(diff, 0.1, "Mean absolute difference is %f" % diff)
fa = FactorAnalysis(n_components=n_components,
noise_variance_init=np.ones(n_features))
assert_raises(ValueError, fa.fit, X[:, :2])
f = lambda x, y: np.abs(getattr(x, y)) # sign will not be equal
fa1, fa2 = fas
for attr in ['loglike_', 'components_', 'noise_variance_']:
assert_almost_equal(f(fa1, attr), f(fa2, attr))
fa1.max_iter = 1
fa1.verbose = True
assert_warns(ConvergenceWarning, fa1.fit, X)
# Test get_covariance and get_precision with n_components == n_features
# with n_components < n_features and with n_components == 0
for n_components in [0, 2, X.shape[1]]:
fa.n_components = n_components
fa.fit(X)
cov = fa.get_covariance()
precision = fa.get_precision()
assert_array_almost_equal(np.dot(cov, precision),
np.eye(X.shape[1]), 12)
示例6: load_data
# 需要导入模块: from sklearn.decomposition import FactorAnalysis [as 别名]
# 或者: from sklearn.decomposition.FactorAnalysis import transform [as 别名]
from sklearn.decomposition import FactorAnalysis
try:
import cPickle as pickle
except:
import pickle
# Factor Analysis
# ================================================================
# Apply factor analysis on the tf-idf matrix and transform raw documents into
# intermediate representation.
docs_tfidf, vocab_tfidf, vocabulary = load_data(subset='all')
n_components = 40
fa = FactorAnalysis(n_components=n_components)
fa.fit(docs_tfidf.toarray())
fa_words = fa.transform(vocab_tfidf.toarray())
# Create a dict to hold the new pca words.
fa_dict = dict(zip(vocabulary, fa_words))
# Store the intermediate representation pca words on disk.
fa_dict_filename = 'fa_dict.pk'
if not os.path.exists(fa_dict_filename):
fa_dict_file = open(fa_dict_filename, 'w')
pickle.dump(fa_dict, fa_dict_file)
# Store estimator on dist for further usage.
fa_estimator_filename = 'fa_estimator.pk'
if not os.path.exists(fa_estimator_filename):
fa_estimator_file = open(fa_estimator_filename, 'w')
pickle.dump(fa, fa_estimator_file)
示例7: factorAna
# 需要导入模块: from sklearn.decomposition import FactorAnalysis [as 别名]
# 或者: from sklearn.decomposition.FactorAnalysis import transform [as 别名]
def factorAna(x,testData,n_components):
fa = FactorAnalysis(n_components)
fa.fit(x)
newData = fa.transform(testData)
return newData
示例8: range
# 需要导入模块: from sklearn.decomposition import FactorAnalysis [as 别名]
# 或者: from sklearn.decomposition.FactorAnalysis import transform [as 别名]
certainty.append([])
# each network has a vote in that cross validation fold
for s in range(len(seeds)):
X = np.vstack([np.array(g1_fmri[s]), np.array(g2_fmri[s])])
y = np.array(labels)
X = preprocessing.scale(X)
print 'seed %d: cv %d/%d'%(s+1,oidx+1,nobs)
X_train = X[train]
X_test = X[test]
y_train = y[train]
y_test = y[test]
c_val_scores = []
dimred = FactorAnalysis(n_components=20)
X_train = dimred.fit_transform(X_train)
X_test = dimred.transform(X_test)
for c in cs:
inner_preds = []
clf = LogisticRegression(C=c, penalty="l1", dual=False, class_weight='auto')
for iidx, (itrain, itest) in enumerate(inner_cv):
X_inner_train = X_train[itrain]
X_val = X_train[itest]
y_inner_train = y_train[itrain]
y_val = y_train[itest]
scaler = preprocessing.StandardScaler().fit(X_inner_train)
X_inner_train = scaler.transform(X_inner_train)
X_val = scaler.transform(X_val)
clf.fit(X_inner_train, y_inner_train)
inner_preds.append(clf.predict(X_val))
c_val_scores.append(f1_score(y_train, inner_preds, pos_label=1))
best_c = cs[np.argmax(c_val_scores)]
示例9:
# 需要导入模块: from sklearn.decomposition import FactorAnalysis [as 别名]
# 或者: from sklearn.decomposition.FactorAnalysis import transform [as 别名]
# <codecell>
comps = pd.DataFrame(componentMatrix).T
# <codecell>
comps.index = pct_df.columns
comps
# <codecell>
comps.to_csv('factors.csv',sep = ',')
# <codecell>
X_transformed = fa.transform(X)
# <codecell>
factored = pd.DataFrame(X_transformed)
# <codecell>
factored.to_csv('factoredX.csv', sep = ',')
# <codecell>
from sklearn.cluster import KMeans
# <codecell>
示例10: read_csv
# 需要导入模块: from sklearn.decomposition import FactorAnalysis [as 别名]
# 或者: from sklearn.decomposition.FactorAnalysis import transform [as 别名]
metabolomics = read_csv('%s/data/kirc_metabolomics_abs_imputed_normalised.txt' % wd, sep='\t', index_col=0)
# Transcriptomics
transcriptomics = read_csv('%s/data/kirc_transcriptomics_filtered_voom_normalised.txt' % wd, sep='\t', index_col=0)
# -- Factor analysis
n_components = 5
# Metabolomics
metabolomics_fa = FactorAnalysis(n_components=n_components).fit(metabolomics)
metabolomics_hfac = DataFrame(metabolomics_fa.components_, index=['Factor%d' % (i + 1) for i in range(n_components)], columns=metabolomics.columns).T
metabolomics_hfac['type'] = [metabolomics_cinfo.ix[i, 'TISSUE TYPE'] for i in metabolomics_hfac.index]
metabolomics_feat = DataFrame(metabolomics_fa.transform(metabolomics), index=metabolomics.index, columns=['Factor%d' % (i + 1) for i in range(n_components)])
print metabolomics_feat.head()
# Transcriptomics
transcriptomics_fa = FactorAnalysis(n_components=n_components).fit(transcriptomics)
transcriptomics_hfac = DataFrame(transcriptomics_fa.components_, index=['Factor%d' % (i + 1) for i in range(n_components)], columns=transcriptomics.columns).T
transcriptomics_hfac['type'] = ['T' if i.split('-')[3].startswith('01') else 'N' for i in transcriptomics_hfac.index]
transcriptomics_feat = DataFrame(transcriptomics_fa.transform(transcriptomics), index=transcriptomics.index, columns=['Factor%d' % (i + 1) for i in range(n_components)])
print transcriptomics_feat.head()
# -- Plot
pal = {'T': '#e74c3c', 'N': '#34495e'}
# Metabolomics