本文整理匯總了Python中sklearn.covariance.LedoitWolf類的典型用法代碼示例。如果您正苦於以下問題:Python LedoitWolf類的具體用法?Python LedoitWolf怎麽用?Python LedoitWolf使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了LedoitWolf類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: maximization
def maximization(self):
# mean maximization
for i in range(self._K):
mu[i] = mu_ss[i] / ndata_ss
# covariance maximization
for i in range(self._K):
for j in range(self._K):
cov[i,j] = (1.0/ ndata_ss) * cov_ss[i,j] + ndata_ss * mu[i] * mu[j] - mu_ss[i] * mu[j] - mu_ss[j] * mu[i]
# covariance shrinkage
lw = LedoitWolf()
cov_result = lw.fit(cov,assume_centered=True).covariance_
inv_cov = np.linalg.inv(cov_result)
log_det_inv_cov = np.log(np.linalg.det(inv_cov))
# topic maximization
for i in range(self._K):
sum_m = 0
for j in range(self._W):
sum_m += beta_ss[i,j]
if sum_m == 0:
sum_m = -1000 * self._W
else:
sum_m = np.log(sum_m)
for j in range(self._W):
log_beta[i,j] = np.log(beta_ss[i,j] - sum_m)
示例2: test_ledoit_wolf_small
def test_ledoit_wolf_small():
# Compare our blocked implementation to the naive implementation
X_small = X[:, :4]
lw = LedoitWolf()
lw.fit(X_small)
shrinkage_ = lw.shrinkage_
assert_almost_equal(shrinkage_, _naive_ledoit_wolf_shrinkage(X_small))
示例3: __init__
def __init__(self, k=2, gamma=1.0, covariance_estimator='ledoit-wolf'):
self.k = float(k)
self.gamma = gamma
self.covariance_estimator = covariance_estimator
if covariance_estimator == 'empirical':
self.cov = EmpiricalCovariance(store_precision=False)
elif covariance_estimator == 'ledoit-wolf':
self.cov = LedoitWolf(store_precision=False)
else:
raise NotImplementedError('%s is not implemented' % covariance_estimator)
self.x0 = None
self.x1 = None
示例4: maximization
def maximization(self):
'''
M-step of EM algorithm, use scikit.learn's LedoitWolf method to perfom
covariance matrix shrinkage.
Arguments:
sufficient statistics, i.e. model parameters
Returns:
the updated sufficient statistics which all in self definition, so no return values
'''
logger.info("running maximization function")
logger.info("mean maximization")
mu = np.divide(self.mu, self.ndata)
logger.info("covariance maximization")
for i in range(self._K):
for j in range(self._K):
self.cov[i, j] = (1.0 / self.ndata) * self.cov[i, j] + self.ndata * mu[i] * mu[j] - self.mu[i] * mu[j] - self.mu[j] * mu[i]
logger.info(" performing covariance shrinkage using sklearn module")
lw = LedoitWolf()
cov_result = lw.fit(self.cov, assume_centered=True).covariance_
self.inv_cov = np.linalg.inv(cov_result)
self.log_det_inv_cov = math_utli.safe_log(np.linalg.det(self.inv_cov))
logger.info("topic maximization")
for i in range(self._K):
sum_m = 0
sum_m += np.sum(self.beta, axis=0)[i]
if sum_m == 0:
sum_m = -1000 * self._W
else:
sum_m = np.log(sum_m)
for j in range(self._W):
self.log_beta[i, j] = math_utli.safe_log(self.beta[i, j] - sum_m)
logger.info("write model parameters to file")
logger.info("write gaussian")
with open('ctm_nu', 'w') as ctm_nu_dump:
cPickle.dump(self.nu, ctm_nu_dump)
with open('ctm_cov', 'w') as ctm_cov_dump:
cPickle.dump(self.cov, ctm_cov_dump)
with open('ctm_inv_cov', 'w') as ctm_inv_cov_dump:
cPickle.dump(self.inv_cov, ctm_inv_cov_dump)
with open('ctm_log_det_inv_cov', 'w') as ctm_log_det_inv_cov_dump:
cPickle.dump(self.log_det_inv_cov, ctm_log_det_inv_cov_dump)
logger.info("write topic matrix")
with open('ctm_log_beta', 'w') as ctm_log_beta_dump:
cPickle.dump(self.log_beta, ctm_log_beta_dump)
示例5: prepareProblem
def prepareProblem(filePath, shrinkage=False, subset=False, subsetSize=0):
# Import data from .csv
df = pd.read_csv(filePath, sep=';')
df.index = df.date
df = df.drop('date', axis=1)
# Subset, if called via subset == True
if subset == True:
df = df.tail(subsetSize)
# Estimate covariance using Empirical/MLE
# Expected input is returns, hence set: assume_centered = True
mleFitted = empirical_covariance(X=df, assume_centered=True)
sigma = mleFitted
if shrinkage == True:
# Estimate covariance using LedoitWolf, first create instance of object
lw = LedoitWolf(assume_centered=True)
lwFitted = lw.fit(X=df).covariance_
sigma = lwFitted
return sigma
示例6: test_ledoit_wolf
def test_ledoit_wolf():
"""Tests LedoitWolf module on a simple dataset.
"""
# test shrinkage coeff on a simple data set
lw = LedoitWolf()
lw.fit(X, assume_centered=True)
assert_almost_equal(lw.shrinkage_, 0.00192, 4)
assert_almost_equal(lw.score(X, assume_centered=True), -2.89795, 4)
# compare shrunk covariance obtained from data and from MLE estimate
lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X,
assume_centered=True)
assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
# compare estimates given by LW and ShrunkCovariance
scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
scov.fit(X, assume_centered=True)
assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)
# test with n_features = 1
X_1d = X[:, 0].reshape((-1, 1))
lw = LedoitWolf()
lw.fit(X_1d, assume_centered=True)
lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d,
assume_centered=True)
assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
assert_array_almost_equal((X_1d ** 2).sum() / n_samples, lw.covariance_, 4)
# test shrinkage coeff on a simple data set (without saving precision)
lw = LedoitWolf(store_precision=False)
lw.fit(X, assume_centered=True)
assert_almost_equal(lw.score(X, assume_centered=True), -2.89795, 4)
assert(lw.precision_ is None)
# Same tests without assuming centered data
# test shrinkage coeff on a simple data set
lw = LedoitWolf()
lw.fit(X)
assert_almost_equal(lw.shrinkage_, 0.007582, 4)
assert_almost_equal(lw.score(X), 2.243483, 4)
# compare shrunk covariance obtained from data and from MLE estimate
lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X)
assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
# compare estimates given by LW and ShrunkCovariance
scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
scov.fit(X)
assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)
# test with n_features = 1
X_1d = X[:, 0].reshape((-1, 1))
lw = LedoitWolf()
lw.fit(X_1d)
lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d)
assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)
# test shrinkage coeff on a simple data set (without saving precision)
lw = LedoitWolf(store_precision=False)
lw.fit(X)
assert_almost_equal(lw.score(X), 2.2434839, 4)
assert(lw.precision_ is None)
示例7: test_ledoit_wolf
def test_ledoit_wolf():
"""Tests LedoitWolf module on a simple dataset.
"""
# test shrinkage coeff on a simple data set
X_centered = X - X.mean(axis=0)
lw = LedoitWolf(assume_centered=True)
lw.fit(X_centered)
shrinkage_ = lw.shrinkage_
score_ = lw.score(X_centered)
assert_almost_equal(ledoit_wolf_shrinkage(X_centered,
assume_centered=True),
shrinkage_)
assert_almost_equal(ledoit_wolf_shrinkage(X_centered,
assume_centered=True, block_size=6),
shrinkage_)
# compare shrunk covariance obtained from data and from MLE estimate
lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_centered,
assume_centered=True)
assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
# compare estimates given by LW and ShrunkCovariance
scov = ShrunkCovariance(shrinkage=lw.shrinkage_, assume_centered=True)
scov.fit(X_centered)
assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)
# test with n_features = 1
X_1d = X[:, 0].reshape((-1, 1))
lw = LedoitWolf(assume_centered=True)
lw.fit(X_1d)
lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d,
assume_centered=True)
assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
assert_array_almost_equal((X_1d ** 2).sum() / n_samples, lw.covariance_, 4)
# test shrinkage coeff on a simple data set (without saving precision)
lw = LedoitWolf(store_precision=False, assume_centered=True)
lw.fit(X_centered)
assert_almost_equal(lw.score(X_centered), score_, 4)
assert(lw.precision_ is None)
# (too) large data set
X_large = np.ones((20, 200))
assert_raises(MemoryError, ledoit_wolf, X_large, block_size=100)
# Same tests without assuming centered data
# test shrinkage coeff on a simple data set
lw = LedoitWolf()
lw.fit(X)
assert_almost_equal(lw.shrinkage_, shrinkage_, 4)
assert_almost_equal(lw.shrinkage_, ledoit_wolf_shrinkage(X))
assert_almost_equal(lw.shrinkage_, ledoit_wolf(X)[1])
assert_almost_equal(lw.score(X), score_, 4)
# compare shrunk covariance obtained from data and from MLE estimate
lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X)
assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
# compare estimates given by LW and ShrunkCovariance
scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
scov.fit(X)
assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)
# test with n_features = 1
X_1d = X[:, 0].reshape((-1, 1))
lw = LedoitWolf()
lw.fit(X_1d)
lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d)
assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)
# test with one sample
X_1sample = np.arange(5)
lw = LedoitWolf()
with warnings.catch_warnings(record=True):
lw.fit(X_1sample)
# test shrinkage coeff on a simple data set (without saving precision)
lw = LedoitWolf(store_precision=False)
lw.fit(X)
assert_almost_equal(lw.score(X), score_, 4)
assert(lw.precision_ is None)
示例8: empirical_covariance
# under the ground-truth model, which we would not have access to in real
# settings
real_cov = np.dot(coloring_matrix.T, coloring_matrix)
emp_cov = empirical_covariance(X_train)
loglik_real = -log_likelihood(emp_cov, linalg.inv(real_cov))
# #############################################################################
# Compare different approaches to setting the parameter
# GridSearch for an optimal shrinkage coefficient
tuned_parameters = [{'shrinkage': shrinkages}]
cv = GridSearchCV(ShrunkCovariance(), tuned_parameters, cv=5)
cv.fit(X_train)
# Ledoit-Wolf optimal shrinkage coefficient estimate
lw = LedoitWolf()
loglik_lw = lw.fit(X_train).score(X_test)
# OAS coefficient estimate
oa = OAS()
loglik_oa = oa.fit(X_train).score(X_test)
# #############################################################################
# Plot results
fig = plt.figure()
plt.title("Regularized covariance: likelihood and shrinkage coefficient")
plt.xlabel('Regularization parameter: shrinkage coefficient')
plt.ylabel('Error: negative log-likelihood on test data')
# range shrinkage curve
plt.loglog(shrinkages, negative_logliks, label="Negative log-likelihood")
示例9: test_connectivity_measure_outputs
def test_connectivity_measure_outputs():
n_subjects = 10
n_features = 49
# Generate signals and compute covariances
emp_covs = []
ledoit_covs = []
signals = []
ledoit_estimator = LedoitWolf()
for k in range(n_subjects):
n_samples = 200 + k
signal, _, _ = generate_signals(n_features=n_features, n_confounds=5,
length=n_samples, same_variance=False)
signals.append(signal)
signal -= signal.mean(axis=0)
emp_covs.append((signal.T).dot(signal) / n_samples)
ledoit_covs.append(ledoit_estimator.fit(signal).covariance_)
kinds = ["covariance", "correlation", "tangent", "precision",
"partial correlation"]
# Check outputs properties
for cov_estimator, covs in zip([EmpiricalCovariance(), LedoitWolf()],
[emp_covs, ledoit_covs]):
input_covs = copy.copy(covs)
for kind in kinds:
conn_measure = ConnectivityMeasure(kind=kind,
cov_estimator=cov_estimator)
connectivities = conn_measure.fit_transform(signals)
# Generic
assert_true(isinstance(connectivities, np.ndarray))
assert_equal(len(connectivities), len(covs))
for k, cov_new in enumerate(connectivities):
assert_array_equal(input_covs[k], covs[k])
assert(is_spd(covs[k], decimal=7))
# Positive definiteness if expected and output value checks
if kind == "tangent":
assert_array_almost_equal(cov_new, cov_new.T)
gmean_sqrt = _map_eigenvalues(np.sqrt,
conn_measure.mean_)
assert(is_spd(gmean_sqrt, decimal=7))
assert(is_spd(conn_measure.whitening_, decimal=7))
assert_array_almost_equal(conn_measure.whitening_.dot(
gmean_sqrt), np.eye(n_features))
assert_array_almost_equal(gmean_sqrt.dot(
_map_eigenvalues(np.exp, cov_new)).dot(gmean_sqrt),
covs[k])
elif kind == "precision":
assert(is_spd(cov_new, decimal=7))
assert_array_almost_equal(cov_new.dot(covs[k]),
np.eye(n_features))
elif kind == "correlation":
assert(is_spd(cov_new, decimal=7))
d = np.sqrt(np.diag(np.diag(covs[k])))
if cov_estimator == EmpiricalCovariance():
assert_array_almost_equal(d.dot(cov_new).dot(d),
covs[k])
assert_array_almost_equal(np.diag(cov_new),
np.ones((n_features)))
elif kind == "partial correlation":
prec = linalg.inv(covs[k])
d = np.sqrt(np.diag(np.diag(prec)))
assert_array_almost_equal(d.dot(cov_new).dot(d), -prec +
2 * np.diag(np.diag(prec)))
# Check the mean_
for kind in kinds:
conn_measure = ConnectivityMeasure(kind=kind)
conn_measure.fit_transform(signals)
assert_equal((conn_measure.mean_).shape, (n_features, n_features))
if kind != 'tangent':
assert_array_almost_equal(
conn_measure.mean_,
np.mean(conn_measure.transform(signals), axis=0))
# Check that the mean isn't modified in transform
conn_measure = ConnectivityMeasure(kind='covariance')
conn_measure.fit(signals[:1])
mean = conn_measure.mean_
conn_measure.transform(signals[1:])
assert_array_equal(mean, conn_measure.mean_)
# Check vectorization option
for kind in kinds:
conn_measure = ConnectivityMeasure(kind=kind)
connectivities = conn_measure.fit_transform(signals)
conn_measure = ConnectivityMeasure(vectorize=True, kind=kind)
vectorized_connectivities = conn_measure.fit_transform(signals)
assert_array_almost_equal(vectorized_connectivities,
sym_matrix_to_vec(connectivities))
# Check not fitted error
assert_raises_regex(
ValueError, 'has not been fitted. ',
ConnectivityMeasure().inverse_transform,
vectorized_connectivities)
#.........這裏部分代碼省略.........
示例10: main
def main():
'''
Constructs a co-occurence network from gene expression data.
Main entry point to code.
'''
# Read in the data
if os.path.isfile(DATA_PICKLE):
print("reading previously saved data from pickle %s" % (DATA_PICKLE))
with open(DATA_PICKLE, 'rb') as file:
df = pickle.load(file)
lwe = pickle.load(file)
pmat = pickle.load(file)
pcore_indices = pickle.load(file)
pcor = pickle.load(file)
lfdr_pcor = pickle.load(file)
#prob = pickle.load(file)
else:
print("reading in data from %s" % (FILENAME))
df = pd.read_csv(FILENAME, sep='\t')
print("found %d rows and %d columns" % (df.shape[0], df.shape[1]))
# compute the row means and sort the data frame by descinding means
df['row_means'] = df.mean(axis=1)
df.sort_values('row_means', axis=0, ascending=False, inplace=True)
df.drop('row_means', axis=1, inplace=True)
# take the most abundant genes
df = df.head(PRUNE_GENES)
# Ledoit-Wolf optimal shrinkage coefficient estimate
print("computing Ledoit-Wolf optimal shrinkage coeffecient estimate")
lwe = LedoitWolf().fit(df.transpose())
pmat = lwe.get_precision()
# Convert symmetric matrix to array, first by getting indices
# of the off diagonal elements, second by pulling them into
# separate array (pcor).
print("extracting off diagnol elements of precision matrix")
pcor_indices = np.triu_indices(pmat.shape[0], 1)
pcor = pmat[pcor_indices]
# Determine edges by computing lfdr of pcor.
print("computing lfdr of partial correlations")
fdrtool = importr('fdrtool')
lfdr_pcor = fdrtool.fdrtool(FloatVector(pcor), statistic="correlation", plot=False)
#prob = 1-lfdr_pcor['lfdr']
with open(DATA_PICKLE, 'wb') as file:
pickle.dump(df, file, pickle.HIGHEST_PROTOCOL)
pickle.dump(lwe, file, pickle.HIGHEST_PROTOCOL)
pickle.dump(pmat, file, pickle.HIGHEST_PROTOCOL)
pickle.dump(pcor_indices, file, pickle.HIGHEST_PROTOCOL)
pickle.dump(pcor, file, pickle.HIGHEST_PROTOCOL)
pickle.dump(lfdr_pcor, file, pickle.HIGHEST_PROTOCOL)
#pickle.dump(prob, file, pickle.HIGHEST_PROTOCOL)
print("making 1-lfdr vs. pcor plot")
prob = 1-np.array(lfdr_pcor.rx2('lfdr'))
with PdfPages(PDF_FILENAME) as pdf:
plt.figure(figsize=(3, 3))
plt.plot(range(7), [3, 1, 4, 1, 5, 9, 2], 'r-o')
plt.title('Page One')
pdf.savefig() # saves the current figure into a pdf page
plt.close()
plt.plot(pcor[0:10000:10], prob[0:10000:10], 'o', markeredgecolor='k', markersize=3)
plt.title("THIS IS A PLOT TITLE, YOU BET")
plt.xlabel('partial correlation')
plt.ylabel('lfdr')
pdf.savefig
plt.close()
示例11: test_ledoit_wolf
def test_ledoit_wolf():
# Tests LedoitWolf module on a simple dataset.
# test shrinkage coeff on a simple data set
X_centered = X - X.mean(axis=0)
lw = LedoitWolf(assume_centered=True)
lw.fit(X_centered)
shrinkage_ = lw.shrinkage_
score_ = lw.score(X_centered)
assert_almost_equal(ledoit_wolf_shrinkage(X_centered,
assume_centered=True),
shrinkage_)
assert_almost_equal(ledoit_wolf_shrinkage(X_centered, assume_centered=True,
block_size=6),
shrinkage_)
# compare shrunk covariance obtained from data and from MLE estimate
lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X_centered,
assume_centered=True)
assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
# compare estimates given by LW and ShrunkCovariance
scov = ShrunkCovariance(shrinkage=lw.shrinkage_, assume_centered=True)
scov.fit(X_centered)
assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)
# test with n_features = 1
X_1d = X[:, 0].reshape((-1, 1))
lw = LedoitWolf(assume_centered=True)
lw.fit(X_1d)
lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X_1d,
assume_centered=True)
assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
assert_array_almost_equal((X_1d ** 2).sum() / n_samples, lw.covariance_, 4)
# test shrinkage coeff on a simple data set (without saving precision)
lw = LedoitWolf(store_precision=False, assume_centered=True)
lw.fit(X_centered)
assert_almost_equal(lw.score(X_centered), score_, 4)
assert(lw.precision_ is None)
# Same tests without assuming centered data
# test shrinkage coeff on a simple data set
lw = LedoitWolf()
lw.fit(X)
assert_almost_equal(lw.shrinkage_, shrinkage_, 4)
assert_almost_equal(lw.shrinkage_, ledoit_wolf_shrinkage(X))
assert_almost_equal(lw.shrinkage_, ledoit_wolf(X)[1])
assert_almost_equal(lw.score(X), score_, 4)
# compare shrunk covariance obtained from data and from MLE estimate
lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X)
assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
# compare estimates given by LW and ShrunkCovariance
scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
scov.fit(X)
assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)
# test with n_features = 1
X_1d = X[:, 0].reshape((-1, 1))
lw = LedoitWolf()
lw.fit(X_1d)
lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X_1d)
assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)
# test with one sample
# warning should be raised when using only 1 sample
X_1sample = np.arange(5).reshape(1, 5)
lw = LedoitWolf()
assert_warns(UserWarning, lw.fit, X_1sample)
assert_array_almost_equal(lw.covariance_,
np.zeros(shape=(5, 5), dtype=np.float64))
# test shrinkage coeff on a simple data set (without saving precision)
lw = LedoitWolf(store_precision=False)
lw.fit(X)
assert_almost_equal(lw.score(X), score_, 4)
assert(lw.precision_ is None)
示例12: threshold_from_simulations
def threshold_from_simulations(self, X, precision=2000, verbose=False,
n_jobs=-1):
"""
"""
import multiprocessing as mp
if n_jobs < 1:
n_jobs = mp.cpu_count()
n_samples, n_features = X.shape
n = n_samples
p = n_features
h = self.support_.sum()
lw = LedoitWolf()
ref_covariance = lw.fit(X[self.support_]).covariance_
c = sp.stats.chi2(p + 2).cdf(
sp.stats.chi2(p).ppf(float(h) / n)) / (float(h) / n)
sigma_root = np.linalg.cholesky(ref_covariance / c)
all_h = []
# inliers distribution
dist_in = np.array([], ndmin=1)
max_i = max(1, int(precision / float(self.support_.sum())))
for i in range(max_i):
if verbose and max_i > 4 and (i % (max_i / 4) == 0):
print "\t", 50 * i / float(max_i), "%"
#sigma_root = np.diag(np.sqrt(eigenvalues))
#sigma_root = np.eye(n_features)
X1, _ = dg.generate_gaussian(
n_samples, n_features, np.zeros(n_features),
cov_root=sigma_root)
# learn location and shape
clf = EllipticEnvelopeRMCDl1(
correction=self.correction, shrinkage=self.shrinkage,
h=self.support_.sum() / float(n_samples), no_fit=True).fit(
X1)
X2 = X1 - clf.location_
dist_in = np.concatenate(
(dist_in, clf.decision_function(
X2[clf.support_], raw_values=True)))
all_h.append(clf.h)
# outliers distribution
dist_out = np.array([], ndmin=1)
max_i = max(1, int(precision / float(n_samples - self.support_.sum())))
for i in range(max_i):
if verbose and max_i > 4 and (i % (max_i / 4) == 0):
print "\t", 50 * (1. + i / float(max_i)), "%"
X1, _ = dg.generate_gaussian(
n_samples, n_features, np.zeros(n_features),
cov_root=sigma_root)
# learn location and shape
clf = EllipticEnvelopeRMCDl1(
correction=self.correction, shrinkage=self.shrinkage,
h=self.support_.sum() / float(n_samples), no_fit=True).fit(X1)
X2 = X1 - clf.location_
dist_out = np.concatenate(
(dist_out, clf.decision_function(
X2[~clf.support_], raw_values=True)))
all_h.append(clf.h)
self.dist_in = np.sort(dist_in)
self.dist_out = np.sort(dist_out)
self.h_mean = np.mean(all_h)
return self.dist_out
示例13: toeplitz
r = 0.1
real_cov = toeplitz(r**np.arange(n_features))
coloring_matrix = cholesky(real_cov)
n_samples_range = np.arange(6, 31, 1)
repeat = 100
lw_mse = np.zeros((n_samples_range.size, repeat))
oa_mse = np.zeros((n_samples_range.size, repeat))
lw_shrinkage = np.zeros((n_samples_range.size, repeat))
oa_shrinkage = np.zeros((n_samples_range.size, repeat))
for i, n_samples in enumerate(n_samples_range):
for j in range(repeat):
X = np.dot(
np.random.normal(size=(n_samples, n_features)), coloring_matrix.T)
lw = LedoitWolf(store_precision=False)
lw.fit(X, assume_centered=True)
lw_mse[i,j] = lw.error_norm(real_cov, scaling=False)
lw_shrinkage[i,j] = lw.shrinkage_
oa = OAS(store_precision=False)
oa.fit(X, assume_centered=True)
oa_mse[i,j] = oa.error_norm(real_cov, scaling=False)
oa_shrinkage[i,j] = oa.shrinkage_
# plot MSE
pl.subplot(2,1,1)
pl.errorbar(n_samples_range, lw_mse.mean(1), yerr=lw_mse.std(1),
label='Ledoit-Wolf', color='g')
pl.errorbar(n_samples_range, oa_mse.mean(1), yerr=oa_mse.std(1),
label='OAS', color='r')
示例14: plot_psds
def plot_psds(psd_file, data_dir='/auto/tdrive/mschachter/data'):
# read PairwiseCF file
pcf_file = os.path.join(data_dir, 'aggregate', 'pairwise_cf.h5')
pcf = AggregatePairwiseCF.load(pcf_file)
# pcf.zscore_within_site()
g = pcf.df.groupby(['bird', 'block', 'segment', 'electrode'])
nsamps_electrodes = len(g)
i = pcf.df.cell_index != -1
g = pcf.df[i].groupby(['bird', 'block', 'segment', 'electrode', 'cell_index'])
nsamps_cells = len(g)
print '# of electrodes: %d' % nsamps_electrodes
print '# of cells: %d' % nsamps_cells
print '# of lfp samples: %d' % (pcf.lfp_psds.shape[0])
print '# of spike psd samples: %d' % (pcf.spike_psds.shape[0])
# compute the LFP mean and std
lfp_psds = deepcopy(pcf.lfp_psds)
print 'lfp_psds_ind: max=%f, q99=%f' % (lfp_psds.max(), np.percentile(lfp_psds.ravel(), 99))
log_transform(lfp_psds)
print 'lfp_psds_ind: max=%f, q99=%f' % (lfp_psds.max(), np.percentile(lfp_psds.ravel(), 99))
nz = lfp_psds.sum(axis=1) > 0
lfp_psds = lfp_psds[nz, :]
lfp_psd_mean = lfp_psds.mean(axis=0)
lfp_psd_std = lfp_psds.std(axis=0, ddof=1)
nsamps_lfp = lfp_psds.shape[0]
# get the spike rate
spike_rate = pcf.df.spike_rate.values
# plt.figure()
# plt.hist(spike_rate, bins=20, color='g', alpha=0.7)
# plt.title('Spike Rate Histogram, q1=%0.3f, q5=%0.3f, q10=%0.3f, q50=%0.3f, q99=%0.3f' %
# (np.percentile(spike_rate, 1), np.percentile(spike_rate, 5), np.percentile(spike_rate, 10),
# np.percentile(spike_rate, 50), np.percentile(spike_rate, 99)))
# plt.show()
# compute the covariance
lfp_psd_z = deepcopy(lfp_psds)
lfp_psd_z -= lfp_psd_mean
lfp_psd_z /= lfp_psd_std
lfp_and_spike_cov_est = LedoitWolf()
lfp_and_spike_cov_est.fit(lfp_psd_z)
lfp_and_spike_cov = lfp_and_spike_cov_est.covariance_
"""
# read CRCNS file
cell_data = dict()
hf = h5py.File(psd_file, 'r')
cnames = hf.attrs['col_names']
for c in cnames:
cell_data[c] = np.array(hf[c])
crcns_psds = np.array(hf['psds'])
freqs = hf.attrs['freqs']
hf.close()
cell_df = pd.DataFrame(cell_data)
print 'regions=',cell_df.superregion.unique()
name_map = {'brainstem':'MLd', 'thalamus':'OV', 'cortex':'Field L+CM'}
"""
# resample the lfp mean and std
freq_rs = np.linspace(pcf.freqs.min(), pcf.freqs.max(), 1000)
lfp_mean_cs = interp1d(pcf.freqs, lfp_psd_mean, kind='cubic')
lfp_mean_rs = lfp_mean_cs(freq_rs)
lfp_std_cs = interp1d(pcf.freqs, lfp_psd_std, kind='cubic')
lfp_std_rs = lfp_std_cs(freq_rs)
# concatenate the lfp psd and log spike rate
lfp_psd_and_spike_rate = list()
for k,(li,si) in enumerate(zip(pcf.df['lfp_index'], pcf.df['spike_index'])):
lpsd = pcf.lfp_psds[li, :]
srate,sstd = pcf.spike_rates[si, :]
if srate > 0:
lfp_psd_and_spike_rate.append(np.hstack([lpsd, np.log(srate)]))
lfp_psd_and_spike_rate = np.array(lfp_psd_and_spike_rate)
nfreqs = len(pcf.freqs)
lfp_rate_cc = np.zeros([nfreqs])
for k in range(nfreqs):
lfp_rate_cc[k] = np.corrcoef(lfp_psd_and_spike_rate[:, k], lfp_psd_and_spike_rate[:, -1])[0, 1]
fig = plt.figure(figsize=(24, 12))
fig.subplots_adjust(left=0.05, right=0.95, wspace=0.30, hspace=0.30)
nrows = 2
ncols = 100
gs = plt.GridSpec(nrows, ncols)
ax = plt.subplot(gs[0, :35])
plt.errorbar(freq_rs, lfp_mean_rs, yerr=lfp_std_rs, c='k', linewidth=9.0, elinewidth=3.0,
ecolor='#D8D8D8', alpha=0.5, capthick=0.)
plt.axis('tight')
plt.xlabel('Frequency (Hz)')
plt.ylabel('Power (dB)')
#.........這裏部分代碼省略.........
示例15: test_connectivity_measure_outputs
def test_connectivity_measure_outputs():
n_subjects = 10
n_features = 49
n_samples = 200
# Generate signals and compute covariances
emp_covs = []
ledoit_covs = []
signals = []
random_state = check_random_state(0)
ledoit_estimator = LedoitWolf()
for k in range(n_subjects):
signal = random_state.randn(n_samples, n_features)
signals.append(signal)
signal -= signal.mean(axis=0)
emp_covs.append((signal.T).dot(signal) / n_samples)
ledoit_covs.append(ledoit_estimator.fit(signal).covariance_)
kinds = ["correlation", "tangent", "precision",
"partial correlation"]
# Check outputs properties
for cov_estimator, covs in zip([EmpiricalCovariance(), LedoitWolf()],
[emp_covs, ledoit_covs]):
input_covs = copy.copy(covs)
for kind in kinds:
conn_measure = ConnectivityMeasure(kind=kind,
cov_estimator=cov_estimator)
connectivities = conn_measure.fit_transform(signals)
# Generic
assert_true(isinstance(connectivities, np.ndarray))
assert_equal(len(connectivities), len(covs))
for k, cov_new in enumerate(connectivities):
assert_array_equal(input_covs[k], covs[k])
assert(is_spd(covs[k], decimal=7))
# Positive definiteness if expected and output value checks
if kind == "tangent":
assert_array_almost_equal(cov_new, cov_new.T)
gmean_sqrt = _map_eigenvalues(np.sqrt,
conn_measure.mean_)
assert(is_spd(gmean_sqrt, decimal=7))
assert(is_spd(conn_measure.whitening_, decimal=7))
assert_array_almost_equal(conn_measure.whitening_.dot(
gmean_sqrt), np.eye(n_features))
assert_array_almost_equal(gmean_sqrt.dot(
_map_eigenvalues(np.exp, cov_new)).dot(gmean_sqrt),
covs[k])
elif kind == "precision":
assert(is_spd(cov_new, decimal=7))
assert_array_almost_equal(cov_new.dot(covs[k]),
np.eye(n_features))
elif kind == "correlation":
assert(is_spd(cov_new, decimal=7))
d = np.sqrt(np.diag(np.diag(covs[k])))
if cov_estimator == EmpiricalCovariance():
assert_array_almost_equal(d.dot(cov_new).dot(d),
covs[k])
assert_array_almost_equal(np.diag(cov_new),
np.ones((n_features)))
elif kind == "partial correlation":
prec = linalg.inv(covs[k])
d = np.sqrt(np.diag(np.diag(prec)))
assert_array_almost_equal(d.dot(cov_new).dot(d), -prec +
2 * np.diag(np.diag(prec)))