本文整理匯總了Python中sklearn.covariance.LedoitWolf.fit方法的典型用法代碼示例。如果您正苦於以下問題:Python LedoitWolf.fit方法的具體用法?Python LedoitWolf.fit怎麽用?Python LedoitWolf.fit使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類sklearn.covariance.LedoitWolf
的用法示例。
在下文中一共展示了LedoitWolf.fit方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: test_ledoit_wolf_small
# 需要導入模塊: from sklearn.covariance import LedoitWolf [as 別名]
# 或者: from sklearn.covariance.LedoitWolf import fit [as 別名]
def test_ledoit_wolf_small():
# Compare our blocked implementation to the naive implementation
X_small = X[:, :4]
lw = LedoitWolf()
lw.fit(X_small)
shrinkage_ = lw.shrinkage_
assert_almost_equal(shrinkage_, _naive_ledoit_wolf_shrinkage(X_small))
示例2: maximization
# 需要導入模塊: from sklearn.covariance import LedoitWolf [as 別名]
# 或者: from sklearn.covariance.LedoitWolf import fit [as 別名]
def maximization(self):
# mean maximization
for i in range(self._K):
mu[i] = mu_ss[i] / ndata_ss
# covariance maximization
for i in range(self._K):
for j in range(self._K):
cov[i,j] = (1.0/ ndata_ss) * cov_ss[i,j] + ndata_ss * mu[i] * mu[j] - mu_ss[i] * mu[j] - mu_ss[j] * mu[i]
# covariance shrinkage
lw = LedoitWolf()
cov_result = lw.fit(cov,assume_centered=True).covariance_
inv_cov = np.linalg.inv(cov_result)
log_det_inv_cov = np.log(np.linalg.det(inv_cov))
# topic maximization
for i in range(self._K):
sum_m = 0
for j in range(self._W):
sum_m += beta_ss[i,j]
if sum_m == 0:
sum_m = -1000 * self._W
else:
sum_m = np.log(sum_m)
for j in range(self._W):
log_beta[i,j] = np.log(beta_ss[i,j] - sum_m)
示例3: maximization
# 需要導入模塊: from sklearn.covariance import LedoitWolf [as 別名]
# 或者: from sklearn.covariance.LedoitWolf import fit [as 別名]
def maximization(self):
'''
M-step of EM algorithm, use scikit.learn's LedoitWolf method to perfom
covariance matrix shrinkage.
Arguments:
sufficient statistics, i.e. model parameters
Returns:
the updated sufficient statistics which all in self definition, so no return values
'''
logger.info("running maximization function")
logger.info("mean maximization")
mu = np.divide(self.mu, self.ndata)
logger.info("covariance maximization")
for i in range(self._K):
for j in range(self._K):
self.cov[i, j] = (1.0 / self.ndata) * self.cov[i, j] + self.ndata * mu[i] * mu[j] - self.mu[i] * mu[j] - self.mu[j] * mu[i]
logger.info(" performing covariance shrinkage using sklearn module")
lw = LedoitWolf()
cov_result = lw.fit(self.cov, assume_centered=True).covariance_
self.inv_cov = np.linalg.inv(cov_result)
self.log_det_inv_cov = math_utli.safe_log(np.linalg.det(self.inv_cov))
logger.info("topic maximization")
for i in range(self._K):
sum_m = 0
sum_m += np.sum(self.beta, axis=0)[i]
if sum_m == 0:
sum_m = -1000 * self._W
else:
sum_m = np.log(sum_m)
for j in range(self._W):
self.log_beta[i, j] = math_utli.safe_log(self.beta[i, j] - sum_m)
logger.info("write model parameters to file")
logger.info("write gaussian")
with open('ctm_nu', 'w') as ctm_nu_dump:
cPickle.dump(self.nu, ctm_nu_dump)
with open('ctm_cov', 'w') as ctm_cov_dump:
cPickle.dump(self.cov, ctm_cov_dump)
with open('ctm_inv_cov', 'w') as ctm_inv_cov_dump:
cPickle.dump(self.inv_cov, ctm_inv_cov_dump)
with open('ctm_log_det_inv_cov', 'w') as ctm_log_det_inv_cov_dump:
cPickle.dump(self.log_det_inv_cov, ctm_log_det_inv_cov_dump)
logger.info("write topic matrix")
with open('ctm_log_beta', 'w') as ctm_log_beta_dump:
cPickle.dump(self.log_beta, ctm_log_beta_dump)
示例4: test_connectivity_measure_outputs
# 需要導入模塊: from sklearn.covariance import LedoitWolf [as 別名]
# 或者: from sklearn.covariance.LedoitWolf import fit [as 別名]
def test_connectivity_measure_outputs():
n_subjects = 10
n_features = 49
n_samples = 200
# Generate signals and compute covariances
emp_covs = []
ledoit_covs = []
signals = []
random_state = check_random_state(0)
ledoit_estimator = LedoitWolf()
for k in range(n_subjects):
signal = random_state.randn(n_samples, n_features)
signals.append(signal)
signal -= signal.mean(axis=0)
emp_covs.append((signal.T).dot(signal) / n_samples)
ledoit_covs.append(ledoit_estimator.fit(signal).covariance_)
kinds = ["correlation", "tangent", "precision",
"partial correlation"]
# Check outputs properties
for cov_estimator, covs in zip([EmpiricalCovariance(), LedoitWolf()],
[emp_covs, ledoit_covs]):
input_covs = copy.copy(covs)
for kind in kinds:
conn_measure = ConnectivityMeasure(kind=kind,
cov_estimator=cov_estimator)
connectivities = conn_measure.fit_transform(signals)
# Generic
assert_true(isinstance(connectivities, np.ndarray))
assert_equal(len(connectivities), len(covs))
for k, cov_new in enumerate(connectivities):
assert_array_equal(input_covs[k], covs[k])
assert(is_spd(covs[k], decimal=7))
# Positive definiteness if expected and output value checks
if kind == "tangent":
assert_array_almost_equal(cov_new, cov_new.T)
gmean_sqrt = _map_eigenvalues(np.sqrt,
conn_measure.mean_)
assert(is_spd(gmean_sqrt, decimal=7))
assert(is_spd(conn_measure.whitening_, decimal=7))
assert_array_almost_equal(conn_measure.whitening_.dot(
gmean_sqrt), np.eye(n_features))
assert_array_almost_equal(gmean_sqrt.dot(
_map_eigenvalues(np.exp, cov_new)).dot(gmean_sqrt),
covs[k])
elif kind == "precision":
assert(is_spd(cov_new, decimal=7))
assert_array_almost_equal(cov_new.dot(covs[k]),
np.eye(n_features))
elif kind == "correlation":
assert(is_spd(cov_new, decimal=7))
d = np.sqrt(np.diag(np.diag(covs[k])))
if cov_estimator == EmpiricalCovariance():
assert_array_almost_equal(d.dot(cov_new).dot(d),
covs[k])
assert_array_almost_equal(np.diag(cov_new),
np.ones((n_features)))
elif kind == "partial correlation":
prec = linalg.inv(covs[k])
d = np.sqrt(np.diag(np.diag(prec)))
assert_array_almost_equal(d.dot(cov_new).dot(d), -prec +
2 * np.diag(np.diag(prec)))
示例5: prepareProblem
# 需要導入模塊: from sklearn.covariance import LedoitWolf [as 別名]
# 或者: from sklearn.covariance.LedoitWolf import fit [as 別名]
def prepareProblem(filePath, shrinkage=False, subset=False, subsetSize=0):
# Import data from .csv
df = pd.read_csv(filePath, sep=';')
df.index = df.date
df = df.drop('date', axis=1)
# Subset, if called via subset == True
if subset == True:
df = df.tail(subsetSize)
# Estimate covariance using Empirical/MLE
# Expected input is returns, hence set: assume_centered = True
mleFitted = empirical_covariance(X=df, assume_centered=True)
sigma = mleFitted
if shrinkage == True:
# Estimate covariance using LedoitWolf, first create instance of object
lw = LedoitWolf(assume_centered=True)
lwFitted = lw.fit(X=df).covariance_
sigma = lwFitted
return sigma
示例6: test_ledoit_wolf
# 需要導入模塊: from sklearn.covariance import LedoitWolf [as 別名]
# 或者: from sklearn.covariance.LedoitWolf import fit [as 別名]
def test_ledoit_wolf():
"""Tests LedoitWolf module on a simple dataset.
"""
# test shrinkage coeff on a simple data set
lw = LedoitWolf()
lw.fit(X, assume_centered=True)
assert_almost_equal(lw.shrinkage_, 0.00192, 4)
assert_almost_equal(lw.score(X, assume_centered=True), -2.89795, 4)
# compare shrunk covariance obtained from data and from MLE estimate
lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X,
assume_centered=True)
assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
# compare estimates given by LW and ShrunkCovariance
scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
scov.fit(X, assume_centered=True)
assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)
# test with n_features = 1
X_1d = X[:, 0].reshape((-1, 1))
lw = LedoitWolf()
lw.fit(X_1d, assume_centered=True)
lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d,
assume_centered=True)
assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
assert_array_almost_equal((X_1d ** 2).sum() / n_samples, lw.covariance_, 4)
# test shrinkage coeff on a simple data set (without saving precision)
lw = LedoitWolf(store_precision=False)
lw.fit(X, assume_centered=True)
assert_almost_equal(lw.score(X, assume_centered=True), -2.89795, 4)
assert(lw.precision_ is None)
# Same tests without assuming centered data
# test shrinkage coeff on a simple data set
lw = LedoitWolf()
lw.fit(X)
assert_almost_equal(lw.shrinkage_, 0.007582, 4)
assert_almost_equal(lw.score(X), 2.243483, 4)
# compare shrunk covariance obtained from data and from MLE estimate
lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X)
assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
# compare estimates given by LW and ShrunkCovariance
scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
scov.fit(X)
assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)
# test with n_features = 1
X_1d = X[:, 0].reshape((-1, 1))
lw = LedoitWolf()
lw.fit(X_1d)
lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d)
assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)
# test shrinkage coeff on a simple data set (without saving precision)
lw = LedoitWolf(store_precision=False)
lw.fit(X)
assert_almost_equal(lw.score(X), 2.2434839, 4)
assert(lw.precision_ is None)
示例7: test_ledoit_wolf
# 需要導入模塊: from sklearn.covariance import LedoitWolf [as 別名]
# 或者: from sklearn.covariance.LedoitWolf import fit [as 別名]
def test_ledoit_wolf():
"""Tests LedoitWolf module on a simple dataset.
"""
# test shrinkage coeff on a simple data set
X_centered = X - X.mean(axis=0)
lw = LedoitWolf(assume_centered=True)
lw.fit(X_centered)
shrinkage_ = lw.shrinkage_
score_ = lw.score(X_centered)
assert_almost_equal(ledoit_wolf_shrinkage(X_centered,
assume_centered=True),
shrinkage_)
assert_almost_equal(ledoit_wolf_shrinkage(X_centered,
assume_centered=True, block_size=6),
shrinkage_)
# compare shrunk covariance obtained from data and from MLE estimate
lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_centered,
assume_centered=True)
assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
# compare estimates given by LW and ShrunkCovariance
scov = ShrunkCovariance(shrinkage=lw.shrinkage_, assume_centered=True)
scov.fit(X_centered)
assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)
# test with n_features = 1
X_1d = X[:, 0].reshape((-1, 1))
lw = LedoitWolf(assume_centered=True)
lw.fit(X_1d)
lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d,
assume_centered=True)
assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
assert_array_almost_equal((X_1d ** 2).sum() / n_samples, lw.covariance_, 4)
# test shrinkage coeff on a simple data set (without saving precision)
lw = LedoitWolf(store_precision=False, assume_centered=True)
lw.fit(X_centered)
assert_almost_equal(lw.score(X_centered), score_, 4)
assert(lw.precision_ is None)
# (too) large data set
X_large = np.ones((20, 200))
assert_raises(MemoryError, ledoit_wolf, X_large, block_size=100)
# Same tests without assuming centered data
# test shrinkage coeff on a simple data set
lw = LedoitWolf()
lw.fit(X)
assert_almost_equal(lw.shrinkage_, shrinkage_, 4)
assert_almost_equal(lw.shrinkage_, ledoit_wolf_shrinkage(X))
assert_almost_equal(lw.shrinkage_, ledoit_wolf(X)[1])
assert_almost_equal(lw.score(X), score_, 4)
# compare shrunk covariance obtained from data and from MLE estimate
lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X)
assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
# compare estimates given by LW and ShrunkCovariance
scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
scov.fit(X)
assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)
# test with n_features = 1
X_1d = X[:, 0].reshape((-1, 1))
lw = LedoitWolf()
lw.fit(X_1d)
lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d)
assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)
# test with one sample
X_1sample = np.arange(5)
lw = LedoitWolf()
with warnings.catch_warnings(record=True):
lw.fit(X_1sample)
# test shrinkage coeff on a simple data set (without saving precision)
lw = LedoitWolf(store_precision=False)
lw.fit(X)
assert_almost_equal(lw.score(X), score_, 4)
assert(lw.precision_ is None)
示例8: empirical_covariance
# 需要導入模塊: from sklearn.covariance import LedoitWolf [as 別名]
# 或者: from sklearn.covariance.LedoitWolf import fit [as 別名]
# settings
real_cov = np.dot(coloring_matrix.T, coloring_matrix)
emp_cov = empirical_covariance(X_train)
loglik_real = -log_likelihood(emp_cov, linalg.inv(real_cov))
# #############################################################################
# Compare different approaches to setting the parameter
# GridSearch for an optimal shrinkage coefficient
tuned_parameters = [{'shrinkage': shrinkages}]
cv = GridSearchCV(ShrunkCovariance(), tuned_parameters, cv=5)
cv.fit(X_train)
# Ledoit-Wolf optimal shrinkage coefficient estimate
lw = LedoitWolf()
loglik_lw = lw.fit(X_train).score(X_test)
# OAS coefficient estimate
oa = OAS()
loglik_oa = oa.fit(X_train).score(X_test)
# #############################################################################
# Plot results
fig = plt.figure()
plt.title("Regularized covariance: likelihood and shrinkage coefficient")
plt.xlabel('Regularization parameter: shrinkage coefficient')
plt.ylabel('Error: negative log-likelihood on test data')
# range shrinkage curve
plt.loglog(shrinkages, negative_logliks, label="Negative log-likelihood")
plt.plot(plt.xlim(), 2 * [loglik_real], '--r',
示例9: test_connectivity_measure_outputs
# 需要導入模塊: from sklearn.covariance import LedoitWolf [as 別名]
# 或者: from sklearn.covariance.LedoitWolf import fit [as 別名]
def test_connectivity_measure_outputs():
n_subjects = 10
n_features = 49
# Generate signals and compute covariances
emp_covs = []
ledoit_covs = []
signals = []
ledoit_estimator = LedoitWolf()
for k in range(n_subjects):
n_samples = 200 + k
signal, _, _ = generate_signals(n_features=n_features, n_confounds=5,
length=n_samples, same_variance=False)
signals.append(signal)
signal -= signal.mean(axis=0)
emp_covs.append((signal.T).dot(signal) / n_samples)
ledoit_covs.append(ledoit_estimator.fit(signal).covariance_)
kinds = ["covariance", "correlation", "tangent", "precision",
"partial correlation"]
# Check outputs properties
for cov_estimator, covs in zip([EmpiricalCovariance(), LedoitWolf()],
[emp_covs, ledoit_covs]):
input_covs = copy.copy(covs)
for kind in kinds:
conn_measure = ConnectivityMeasure(kind=kind,
cov_estimator=cov_estimator)
connectivities = conn_measure.fit_transform(signals)
# Generic
assert_true(isinstance(connectivities, np.ndarray))
assert_equal(len(connectivities), len(covs))
for k, cov_new in enumerate(connectivities):
assert_array_equal(input_covs[k], covs[k])
assert(is_spd(covs[k], decimal=7))
# Positive definiteness if expected and output value checks
if kind == "tangent":
assert_array_almost_equal(cov_new, cov_new.T)
gmean_sqrt = _map_eigenvalues(np.sqrt,
conn_measure.mean_)
assert(is_spd(gmean_sqrt, decimal=7))
assert(is_spd(conn_measure.whitening_, decimal=7))
assert_array_almost_equal(conn_measure.whitening_.dot(
gmean_sqrt), np.eye(n_features))
assert_array_almost_equal(gmean_sqrt.dot(
_map_eigenvalues(np.exp, cov_new)).dot(gmean_sqrt),
covs[k])
elif kind == "precision":
assert(is_spd(cov_new, decimal=7))
assert_array_almost_equal(cov_new.dot(covs[k]),
np.eye(n_features))
elif kind == "correlation":
assert(is_spd(cov_new, decimal=7))
d = np.sqrt(np.diag(np.diag(covs[k])))
if cov_estimator == EmpiricalCovariance():
assert_array_almost_equal(d.dot(cov_new).dot(d),
covs[k])
assert_array_almost_equal(np.diag(cov_new),
np.ones((n_features)))
elif kind == "partial correlation":
prec = linalg.inv(covs[k])
d = np.sqrt(np.diag(np.diag(prec)))
assert_array_almost_equal(d.dot(cov_new).dot(d), -prec +
2 * np.diag(np.diag(prec)))
# Check the mean_
for kind in kinds:
conn_measure = ConnectivityMeasure(kind=kind)
conn_measure.fit_transform(signals)
assert_equal((conn_measure.mean_).shape, (n_features, n_features))
if kind != 'tangent':
assert_array_almost_equal(
conn_measure.mean_,
np.mean(conn_measure.transform(signals), axis=0))
# Check that the mean isn't modified in transform
conn_measure = ConnectivityMeasure(kind='covariance')
conn_measure.fit(signals[:1])
mean = conn_measure.mean_
conn_measure.transform(signals[1:])
assert_array_equal(mean, conn_measure.mean_)
# Check vectorization option
for kind in kinds:
conn_measure = ConnectivityMeasure(kind=kind)
connectivities = conn_measure.fit_transform(signals)
conn_measure = ConnectivityMeasure(vectorize=True, kind=kind)
vectorized_connectivities = conn_measure.fit_transform(signals)
assert_array_almost_equal(vectorized_connectivities,
sym_matrix_to_vec(connectivities))
# Check not fitted error
assert_raises_regex(
ValueError, 'has not been fitted. ',
ConnectivityMeasure().inverse_transform,
vectorized_connectivities)
#.........這裏部分代碼省略.........
示例10: view_as_windows
# 需要導入模塊: from sklearn.covariance import LedoitWolf [as 別名]
# 或者: from sklearn.covariance.LedoitWolf import fit [as 別名]
X_unlab_patches = []
random.seed(42)
print "Gathering examples..."
# Use subsample of 200K for k-means and covariance estimates
for i in random.sample(range(0, unlab_X.shape[2]), 200000):
patches = view_as_windows(unlab_X[:, :, i], (w, w), step=s)
re_shaped = numpy.reshape(patches, (patches.shape[0]*patches.shape[0], w * w))
# normalize the patches, per sample
re_shaped = preprocessing.scale(re_shaped, axis=1)
X_unlab_patches.append(re_shaped)
X_unlab_patches = numpy.vstack(X_unlab_patches)
# build whitening transform matrix
print "Fitting ZCA Whitening Transform..."
cov = LedoitWolf()
cov.fit(X_unlab_patches) # fit covariance estimate
D, U = numpy.linalg.eigh(cov.covariance_)
V = numpy.sqrt(numpy.linalg.inv(numpy.diag(D + zca_eps)))
Wh = numpy.dot(numpy.dot(U, V), U.T)
mu = numpy.mean(X_unlab_patches, axis=0)
X_unlab_patches = numpy.dot(X_unlab_patches-mu, Wh)
# run k-means on unlabelled data
print "Starting k-means..."
clustr = sklearn.cluster.MiniBatchKMeans(n_clusters=n_clust,
compute_labels=False,
batch_size=300)
k_means = clustr.fit(X_unlab_patches)
def f_unsup(img):
示例11: LedoitWolf
# 需要導入模塊: from sklearn.covariance import LedoitWolf [as 別名]
# 或者: from sklearn.covariance.LedoitWolf import fit [as 別名]
c_des_out=np.logical_not(label[:,2]== b'des')
tmp_out= np.logical_and(c_des_out,mask_block)
c_rest_out=np.logical_not(label[:,0]== b'rest')
cond_out= np.logical_and(tmp_out,c_rest_out)
y=label[cond_out,2]
labels=np.unique(y)
# Prepare correlation
estimator = LedoitWolf()
scaler=StandardScaler()
# Create np array
result_matrix = np.empty([len(names),motor_region.shape[0],labels.shape[0],labels.shape[0]])
#Analysis for each subject
for i,n in enumerate(sorted(names)):
roi_name=fold_g+'mni4060/asymroi_'+smt+'_'+n+'.npz'
roi=np.load(roi_name)['roi'][cond_out]
roi=roi[:,motor_region-1]
for j in range(motor_region.shape[0]):
roi_j=roi[:,j]
roi_mat=np.zeros(((y==b'imp').sum(),len(labels)))
for z,lab in enumerate(sorted(labels)):
roi_mat[:,z]=roi_j[y==lab]
roi_sc=scaler.fit_transform(roi_mat)
estimator.fit(roi_sc)
matrix=estimator.covariance_
result_matrix[i,j]=1-matrix
np.savez_compressed('F:/IRM_Marche/dismatrix.npz',result_matrix)
示例12: test_ledoit_wolf
# 需要導入模塊: from sklearn.covariance import LedoitWolf [as 別名]
# 或者: from sklearn.covariance.LedoitWolf import fit [as 別名]
def test_ledoit_wolf():
# Tests LedoitWolf module on a simple dataset.
# test shrinkage coeff on a simple data set
X_centered = X - X.mean(axis=0)
lw = LedoitWolf(assume_centered=True)
lw.fit(X_centered)
shrinkage_ = lw.shrinkage_
score_ = lw.score(X_centered)
assert_almost_equal(ledoit_wolf_shrinkage(X_centered,
assume_centered=True),
shrinkage_)
assert_almost_equal(ledoit_wolf_shrinkage(X_centered, assume_centered=True,
block_size=6),
shrinkage_)
# compare shrunk covariance obtained from data and from MLE estimate
lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X_centered,
assume_centered=True)
assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
# compare estimates given by LW and ShrunkCovariance
scov = ShrunkCovariance(shrinkage=lw.shrinkage_, assume_centered=True)
scov.fit(X_centered)
assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)
# test with n_features = 1
X_1d = X[:, 0].reshape((-1, 1))
lw = LedoitWolf(assume_centered=True)
lw.fit(X_1d)
lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X_1d,
assume_centered=True)
assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
assert_array_almost_equal((X_1d ** 2).sum() / n_samples, lw.covariance_, 4)
# test shrinkage coeff on a simple data set (without saving precision)
lw = LedoitWolf(store_precision=False, assume_centered=True)
lw.fit(X_centered)
assert_almost_equal(lw.score(X_centered), score_, 4)
assert(lw.precision_ is None)
# Same tests without assuming centered data
# test shrinkage coeff on a simple data set
lw = LedoitWolf()
lw.fit(X)
assert_almost_equal(lw.shrinkage_, shrinkage_, 4)
assert_almost_equal(lw.shrinkage_, ledoit_wolf_shrinkage(X))
assert_almost_equal(lw.shrinkage_, ledoit_wolf(X)[1])
assert_almost_equal(lw.score(X), score_, 4)
# compare shrunk covariance obtained from data and from MLE estimate
lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X)
assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
# compare estimates given by LW and ShrunkCovariance
scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
scov.fit(X)
assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)
# test with n_features = 1
X_1d = X[:, 0].reshape((-1, 1))
lw = LedoitWolf()
lw.fit(X_1d)
lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X_1d)
assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)
# test with one sample
# warning should be raised when using only 1 sample
X_1sample = np.arange(5).reshape(1, 5)
lw = LedoitWolf()
assert_warns(UserWarning, lw.fit, X_1sample)
assert_array_almost_equal(lw.covariance_,
np.zeros(shape=(5, 5), dtype=np.float64))
# test shrinkage coeff on a simple data set (without saving precision)
lw = LedoitWolf(store_precision=False)
lw.fit(X)
assert_almost_equal(lw.score(X), score_, 4)
assert(lw.precision_ is None)
示例13: threshold_from_simulations
# 需要導入模塊: from sklearn.covariance import LedoitWolf [as 別名]
# 或者: from sklearn.covariance.LedoitWolf import fit [as 別名]
def threshold_from_simulations(self, X, precision=2000, verbose=False,
n_jobs=-1):
"""
"""
import multiprocessing as mp
if n_jobs < 1:
n_jobs = mp.cpu_count()
n_samples, n_features = X.shape
n = n_samples
p = n_features
h = self.support_.sum()
lw = LedoitWolf()
ref_covariance = lw.fit(X[self.support_]).covariance_
c = sp.stats.chi2(p + 2).cdf(
sp.stats.chi2(p).ppf(float(h) / n)) / (float(h) / n)
sigma_root = np.linalg.cholesky(ref_covariance / c)
all_h = []
# inliers distribution
dist_in = np.array([], ndmin=1)
max_i = max(1, int(precision / float(self.support_.sum())))
for i in range(max_i):
if verbose and max_i > 4 and (i % (max_i / 4) == 0):
print "\t", 50 * i / float(max_i), "%"
#sigma_root = np.diag(np.sqrt(eigenvalues))
#sigma_root = np.eye(n_features)
X1, _ = dg.generate_gaussian(
n_samples, n_features, np.zeros(n_features),
cov_root=sigma_root)
# learn location and shape
clf = EllipticEnvelopeRMCDl1(
correction=self.correction, shrinkage=self.shrinkage,
h=self.support_.sum() / float(n_samples), no_fit=True).fit(
X1)
X2 = X1 - clf.location_
dist_in = np.concatenate(
(dist_in, clf.decision_function(
X2[clf.support_], raw_values=True)))
all_h.append(clf.h)
# outliers distribution
dist_out = np.array([], ndmin=1)
max_i = max(1, int(precision / float(n_samples - self.support_.sum())))
for i in range(max_i):
if verbose and max_i > 4 and (i % (max_i / 4) == 0):
print "\t", 50 * (1. + i / float(max_i)), "%"
X1, _ = dg.generate_gaussian(
n_samples, n_features, np.zeros(n_features),
cov_root=sigma_root)
# learn location and shape
clf = EllipticEnvelopeRMCDl1(
correction=self.correction, shrinkage=self.shrinkage,
h=self.support_.sum() / float(n_samples), no_fit=True).fit(X1)
X2 = X1 - clf.location_
dist_out = np.concatenate(
(dist_out, clf.decision_function(
X2[~clf.support_], raw_values=True)))
all_h.append(clf.h)
self.dist_in = np.sort(dist_in)
self.dist_out = np.sort(dist_out)
self.h_mean = np.mean(all_h)
return self.dist_out
示例14: toeplitz
# 需要導入模塊: from sklearn.covariance import LedoitWolf [as 別名]
# 或者: from sklearn.covariance.LedoitWolf import fit [as 別名]
real_cov = toeplitz(r**np.arange(n_features))
coloring_matrix = cholesky(real_cov)
n_samples_range = np.arange(6, 31, 1)
repeat = 100
lw_mse = np.zeros((n_samples_range.size, repeat))
oa_mse = np.zeros((n_samples_range.size, repeat))
lw_shrinkage = np.zeros((n_samples_range.size, repeat))
oa_shrinkage = np.zeros((n_samples_range.size, repeat))
for i, n_samples in enumerate(n_samples_range):
for j in range(repeat):
X = np.dot(
np.random.normal(size=(n_samples, n_features)), coloring_matrix.T)
lw = LedoitWolf(store_precision=False)
lw.fit(X, assume_centered=True)
lw_mse[i,j] = lw.error_norm(real_cov, scaling=False)
lw_shrinkage[i,j] = lw.shrinkage_
oa = OAS(store_precision=False)
oa.fit(X, assume_centered=True)
oa_mse[i,j] = oa.error_norm(real_cov, scaling=False)
oa_shrinkage[i,j] = oa.shrinkage_
# plot MSE
pl.subplot(2,1,1)
pl.errorbar(n_samples_range, lw_mse.mean(1), yerr=lw_mse.std(1),
label='Ledoit-Wolf', color='g')
pl.errorbar(n_samples_range, oa_mse.mean(1), yerr=oa_mse.std(1),
label='OAS', color='r')
pl.ylabel("Squared error")
示例15: plot_psds
# 需要導入模塊: from sklearn.covariance import LedoitWolf [as 別名]
# 或者: from sklearn.covariance.LedoitWolf import fit [as 別名]
def plot_psds(psd_file, data_dir='/auto/tdrive/mschachter/data'):
# read PairwiseCF file
pcf_file = os.path.join(data_dir, 'aggregate', 'pairwise_cf.h5')
pcf = AggregatePairwiseCF.load(pcf_file)
# pcf.zscore_within_site()
g = pcf.df.groupby(['bird', 'block', 'segment', 'electrode'])
nsamps_electrodes = len(g)
i = pcf.df.cell_index != -1
g = pcf.df[i].groupby(['bird', 'block', 'segment', 'electrode', 'cell_index'])
nsamps_cells = len(g)
print '# of electrodes: %d' % nsamps_electrodes
print '# of cells: %d' % nsamps_cells
print '# of lfp samples: %d' % (pcf.lfp_psds.shape[0])
print '# of spike psd samples: %d' % (pcf.spike_psds.shape[0])
# compute the LFP mean and std
lfp_psds = deepcopy(pcf.lfp_psds)
print 'lfp_psds_ind: max=%f, q99=%f' % (lfp_psds.max(), np.percentile(lfp_psds.ravel(), 99))
log_transform(lfp_psds)
print 'lfp_psds_ind: max=%f, q99=%f' % (lfp_psds.max(), np.percentile(lfp_psds.ravel(), 99))
nz = lfp_psds.sum(axis=1) > 0
lfp_psds = lfp_psds[nz, :]
lfp_psd_mean = lfp_psds.mean(axis=0)
lfp_psd_std = lfp_psds.std(axis=0, ddof=1)
nsamps_lfp = lfp_psds.shape[0]
# get the spike rate
spike_rate = pcf.df.spike_rate.values
# plt.figure()
# plt.hist(spike_rate, bins=20, color='g', alpha=0.7)
# plt.title('Spike Rate Histogram, q1=%0.3f, q5=%0.3f, q10=%0.3f, q50=%0.3f, q99=%0.3f' %
# (np.percentile(spike_rate, 1), np.percentile(spike_rate, 5), np.percentile(spike_rate, 10),
# np.percentile(spike_rate, 50), np.percentile(spike_rate, 99)))
# plt.show()
# compute the covariance
lfp_psd_z = deepcopy(lfp_psds)
lfp_psd_z -= lfp_psd_mean
lfp_psd_z /= lfp_psd_std
lfp_and_spike_cov_est = LedoitWolf()
lfp_and_spike_cov_est.fit(lfp_psd_z)
lfp_and_spike_cov = lfp_and_spike_cov_est.covariance_
"""
# read CRCNS file
cell_data = dict()
hf = h5py.File(psd_file, 'r')
cnames = hf.attrs['col_names']
for c in cnames:
cell_data[c] = np.array(hf[c])
crcns_psds = np.array(hf['psds'])
freqs = hf.attrs['freqs']
hf.close()
cell_df = pd.DataFrame(cell_data)
print 'regions=',cell_df.superregion.unique()
name_map = {'brainstem':'MLd', 'thalamus':'OV', 'cortex':'Field L+CM'}
"""
# resample the lfp mean and std
freq_rs = np.linspace(pcf.freqs.min(), pcf.freqs.max(), 1000)
lfp_mean_cs = interp1d(pcf.freqs, lfp_psd_mean, kind='cubic')
lfp_mean_rs = lfp_mean_cs(freq_rs)
lfp_std_cs = interp1d(pcf.freqs, lfp_psd_std, kind='cubic')
lfp_std_rs = lfp_std_cs(freq_rs)
# concatenate the lfp psd and log spike rate
lfp_psd_and_spike_rate = list()
for k,(li,si) in enumerate(zip(pcf.df['lfp_index'], pcf.df['spike_index'])):
lpsd = pcf.lfp_psds[li, :]
srate,sstd = pcf.spike_rates[si, :]
if srate > 0:
lfp_psd_and_spike_rate.append(np.hstack([lpsd, np.log(srate)]))
lfp_psd_and_spike_rate = np.array(lfp_psd_and_spike_rate)
nfreqs = len(pcf.freqs)
lfp_rate_cc = np.zeros([nfreqs])
for k in range(nfreqs):
lfp_rate_cc[k] = np.corrcoef(lfp_psd_and_spike_rate[:, k], lfp_psd_and_spike_rate[:, -1])[0, 1]
fig = plt.figure(figsize=(24, 12))
fig.subplots_adjust(left=0.05, right=0.95, wspace=0.30, hspace=0.30)
nrows = 2
ncols = 100
gs = plt.GridSpec(nrows, ncols)
ax = plt.subplot(gs[0, :35])
plt.errorbar(freq_rs, lfp_mean_rs, yerr=lfp_std_rs, c='k', linewidth=9.0, elinewidth=3.0,
ecolor='#D8D8D8', alpha=0.5, capthick=0.)
plt.axis('tight')
plt.xlabel('Frequency (Hz)')
plt.ylabel('Power (dB)')
#.........這裏部分代碼省略.........