当前位置: 首页>>代码示例>>Python>>正文


Python covariance.LedoitWolf类代码示例

本文整理汇总了Python中sklearn.covariance.LedoitWolf的典型用法代码示例。如果您正苦于以下问题:Python LedoitWolf类的具体用法?Python LedoitWolf怎么用?Python LedoitWolf使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了LedoitWolf类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: maximization

	def maximization(self):
		# mean maximization
		for i in range(self._K):
			mu[i] = mu_ss[i] / ndata_ss
		# covariance maximization
		for i in range(self._K):
			for j in range(self._K):
				cov[i,j] = (1.0/ ndata_ss) * cov_ss[i,j] + ndata_ss * mu[i] * mu[j] - mu_ss[i] * mu[j] - mu_ss[j] * mu[i]
		# covariance shrinkage
		lw = LedoitWolf()
		cov_result = lw.fit(cov,assume_centered=True).covariance_
		inv_cov = np.linalg.inv(cov_result)
		log_det_inv_cov = np.log(np.linalg.det(inv_cov))

		# topic maximization
		for i in range(self._K):
			sum_m = 0 
			for j in range(self._W):
				sum_m += beta_ss[i,j]

			if sum_m == 0:
				sum_m = -1000 * self._W
			else:
				sum_m = np.log(sum_m)
			for j in range(self._W):
				log_beta[i,j] = np.log(beta_ss[i,j] - sum_m)
开发者ID:happyche,项目名称:code,代码行数:26,代码来源:ctm.py

示例2: test_ledoit_wolf_small

def test_ledoit_wolf_small():
    # Compare our blocked implementation to the naive implementation
    X_small = X[:, :4]
    lw = LedoitWolf()
    lw.fit(X_small)
    shrinkage_ = lw.shrinkage_

    assert_almost_equal(shrinkage_, _naive_ledoit_wolf_shrinkage(X_small))
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:8,代码来源:test_covariance.py

示例3: __init__

    def __init__(self, k=2, gamma=1.0, covariance_estimator='ledoit-wolf'):
        self.k = float(k)
        self.gamma = gamma
        self.covariance_estimator = covariance_estimator

        if covariance_estimator == 'empirical':
            self.cov = EmpiricalCovariance(store_precision=False)
        elif covariance_estimator == 'ledoit-wolf':
            self.cov = LedoitWolf(store_precision=False)
        else:
            raise NotImplementedError('%s is not implemented' % covariance_estimator)

        self.x0 = None
        self.x1 = None
开发者ID:codeaudit,项目名称:melm,代码行数:14,代码来源:dcsk.py

示例4: maximization

	def maximization(self):
		'''
		M-step of EM algorithm, use scikit.learn's LedoitWolf method to perfom
		covariance matrix shrinkage.
		Arguments:
			sufficient statistics, i.e. model parameters
		Returns:
			the updated sufficient statistics which all in self definition, so no return values
		'''
		logger.info("running maximization function")
		logger.info("mean maximization")
		mu = np.divide(self.mu, self.ndata)
		logger.info("covariance maximization")
		for i in range(self._K):
			for j in range(self._K):
				self.cov[i, j] = (1.0 / self.ndata) * self.cov[i, j] + self.ndata * mu[i] * mu[j] - self.mu[i] * mu[j] - self.mu[j] * mu[i]
		logger.info(" performing covariance shrinkage using sklearn module")
		lw = LedoitWolf()
		cov_result = lw.fit(self.cov, assume_centered=True).covariance_
		self.inv_cov = np.linalg.inv(cov_result)
		self.log_det_inv_cov = math_utli.safe_log(np.linalg.det(self.inv_cov))

		logger.info("topic maximization")
		for i in range(self._K):
			sum_m = 0
			sum_m += np.sum(self.beta, axis=0)[i]

			if sum_m == 0:
				sum_m = -1000 * self._W
			else:
				sum_m = np.log(sum_m)

			for j in range(self._W):
				self.log_beta[i, j] = math_utli.safe_log(self.beta[i, j] - sum_m)

		logger.info("write model parameters to file")
		logger.info("write gaussian")
		with open('ctm_nu', 'w') as ctm_nu_dump:
			cPickle.dump(self.nu, ctm_nu_dump)
		with open('ctm_cov', 'w') as ctm_cov_dump:
			cPickle.dump(self.cov, ctm_cov_dump)
		with open('ctm_inv_cov', 'w') as ctm_inv_cov_dump:
			cPickle.dump(self.inv_cov, ctm_inv_cov_dump)
		with open('ctm_log_det_inv_cov', 'w') as ctm_log_det_inv_cov_dump:
			cPickle.dump(self.log_det_inv_cov, ctm_log_det_inv_cov_dump)
		logger.info("write topic matrix")
		with open('ctm_log_beta', 'w') as ctm_log_beta_dump:
			cPickle.dump(self.log_beta, ctm_log_beta_dump)
开发者ID:zhibo-work,项目名称:code,代码行数:48,代码来源:ctm.py

示例5: prepareProblem

def prepareProblem(filePath, shrinkage=False, subset=False, subsetSize=0):
    # Import data from .csv
    df = pd.read_csv(filePath, sep=';')
    df.index = df.date
    df = df.drop('date', axis=1)

    # Subset, if called via subset == True
    if subset == True:
        df = df.tail(subsetSize)

    # Estimate covariance using Empirical/MLE
    # Expected input is returns, hence set: assume_centered = True
    mleFitted = empirical_covariance(X=df, assume_centered=True)
    sigma = mleFitted

    if shrinkage == True:
        # Estimate covariance using LedoitWolf, first create instance of object
        lw = LedoitWolf(assume_centered=True)
        lwFitted = lw.fit(X=df).covariance_
        sigma = lwFitted

    return sigma
开发者ID:dimitar-popov-fintegral,项目名称:thesisCodeRepo,代码行数:22,代码来源:quadraticPortfolioOptimization.py

示例6: test_ledoit_wolf

def test_ledoit_wolf():
    """Tests LedoitWolf module on a simple dataset.

    """
    # test shrinkage coeff on a simple data set
    lw = LedoitWolf()
    lw.fit(X, assume_centered=True)
    assert_almost_equal(lw.shrinkage_, 0.00192, 4)
    assert_almost_equal(lw.score(X, assume_centered=True), -2.89795, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X,
                                                        assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
    scov.fit(X, assume_centered=True)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf()
    lw.fit(X_1d, assume_centered=True)
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d,
                                                         assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    assert_array_almost_equal((X_1d ** 2).sum() / n_samples, lw.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False)
    lw.fit(X, assume_centered=True)
    assert_almost_equal(lw.score(X, assume_centered=True), -2.89795, 4)
    assert(lw.precision_ is None)

    # Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    lw = LedoitWolf()
    lw.fit(X)
    assert_almost_equal(lw.shrinkage_, 0.007582, 4)
    assert_almost_equal(lw.score(X), 2.243483, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf()
    lw.fit(X_1d)
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False)
    lw.fit(X)
    assert_almost_equal(lw.score(X), 2.2434839, 4)
    assert(lw.precision_ is None)
开发者ID:forkloop,项目名称:scikit-learn,代码行数:64,代码来源:test_covariance.py

示例7: test_ledoit_wolf

def test_ledoit_wolf():
    """Tests LedoitWolf module on a simple dataset.

    """
    # test shrinkage coeff on a simple data set
    X_centered = X - X.mean(axis=0)
    lw = LedoitWolf(assume_centered=True)
    lw.fit(X_centered)
    shrinkage_ = lw.shrinkage_
    score_ = lw.score(X_centered)
    assert_almost_equal(ledoit_wolf_shrinkage(X_centered,
                                              assume_centered=True),
                        shrinkage_)
    assert_almost_equal(ledoit_wolf_shrinkage(X_centered,
                                assume_centered=True, block_size=6),
                        shrinkage_)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_centered,
                                                        assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_, assume_centered=True)
    scov.fit(X_centered)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf(assume_centered=True)
    lw.fit(X_1d)
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d,
                                                         assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    assert_array_almost_equal((X_1d ** 2).sum() / n_samples, lw.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False, assume_centered=True)
    lw.fit(X_centered)
    assert_almost_equal(lw.score(X_centered), score_, 4)
    assert(lw.precision_ is None)

    # (too) large data set
    X_large = np.ones((20, 200))
    assert_raises(MemoryError, ledoit_wolf, X_large, block_size=100)

    # Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    lw = LedoitWolf()
    lw.fit(X)
    assert_almost_equal(lw.shrinkage_, shrinkage_, 4)
    assert_almost_equal(lw.shrinkage_, ledoit_wolf_shrinkage(X))
    assert_almost_equal(lw.shrinkage_, ledoit_wolf(X)[1])
    assert_almost_equal(lw.score(X), score_, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf()
    lw.fit(X_1d)
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)

    # test with one sample
    X_1sample = np.arange(5)
    lw = LedoitWolf()
    with warnings.catch_warnings(record=True):
        lw.fit(X_1sample)

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False)
    lw.fit(X)
    assert_almost_equal(lw.score(X), score_, 4)
    assert(lw.precision_ is None)
开发者ID:GbalsaC,项目名称:bitnamiP,代码行数:83,代码来源:test_covariance.py

示例8: empirical_covariance

# under the ground-truth model, which we would not have access to in real
# settings
real_cov = np.dot(coloring_matrix.T, coloring_matrix)
emp_cov = empirical_covariance(X_train)
loglik_real = -log_likelihood(emp_cov, linalg.inv(real_cov))

# #############################################################################
# Compare different approaches to setting the parameter

# GridSearch for an optimal shrinkage coefficient
tuned_parameters = [{'shrinkage': shrinkages}]
cv = GridSearchCV(ShrunkCovariance(), tuned_parameters, cv=5)
cv.fit(X_train)

# Ledoit-Wolf optimal shrinkage coefficient estimate
lw = LedoitWolf()
loglik_lw = lw.fit(X_train).score(X_test)

# OAS coefficient estimate
oa = OAS()
loglik_oa = oa.fit(X_train).score(X_test)

# #############################################################################
# Plot results
fig = plt.figure()
plt.title("Regularized covariance: likelihood and shrinkage coefficient")
plt.xlabel('Regularization parameter: shrinkage coefficient')
plt.ylabel('Error: negative log-likelihood on test data')
# range shrinkage curve
plt.loglog(shrinkages, negative_logliks, label="Negative log-likelihood")
开发者ID:MartinThoma,项目名称:scikit-learn,代码行数:30,代码来源:plot_covariance_estimation.py

示例9: test_connectivity_measure_outputs

def test_connectivity_measure_outputs():
    n_subjects = 10
    n_features = 49

    # Generate signals and compute covariances
    emp_covs = []
    ledoit_covs = []
    signals = []
    ledoit_estimator = LedoitWolf()
    for k in range(n_subjects):
        n_samples = 200 + k
        signal, _, _ = generate_signals(n_features=n_features, n_confounds=5,
                                        length=n_samples, same_variance=False)
        signals.append(signal)
        signal -= signal.mean(axis=0)
        emp_covs.append((signal.T).dot(signal) / n_samples)
        ledoit_covs.append(ledoit_estimator.fit(signal).covariance_)

    kinds = ["covariance", "correlation", "tangent", "precision",
             "partial correlation"]

    # Check outputs properties
    for cov_estimator, covs in zip([EmpiricalCovariance(), LedoitWolf()],
                                   [emp_covs, ledoit_covs]):
        input_covs = copy.copy(covs)
        for kind in kinds:
            conn_measure = ConnectivityMeasure(kind=kind,
                                               cov_estimator=cov_estimator)
            connectivities = conn_measure.fit_transform(signals)

            # Generic
            assert_true(isinstance(connectivities, np.ndarray))
            assert_equal(len(connectivities), len(covs))

            for k, cov_new in enumerate(connectivities):
                assert_array_equal(input_covs[k], covs[k])
                assert(is_spd(covs[k], decimal=7))

                # Positive definiteness if expected and output value checks
                if kind == "tangent":
                    assert_array_almost_equal(cov_new, cov_new.T)
                    gmean_sqrt = _map_eigenvalues(np.sqrt,
                                                  conn_measure.mean_)
                    assert(is_spd(gmean_sqrt, decimal=7))
                    assert(is_spd(conn_measure.whitening_, decimal=7))
                    assert_array_almost_equal(conn_measure.whitening_.dot(
                        gmean_sqrt), np.eye(n_features))
                    assert_array_almost_equal(gmean_sqrt.dot(
                        _map_eigenvalues(np.exp, cov_new)).dot(gmean_sqrt),
                        covs[k])
                elif kind == "precision":
                    assert(is_spd(cov_new, decimal=7))
                    assert_array_almost_equal(cov_new.dot(covs[k]),
                                              np.eye(n_features))
                elif kind == "correlation":
                    assert(is_spd(cov_new, decimal=7))
                    d = np.sqrt(np.diag(np.diag(covs[k])))
                    if cov_estimator == EmpiricalCovariance():
                        assert_array_almost_equal(d.dot(cov_new).dot(d),
                                                  covs[k])
                    assert_array_almost_equal(np.diag(cov_new),
                                              np.ones((n_features)))
                elif kind == "partial correlation":
                    prec = linalg.inv(covs[k])
                    d = np.sqrt(np.diag(np.diag(prec)))
                    assert_array_almost_equal(d.dot(cov_new).dot(d), -prec +
                                              2 * np.diag(np.diag(prec)))

    # Check the mean_
    for kind in kinds:
        conn_measure = ConnectivityMeasure(kind=kind)
        conn_measure.fit_transform(signals)
        assert_equal((conn_measure.mean_).shape, (n_features, n_features))
        if kind != 'tangent':
            assert_array_almost_equal(
                conn_measure.mean_,
                np.mean(conn_measure.transform(signals), axis=0))

    # Check that the mean isn't modified in transform
    conn_measure = ConnectivityMeasure(kind='covariance')
    conn_measure.fit(signals[:1])
    mean = conn_measure.mean_
    conn_measure.transform(signals[1:])
    assert_array_equal(mean, conn_measure.mean_)

    # Check vectorization option
    for kind in kinds:
        conn_measure = ConnectivityMeasure(kind=kind)
        connectivities = conn_measure.fit_transform(signals)
        conn_measure = ConnectivityMeasure(vectorize=True, kind=kind)
        vectorized_connectivities = conn_measure.fit_transform(signals)
        assert_array_almost_equal(vectorized_connectivities,
                                  sym_matrix_to_vec(connectivities))

    # Check not fitted error
    assert_raises_regex(
        ValueError, 'has not been fitted. ',
        ConnectivityMeasure().inverse_transform,
        vectorized_connectivities)

#.........这里部分代码省略.........
开发者ID:bthirion,项目名称:nilearn,代码行数:101,代码来源:test_connectivity_matrices.py

示例10: main

def main():
    '''
    Constructs a co-occurence network from gene expression data.

    Main entry point to code.
    '''

    # Read in the data
    if os.path.isfile(DATA_PICKLE):
        print("reading previously saved data from pickle %s" % (DATA_PICKLE))
        with open(DATA_PICKLE, 'rb') as file:
            df = pickle.load(file)
            lwe = pickle.load(file)
            pmat = pickle.load(file)
            pcore_indices = pickle.load(file)
            pcor = pickle.load(file)
            lfdr_pcor = pickle.load(file)
            #prob = pickle.load(file)
    else:
        print("reading in data from %s" % (FILENAME))
        df = pd.read_csv(FILENAME, sep='\t')
        print("found %d rows and %d columns" % (df.shape[0], df.shape[1]))
        # compute the row means and sort the data frame by descinding means
        df['row_means'] = df.mean(axis=1)
        df.sort_values('row_means', axis=0, ascending=False, inplace=True)
        df.drop('row_means', axis=1, inplace=True)
        # take the most abundant genes
        df = df.head(PRUNE_GENES)

        # Ledoit-Wolf optimal shrinkage coefficient estimate
        print("computing Ledoit-Wolf optimal shrinkage coeffecient estimate")
        lwe = LedoitWolf().fit(df.transpose())
        pmat = lwe.get_precision()
        # Convert symmetric matrix to array, first by getting indices
        # of the off diagonal elements, second by pulling them into
        # separate array (pcor).
        print("extracting off diagnol elements of precision matrix")
        pcor_indices = np.triu_indices(pmat.shape[0], 1)
        pcor = pmat[pcor_indices]

        # Determine edges by computing lfdr of pcor.
        print("computing lfdr of partial correlations")
        fdrtool = importr('fdrtool')
        lfdr_pcor = fdrtool.fdrtool(FloatVector(pcor), statistic="correlation", plot=False)
        #prob = 1-lfdr_pcor['lfdr']

        with open(DATA_PICKLE, 'wb') as file:
            pickle.dump(df, file, pickle.HIGHEST_PROTOCOL)
            pickle.dump(lwe, file, pickle.HIGHEST_PROTOCOL)
            pickle.dump(pmat, file, pickle.HIGHEST_PROTOCOL)
            pickle.dump(pcor_indices, file, pickle.HIGHEST_PROTOCOL)
            pickle.dump(pcor, file, pickle.HIGHEST_PROTOCOL)
            pickle.dump(lfdr_pcor, file, pickle.HIGHEST_PROTOCOL)
            #pickle.dump(prob, file, pickle.HIGHEST_PROTOCOL)

    print("making 1-lfdr vs. pcor plot")
    prob = 1-np.array(lfdr_pcor.rx2('lfdr'))
    with PdfPages(PDF_FILENAME) as pdf:
        plt.figure(figsize=(3, 3))
        plt.plot(range(7), [3, 1, 4, 1, 5, 9, 2], 'r-o')
        plt.title('Page One')
        pdf.savefig()  # saves the current figure into a pdf page
        plt.close()

        plt.plot(pcor[0:10000:10], prob[0:10000:10], 'o', markeredgecolor='k', markersize=3)
        plt.title("THIS IS A PLOT TITLE, YOU BET")
        plt.xlabel('partial correlation')
        plt.ylabel('lfdr')
        pdf.savefig
        plt.close()
开发者ID:JanetMatsen,项目名称:meta4_bins_janalysis,代码行数:70,代码来源:network.py

示例11: test_ledoit_wolf

def test_ledoit_wolf():
    # Tests LedoitWolf module on a simple dataset.
    # test shrinkage coeff on a simple data set
    X_centered = X - X.mean(axis=0)
    lw = LedoitWolf(assume_centered=True)
    lw.fit(X_centered)
    shrinkage_ = lw.shrinkage_

    score_ = lw.score(X_centered)
    assert_almost_equal(ledoit_wolf_shrinkage(X_centered,
                                              assume_centered=True),
                        shrinkage_)
    assert_almost_equal(ledoit_wolf_shrinkage(X_centered, assume_centered=True,
                                              block_size=6),
                        shrinkage_)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X_centered,
                                                         assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_, assume_centered=True)
    scov.fit(X_centered)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf(assume_centered=True)
    lw.fit(X_1d)
    lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X_1d,
                                                         assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
    assert_array_almost_equal((X_1d ** 2).sum() / n_samples, lw.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False, assume_centered=True)
    lw.fit(X_centered)
    assert_almost_equal(lw.score(X_centered), score_, 4)
    assert(lw.precision_ is None)

    # Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    lw = LedoitWolf()
    lw.fit(X)
    assert_almost_equal(lw.shrinkage_, shrinkage_, 4)
    assert_almost_equal(lw.shrinkage_, ledoit_wolf_shrinkage(X))
    assert_almost_equal(lw.shrinkage_, ledoit_wolf(X)[1])
    assert_almost_equal(lw.score(X), score_, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf()
    lw.fit(X_1d)
    lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X_1d)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)

    # test with one sample
    # warning should be raised when using only 1 sample
    X_1sample = np.arange(5).reshape(1, 5)
    lw = LedoitWolf()
    assert_warns(UserWarning, lw.fit, X_1sample)
    assert_array_almost_equal(lw.covariance_,
                              np.zeros(shape=(5, 5), dtype=np.float64))

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False)
    lw.fit(X)
    assert_almost_equal(lw.score(X), score_, 4)
    assert(lw.precision_ is None)
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:80,代码来源:test_covariance.py

示例12: threshold_from_simulations

    def threshold_from_simulations(self, X, precision=2000, verbose=False,
                                   n_jobs=-1):
        """
        """
        import multiprocessing as mp
        if n_jobs < 1:
            n_jobs = mp.cpu_count()
        n_samples, n_features = X.shape
        n = n_samples
        p = n_features
        h = self.support_.sum()
        lw = LedoitWolf()
        ref_covariance = lw.fit(X[self.support_]).covariance_
        c = sp.stats.chi2(p + 2).cdf(
            sp.stats.chi2(p).ppf(float(h) / n)) / (float(h) / n)
        sigma_root = np.linalg.cholesky(ref_covariance / c)
        all_h = []

        # inliers distribution
        dist_in = np.array([], ndmin=1)
        max_i = max(1, int(precision / float(self.support_.sum())))
        for i in range(max_i):
            if verbose and max_i > 4 and (i % (max_i / 4) == 0):
                print "\t", 50 * i / float(max_i), "%"
            #sigma_root = np.diag(np.sqrt(eigenvalues))
            #sigma_root = np.eye(n_features)
            X1, _ = dg.generate_gaussian(
                n_samples, n_features, np.zeros(n_features),
                cov_root=sigma_root)
            # learn location and shape
            clf = EllipticEnvelopeRMCDl1(
                correction=self.correction, shrinkage=self.shrinkage,
                h=self.support_.sum() / float(n_samples), no_fit=True).fit(
                X1)
            X2 = X1 - clf.location_
            dist_in = np.concatenate(
                (dist_in, clf.decision_function(
                        X2[clf.support_], raw_values=True)))
            all_h.append(clf.h)

        # outliers distribution
        dist_out = np.array([], ndmin=1)
        max_i = max(1, int(precision / float(n_samples - self.support_.sum())))
        for i in range(max_i):
            if verbose and max_i > 4 and (i % (max_i / 4) == 0):
                print "\t", 50 * (1. + i / float(max_i)), "%"
            X1, _ = dg.generate_gaussian(
                n_samples, n_features, np.zeros(n_features),
                cov_root=sigma_root)
            # learn location and shape
            clf = EllipticEnvelopeRMCDl1(
                correction=self.correction, shrinkage=self.shrinkage,
                h=self.support_.sum() / float(n_samples), no_fit=True).fit(X1)
            X2 = X1 - clf.location_
            dist_out = np.concatenate(
                (dist_out, clf.decision_function(
                        X2[~clf.support_], raw_values=True)))
            all_h.append(clf.h)
        self.dist_in = np.sort(dist_in)
        self.dist_out = np.sort(dist_out)
        self.h_mean = np.mean(all_h)

        return self.dist_out
开发者ID:VirgileFritsch,项目名称:outliers,代码行数:63,代码来源:elliptic_envelope.py

示例13: toeplitz

r = 0.1
real_cov = toeplitz(r**np.arange(n_features))
coloring_matrix = cholesky(real_cov)

n_samples_range = np.arange(6, 31, 1)
repeat = 100
lw_mse = np.zeros((n_samples_range.size, repeat))
oa_mse = np.zeros((n_samples_range.size, repeat))
lw_shrinkage = np.zeros((n_samples_range.size, repeat))
oa_shrinkage = np.zeros((n_samples_range.size, repeat))
for i, n_samples in enumerate(n_samples_range):
    for j in range(repeat):
        X = np.dot(
            np.random.normal(size=(n_samples, n_features)), coloring_matrix.T)

        lw = LedoitWolf(store_precision=False)
        lw.fit(X, assume_centered=True)
        lw_mse[i,j] = lw.error_norm(real_cov, scaling=False)
        lw_shrinkage[i,j] = lw.shrinkage_

        oa = OAS(store_precision=False)
        oa.fit(X, assume_centered=True)
        oa_mse[i,j] = oa.error_norm(real_cov, scaling=False)
        oa_shrinkage[i,j] = oa.shrinkage_

# plot MSE
pl.subplot(2,1,1)
pl.errorbar(n_samples_range, lw_mse.mean(1), yerr=lw_mse.std(1),
            label='Ledoit-Wolf', color='g')
pl.errorbar(n_samples_range, oa_mse.mean(1), yerr=oa_mse.std(1),
            label='OAS', color='r')
开发者ID:Yangqing,项目名称:scikit-learn,代码行数:31,代码来源:plot_lw_vs_oas.py

示例14: plot_psds

def plot_psds(psd_file, data_dir='/auto/tdrive/mschachter/data'):

    # read PairwiseCF file
    pcf_file = os.path.join(data_dir, 'aggregate', 'pairwise_cf.h5')
    pcf = AggregatePairwiseCF.load(pcf_file)
    # pcf.zscore_within_site()

    g = pcf.df.groupby(['bird', 'block', 'segment', 'electrode'])
    nsamps_electrodes = len(g)

    i = pcf.df.cell_index != -1
    g = pcf.df[i].groupby(['bird', 'block', 'segment', 'electrode', 'cell_index'])
    nsamps_cells = len(g)

    print '# of electrodes: %d' % nsamps_electrodes
    print '# of cells: %d' % nsamps_cells
    print '# of lfp samples: %d' % (pcf.lfp_psds.shape[0])
    print '# of spike psd samples: %d' % (pcf.spike_psds.shape[0])

    # compute the LFP mean and std
    lfp_psds = deepcopy(pcf.lfp_psds)
    print 'lfp_psds_ind: max=%f, q99=%f' % (lfp_psds.max(), np.percentile(lfp_psds.ravel(), 99))
    log_transform(lfp_psds)
    print 'lfp_psds_ind: max=%f, q99=%f' % (lfp_psds.max(), np.percentile(lfp_psds.ravel(), 99))
    nz = lfp_psds.sum(axis=1) > 0
    lfp_psds = lfp_psds[nz, :]
    lfp_psd_mean = lfp_psds.mean(axis=0)
    lfp_psd_std = lfp_psds.std(axis=0, ddof=1)
    nsamps_lfp = lfp_psds.shape[0]

    # get the spike rate
    spike_rate = pcf.df.spike_rate.values
    # plt.figure()
    # plt.hist(spike_rate, bins=20, color='g', alpha=0.7)
    # plt.title('Spike Rate Histogram, q1=%0.3f, q5=%0.3f, q10=%0.3f, q50=%0.3f, q99=%0.3f' %
    #           (np.percentile(spike_rate, 1), np.percentile(spike_rate, 5), np.percentile(spike_rate, 10),
    #           np.percentile(spike_rate, 50), np.percentile(spike_rate, 99)))
    # plt.show()

    # compute the covariance
    lfp_psd_z = deepcopy(lfp_psds)
    lfp_psd_z -= lfp_psd_mean
    lfp_psd_z /= lfp_psd_std
    lfp_and_spike_cov_est = LedoitWolf()
    lfp_and_spike_cov_est.fit(lfp_psd_z)
    lfp_and_spike_cov = lfp_and_spike_cov_est.covariance_

    """
    # read CRCNS file
    cell_data = dict()
    hf = h5py.File(psd_file, 'r')
    cnames = hf.attrs['col_names']
    for c in cnames:
        cell_data[c] = np.array(hf[c])
    crcns_psds = np.array(hf['psds'])
    freqs = hf.attrs['freqs']
    hf.close()

    cell_df = pd.DataFrame(cell_data)
    print 'regions=',cell_df.superregion.unique()

    name_map = {'brainstem':'MLd', 'thalamus':'OV', 'cortex':'Field L+CM'}
    """

    # resample the lfp mean and std
    freq_rs = np.linspace(pcf.freqs.min(), pcf.freqs.max(), 1000)
    
    lfp_mean_cs = interp1d(pcf.freqs, lfp_psd_mean, kind='cubic')
    lfp_mean_rs = lfp_mean_cs(freq_rs)
    
    lfp_std_cs = interp1d(pcf.freqs, lfp_psd_std, kind='cubic')
    lfp_std_rs = lfp_std_cs(freq_rs)

    # concatenate the lfp psd and log spike rate
    lfp_psd_and_spike_rate = list()
    for k,(li,si) in enumerate(zip(pcf.df['lfp_index'], pcf.df['spike_index'])):
        lpsd = pcf.lfp_psds[li, :]
        srate,sstd = pcf.spike_rates[si, :]
        if srate > 0:
            lfp_psd_and_spike_rate.append(np.hstack([lpsd, np.log(srate)]))
    lfp_psd_and_spike_rate = np.array(lfp_psd_and_spike_rate)

    nfreqs = len(pcf.freqs)
    lfp_rate_cc = np.zeros([nfreqs])
    for k in range(nfreqs):
        lfp_rate_cc[k] = np.corrcoef(lfp_psd_and_spike_rate[:, k], lfp_psd_and_spike_rate[:, -1])[0, 1]

    fig = plt.figure(figsize=(24, 12))
    fig.subplots_adjust(left=0.05, right=0.95, wspace=0.30, hspace=0.30)

    nrows = 2
    ncols = 100
    gs = plt.GridSpec(nrows, ncols)

    ax = plt.subplot(gs[0, :35])
    plt.errorbar(freq_rs, lfp_mean_rs, yerr=lfp_std_rs, c='k', linewidth=9.0, elinewidth=3.0,
                 ecolor='#D8D8D8', alpha=0.5, capthick=0.)
    plt.axis('tight')
    plt.xlabel('Frequency (Hz)')
    plt.ylabel('Power (dB)')
#.........这里部分代码省略.........
开发者ID:mschachter,项目名称:writings,代码行数:101,代码来源:figure.py

示例15: test_connectivity_measure_outputs

def test_connectivity_measure_outputs():
    n_subjects = 10
    n_features = 49
    n_samples = 200

    # Generate signals and compute covariances
    emp_covs = []
    ledoit_covs = []
    signals = []
    random_state = check_random_state(0)
    ledoit_estimator = LedoitWolf()
    for k in range(n_subjects):
        signal = random_state.randn(n_samples, n_features)
        signals.append(signal)
        signal -= signal.mean(axis=0)
        emp_covs.append((signal.T).dot(signal) / n_samples)
        ledoit_covs.append(ledoit_estimator.fit(signal).covariance_)

    kinds = ["correlation", "tangent", "precision",
             "partial correlation"]

    # Check outputs properties
    for cov_estimator, covs in zip([EmpiricalCovariance(), LedoitWolf()],
                                   [emp_covs, ledoit_covs]):
        input_covs = copy.copy(covs)
        for kind in kinds:
            conn_measure = ConnectivityMeasure(kind=kind,
                                               cov_estimator=cov_estimator)
            connectivities = conn_measure.fit_transform(signals)

            # Generic
            assert_true(isinstance(connectivities, np.ndarray))
            assert_equal(len(connectivities), len(covs))

            for k, cov_new in enumerate(connectivities):
                assert_array_equal(input_covs[k], covs[k])
                assert(is_spd(covs[k], decimal=7))

                # Positive definiteness if expected and output value checks
                if kind == "tangent":
                    assert_array_almost_equal(cov_new, cov_new.T)
                    gmean_sqrt = _map_eigenvalues(np.sqrt,
                                                  conn_measure.mean_)
                    assert(is_spd(gmean_sqrt, decimal=7))
                    assert(is_spd(conn_measure.whitening_, decimal=7))
                    assert_array_almost_equal(conn_measure.whitening_.dot(
                        gmean_sqrt), np.eye(n_features))
                    assert_array_almost_equal(gmean_sqrt.dot(
                        _map_eigenvalues(np.exp, cov_new)).dot(gmean_sqrt),
                        covs[k])
                elif kind == "precision":
                    assert(is_spd(cov_new, decimal=7))
                    assert_array_almost_equal(cov_new.dot(covs[k]),
                                              np.eye(n_features))
                elif kind == "correlation":
                    assert(is_spd(cov_new, decimal=7))
                    d = np.sqrt(np.diag(np.diag(covs[k])))
                    if cov_estimator == EmpiricalCovariance():
                        assert_array_almost_equal(d.dot(cov_new).dot(d),
                                                  covs[k])
                    assert_array_almost_equal(np.diag(cov_new),
                                              np.ones((n_features)))
                elif kind == "partial correlation":
                    prec = linalg.inv(covs[k])
                    d = np.sqrt(np.diag(np.diag(prec)))
                    assert_array_almost_equal(d.dot(cov_new).dot(d), -prec +
                                              2 * np.diag(np.diag(prec)))
开发者ID:AlexandreAbraham,项目名称:nilearn,代码行数:67,代码来源:test_connectivity_matrices.py


注:本文中的sklearn.covariance.LedoitWolf类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。