本文整理汇总了Python中sklearn.mixture.GaussianMixture.bic方法的典型用法代码示例。如果您正苦于以下问题:Python GaussianMixture.bic方法的具体用法?Python GaussianMixture.bic怎么用?Python GaussianMixture.bic使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.mixture.GaussianMixture
的用法示例。
在下文中一共展示了GaussianMixture.bic方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: fit_mixtures
# 需要导入模块: from sklearn.mixture import GaussianMixture [as 别名]
# 或者: from sklearn.mixture.GaussianMixture import bic [as 别名]
def fit_mixtures(X,mag,mbins,binwidth=0.2,seed=None,
keepscore=False,keepbic=False,**kwargs):
kwargs.setdefault('n_components',25)
kwargs.setdefault('covariance_type','full')
fits = []
if keepscore:
scores = []
if keepbic:
bics = []
if seed:
np.random.seed(seed)
for bincenter in mbins:
# this is not an efficient way to assign bins, but the time
# is negligible compared to the GMM fitting anyway
ii = np.where( np.abs(mag-bincenter) < binwidth )[0]
if False:
print('{:.2f}: {} qsos'.format(bincenter,len(ii)))
gmm = GaussianMixture(**kwargs)
gmm.fit(X[ii])
fits.append(gmm)
if keepscore:
scores.append(gmm.score(X[ii]))
if keepbic:
bics.append(gmm.bic(X[ii]))
rv = (fits,)
if keepscore:
rv += (scores,)
if keepbic:
rv += (bics,)
return rv
示例2: fit_gmm
# 需要导入模块: from sklearn.mixture import GaussianMixture [as 别名]
# 或者: from sklearn.mixture.GaussianMixture import bic [as 别名]
def fit_gmm(
max_components,
n_distances,
atoms,
distances,
regularization_type="bic",
covariance_type="diag",
):
"""
Fit a GMM to a set of distances.
This routine will fit a Gaussian mixture model from a set
of input distances using sklearn_. The resulting set of parameters can
be used to initialize a `GMMDistanceRestraint` in a MELD simulation.
.. _sklearn: http://scikit-learn.org/stable/modules/mixture.html
Parameters
----------
max_components: int
Maximum number of components to use in fitting GMM.
n_distances: int
Number of distances involved in GMM
atoms: list of (int, str, int, str) tuples.
The atoms that are involved in each distance are specified
as a list of `n_distances` tuples, each of the form
(r1, n1, r2, n2), where r1, r2 are the integer residue
indices starting from one, and n1, n2 are the atom names.
distances: array_like(n_dim=2)
An (n_samples, n_distances) array of distances (in nm) to fit.
regularization_type: str
The type of regularization to use, options are "bic"
and "dirichlet".
covariance_type: str
The form of the covariance matrix, options are "diag"
and "full".
Returns
-------
GMMParams
The fit parameters, which can be used to initialize
a `meld.system.restraints.GMMDistanceRestraint` using
``GMMDistanceRestraint.from_params``.
Notes
-----
There are two ways to regularize in order to prevent over fitting.
``regularization_type="bic"`` will use the Bayesian information
criterion to penalize models that have more parameters. When
using ``bic``, The final number of components in the model
will be less than or equal to `max_components`.
``regularization_type=dirichlet`` will use a Dirichlet process
prior on the weight distributions. The final number of components
in the model will always be equal to `max_components`, but most
of the weights will be small.
There are two forms for the covariance matrix, which differ in
the number of parameters and expressiveness.
``covariance_type="diag"`` will fit using a diagonal covariance
matrix. This has few parameters, but does not capture correlations
between input distances. Typically, choosing ``"diag"`` will
result in a model with more components.
``covariance_type="full"`` will fit using a full representation
of the covariance matrix. This captures correlations between
input distances, but has far more parameters and is potentially
prone to over fitting.
"""
#
# Constants
#
N_INIT = 25
MAX_ITER = 1000
KFOLD_SPLITS = 5
REG_COVAR = 1e-4
RANDOMSEARCH_TRIALS = 32
#
# Check the inputs
#
if distances.shape[1] != n_distances:
raise ValueError("distances must have shape (n_samples, n_distances)")
if len(atoms) != n_distances:
raise ValueError(
"atoms must be a list of (ind1, name1, ind2, name2) of "
"length n_components"
)
if regularization_type not in ["bic", "dirichlet"]:
raise ValueError('regularization_type must be one of ["bic", "dirichlet"]')
if covariance_type not in ["diag", "full"]:
raise ValueError('covariance_type must be one of ["diag", "full"]')
if max_components < 1:
#.........这里部分代码省略.........
示例3: gmm_analysis
# 需要导入模块: from sklearn.mixture import GaussianMixture [as 别名]
# 或者: from sklearn.mixture.GaussianMixture import bic [as 别名]
def gmm_analysis(self, X_train, X_test, y_train, y_test, data_set_name, max_clusters, analysis_name='GMM'):
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
em_bic = []
em_aic = []
em_completeness_score = []
em_homogeneity_score = []
em_measure_score = []
em_adjusted_rand_score = []
em_adjusted_mutual_info_score = []
cluster_range = np.arange(2, max_clusters+1, 1)
for k in cluster_range:
print('K Clusters: ', k)
##
## Expectation Maximization
##
em = GaussianMixture(n_components=k, covariance_type='full')
em.fit(X_train_scl)
em_pred = em.predict(X_train_scl)
em_bic.append(em.bic(X_train_scl))
em_aic.append(em.aic(X_train_scl))
# metrics
y_train_score = y_train.reshape(y_train.shape[0],)
em_homogeneity_score.append(homogeneity_score(y_train_score, em_pred))
em_completeness_score.append(completeness_score(y_train_score, em_pred))
em_measure_score.append(v_measure_score(y_train_score, em_pred))
em_adjusted_rand_score.append(adjusted_rand_score(y_train_score, em_pred))
em_adjusted_mutual_info_score.append(adjusted_mutual_info_score(y_train_score, em_pred))
##
## Plots
##
ph = plot_helper()
##
## BIC/AIC Plot
##
title = 'Information Criterion Plot (' + analysis_name + ') for ' + data_set_name
name = data_set_name.lower() + '_' + analysis_name.lower() + '_ic'
filename = './' + self.out_dir + '/' + name + '.png'
ph.plot_series(cluster_range,
[em_bic, em_aic],
[None, None],
['bic', 'aic'],
cm.viridis(np.linspace(0, 1, 2)),
['o', '*'],
title,
'Number of Clusters',
'Information Criterion',
filename)
##
## Score Plot
##
title = 'Score Summary Plot (' + analysis_name + ') for ' + data_set_name
name = data_set_name.lower() + '_' + analysis_name.lower() + '_score'
filename = './' + self.out_dir + '/' + name + '.png'
ph.plot_series(cluster_range,
[em_homogeneity_score, em_completeness_score, em_measure_score, em_adjusted_rand_score, em_adjusted_mutual_info_score],
[None, None, None, None, None, None],
['homogeneity', 'completeness', 'measure', 'adjusted_rand', 'adjusted_mutual_info'],
cm.viridis(np.linspace(0, 1, 5)),
['o', '^', 'v', '>', '<', '1'],
title,
'Number of Clusters',
'Score',
filename)
示例4: enumerate
# 需要导入模块: from sklearn.mixture import GaussianMixture [as 别名]
# 或者: from sklearn.mixture.GaussianMixture import bic [as 别名]
N = N1 + N2
x1 = np.random.multivariate_normal(mean=(1, 2), cov=cov1, size=N1)
m = np.array(((1, 1), (1, 3)))
x1 = x1.dot(m)
x2 = np.random.multivariate_normal(mean=(-1, 10), cov=cov1, size=N2)
x = np.vstack((x1, x2))
y = np.array([0]*N1 + [1]*N2)
types = ('spherical', 'diag', 'tied', 'full')
err = np.empty(len(types))
bic = np.empty(len(types))
for i, type in enumerate(types):
gmm = GaussianMixture(n_components=2, covariance_type=type, random_state=0)
gmm.fit(x)
err[i] = 1 - accuracy_rate(gmm.predict(x), y)
bic[i] = gmm.bic(x)
print('错误率:', err.ravel())
print('BIC:', bic.ravel())
xpos = np.arange(4)
plt.figure(facecolor='w')
ax = plt.axes()
b1 = ax.bar(xpos-0.3, err, width=0.3, color='#77E0A0', edgecolor='k')
b2 = ax.twinx().bar(xpos, bic, width=0.3, color='#FF8080', edgecolor='k')
plt.grid(b=True, ls=':', color='#606060')
bic_min, bic_max = expand(bic.min(), bic.max())
plt.ylim((bic_min, bic_max))
plt.xticks(xpos, types)
plt.legend([b1[0], b2[0]], ('错误率', 'BIC'))
plt.title('不同方差类型的误差率和BIC', fontsize=15)
plt.show()
示例5: range
# 需要导入模块: from sklearn.mixture import GaussianMixture [as 别名]
# 或者: from sklearn.mixture.GaussianMixture import bic [as 别名]
w = np.exp(-np.exp(3 * w.mean(axis=1)))
# gmm model selection with bic:
lowest_bic = np.infty
bic = []
n_components_range = range(1, 7)
cv_types = ['spherical', 'tied', 'diag', 'full']
for cv_type in cv_types:
for n_components in n_components_range:
# Fit a mixture of Gaussians with EM
gmm = GaussianMixture(n_components=n_components,
covariance_type=cv_type, n_init=5)
gmm.fit(X)
bic.append(gmm.bic(X))
if bic[-1] < lowest_bic:
lowest_bic = bic[-1]
best_gmm = gmm
preds = best_gmm.predict(X)
probs = best_gmm.predict_proba(X)
for name, col in zip(cv_types, np.array(bic).reshape(-1, len(cv_types)).T):
plt.plot(n_components_range, col, label=name)
plt.legend()
plt.savefig('gmm_sklearn_bic/bic.pdf')
data_thr['preds'] = pd.Series(preds).astype("category")