当前位置: 首页>>代码示例>>Python>>正文


Python GaussianMixture.bic方法代码示例

本文整理汇总了Python中sklearn.mixture.GaussianMixture.bic方法的典型用法代码示例。如果您正苦于以下问题:Python GaussianMixture.bic方法的具体用法?Python GaussianMixture.bic怎么用?Python GaussianMixture.bic使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.mixture.GaussianMixture的用法示例。


在下文中一共展示了GaussianMixture.bic方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: fit_mixtures

# 需要导入模块: from sklearn.mixture import GaussianMixture [as 别名]
# 或者: from sklearn.mixture.GaussianMixture import bic [as 别名]
def fit_mixtures(X,mag,mbins,binwidth=0.2,seed=None,
                 keepscore=False,keepbic=False,**kwargs):
    kwargs.setdefault('n_components',25)
    kwargs.setdefault('covariance_type','full')
    fits = []
    if keepscore:
        scores = []
    if keepbic:
        bics = []
    if seed:
        np.random.seed(seed)
    for bincenter in mbins:
        # this is not an efficient way to assign bins, but the time
        # is negligible compared to the GMM fitting anyway
        ii = np.where( np.abs(mag-bincenter) < binwidth )[0]
        if False:
            print('{:.2f}: {} qsos'.format(bincenter,len(ii)))
        gmm = GaussianMixture(**kwargs)
        gmm.fit(X[ii])
        fits.append(gmm)
        if keepscore:
            scores.append(gmm.score(X[ii]))
        if keepbic:
            bics.append(gmm.bic(X[ii]))
    rv = (fits,)
    if keepscore:
        rv += (scores,)
    if keepbic:
        rv += (bics,)
    return rv
开发者ID:imcgreer,项目名称:simqso,代码行数:32,代码来源:ebossfit.py

示例2: fit_gmm

# 需要导入模块: from sklearn.mixture import GaussianMixture [as 别名]
# 或者: from sklearn.mixture.GaussianMixture import bic [as 别名]
def fit_gmm(
    max_components,
    n_distances,
    atoms,
    distances,
    regularization_type="bic",
    covariance_type="diag",
):
    """
    Fit a GMM to a set of distances.

    This routine will fit a Gaussian mixture model from a set
    of input distances using sklearn_. The resulting set of parameters can
    be used to initialize a `GMMDistanceRestraint` in a MELD simulation.

    .. _sklearn: http://scikit-learn.org/stable/modules/mixture.html

    Parameters
    ----------
    max_components: int
        Maximum number of components to use in fitting GMM.
    n_distances: int
        Number of distances involved in GMM
    atoms: list of (int, str, int, str) tuples.
        The atoms that are involved in each distance are specified
        as a list of `n_distances` tuples, each of the form
        (r1, n1, r2, n2), where r1, r2 are the integer residue
        indices starting from one, and n1, n2 are the atom names.
    distances: array_like(n_dim=2)
        An (n_samples, n_distances) array of distances (in nm) to fit.
    regularization_type: str
        The type of regularization to use, options are "bic"
        and "dirichlet".
    covariance_type: str
        The form of the covariance matrix, options are "diag"
        and "full".

    Returns
    -------
    GMMParams
        The fit parameters, which can be used to initialize
        a `meld.system.restraints.GMMDistanceRestraint` using
        ``GMMDistanceRestraint.from_params``.

    Notes
    -----
    There are two ways to regularize in order to prevent over fitting.

    ``regularization_type="bic"`` will use the Bayesian information
    criterion to penalize models that have more parameters. When
    using ``bic``, The final number of components in the model
    will be less than or equal to `max_components`.

    ``regularization_type=dirichlet`` will use a Dirichlet process
    prior on the weight distributions. The final number of components
    in the model will always be equal to `max_components`, but most
    of the weights will be small.

    There are two forms for the covariance matrix, which differ in
    the number of parameters and expressiveness.

    ``covariance_type="diag"`` will fit using a diagonal covariance
    matrix. This has few parameters, but does not capture correlations
    between input distances. Typically, choosing ``"diag"`` will
    result in a model with more components.

    ``covariance_type="full"`` will fit using a full representation
    of the covariance matrix. This captures correlations between
    input distances, but has far more parameters and is potentially
    prone to over fitting.
    """

    #
    # Constants
    #
    N_INIT = 25
    MAX_ITER = 1000
    KFOLD_SPLITS = 5
    REG_COVAR = 1e-4
    RANDOMSEARCH_TRIALS = 32

    #
    # Check the inputs
    #
    if distances.shape[1] != n_distances:
        raise ValueError("distances must have shape (n_samples, n_distances)")

    if len(atoms) != n_distances:
        raise ValueError(
            "atoms must be a list of (ind1, name1, ind2, name2) of "
            "length n_components"
        )

    if regularization_type not in ["bic", "dirichlet"]:
        raise ValueError('regularization_type must be one of ["bic", "dirichlet"]')

    if covariance_type not in ["diag", "full"]:
        raise ValueError('covariance_type must be one of ["diag", "full"]')

    if max_components < 1:
#.........这里部分代码省略.........
开发者ID:maccallumlab,项目名称:meld,代码行数:103,代码来源:gmmfit.py

示例3: gmm_analysis

# 需要导入模块: from sklearn.mixture import GaussianMixture [as 别名]
# 或者: from sklearn.mixture.GaussianMixture import bic [as 别名]
 def gmm_analysis(self, X_train, X_test, y_train, y_test, data_set_name, max_clusters, analysis_name='GMM'):
     scl = RobustScaler()
     X_train_scl = scl.fit_transform(X_train)
     X_test_scl = scl.transform(X_test)
     
     em_bic = []
     em_aic = []
     em_completeness_score = []
     em_homogeneity_score = []
     em_measure_score = []
     em_adjusted_rand_score = []
     em_adjusted_mutual_info_score = []
     
     cluster_range = np.arange(2, max_clusters+1, 1)
     for k in cluster_range:
         print('K Clusters: ', k)
         
         ##
         ## Expectation Maximization
         ##
         em = GaussianMixture(n_components=k, covariance_type='full')
         em.fit(X_train_scl)
         em_pred = em.predict(X_train_scl)
         
         em_bic.append(em.bic(X_train_scl))
         em_aic.append(em.aic(X_train_scl))        
     
         # metrics
         y_train_score = y_train.reshape(y_train.shape[0],)
         
         em_homogeneity_score.append(homogeneity_score(y_train_score, em_pred))
         em_completeness_score.append(completeness_score(y_train_score, em_pred))
         em_measure_score.append(v_measure_score(y_train_score, em_pred))
         em_adjusted_rand_score.append(adjusted_rand_score(y_train_score, em_pred))
         em_adjusted_mutual_info_score.append(adjusted_mutual_info_score(y_train_score, em_pred))
         
     
     ##
     ## Plots
     ##
     ph = plot_helper()
     
     ##
     ## BIC/AIC Plot
     ##
     title = 'Information Criterion Plot (' + analysis_name + ') for ' + data_set_name
     name = data_set_name.lower() + '_' + analysis_name.lower() + '_ic'
     filename = './' + self.out_dir + '/' + name + '.png'
     
     ph.plot_series(cluster_range,
                 [em_bic, em_aic],
                 [None, None],
                 ['bic', 'aic'],
                 cm.viridis(np.linspace(0, 1, 2)),
                 ['o', '*'],
                 title,
                 'Number of Clusters',
                 'Information Criterion',
                 filename)
     
     ##
     ## Score Plot
     ##
     title = 'Score Summary Plot (' + analysis_name + ') for ' + data_set_name
     name = data_set_name.lower() + '_' + analysis_name.lower() + '_score'
     filename = './' + self.out_dir + '/' + name + '.png'
                 
     ph.plot_series(cluster_range,
                 [em_homogeneity_score, em_completeness_score, em_measure_score, em_adjusted_rand_score, em_adjusted_mutual_info_score],
                 [None, None, None, None, None, None],
                 ['homogeneity', 'completeness', 'measure', 'adjusted_rand', 'adjusted_mutual_info'],
                 cm.viridis(np.linspace(0, 1, 5)),
                 ['o', '^', 'v', '>', '<', '1'],
                 title,
                 'Number of Clusters',
                 'Score',
                 filename)
开发者ID:rbaxter1,项目名称:CS7641,代码行数:79,代码来源:part1.py

示例4: enumerate

# 需要导入模块: from sklearn.mixture import GaussianMixture [as 别名]
# 或者: from sklearn.mixture.GaussianMixture import bic [as 别名]
    N = N1 + N2
    x1 = np.random.multivariate_normal(mean=(1, 2), cov=cov1, size=N1)
    m = np.array(((1, 1), (1, 3)))
    x1 = x1.dot(m)
    x2 = np.random.multivariate_normal(mean=(-1, 10), cov=cov1, size=N2)
    x = np.vstack((x1, x2))
    y = np.array([0]*N1 + [1]*N2)

    types = ('spherical', 'diag', 'tied', 'full')
    err = np.empty(len(types))
    bic = np.empty(len(types))
    for i, type in enumerate(types):
        gmm = GaussianMixture(n_components=2, covariance_type=type, random_state=0)
        gmm.fit(x)
        err[i] = 1 - accuracy_rate(gmm.predict(x), y)
        bic[i] = gmm.bic(x)
    print('错误率:', err.ravel())
    print('BIC:', bic.ravel())
    xpos = np.arange(4)
    plt.figure(facecolor='w')
    ax = plt.axes()
    b1 = ax.bar(xpos-0.3, err, width=0.3, color='#77E0A0', edgecolor='k')
    b2 = ax.twinx().bar(xpos, bic, width=0.3, color='#FF8080', edgecolor='k')
    plt.grid(b=True, ls=':', color='#606060')
    bic_min, bic_max = expand(bic.min(), bic.max())
    plt.ylim((bic_min, bic_max))
    plt.xticks(xpos, types)
    plt.legend([b1[0], b2[0]], ('错误率', 'BIC'))
    plt.title('不同方差类型的误差率和BIC', fontsize=15)
    plt.show()
开发者ID:wEEang763162,项目名称:machine_learning_zoubo,代码行数:32,代码来源:20.3.GMM_Parameter.py

示例5: range

# 需要导入模块: from sklearn.mixture import GaussianMixture [as 别名]
# 或者: from sklearn.mixture.GaussianMixture import bic [as 别名]
w = np.exp(-np.exp(3 * w.mean(axis=1)))



# gmm model selection with bic:
lowest_bic = np.infty
bic = []
n_components_range = range(1, 7)
cv_types = ['spherical', 'tied', 'diag', 'full']
for cv_type in cv_types:
    for n_components in n_components_range:
        # Fit a mixture of Gaussians with EM
        gmm = GaussianMixture(n_components=n_components,
                              covariance_type=cv_type, n_init=5)
        gmm.fit(X)
        bic.append(gmm.bic(X))
        if bic[-1] < lowest_bic:
            lowest_bic = bic[-1]
            best_gmm = gmm

preds = best_gmm.predict(X)
probs = best_gmm.predict_proba(X)

for name, col in zip(cv_types, np.array(bic).reshape(-1, len(cv_types)).T):
    plt.plot(n_components_range, col, label=name)
plt.legend()
plt.savefig('gmm_sklearn_bic/bic.pdf')


data_thr['preds'] = pd.Series(preds).astype("category")
开发者ID:ngoix,项目名称:cyg-x1,代码行数:32,代码来源:gmm_sklearn_bic.py


注:本文中的sklearn.mixture.GaussianMixture.bic方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。