当前位置: 首页>>代码示例>>Python>>正文


Python metrics.silhouette_score方法代码示例

本文整理汇总了Python中sklearn.metrics.silhouette_score方法的典型用法代码示例。如果您正苦于以下问题:Python metrics.silhouette_score方法的具体用法?Python metrics.silhouette_score怎么用?Python metrics.silhouette_score使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.metrics的用法示例。


在下文中一共展示了metrics.silhouette_score方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: silhouette_score

# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def silhouette_score(phate_op, n_clusters, random_state=None, **kwargs):
    """Compute the Silhouette score on KMeans on the PHATE potential

    Parameters
    ----------
    phate_op : phate.PHATE
        Fitted PHATE operator
    n_clusters : int
        Number of clusters.
    random_state : int or None, optional (default: None)
        Random seed for k-means

    Returns
    -------
    score : float
    """
    cluster_labels = kmeans(phate_op, n_clusters=n_clusters, random_state=random_state, **kwargs)
    return metrics.silhouette_score(phate_op.diff_potential, cluster_labels) 
开发者ID:KrishnaswamyLab,项目名称:PHATE,代码行数:20,代码来源:cluster.py

示例2: calc_scores

# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def calc_scores(cls, model, data, min_clusters, max_clusters, random_state=0):
        silhouettes = []
        davieses = []
        calinskies = []
        if model.__class__.__name__ == 'HierarchicalClustering':
            linkage_matrix = model.fit(data)
        else:
            linkage_matrix = None
        for nc in range(min_clusters, max_clusters + 1):
            model.n_clusters = nc
            model.random_state = random_state
            pred_labels = model.fit_predict(data)
            silhouettes.append(silhouette_score(data, pred_labels, random_state=random_state))
            davieses.append(davies_bouldin_score(data, pred_labels))
            calinskies.append(calinski_harabasz_score(data, pred_labels))

        sil_nc = np.argmax(silhouettes) + min_clusters
        dav_nc = np.argmin(davieses) + min_clusters
        cal_nc = np.argmax(calinskies) + min_clusters

        return silhouettes, sil_nc, davieses, dav_nc, calinskies, cal_nc, linkage_matrix 
开发者ID:canard0328,项目名称:malss,代码行数:23,代码来源:clustering.py

示例3: clustering_scores

# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def clustering_scores(self, prediction_algorithm: str = "knn") -> Tuple:
        if self.gene_dataset.n_labels > 1:
            latent, _, labels = self.get_latent()
            if prediction_algorithm == "knn":
                labels_pred = KMeans(
                    self.gene_dataset.n_labels, n_init=200
                ).fit_predict(
                    latent
                )  # n_jobs>1 ?
            elif prediction_algorithm == "gmm":
                gmm = GMM(self.gene_dataset.n_labels)
                gmm.fit(latent)
                labels_pred = gmm.predict(latent)

            asw_score = silhouette_score(latent, labels)
            nmi_score = NMI(labels, labels_pred)
            ari_score = ARI(labels, labels_pred)
            uca_score = unsupervised_clustering_accuracy(labels, labels_pred)[0]
            logger.debug(
                "Clustering Scores:\nSilhouette: %.4f\nNMI: %.4f\nARI: %.4f\nUCA: %.4f"
                % (asw_score, nmi_score, ari_score, uca_score)
            )
            return asw_score, nmi_score, ari_score, uca_score 
开发者ID:YosefLab,项目名称:scVI,代码行数:25,代码来源:posterior.py

示例4: _cluster_plot

# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def _cluster_plot(self, embedding, labels):
    silhouette = silhouette_score(embedding.squeeze(), labels)
    chs = calinski_harabaz_score(embedding.squeeze(), labels)
    dbs = davies_bouldin_score(embedding.squeeze(), labels)

    n_labels = len(set(labels))

    self.writer.add_scalar(f"silhouette {n_labels}", silhouette, self.step_id)
    self.writer.add_scalar(f"chs {n_labels}", chs, self.step_id)
    self.writer.add_scalar(f"dbs {n_labels}", dbs, self.step_id)

    indices = list(range(len(labels)))
    random.shuffle(indices)
    samples_to_plot = indices[:1000]
    sample_labels = [labels[idx] for idx in samples_to_plot]
    sample_embedding = embedding[samples_to_plot]
    pca = PCA(2).fit_transform(sample_embedding.squeeze())
    fig, ax = plt.subplots()
    ax.scatter(pca[:, 0], pca[:, 1], c=sample_labels, cmap="tab20")
    self.writer.add_figure(f"clustering {n_labels}", fig, self.step_id) 
开发者ID:mjendrusch,项目名称:torchsupport,代码行数:22,代码来源:clustering.py

示例5: test_silhouette

# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def test_silhouette():
    # this test checks wether combat can align data from several gaussians
    # it checks this by computing the silhouette coefficient in a pca embedding

    # load in data
    adata = sc.datasets.blobs()

    # apply combat
    sc.pp.combat(adata, 'blobs')

    # compute pca
    sc.tl.pca(adata)
    X_pca = adata.obsm['X_pca']

    # compute silhouette coefficient in pca
    sh = silhouette_score(X_pca[:, :2], adata.obs['blobs'].values)

    assert sh < 0.1 
开发者ID:theislab,项目名称:scanpy,代码行数:20,代码来源:test_combat.py

示例6: _find_optimal_clustering

# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def _find_optimal_clustering(self,clusterings):

        max_score = float('-inf')
        max_clustering = None

        for clustering in clusterings:
            labeled_vectors = [(node.vector,cluster_idx) for cluster_idx in range(len(clustering)) for node in _get_cluster_nodes(clustering[cluster_idx][1]) ]
            vectors,labels = [np.array(x) for x in zip(*labeled_vectors)]
            if np.in1d([1],labels)[0]:
                score = silhouette_score(vectors,labels,metric='cosine')
            else:
                continue # silhouette doesn't work with just one cluster
            if score > max_score:
                max_score = score
                max_clustering = clustering

        return list(zip(*max_clustering))[1] if max_clustering else list(zip(*clusterings[0]))[1] 
开发者ID:texta-tk,项目名称:texta,代码行数:19,代码来源:precluster.py

示例7: evaluate_performance

# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def evaluate_performance(data, labels, metric='euclidean'):
        score = skmetrics.silhouette_score(data, labels, metric=metric)
        print('Labels:', labels)
        print('Score:', score)

        return score 
开发者ID:melqkiades,项目名称:yelp,代码行数:8,代码来源:clusterer.py

示例8: bench_k_means

# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def bench_k_means(estimator, name, data):
    estimator.fit(data)
    # A short explanation for every score:
    # homogeneity:          each cluster contains only members of a single class (range 0 - 1)
    # completeness:         all members of a given class are assigned to the same cluster (range 0 - 1)
    # v_measure:            harmonic mean of homogeneity and completeness
    # adjusted_rand:        similarity of the actual values and their predictions,
    #                       ignoring permutations and with chance normalization
    #                       (range -1 to 1, -1 being bad, 1 being perfect and 0 being random)
    # adjusted_mutual_info: agreement of the actual values and predictions, ignoring permutations
    #                       (range 0 - 1, with 0 being random agreement and 1 being perfect agreement)
    # silhouette:           uses the mean distance between a sample and all other points in the same class,
    #                       as well as the mean distance between a sample and all other points in the nearest cluster
    #                       to calculate a score (range: -1 to 1, with the former being incorrect,
    #                       and the latter standing for highly dense clustering.
    #                       0 indicates overlapping clusters.
    print('%-9s \t%i \thomogeneity: %.3f \tcompleteness: %.3f \tv-measure: %.3f \tadjusted-rand: %.3f \t'
          'adjusted-mutual-info: %.3f \tsilhouette: %.3f'
          % (name, estimator.inertia_,
             metrics.homogeneity_score(y, estimator.labels_),
             metrics.completeness_score(y, estimator.labels_),
             metrics.v_measure_score(y, estimator.labels_),
             metrics.adjusted_rand_score(y, estimator.labels_),
             metrics.adjusted_mutual_info_score(y,  estimator.labels_),
             metrics.silhouette_score(data, estimator.labels_,
                                      metric='euclidean'))) 
开发者ID:HoussemCharf,项目名称:FunUtils,代码行数:28,代码来源:k_means_clustering.py

示例9: printClustersSummary

# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def printClustersSummary(data, labels, centroids):
    '''
        Helper method to automate models assessment
    '''
    print('Pseudo_F: ', pseudo_F(data, labels, centroids))
    print('Davis-Bouldin: ', 
        davis_bouldin(data, labels, centroids))
    print('Silhouette score: ', 
        mt.silhouette_score(data, np.array(labels), 
            metric='euclidean')) 
开发者ID:drabastomek,项目名称:practicalDataAnalysisCookbook,代码行数:12,代码来源:helper.py

示例10: find_best_n_clusters

# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def find_best_n_clusters(data, clusterer, max_n_clusters, random_state, **kwargs):
    """
    Finds best number of clusters for KMeans and Gaussian Mixture.

    Parameters
    -------
    data: pd.DataFrame
        Dataframe with features for clustering with index as in ``retention_config.index_col``
    clusterer: sklearn clusterer class
        For instance, ``sklearn.cluster.KMeans`` or ``sklearn.mixture.GaussianMixture``.
    max_n_clusters: int
        Maximal number of clusters for searching.
    random_state: int
        Random state for clusterer.

    Returns
    -------
    Optimal keyword arguments for clustering method.

    Return type
    ------
    Dict
    """
    args = {i: j for i, j in kwargs.items() if i in clusterer.get_params(clusterer)}
    if 'n_clusters' in clusterer.get_params(clusterer):
        kms = True
    else:
        kms = False
    args.pop('n_clusters' if kms else 'n_components', None)
    args.update({'random_state': random_state})
    score = {}
    for i in range(2, max_n_clusters + 1):
        args.update({'n_clusters' if kms else 'n_components': i})
        km = clusterer(**args)
        score[i] = silhouette_score(data, km.fit_predict(data), metric='cosine')
    best = pd.Series(score).idxmax()
    args.update({'n_clusters' if kms else 'n_components': best})
    print(f'Best number of clusters is {best}')
    return args 
开发者ID:retentioneering,项目名称:retentioneering-tools,代码行数:41,代码来源:clustering.py

示例11: calc_all_metrics

# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def calc_all_metrics(data, km):
    """
    Calculates all quality metrics: Cluster Stability Index, Silhouette score, Homogeneity, distances for clustering.

    Parameters
    --------
    data: pd.DataFrame
        Dataframe with features for clustering indexed as in ``retention_config.index_col``
    km:
        Already fitted clusterer.

    Returns
    --------
    Metrics scores

    Return type
    --------
    Dict
    """
    res = {}
    cl = km.labels_
    res['mean_pd'] = calc_mean_pd(data, cl)
    if hasattr(km, 'cluster_centers_'):
        res['mean_fc'] = calc_mean_dist_from_center(data, km)
    if len(set(cl)) > 1:
        res['silhouette'] = silhouette_score(data, cl, metric='cosine')
    return res 
开发者ID:retentioneering,项目名称:retentioneering-tools,代码行数:29,代码来源:clustering.py

示例12: test

# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def test():
    parser = argparse.ArgumentParser()

    parser.add_argument("File")

    args = parser.parse_args()

    info = fh.get_function_information(args.File)
    #info = fh.get_arg_funcs(args.File)

    info = trim_funcs(info, args.File)

    vect, func_sparse = funcs_to_sparse(info)

    transformer = Normalizer().fit(func_sparse)

    func_sparse = transformer.transform(func_sparse)

    #svd = TruncatedSVD(random_state=2)
    svd = TruncatedSVD(n_components=5, n_iter=7, random_state=42)

    func_sparse = svd.fit_transform(func_sparse)

    scores = []
    clust_count = []
    for x in range(2, 20):
        result = KMeans(n_clusters=x, random_state=2).fit(func_sparse)

        score = silhouette_score(func_sparse, result.labels_, metric="cosine")
        scores.append(score)
        clust_count.append(x)

        print("Clusters {:<3} | Silhoette Score : {}".format(x, score))

    plt.plot(clust_count, scores)
    plt.xlabel("Cluster Centroid Count")
    plt.ylabel("Silhoette Score")
    plt.grid = True
    plt.show()

    pass 
开发者ID:ChrisTheCoolHut,项目名称:Firmware_Slap,代码行数:43,代码来源:function_clustering.py

示例13: single_cluster

# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def single_cluster(all_functions, centroid_count=2):
    vect, func_sparse = funcs_to_sparse(all_functions)

    transformer = Normalizer().fit(func_sparse)

    func_sparse = transformer.transform(func_sparse)

    # svd = TruncatedSVD(random_state=2)
    # svd = TruncatedSVD(n_components=5, n_iter=7, random_state=42)

    # func_sparse = svd.fit_transform(func_sparse)

    labels = []

    result = KMeans(n_clusters=centroid_count, random_state=2).fit(func_sparse)

    score = silhouette_score(func_sparse,
                             result.labels_,
                             metric="cosine",
                             random_state=2,
                             sample_size=5000)
    labels.append(result.labels_)

    print("Clusters {:<3} | Silhoette Score : {}".format(
        centroid_count, score))

    return result.labels_ 
开发者ID:ChrisTheCoolHut,项目名称:Firmware_Slap,代码行数:29,代码来源:firmware_clustering.py

示例14: get_single_cluster

# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def get_single_cluster(all_functions, centroid_count=2):
    return_dict = {}
    vect, func_sparse = funcs_to_sparse(all_functions)

    transformer = Normalizer().fit(func_sparse)

    func_sparse = transformer.transform(func_sparse)

    # svd = TruncatedSVD(random_state=2)
    # svd = TruncatedSVD(n_components=5, n_iter=7, random_state=42)

    # func_sparse = svd.fit_transform(func_sparse)

    labels = []

    result = KMeans(n_clusters=centroid_count, random_state=2).fit(func_sparse)

    score = silhouette_score(func_sparse,
                             result.labels_,
                             metric="cosine",
                             random_state=2,
                             sample_size=5000)
    labels.append(result.labels_)

    #print("Clusters {:<3} | Silhoette Score : {}".format(centroid_count, score))
    return_dict['count'] = centroid_count
    return_dict['score'] = score
    return_dict['labels'] = result.labels_

    return return_dict 
开发者ID:ChrisTheCoolHut,项目名称:Firmware_Slap,代码行数:32,代码来源:firmware_clustering.py

示例15: n_cluster_embeddings

# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def n_cluster_embeddings(self, features=None, n_clusters=3, method='ac'):
        '''
        clusters the nodes based on embedding features
        features = None (use DGI generated embeddings)
        '''
        if method == 'ac':
            clustering = AgglomerativeClustering(n_clusters=n_clusters, affinity='euclidean',\
                                                 linkage='ward')
            clustering.fit(self.embeddings if features is None else features)
            self.labels = clustering.labels_
            self.score = silhouette_score(self.embeddings if features is None else features,\
                                          self.labels)
        return {'labels': self.labels, 'score': self.score} 
开发者ID:plkmo,项目名称:NLP_Toolkit,代码行数:15,代码来源:infer.py


注:本文中的sklearn.metrics.silhouette_score方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。