本文整理汇总了Python中sklearn.metrics.silhouette_score方法的典型用法代码示例。如果您正苦于以下问题:Python metrics.silhouette_score方法的具体用法?Python metrics.silhouette_score怎么用?Python metrics.silhouette_score使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.metrics
的用法示例。
在下文中一共展示了metrics.silhouette_score方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: silhouette_score
# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def silhouette_score(phate_op, n_clusters, random_state=None, **kwargs):
"""Compute the Silhouette score on KMeans on the PHATE potential
Parameters
----------
phate_op : phate.PHATE
Fitted PHATE operator
n_clusters : int
Number of clusters.
random_state : int or None, optional (default: None)
Random seed for k-means
Returns
-------
score : float
"""
cluster_labels = kmeans(phate_op, n_clusters=n_clusters, random_state=random_state, **kwargs)
return metrics.silhouette_score(phate_op.diff_potential, cluster_labels)
示例2: calc_scores
# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def calc_scores(cls, model, data, min_clusters, max_clusters, random_state=0):
silhouettes = []
davieses = []
calinskies = []
if model.__class__.__name__ == 'HierarchicalClustering':
linkage_matrix = model.fit(data)
else:
linkage_matrix = None
for nc in range(min_clusters, max_clusters + 1):
model.n_clusters = nc
model.random_state = random_state
pred_labels = model.fit_predict(data)
silhouettes.append(silhouette_score(data, pred_labels, random_state=random_state))
davieses.append(davies_bouldin_score(data, pred_labels))
calinskies.append(calinski_harabasz_score(data, pred_labels))
sil_nc = np.argmax(silhouettes) + min_clusters
dav_nc = np.argmin(davieses) + min_clusters
cal_nc = np.argmax(calinskies) + min_clusters
return silhouettes, sil_nc, davieses, dav_nc, calinskies, cal_nc, linkage_matrix
示例3: clustering_scores
# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def clustering_scores(self, prediction_algorithm: str = "knn") -> Tuple:
if self.gene_dataset.n_labels > 1:
latent, _, labels = self.get_latent()
if prediction_algorithm == "knn":
labels_pred = KMeans(
self.gene_dataset.n_labels, n_init=200
).fit_predict(
latent
) # n_jobs>1 ?
elif prediction_algorithm == "gmm":
gmm = GMM(self.gene_dataset.n_labels)
gmm.fit(latent)
labels_pred = gmm.predict(latent)
asw_score = silhouette_score(latent, labels)
nmi_score = NMI(labels, labels_pred)
ari_score = ARI(labels, labels_pred)
uca_score = unsupervised_clustering_accuracy(labels, labels_pred)[0]
logger.debug(
"Clustering Scores:\nSilhouette: %.4f\nNMI: %.4f\nARI: %.4f\nUCA: %.4f"
% (asw_score, nmi_score, ari_score, uca_score)
)
return asw_score, nmi_score, ari_score, uca_score
示例4: _cluster_plot
# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def _cluster_plot(self, embedding, labels):
silhouette = silhouette_score(embedding.squeeze(), labels)
chs = calinski_harabaz_score(embedding.squeeze(), labels)
dbs = davies_bouldin_score(embedding.squeeze(), labels)
n_labels = len(set(labels))
self.writer.add_scalar(f"silhouette {n_labels}", silhouette, self.step_id)
self.writer.add_scalar(f"chs {n_labels}", chs, self.step_id)
self.writer.add_scalar(f"dbs {n_labels}", dbs, self.step_id)
indices = list(range(len(labels)))
random.shuffle(indices)
samples_to_plot = indices[:1000]
sample_labels = [labels[idx] for idx in samples_to_plot]
sample_embedding = embedding[samples_to_plot]
pca = PCA(2).fit_transform(sample_embedding.squeeze())
fig, ax = plt.subplots()
ax.scatter(pca[:, 0], pca[:, 1], c=sample_labels, cmap="tab20")
self.writer.add_figure(f"clustering {n_labels}", fig, self.step_id)
示例5: test_silhouette
# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def test_silhouette():
# this test checks wether combat can align data from several gaussians
# it checks this by computing the silhouette coefficient in a pca embedding
# load in data
adata = sc.datasets.blobs()
# apply combat
sc.pp.combat(adata, 'blobs')
# compute pca
sc.tl.pca(adata)
X_pca = adata.obsm['X_pca']
# compute silhouette coefficient in pca
sh = silhouette_score(X_pca[:, :2], adata.obs['blobs'].values)
assert sh < 0.1
示例6: _find_optimal_clustering
# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def _find_optimal_clustering(self,clusterings):
max_score = float('-inf')
max_clustering = None
for clustering in clusterings:
labeled_vectors = [(node.vector,cluster_idx) for cluster_idx in range(len(clustering)) for node in _get_cluster_nodes(clustering[cluster_idx][1]) ]
vectors,labels = [np.array(x) for x in zip(*labeled_vectors)]
if np.in1d([1],labels)[0]:
score = silhouette_score(vectors,labels,metric='cosine')
else:
continue # silhouette doesn't work with just one cluster
if score > max_score:
max_score = score
max_clustering = clustering
return list(zip(*max_clustering))[1] if max_clustering else list(zip(*clusterings[0]))[1]
示例7: evaluate_performance
# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def evaluate_performance(data, labels, metric='euclidean'):
score = skmetrics.silhouette_score(data, labels, metric=metric)
print('Labels:', labels)
print('Score:', score)
return score
示例8: bench_k_means
# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def bench_k_means(estimator, name, data):
estimator.fit(data)
# A short explanation for every score:
# homogeneity: each cluster contains only members of a single class (range 0 - 1)
# completeness: all members of a given class are assigned to the same cluster (range 0 - 1)
# v_measure: harmonic mean of homogeneity and completeness
# adjusted_rand: similarity of the actual values and their predictions,
# ignoring permutations and with chance normalization
# (range -1 to 1, -1 being bad, 1 being perfect and 0 being random)
# adjusted_mutual_info: agreement of the actual values and predictions, ignoring permutations
# (range 0 - 1, with 0 being random agreement and 1 being perfect agreement)
# silhouette: uses the mean distance between a sample and all other points in the same class,
# as well as the mean distance between a sample and all other points in the nearest cluster
# to calculate a score (range: -1 to 1, with the former being incorrect,
# and the latter standing for highly dense clustering.
# 0 indicates overlapping clusters.
print('%-9s \t%i \thomogeneity: %.3f \tcompleteness: %.3f \tv-measure: %.3f \tadjusted-rand: %.3f \t'
'adjusted-mutual-info: %.3f \tsilhouette: %.3f'
% (name, estimator.inertia_,
metrics.homogeneity_score(y, estimator.labels_),
metrics.completeness_score(y, estimator.labels_),
metrics.v_measure_score(y, estimator.labels_),
metrics.adjusted_rand_score(y, estimator.labels_),
metrics.adjusted_mutual_info_score(y, estimator.labels_),
metrics.silhouette_score(data, estimator.labels_,
metric='euclidean')))
示例9: printClustersSummary
# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def printClustersSummary(data, labels, centroids):
'''
Helper method to automate models assessment
'''
print('Pseudo_F: ', pseudo_F(data, labels, centroids))
print('Davis-Bouldin: ',
davis_bouldin(data, labels, centroids))
print('Silhouette score: ',
mt.silhouette_score(data, np.array(labels),
metric='euclidean'))
示例10: find_best_n_clusters
# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def find_best_n_clusters(data, clusterer, max_n_clusters, random_state, **kwargs):
"""
Finds best number of clusters for KMeans and Gaussian Mixture.
Parameters
-------
data: pd.DataFrame
Dataframe with features for clustering with index as in ``retention_config.index_col``
clusterer: sklearn clusterer class
For instance, ``sklearn.cluster.KMeans`` or ``sklearn.mixture.GaussianMixture``.
max_n_clusters: int
Maximal number of clusters for searching.
random_state: int
Random state for clusterer.
Returns
-------
Optimal keyword arguments for clustering method.
Return type
------
Dict
"""
args = {i: j for i, j in kwargs.items() if i in clusterer.get_params(clusterer)}
if 'n_clusters' in clusterer.get_params(clusterer):
kms = True
else:
kms = False
args.pop('n_clusters' if kms else 'n_components', None)
args.update({'random_state': random_state})
score = {}
for i in range(2, max_n_clusters + 1):
args.update({'n_clusters' if kms else 'n_components': i})
km = clusterer(**args)
score[i] = silhouette_score(data, km.fit_predict(data), metric='cosine')
best = pd.Series(score).idxmax()
args.update({'n_clusters' if kms else 'n_components': best})
print(f'Best number of clusters is {best}')
return args
示例11: calc_all_metrics
# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def calc_all_metrics(data, km):
"""
Calculates all quality metrics: Cluster Stability Index, Silhouette score, Homogeneity, distances for clustering.
Parameters
--------
data: pd.DataFrame
Dataframe with features for clustering indexed as in ``retention_config.index_col``
km:
Already fitted clusterer.
Returns
--------
Metrics scores
Return type
--------
Dict
"""
res = {}
cl = km.labels_
res['mean_pd'] = calc_mean_pd(data, cl)
if hasattr(km, 'cluster_centers_'):
res['mean_fc'] = calc_mean_dist_from_center(data, km)
if len(set(cl)) > 1:
res['silhouette'] = silhouette_score(data, cl, metric='cosine')
return res
示例12: test
# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def test():
parser = argparse.ArgumentParser()
parser.add_argument("File")
args = parser.parse_args()
info = fh.get_function_information(args.File)
#info = fh.get_arg_funcs(args.File)
info = trim_funcs(info, args.File)
vect, func_sparse = funcs_to_sparse(info)
transformer = Normalizer().fit(func_sparse)
func_sparse = transformer.transform(func_sparse)
#svd = TruncatedSVD(random_state=2)
svd = TruncatedSVD(n_components=5, n_iter=7, random_state=42)
func_sparse = svd.fit_transform(func_sparse)
scores = []
clust_count = []
for x in range(2, 20):
result = KMeans(n_clusters=x, random_state=2).fit(func_sparse)
score = silhouette_score(func_sparse, result.labels_, metric="cosine")
scores.append(score)
clust_count.append(x)
print("Clusters {:<3} | Silhoette Score : {}".format(x, score))
plt.plot(clust_count, scores)
plt.xlabel("Cluster Centroid Count")
plt.ylabel("Silhoette Score")
plt.grid = True
plt.show()
pass
示例13: single_cluster
# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def single_cluster(all_functions, centroid_count=2):
vect, func_sparse = funcs_to_sparse(all_functions)
transformer = Normalizer().fit(func_sparse)
func_sparse = transformer.transform(func_sparse)
# svd = TruncatedSVD(random_state=2)
# svd = TruncatedSVD(n_components=5, n_iter=7, random_state=42)
# func_sparse = svd.fit_transform(func_sparse)
labels = []
result = KMeans(n_clusters=centroid_count, random_state=2).fit(func_sparse)
score = silhouette_score(func_sparse,
result.labels_,
metric="cosine",
random_state=2,
sample_size=5000)
labels.append(result.labels_)
print("Clusters {:<3} | Silhoette Score : {}".format(
centroid_count, score))
return result.labels_
示例14: get_single_cluster
# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def get_single_cluster(all_functions, centroid_count=2):
return_dict = {}
vect, func_sparse = funcs_to_sparse(all_functions)
transformer = Normalizer().fit(func_sparse)
func_sparse = transformer.transform(func_sparse)
# svd = TruncatedSVD(random_state=2)
# svd = TruncatedSVD(n_components=5, n_iter=7, random_state=42)
# func_sparse = svd.fit_transform(func_sparse)
labels = []
result = KMeans(n_clusters=centroid_count, random_state=2).fit(func_sparse)
score = silhouette_score(func_sparse,
result.labels_,
metric="cosine",
random_state=2,
sample_size=5000)
labels.append(result.labels_)
#print("Clusters {:<3} | Silhoette Score : {}".format(centroid_count, score))
return_dict['count'] = centroid_count
return_dict['score'] = score
return_dict['labels'] = result.labels_
return return_dict
示例15: n_cluster_embeddings
# 需要导入模块: from sklearn import metrics [as 别名]
# 或者: from sklearn.metrics import silhouette_score [as 别名]
def n_cluster_embeddings(self, features=None, n_clusters=3, method='ac'):
'''
clusters the nodes based on embedding features
features = None (use DGI generated embeddings)
'''
if method == 'ac':
clustering = AgglomerativeClustering(n_clusters=n_clusters, affinity='euclidean',\
linkage='ward')
clustering.fit(self.embeddings if features is None else features)
self.labels = clustering.labels_
self.score = silhouette_score(self.embeddings if features is None else features,\
self.labels)
return {'labels': self.labels, 'score': self.score}