本文整理匯總了Python中sklearn.cluster.KMeans方法的典型用法代碼示例。如果您正苦於以下問題:Python cluster.KMeans方法的具體用法?Python cluster.KMeans怎麽用?Python cluster.KMeans使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類sklearn.cluster
的用法示例。
在下文中一共展示了cluster.KMeans方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: test_k_means_new_centers
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def test_k_means_new_centers():
# Explore the part of the code where a new center is reassigned
X = np.array([[0, 0, 1, 1],
[0, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 1, 0, 0]])
labels = [0, 1, 2, 1, 1, 2]
bad_centers = np.array([[+0, 1, 0, 0],
[.2, 0, .2, .2],
[+0, 0, 0, 0]])
km = KMeans(n_clusters=3, init=bad_centers, n_init=1, max_iter=10,
random_state=1)
for this_X in (X, sp.coo_matrix(X)):
km.fit(this_X)
this_labels = km.labels_
# Reorder the labels so that the first instance is in cluster 0,
# the second in cluster 1, ...
this_labels = np.unique(this_labels, return_index=True)[1][this_labels]
np.testing.assert_array_equal(this_labels, labels)
示例2: SpectralClustering
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def SpectralClustering(CKSym, n):
# This is direct port of JHU vision lab code. Could probably use sklearn SpectralClustering.
CKSym = CKSym.astype(float)
N, _ = CKSym.shape
MAXiter = 1000 # Maximum number of iterations for KMeans
REPlic = 20 # Number of replications for KMeans
DN = np.diag(np.divide(1, np.sqrt(np.sum(CKSym, axis=0) + np.finfo(float).eps)))
LapN = identity(N).toarray().astype(float) - np.matmul(np.matmul(DN, CKSym), DN)
_, _, vN = np.linalg.svd(LapN)
vN = vN.T
kerN = vN[:, N - n:N]
normN = np.sqrt(np.sum(np.square(kerN), axis=1))
kerNS = np.divide(kerN, normN.reshape(len(normN), 1) + np.finfo(float).eps)
km = KMeans(n_clusters=n, n_init=REPlic, max_iter=MAXiter, n_jobs=-1).fit(kerNS)
return km.labels_
示例3: silhouette_score
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def silhouette_score(phate_op, n_clusters, random_state=None, **kwargs):
"""Compute the Silhouette score on KMeans on the PHATE potential
Parameters
----------
phate_op : phate.PHATE
Fitted PHATE operator
n_clusters : int
Number of clusters.
random_state : int or None, optional (default: None)
Random seed for k-means
Returns
-------
score : float
"""
cluster_labels = kmeans(phate_op, n_clusters=n_clusters, random_state=random_state, **kwargs)
return metrics.silhouette_score(phate_op.diff_potential, cluster_labels)
示例4: entropy_test
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def entropy_test(datasets_dimred, ds_labels):
ds_labels = np.array(ds_labels)
X_dimred = np.concatenate(datasets_dimred)
embedding = None
for k in range(10, 21):
km = KMeans(n_clusters=k, n_jobs=-1, verbose=0)
km.fit(X_dimred)
if False and k % 5 == 0:
embedding = visualize(
datasets_dimred,
km.labels_, NAMESPACE + '_km{}'.format(k),
[ str(x) for x in range(k) ],
embedding=embedding
)
print('k = {}, average normalized entropy = {}'
.format(k, avg_norm_entropy(ds_labels, km.labels_)))
示例5: spectral_clustering
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def spectral_clustering(L, K, seed=1234):
"""
Implement paper "Shi, J. and Malik, J., 2000. Normalized cuts and image
segmentation. IEEE Transactions on pattern analysis and machine intelligence,
22(8), pp.888-905."
Args:
L: graph Laplacian, numpy or scipy matrix
K: int, number of clusters
Returns:
node_label: list
N.B.: for simplicity, we only consider simple and undirected graph
"""
num_nodes = L.shape[0]
assert (K < num_nodes - 1)
eig, eig_vec = scipy.sparse.linalg.eigsh(
L, k=K, which='LM', maxiter=num_nodes * 10000, tol=0, mode='normal')
kmeans = KMeans(n_clusters=K, random_state=seed).fit(eig_vec.real)
return kmeans.labels_
示例6: cluster
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def cluster(points, means = 8):
kk = KMeans(n_clusters = means)
kk.fit(points)
labels = kk.predict(points)
r = []
for i in range(means):
row = []
while(len(row) < 8):
image = random.randint(0, data.files.shape[0] - 1)
if labels[image] == i:
row.append(data.files[image])
r.append(np.concatenate(row, axis=1))
c = np.concatenate(r, axis=0)
x = Image.fromarray(c)
x.save('Results/clusters.png')
示例7: test_greedy0_n2
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def test_greedy0_n2(self):
arms, mab = self.predict(arms=[1, 2, 3, 4],
decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
neighborhood_policy=NeighborhoodPolicy.Clusters(2),
context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
seed=123456,
num_run=1,
is_predict=True)
self.assertListEqual(arms, [3, 1])
self.assertTrue(isinstance(mab._imp.kmeans, KMeans))
示例8: test_copy
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def test_copy(self):
arms, mab = self.predict(arms=[1, 2, 3, 4],
decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
neighborhood_policy=NeighborhoodPolicy.Clusters(2),
context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
seed=123456,
num_run=1,
is_predict=True)
clusters = deepcopy(mab._imp)
self.assertIsNot(clusters, mab._imp)
self.assertIsInstance(clusters.lp_list[0], _EpsilonGreedy)
self.assertIsInstance(clusters.lp_list[1], _EpsilonGreedy)
self.assertIsInstance(clusters.kmeans, KMeans)
self.assertIsNot(clusters.kmeans, mab._imp.kmeans)
self.assertIsNot(clusters.lp_list[0], mab._imp.lp_list[0])
self.assertIsNot(clusters.lp_list[1], mab._imp.lp_list[1])
self.assertEqual(clusters.lp_list[0].epsilon, mab._imp.lp_list[0].epsilon)
self.assertEqual(clusters.lp_list[1].epsilon, mab._imp.lp_list[1].epsilon)
示例9: test
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def test():
model.eval()
z = model.encode(data.x, data.train_pos_edge_index)
# Cluster embedded values using k-means.
kmeans_input = z.cpu().numpy()
kmeans = KMeans(n_clusters=7, random_state=0).fit(kmeans_input)
pred = kmeans.predict(kmeans_input)
labels = data.y.cpu().numpy()
completeness = completeness_score(labels, pred)
hm = homogeneity_score(labels, pred)
nmi = v_measure_score(labels, pred)
auc, ap = model.test(z, data.test_pos_edge_index, data.test_neg_edge_index)
return auc, ap, completeness, hm, nmi
示例10: clustering_scores
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def clustering_scores(self, prediction_algorithm: str = "knn") -> Tuple:
if self.gene_dataset.n_labels > 1:
latent, _, labels = self.get_latent()
if prediction_algorithm == "knn":
labels_pred = KMeans(
self.gene_dataset.n_labels, n_init=200
).fit_predict(
latent
) # n_jobs>1 ?
elif prediction_algorithm == "gmm":
gmm = GMM(self.gene_dataset.n_labels)
gmm.fit(latent)
labels_pred = gmm.predict(latent)
asw_score = silhouette_score(latent, labels)
nmi_score = NMI(labels, labels_pred)
ari_score = ARI(labels, labels_pred)
uca_score = unsupervised_clustering_accuracy(labels, labels_pred)[0]
logger.debug(
"Clustering Scores:\nSilhouette: %.4f\nNMI: %.4f\nARI: %.4f\nUCA: %.4f"
% (asw_score, nmi_score, ari_score, uca_score)
)
return asw_score, nmi_score, ari_score, uca_score
示例11: __init__
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def __init__(self, dataset, base_query_strategy, similarity_metric=None,
clustering_method=None, beta=1.0, random_state=None):
super(DensityWeightedMeta, self).__init__(dataset=dataset)
if not isinstance(base_query_strategy, QueryStrategy):
raise TypeError(
"'base_query_strategy' has to be an instance of 'QueryStrategy'"
)
if base_query_strategy.dataset != self.dataset:
raise ValueError("base_query_strategy should share the same"
"dataset instance with DensityWeightedMeta")
self.base_query_strategy = base_query_strategy
self.beta = beta
self.random_state_ = seed_random_state(random_state)
if clustering_method is not None:
self.clustering_method = clustering_method
else:
self.clustering_method = KMeans(
n_clusters=5, random_state=self.random_state_)
if similarity_metric is not None:
self.similarity_metric = similarity_metric
else:
self.similarity_metric = cosine_similarity
示例12: SAA
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def SAA(self):
"""Use K-means method to discretize the Markovian process."""
from sklearn.cluster import KMeans
if self.int_flag == 0:
labels = numpy.zeros(self.n_samples,dtype=int)
self._initialize_matrix()
for t in range(1,self.T):
kmeans = KMeans(
n_clusters=self.n_Markov_states[t],
random_state=0,
).fit(self.samples[:,t,:])
self.Markov_states[t] = kmeans.cluster_centers_
if self.int_flag == 0:
labels_new = kmeans.labels_
counts = numpy.zeros([self.n_Markov_states[t-1],1])
for i in range(self.n_samples):
counts[labels[i]] += 1
self.transition_matrix[t][labels[i]][labels_new[i]] += 1
self.transition_matrix[t] /= counts
labels = labels_new
if self.int_flag == 1:
self.train_transition_matrix()
return (self.Markov_states,self.transition_matrix)
示例13: analysis_KMeans
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def analysis_KMeans():
mean_distortions = []
K = len(labels_idx)
K_range = range(320, 1000)
for k in K_range:
print("Cluster k is {}".format(k))
kmeans_model = KMeans(n_clusters=k, init="k-means++", n_jobs=-1)
kmeans_model.fit(np_features)
t_distortions = sum(
np.min(cdist(np_features, kmeans_model.cluster_centers_, 'euclidean'), axis=1)) / np_features.shape[0]
mean_distortions.append(t_distortions)
with open("./kmeans_cluster.csv", "a+") as wh:
for idx in range(len(K_range)):
wh.write("{},{}\n".format(K_range[idx], mean_distortions[idx]))
# plt.plot(K_range, mean_distortions, 'bx-')
# plt.xlabel('k')
# plt.ylabel(u'Avgerage distortion degree')
# plt.title(u'Elbows rule to select the best K value')
# plt.savefig("kmeans_cluster.png")
示例14: findClusters_kmeans
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def findClusters_kmeans(data):
'''
Cluster data using k-means
'''
# create the classifier object
kmeans = cl.KMeans(
n_clusters=4,
n_jobs=-1,
verbose=0,
n_init=30
)
# fit the data
return kmeans.fit(data)
# the file name of the dataset
示例15: calc_mean_dist_from_center
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def calc_mean_dist_from_center(data, km):
"""
Calculates mean distance from cluster centers. Note that it will be calculated only for KMeans and GMM, because DBSCAN may have ambiguous form of clusters.
Parameters
--------
data: pd.DataFrame
Dataframe with features for clustering indexed as in ``retention_config.index_col``
km:
Already fitted clusterer.
Returns
-------
Mapping of clusters names to mean distance from cluster centers.
Return type
-------
Dict
"""
res = {}
cl = km.labels_
cs = km.cluster_centers_
for i in set(cl):
res[i] = _cosine_dist(data[cl == i], cs[i]).mean()
return res