當前位置: 首頁>>代碼示例>>Python>>正文


Python cluster.KMeans方法代碼示例

本文整理匯總了Python中sklearn.cluster.KMeans方法的典型用法代碼示例。如果您正苦於以下問題:Python cluster.KMeans方法的具體用法?Python cluster.KMeans怎麽用?Python cluster.KMeans使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在sklearn.cluster的用法示例。


在下文中一共展示了cluster.KMeans方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: test_k_means_new_centers

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def test_k_means_new_centers():
    # Explore the part of the code where a new center is reassigned
    X = np.array([[0, 0, 1, 1],
                  [0, 0, 0, 0],
                  [0, 1, 0, 0],
                  [0, 0, 0, 0],
                  [0, 0, 0, 0],
                  [0, 1, 0, 0]])
    labels = [0, 1, 2, 1, 1, 2]
    bad_centers = np.array([[+0, 1, 0, 0],
                            [.2, 0, .2, .2],
                            [+0, 0, 0, 0]])

    km = KMeans(n_clusters=3, init=bad_centers, n_init=1, max_iter=10,
                random_state=1)
    for this_X in (X, sp.coo_matrix(X)):
        km.fit(this_X)
        this_labels = km.labels_
        # Reorder the labels so that the first instance is in cluster 0,
        # the second in cluster 1, ...
        this_labels = np.unique(this_labels, return_index=True)[1][this_labels]
        np.testing.assert_array_equal(this_labels, labels) 
開發者ID:PacktPublishing,項目名稱:Mastering-Elasticsearch-7.0,代碼行數:24,代碼來源:test_k_means.py

示例2: SpectralClustering

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def SpectralClustering(CKSym, n):
    # This is direct port of JHU vision lab code. Could probably use sklearn SpectralClustering.
    CKSym = CKSym.astype(float)
    N, _ = CKSym.shape
    MAXiter = 1000  # Maximum number of iterations for KMeans
    REPlic = 20  # Number of replications for KMeans

    DN = np.diag(np.divide(1, np.sqrt(np.sum(CKSym, axis=0) + np.finfo(float).eps)))
    LapN = identity(N).toarray().astype(float) - np.matmul(np.matmul(DN, CKSym), DN)
    _, _, vN = np.linalg.svd(LapN)
    vN = vN.T
    kerN = vN[:, N - n:N]
    normN = np.sqrt(np.sum(np.square(kerN), axis=1))
    kerNS = np.divide(kerN, normN.reshape(len(normN), 1) + np.finfo(float).eps)
    km = KMeans(n_clusters=n, n_init=REPlic, max_iter=MAXiter, n_jobs=-1).fit(kerNS)
    return km.labels_ 
開發者ID:abhinav4192,項目名稱:sparse-subspace-clustering-python,代碼行數:18,代碼來源:SpectralClustering.py

示例3: silhouette_score

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def silhouette_score(phate_op, n_clusters, random_state=None, **kwargs):
    """Compute the Silhouette score on KMeans on the PHATE potential

    Parameters
    ----------
    phate_op : phate.PHATE
        Fitted PHATE operator
    n_clusters : int
        Number of clusters.
    random_state : int or None, optional (default: None)
        Random seed for k-means

    Returns
    -------
    score : float
    """
    cluster_labels = kmeans(phate_op, n_clusters=n_clusters, random_state=random_state, **kwargs)
    return metrics.silhouette_score(phate_op.diff_potential, cluster_labels) 
開發者ID:KrishnaswamyLab,項目名稱:PHATE,代碼行數:20,代碼來源:cluster.py

示例4: entropy_test

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def entropy_test(datasets_dimred, ds_labels):
    
    ds_labels = np.array(ds_labels)
    X_dimred = np.concatenate(datasets_dimred)
    embedding = None
    
    for k in range(10, 21):
        km = KMeans(n_clusters=k, n_jobs=-1, verbose=0)
        km.fit(X_dimred)

        if False and k % 5 == 0:
            embedding = visualize(
                datasets_dimred,
                km.labels_, NAMESPACE + '_km{}'.format(k),
                [ str(x) for x in range(k) ],
                embedding=embedding
            )
        
        print('k = {}, average normalized entropy = {}'
              .format(k, avg_norm_entropy(ds_labels, km.labels_))) 
開發者ID:brianhie,項目名稱:scanorama,代碼行數:22,代碼來源:pancreas_tests.py

示例5: spectral_clustering

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def spectral_clustering(L, K, seed=1234):
  """
  Implement paper "Shi, J. and Malik, J., 2000. Normalized cuts and image 
  segmentation. IEEE Transactions on pattern analysis and machine intelligence, 
  22(8), pp.888-905."

  Args:
    L: graph Laplacian, numpy or scipy matrix
    K: int, number of clusters

  Returns:
    node_label: list

  N.B.: for simplicity, we only consider simple and undirected graph
  """
  num_nodes = L.shape[0]
  assert (K < num_nodes - 1)

  eig, eig_vec = scipy.sparse.linalg.eigsh(
      L, k=K, which='LM', maxiter=num_nodes * 10000, tol=0, mode='normal')
  kmeans = KMeans(n_clusters=K, random_state=seed).fit(eig_vec.real)

  return kmeans.labels_ 
開發者ID:lrjconan,項目名稱:LanczosNetwork,代碼行數:25,代碼來源:spectral_graph_partition.py

示例6: cluster

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def cluster(points, means = 8):
    kk = KMeans(n_clusters = means)
    kk.fit(points)

    labels = kk.predict(points)

    r = []

    for i in range(means):
        row = []
        while(len(row) < 8):
            image = random.randint(0, data.files.shape[0] - 1)
            if labels[image] == i:
                row.append(data.files[image])

        r.append(np.concatenate(row, axis=1))

    c = np.concatenate(r, axis=0)

    x = Image.fromarray(c)
    x.save('Results/clusters.png') 
開發者ID:manicman1999,項目名稱:Keras-BiGAN,代碼行數:23,代碼來源:guess.py

示例7: test_greedy0_n2

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def test_greedy0_n2(self):

        arms, mab = self.predict(arms=[1, 2, 3, 4],
                                 decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
                                 rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
                                 learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
                                 neighborhood_policy=NeighborhoodPolicy.Clusters(2),
                                 context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                                                  [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                                                  [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                                                  [0, 2, 1, 0, 0]],
                                 contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertListEqual(arms, [3, 1])
        self.assertTrue(isinstance(mab._imp.kmeans, KMeans)) 
開發者ID:fidelity,項目名稱:mabwiser,代碼行數:20,代碼來源:test_clusters.py

示例8: test_copy

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def test_copy(self):
        arms, mab = self.predict(arms=[1, 2, 3, 4],
                                 decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
                                 rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
                                 learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
                                 neighborhood_policy=NeighborhoodPolicy.Clusters(2),
                                 context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                                                  [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                                                  [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                                                  [0, 2, 1, 0, 0]],
                                 contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        clusters = deepcopy(mab._imp)
        self.assertIsNot(clusters, mab._imp)
        self.assertIsInstance(clusters.lp_list[0], _EpsilonGreedy)
        self.assertIsInstance(clusters.lp_list[1], _EpsilonGreedy)
        self.assertIsInstance(clusters.kmeans, KMeans)
        self.assertIsNot(clusters.kmeans, mab._imp.kmeans)
        self.assertIsNot(clusters.lp_list[0], mab._imp.lp_list[0])
        self.assertIsNot(clusters.lp_list[1], mab._imp.lp_list[1])
        self.assertEqual(clusters.lp_list[0].epsilon, mab._imp.lp_list[0].epsilon)
        self.assertEqual(clusters.lp_list[1].epsilon, mab._imp.lp_list[1].epsilon) 
開發者ID:fidelity,項目名稱:mabwiser,代碼行數:27,代碼來源:test_clusters.py

示例9: test

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def test():
    model.eval()
    z = model.encode(data.x, data.train_pos_edge_index)

    # Cluster embedded values using k-means.
    kmeans_input = z.cpu().numpy()
    kmeans = KMeans(n_clusters=7, random_state=0).fit(kmeans_input)
    pred = kmeans.predict(kmeans_input)

    labels = data.y.cpu().numpy()
    completeness = completeness_score(labels, pred)
    hm = homogeneity_score(labels, pred)
    nmi = v_measure_score(labels, pred)

    auc, ap = model.test(z, data.test_pos_edge_index, data.test_neg_edge_index)

    return auc, ap, completeness, hm, nmi 
開發者ID:rusty1s,項目名稱:pytorch_geometric,代碼行數:19,代碼來源:argva_node_clustering.py

示例10: clustering_scores

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def clustering_scores(self, prediction_algorithm: str = "knn") -> Tuple:
        if self.gene_dataset.n_labels > 1:
            latent, _, labels = self.get_latent()
            if prediction_algorithm == "knn":
                labels_pred = KMeans(
                    self.gene_dataset.n_labels, n_init=200
                ).fit_predict(
                    latent
                )  # n_jobs>1 ?
            elif prediction_algorithm == "gmm":
                gmm = GMM(self.gene_dataset.n_labels)
                gmm.fit(latent)
                labels_pred = gmm.predict(latent)

            asw_score = silhouette_score(latent, labels)
            nmi_score = NMI(labels, labels_pred)
            ari_score = ARI(labels, labels_pred)
            uca_score = unsupervised_clustering_accuracy(labels, labels_pred)[0]
            logger.debug(
                "Clustering Scores:\nSilhouette: %.4f\nNMI: %.4f\nARI: %.4f\nUCA: %.4f"
                % (asw_score, nmi_score, ari_score, uca_score)
            )
            return asw_score, nmi_score, ari_score, uca_score 
開發者ID:YosefLab,項目名稱:scVI,代碼行數:25,代碼來源:posterior.py

示例11: __init__

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def __init__(self, dataset, base_query_strategy, similarity_metric=None,
                 clustering_method=None, beta=1.0, random_state=None):
        super(DensityWeightedMeta, self).__init__(dataset=dataset)
        if not isinstance(base_query_strategy, QueryStrategy):
            raise TypeError(
                "'base_query_strategy' has to be an instance of 'QueryStrategy'"
            )
        if base_query_strategy.dataset != self.dataset:
            raise ValueError("base_query_strategy should share the same"
                             "dataset instance with DensityWeightedMeta")

        self.base_query_strategy = base_query_strategy
        self.beta = beta
        self.random_state_ = seed_random_state(random_state)

        if clustering_method is not None:
            self.clustering_method = clustering_method
        else:
            self.clustering_method = KMeans(
                n_clusters=5, random_state=self.random_state_)
        
        if similarity_metric is not None:
            self.similarity_metric = similarity_metric
        else:
            self.similarity_metric = cosine_similarity 
開發者ID:ntucllab,項目名稱:libact,代碼行數:27,代碼來源:density_weighted_meta.py

示例12: SAA

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def SAA(self):
        """Use K-means method to discretize the Markovian process."""
        from sklearn.cluster import KMeans
        if self.int_flag == 0:
            labels = numpy.zeros(self.n_samples,dtype=int)
        self._initialize_matrix()
        for t in range(1,self.T):
            kmeans = KMeans(
                n_clusters=self.n_Markov_states[t],
                random_state=0,
            ).fit(self.samples[:,t,:])
            self.Markov_states[t] = kmeans.cluster_centers_
            if self.int_flag == 0:
                labels_new = kmeans.labels_
                counts = numpy.zeros([self.n_Markov_states[t-1],1])
                for i in range(self.n_samples):
                    counts[labels[i]] += 1
                    self.transition_matrix[t][labels[i]][labels_new[i]] += 1
                self.transition_matrix[t] /= counts
                labels = labels_new
        if self.int_flag == 1:
            self.train_transition_matrix()

        return (self.Markov_states,self.transition_matrix) 
開發者ID:lingquant,項目名稱:msppy,代碼行數:26,代碼來源:discretize.py

示例13: analysis_KMeans

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def analysis_KMeans():
	mean_distortions = []
	K = len(labels_idx)
	K_range = range(320, 1000)
	for k in K_range:
		print("Cluster k is {}".format(k))
		kmeans_model = KMeans(n_clusters=k, init="k-means++", n_jobs=-1)
		kmeans_model.fit(np_features)
		t_distortions = sum(
			np.min(cdist(np_features, kmeans_model.cluster_centers_, 'euclidean'), axis=1)) / np_features.shape[0]
		mean_distortions.append(t_distortions)

	with open("./kmeans_cluster.csv", "a+") as wh:
		for idx in range(len(K_range)):
			wh.write("{},{}\n".format(K_range[idx], mean_distortions[idx]))

	# plt.plot(K_range, mean_distortions, 'bx-')
	# plt.xlabel('k')
	# plt.ylabel(u'Avgerage distortion degree')
	# plt.title(u'Elbows rule to select the best K value')
	# plt.savefig("kmeans_cluster.png") 
開發者ID:liuguiyangnwpu,項目名稱:MassImageRetrieval,代碼行數:23,代碼來源:feature_preprocess.py

示例14: findClusters_kmeans

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def findClusters_kmeans(data):
    '''
        Cluster data using k-means
    '''
    # create the classifier object
    kmeans = cl.KMeans(
        n_clusters=4,
        n_jobs=-1,
        verbose=0,
        n_init=30
    )

    # fit the data
    return kmeans.fit(data)

# the file name of the dataset 
開發者ID:drabastomek,項目名稱:practicalDataAnalysisCookbook,代碼行數:18,代碼來源:clustering_kmeans.py

示例15: calc_mean_dist_from_center

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import KMeans [as 別名]
def calc_mean_dist_from_center(data, km):
    """
    Calculates mean distance from cluster centers. Note that it will be calculated only for KMeans and GMM, because DBSCAN may have ambiguous form of clusters.

    Parameters
    --------
    data: pd.DataFrame
        Dataframe with features for clustering indexed as in ``retention_config.index_col``
    km:
        Already fitted clusterer.

    Returns
    -------
    Mapping of clusters names to mean distance from cluster centers.

    Return type
    -------
    Dict
    """
    res = {}
    cl = km.labels_
    cs = km.cluster_centers_
    for i in set(cl):
        res[i] = _cosine_dist(data[cl == i], cs[i]).mean()
    return res 
開發者ID:retentioneering,項目名稱:retentioneering-tools,代碼行數:27,代碼來源:clustering.py


注:本文中的sklearn.cluster.KMeans方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。