當前位置: 首頁>>代碼示例>>Python>>正文


Python cluster.MiniBatchKMeans方法代碼示例

本文整理匯總了Python中sklearn.cluster.MiniBatchKMeans方法的典型用法代碼示例。如果您正苦於以下問題:Python cluster.MiniBatchKMeans方法的具體用法?Python cluster.MiniBatchKMeans怎麽用?Python cluster.MiniBatchKMeans使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在sklearn.cluster的用法示例。


在下文中一共展示了cluster.MiniBatchKMeans方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: process_vec_info

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def process_vec_info(g, n_clusters=8):
    """process_vec_info."""
    # extract node vec information and make np data matrix
    data_matrix = np.array([g.node[u]['vec'] for u in g.nodes()])
    # cluster with kmeans
    clu = MiniBatchKMeans(n_clusters=n_clusters, n_init=10)
    clu.fit(data_matrix)
    preds = clu.predict(data_matrix)
    vecs = clu.transform(data_matrix)
    vecs = 1 / (1 + vecs)
    # replace node information
    graph = g.copy()
    for u in graph.nodes():
        graph.node[u]['label'] = str(preds[u])
        graph.node[u]['vec'] = list(vecs[u])
    return graph 
開發者ID:fabriziocosta,項目名稱:EDeN,代碼行數:18,代碼來源:estimator_utils.py

示例2: auto_label

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def auto_label(graphs, n_clusters=16, **opts):
    """Label nodes with cluster id.

    Cluster nodes using as features the output of vertex_vectorize.
    """
    data_list = Vectorizer(**opts).vertex_transform(graphs)
    data_matrix = vstack(data_list)
    clu = MiniBatchKMeans(n_clusters=n_clusters, n_init=10)
    clu.fit(data_matrix)
    preds = clu.predict(data_matrix)
    vecs = clu.transform(data_matrix)
    sizes = [m.shape[0] for m in data_list]
    label_list = []
    vecs_list = []
    pointer = 0
    for size in sizes:
        label_list.append(preds[pointer: pointer + size])
        vecs_list.append(vecs[pointer: pointer + size])
        pointer += size
    return label_list, vecs_list 
開發者ID:fabriziocosta,項目名稱:EDeN,代碼行數:22,代碼來源:graph.py

示例3: neighborhood_policy

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def neighborhood_policy(self):
        """
        Creates named tuple of the neighborhood policy based on the implementor.

        Returns
        -------
        The neighborhood policy
        """
        if isinstance(self._imp, _KNearest):
            return NeighborhoodPolicy.KNearest(self._imp.k, self._imp.metric)
        elif isinstance(self._imp, _Radius):
            return NeighborhoodPolicy.Radius(self._imp.radius, self._imp.metric, self._imp.no_nhood_prob_of_arm)
        elif isinstance(self._imp, _Clusters):
            return NeighborhoodPolicy.Clusters(self._imp.n_clusters, isinstance(self._imp.kmeans, MiniBatchKMeans))
        else:
            return None 
開發者ID:fidelity,項目名稱:mabwiser,代碼行數:18,代碼來源:mab.py

示例4: test_greedy0_n2_mini

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def test_greedy0_n2_mini(self):

        arms, mab = self.predict(arms=[1, 2, 3, 4],
                                 decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
                                 rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
                                 learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
                                 neighborhood_policy=NeighborhoodPolicy.Clusters(2, True),
                                 context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                                                  [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                                                  [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                                                  [0, 2, 1, 0, 0]],
                                 contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertListEqual(arms, [3, 1])
        self.assertTrue(isinstance(mab._imp.kmeans, MiniBatchKMeans)) 
開發者ID:fidelity,項目名稱:mabwiser,代碼行數:20,代碼來源:test_clusters.py

示例5: test_find_default_param_grid

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def test_find_default_param_grid(self):
        """
        Test the `find_default_param_grid` function.
        """

        custom_param_grids = \
            {MiniBatchKMeans: {'n_clusters': [4, 5, 6, 7, 9],
                               'init' : ['k-means++', 'random']},
             BernoulliNB: {'alpha': [0.1, 0.5, 1.0]},
             MultinomialNB: {'alpha': [0.5, 0.75, 1.0]},
             Perceptron: {'penalty': ['l2', 'l1', 'elasticnet'],
                          'alpha': [0.0001, 0.001, 0.01],
                          'n_iter': [5]},
             PassiveAggressiveRegressor: {'C': [0.01, 0.1, 1.0],
                                          'n_iter': [10],
                                          'loss': ['epsilon_insensitive']}}

        learners = [MiniBatchKMeans, BernoulliNB, MultinomialNB, Perceptron,
                    PassiveAggressiveRegressor]
        learner_abbrevs = ['mbkm', 'bnb', 'mnb', 'perc', 'pagr']
        for param_grids in [DEFAULT_PARAM_GRIDS, custom_param_grids]:
            yield (self.check_find_default_param_grid_defaults,
                   list(zip(learner_abbrevs, learners)),
                   param_grids) 
開發者ID:mulhod,項目名稱:reviewer_experience_prediction,代碼行數:26,代碼來源:test_src.py

示例6: test_minibatch_sensible_reassign_fit

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def test_minibatch_sensible_reassign_fit():
    # check if identical initial clusters are reassigned
    # also a regression test for when there are more desired reassignments than
    # samples.
    zeroed_X, true_labels = make_blobs(n_samples=100, centers=5,
                                       cluster_std=1., random_state=42)
    zeroed_X[::2, :] = 0
    mb_k_means = MiniBatchKMeans(n_clusters=20, batch_size=10, random_state=42,
                                 init="random")
    mb_k_means.fit(zeroed_X)
    # there should not be too many exact zero cluster centers
    assert_greater(mb_k_means.cluster_centers_.any(axis=1).sum(), 10)

    # do the same with batch-size > X.shape[0] (regression test)
    mb_k_means = MiniBatchKMeans(n_clusters=20, batch_size=201,
                                 random_state=42, init="random")
    mb_k_means.fit(zeroed_X)
    # there should not be too many exact zero cluster centers
    assert_greater(mb_k_means.cluster_centers_.any(axis=1).sum(), 10) 
開發者ID:PacktPublishing,項目名稱:Mastering-Elasticsearch-7.0,代碼行數:21,代碼來源:test_k_means.py

示例7: test_sparse_mb_k_means_callable_init

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def test_sparse_mb_k_means_callable_init():

    def test_init(X, k, random_state):
        return centers

    # Small test to check that giving the wrong number of centers
    # raises a meaningful error
    msg = "does not match the number of clusters"
    assert_raises_regex(ValueError, msg, MiniBatchKMeans(init=test_init,
                                                         random_state=42).fit,
                        X_csr)

    # Now check that the fit actually works
    mb_k_means = MiniBatchKMeans(n_clusters=3, init=test_init,
                                 random_state=42).fit(X_csr)
    _check_fitted_model(mb_k_means) 
開發者ID:PacktPublishing,項目名稱:Mastering-Elasticsearch-7.0,代碼行數:18,代碼來源:test_k_means.py

示例8: test_weighted_vs_repeated

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def test_weighted_vs_repeated():
    # a sample weight of N should yield the same result as an N-fold
    # repetition of the sample
    rng = np.random.RandomState(0)
    sample_weight = rng.randint(1, 5, size=n_samples)
    X_repeat = np.repeat(X, sample_weight, axis=0)
    estimators = [KMeans(init="k-means++", n_clusters=n_clusters,
                         random_state=42),
                  KMeans(init="random", n_clusters=n_clusters,
                         random_state=42),
                  KMeans(init=centers.copy(), n_clusters=n_clusters,
                         random_state=42),
                  MiniBatchKMeans(n_clusters=n_clusters, batch_size=10,
                                  random_state=42)]
    for estimator in estimators:
        est_weighted = clone(estimator).fit(X, sample_weight=sample_weight)
        est_repeated = clone(estimator).fit(X_repeat)
        repeated_labels = np.repeat(est_weighted.labels_, sample_weight)
        assert_almost_equal(v_measure_score(est_repeated.labels_,
                                            repeated_labels), 1.0)
        if not isinstance(estimator, MiniBatchKMeans):
            assert_almost_equal(_sort_centers(est_weighted.cluster_centers_),
                                _sort_centers(est_repeated.cluster_centers_)) 
開發者ID:PacktPublishing,項目名稱:Mastering-Elasticsearch-7.0,代碼行數:25,代碼來源:test_k_means.py

示例9: k_means

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def k_means(n_clusters, samples):

    """
    Run k-means clustering on vertex coordinates.

    Parameters:
    - - - - -
    n_clusters : int
        number of clusters to generate
    samples : array
        Euclidean-space coordinates of vertices
    """

    # Run Mini-Batch K-Means
    k_means = cluster.MiniBatchKMeans(
        n_clusters=n_clusters, init='k-means++', max_iter=1000,
        batch_size=10000, verbose=False, compute_labels=True,
        max_no_improvement=100, n_init=5, reassignment_ratio=0.1)
    k_means.fit(samples)

    labels = k_means.labels_.copy()
    labels = labels.astype(np.int32)+1

    return labels 
開發者ID:miykael,項目名稱:parcellation_fragmenter,代碼行數:26,代碼來源:clusterings.py

示例10: train_subquantizers

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def train_subquantizers(data, num_buckets, subquantizer_clusters=256, kmeans_local_iters=20, n_init=10, random_state=None):
    """
    Fit a set of num_buckets subquantizers for corresponding subvectors.
    """

    subquantizers = list()
    for i, d in enumerate(np.split(data, num_buckets, axis=1)):
        #model = KMeans(n_clusters=subquantizer_clusters, init="k-means++", max_iter=kmeans_local_iters,
        #               n_init=n_init, n_jobs=1, verbose=False, random_state=random_state)
        model = MiniBatchKMeans(n_clusters=subquantizer_clusters, init='k-means++', max_iter=kmeans_local_iters,
                                n_init=n_init, batch_size=10000, verbose=False, random_state=random_state)
        model.fit(d)
        subquantizers.append(model.cluster_centers_)
        logger.info('Fit subquantizer %d of %d.' % (i + 1, num_buckets))

    return subquantizers 
開發者ID:ColumbiaDVMM,項目名稱:ColumbiaImageSearch,代碼行數:18,代碼來源:model.py

示例11: cluster

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def cluster(self, embedding, n_clusters=None):
        vocab = list(embedding.wv.vocab.keys())
        vocab_vectors = np.array([embedding[word] for word in vocab])
        
        if not n_clusters:
            # number of clusters = 10% of embedding vocabulary
            # if larger than 1000, limit to 1000
            n_clusters = int(len(vocab) * 0.1)
            if n_clusters > 1000:
                n_clusters = 1000

        clustering = MiniBatchKMeans(n_clusters=n_clusters).fit(vocab_vectors)
        cluster_labels = clustering.labels_
        
        for i,cluster_label in enumerate(cluster_labels):
            word = vocab[i]
            etalon = embedding.wv.most_similar(positive=[clustering.cluster_centers_[cluster_label]])[0][0]
            
            if etalon not in self.cluster_dict:
                self.cluster_dict[etalon] = []
                
            self.cluster_dict[etalon].append(word)
            self.word_to_cluster_dict[word] = etalon
        
        return True 
開發者ID:texta-tk,項目名稱:texta,代碼行數:27,代碼來源:word_cluster.py

示例12: test_batchkmeans_clustering

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def test_batchkmeans_clustering(self):
        data = load_iris()
        X = data.data
        model = MiniBatchKMeans(n_clusters=3)
        model.fit(X)
        model_onnx = convert_sklearn(model, "kmeans",
                                     [("input", FloatTensorType([None, 4]))],
                                     target_opset=TARGET_OPSET)
        self.assertIsNotNone(model_onnx)
        dump_data_and_model(
            X.astype(numpy.float32)[40:60],
            model,
            model_onnx,
            basename="SklearnKMeans-Dec4",
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2')",
        ) 
開發者ID:onnx,項目名稱:sklearn-onnx,代碼行數:19,代碼來源:test_sklearn_k_means_converter.py

示例13: test_batchkmeans_clustering_opset9

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def test_batchkmeans_clustering_opset9(self):
        data = load_iris()
        X = data.data
        model = MiniBatchKMeans(n_clusters=3)
        model.fit(X)
        model_onnx = convert_sklearn(model, "kmeans",
                                     [("input", FloatTensorType([None, 4]))],
                                     target_opset=9)
        self.assertIsNotNone(model_onnx)
        dump_data_and_model(
            X.astype(numpy.float32)[40:60],
            model,
            model_onnx,
            basename="SklearnKMeansOp9-Dec4",
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2')",
        ) 
開發者ID:onnx,項目名稱:sklearn-onnx,代碼行數:19,代碼來源:test_sklearn_k_means_converter.py

示例14: test_batchkmeans_clustering_opset11

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def test_batchkmeans_clustering_opset11(self):
        data = load_iris()
        X = data.data
        model = MiniBatchKMeans(n_clusters=3)
        model.fit(X)
        model_onnx = convert_sklearn(model, "kmeans",
                                     [("input", FloatTensorType([None, 4]))],
                                     target_opset=11)
        self.assertIsNotNone(model_onnx)
        dump_data_and_model(
            X.astype(numpy.float32)[40:60],
            model,
            model_onnx,
            basename="SklearnKMeansOp9-Dec4",
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2')") 
開發者ID:onnx,項目名稱:sklearn-onnx,代碼行數:18,代碼來源:test_sklearn_k_means_converter.py

示例15: test_batchkmeans_clustering_int

# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def test_batchkmeans_clustering_int(self):
        data = load_digits()
        X = data.data
        model = MiniBatchKMeans(n_clusters=4)
        model.fit(X)
        model_onnx = convert_sklearn(model, "kmeans",
                                     [("input", Int64TensorType([None,
                                      X.shape[1]]))],
                                     target_opset=TARGET_OPSET)
        self.assertIsNotNone(model_onnx)
        dump_data_and_model(
            X.astype(numpy.int64)[40:60],
            model,
            model_onnx,
            basename="SklearnBatchKMeansInt-Dec4",
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__) "
                          "<= StrictVersion('0.2.1')",
        ) 
開發者ID:onnx,項目名稱:sklearn-onnx,代碼行數:22,代碼來源:test_sklearn_k_means_converter.py


注:本文中的sklearn.cluster.MiniBatchKMeans方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。