本文整理匯總了Python中sklearn.cluster.MiniBatchKMeans方法的典型用法代碼示例。如果您正苦於以下問題:Python cluster.MiniBatchKMeans方法的具體用法?Python cluster.MiniBatchKMeans怎麽用?Python cluster.MiniBatchKMeans使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類sklearn.cluster
的用法示例。
在下文中一共展示了cluster.MiniBatchKMeans方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: process_vec_info
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def process_vec_info(g, n_clusters=8):
"""process_vec_info."""
# extract node vec information and make np data matrix
data_matrix = np.array([g.node[u]['vec'] for u in g.nodes()])
# cluster with kmeans
clu = MiniBatchKMeans(n_clusters=n_clusters, n_init=10)
clu.fit(data_matrix)
preds = clu.predict(data_matrix)
vecs = clu.transform(data_matrix)
vecs = 1 / (1 + vecs)
# replace node information
graph = g.copy()
for u in graph.nodes():
graph.node[u]['label'] = str(preds[u])
graph.node[u]['vec'] = list(vecs[u])
return graph
示例2: auto_label
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def auto_label(graphs, n_clusters=16, **opts):
"""Label nodes with cluster id.
Cluster nodes using as features the output of vertex_vectorize.
"""
data_list = Vectorizer(**opts).vertex_transform(graphs)
data_matrix = vstack(data_list)
clu = MiniBatchKMeans(n_clusters=n_clusters, n_init=10)
clu.fit(data_matrix)
preds = clu.predict(data_matrix)
vecs = clu.transform(data_matrix)
sizes = [m.shape[0] for m in data_list]
label_list = []
vecs_list = []
pointer = 0
for size in sizes:
label_list.append(preds[pointer: pointer + size])
vecs_list.append(vecs[pointer: pointer + size])
pointer += size
return label_list, vecs_list
示例3: neighborhood_policy
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def neighborhood_policy(self):
"""
Creates named tuple of the neighborhood policy based on the implementor.
Returns
-------
The neighborhood policy
"""
if isinstance(self._imp, _KNearest):
return NeighborhoodPolicy.KNearest(self._imp.k, self._imp.metric)
elif isinstance(self._imp, _Radius):
return NeighborhoodPolicy.Radius(self._imp.radius, self._imp.metric, self._imp.no_nhood_prob_of_arm)
elif isinstance(self._imp, _Clusters):
return NeighborhoodPolicy.Clusters(self._imp.n_clusters, isinstance(self._imp.kmeans, MiniBatchKMeans))
else:
return None
示例4: test_greedy0_n2_mini
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def test_greedy0_n2_mini(self):
arms, mab = self.predict(arms=[1, 2, 3, 4],
decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
neighborhood_policy=NeighborhoodPolicy.Clusters(2, True),
context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
seed=123456,
num_run=1,
is_predict=True)
self.assertListEqual(arms, [3, 1])
self.assertTrue(isinstance(mab._imp.kmeans, MiniBatchKMeans))
示例5: test_find_default_param_grid
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def test_find_default_param_grid(self):
"""
Test the `find_default_param_grid` function.
"""
custom_param_grids = \
{MiniBatchKMeans: {'n_clusters': [4, 5, 6, 7, 9],
'init' : ['k-means++', 'random']},
BernoulliNB: {'alpha': [0.1, 0.5, 1.0]},
MultinomialNB: {'alpha': [0.5, 0.75, 1.0]},
Perceptron: {'penalty': ['l2', 'l1', 'elasticnet'],
'alpha': [0.0001, 0.001, 0.01],
'n_iter': [5]},
PassiveAggressiveRegressor: {'C': [0.01, 0.1, 1.0],
'n_iter': [10],
'loss': ['epsilon_insensitive']}}
learners = [MiniBatchKMeans, BernoulliNB, MultinomialNB, Perceptron,
PassiveAggressiveRegressor]
learner_abbrevs = ['mbkm', 'bnb', 'mnb', 'perc', 'pagr']
for param_grids in [DEFAULT_PARAM_GRIDS, custom_param_grids]:
yield (self.check_find_default_param_grid_defaults,
list(zip(learner_abbrevs, learners)),
param_grids)
示例6: test_minibatch_sensible_reassign_fit
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def test_minibatch_sensible_reassign_fit():
# check if identical initial clusters are reassigned
# also a regression test for when there are more desired reassignments than
# samples.
zeroed_X, true_labels = make_blobs(n_samples=100, centers=5,
cluster_std=1., random_state=42)
zeroed_X[::2, :] = 0
mb_k_means = MiniBatchKMeans(n_clusters=20, batch_size=10, random_state=42,
init="random")
mb_k_means.fit(zeroed_X)
# there should not be too many exact zero cluster centers
assert_greater(mb_k_means.cluster_centers_.any(axis=1).sum(), 10)
# do the same with batch-size > X.shape[0] (regression test)
mb_k_means = MiniBatchKMeans(n_clusters=20, batch_size=201,
random_state=42, init="random")
mb_k_means.fit(zeroed_X)
# there should not be too many exact zero cluster centers
assert_greater(mb_k_means.cluster_centers_.any(axis=1).sum(), 10)
示例7: test_sparse_mb_k_means_callable_init
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def test_sparse_mb_k_means_callable_init():
def test_init(X, k, random_state):
return centers
# Small test to check that giving the wrong number of centers
# raises a meaningful error
msg = "does not match the number of clusters"
assert_raises_regex(ValueError, msg, MiniBatchKMeans(init=test_init,
random_state=42).fit,
X_csr)
# Now check that the fit actually works
mb_k_means = MiniBatchKMeans(n_clusters=3, init=test_init,
random_state=42).fit(X_csr)
_check_fitted_model(mb_k_means)
示例8: test_weighted_vs_repeated
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def test_weighted_vs_repeated():
# a sample weight of N should yield the same result as an N-fold
# repetition of the sample
rng = np.random.RandomState(0)
sample_weight = rng.randint(1, 5, size=n_samples)
X_repeat = np.repeat(X, sample_weight, axis=0)
estimators = [KMeans(init="k-means++", n_clusters=n_clusters,
random_state=42),
KMeans(init="random", n_clusters=n_clusters,
random_state=42),
KMeans(init=centers.copy(), n_clusters=n_clusters,
random_state=42),
MiniBatchKMeans(n_clusters=n_clusters, batch_size=10,
random_state=42)]
for estimator in estimators:
est_weighted = clone(estimator).fit(X, sample_weight=sample_weight)
est_repeated = clone(estimator).fit(X_repeat)
repeated_labels = np.repeat(est_weighted.labels_, sample_weight)
assert_almost_equal(v_measure_score(est_repeated.labels_,
repeated_labels), 1.0)
if not isinstance(estimator, MiniBatchKMeans):
assert_almost_equal(_sort_centers(est_weighted.cluster_centers_),
_sort_centers(est_repeated.cluster_centers_))
示例9: k_means
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def k_means(n_clusters, samples):
"""
Run k-means clustering on vertex coordinates.
Parameters:
- - - - -
n_clusters : int
number of clusters to generate
samples : array
Euclidean-space coordinates of vertices
"""
# Run Mini-Batch K-Means
k_means = cluster.MiniBatchKMeans(
n_clusters=n_clusters, init='k-means++', max_iter=1000,
batch_size=10000, verbose=False, compute_labels=True,
max_no_improvement=100, n_init=5, reassignment_ratio=0.1)
k_means.fit(samples)
labels = k_means.labels_.copy()
labels = labels.astype(np.int32)+1
return labels
示例10: train_subquantizers
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def train_subquantizers(data, num_buckets, subquantizer_clusters=256, kmeans_local_iters=20, n_init=10, random_state=None):
"""
Fit a set of num_buckets subquantizers for corresponding subvectors.
"""
subquantizers = list()
for i, d in enumerate(np.split(data, num_buckets, axis=1)):
#model = KMeans(n_clusters=subquantizer_clusters, init="k-means++", max_iter=kmeans_local_iters,
# n_init=n_init, n_jobs=1, verbose=False, random_state=random_state)
model = MiniBatchKMeans(n_clusters=subquantizer_clusters, init='k-means++', max_iter=kmeans_local_iters,
n_init=n_init, batch_size=10000, verbose=False, random_state=random_state)
model.fit(d)
subquantizers.append(model.cluster_centers_)
logger.info('Fit subquantizer %d of %d.' % (i + 1, num_buckets))
return subquantizers
示例11: cluster
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def cluster(self, embedding, n_clusters=None):
vocab = list(embedding.wv.vocab.keys())
vocab_vectors = np.array([embedding[word] for word in vocab])
if not n_clusters:
# number of clusters = 10% of embedding vocabulary
# if larger than 1000, limit to 1000
n_clusters = int(len(vocab) * 0.1)
if n_clusters > 1000:
n_clusters = 1000
clustering = MiniBatchKMeans(n_clusters=n_clusters).fit(vocab_vectors)
cluster_labels = clustering.labels_
for i,cluster_label in enumerate(cluster_labels):
word = vocab[i]
etalon = embedding.wv.most_similar(positive=[clustering.cluster_centers_[cluster_label]])[0][0]
if etalon not in self.cluster_dict:
self.cluster_dict[etalon] = []
self.cluster_dict[etalon].append(word)
self.word_to_cluster_dict[word] = etalon
return True
示例12: test_batchkmeans_clustering
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def test_batchkmeans_clustering(self):
data = load_iris()
X = data.data
model = MiniBatchKMeans(n_clusters=3)
model.fit(X)
model_onnx = convert_sklearn(model, "kmeans",
[("input", FloatTensorType([None, 4]))],
target_opset=TARGET_OPSET)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(numpy.float32)[40:60],
model,
model_onnx,
basename="SklearnKMeans-Dec4",
allow_failure="StrictVersion(onnx.__version__)"
" < StrictVersion('1.2')",
)
示例13: test_batchkmeans_clustering_opset9
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def test_batchkmeans_clustering_opset9(self):
data = load_iris()
X = data.data
model = MiniBatchKMeans(n_clusters=3)
model.fit(X)
model_onnx = convert_sklearn(model, "kmeans",
[("input", FloatTensorType([None, 4]))],
target_opset=9)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(numpy.float32)[40:60],
model,
model_onnx,
basename="SklearnKMeansOp9-Dec4",
allow_failure="StrictVersion(onnx.__version__)"
" < StrictVersion('1.2')",
)
示例14: test_batchkmeans_clustering_opset11
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def test_batchkmeans_clustering_opset11(self):
data = load_iris()
X = data.data
model = MiniBatchKMeans(n_clusters=3)
model.fit(X)
model_onnx = convert_sklearn(model, "kmeans",
[("input", FloatTensorType([None, 4]))],
target_opset=11)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(numpy.float32)[40:60],
model,
model_onnx,
basename="SklearnKMeansOp9-Dec4",
allow_failure="StrictVersion(onnx.__version__)"
" < StrictVersion('1.2')")
示例15: test_batchkmeans_clustering_int
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import MiniBatchKMeans [as 別名]
def test_batchkmeans_clustering_int(self):
data = load_digits()
X = data.data
model = MiniBatchKMeans(n_clusters=4)
model.fit(X)
model_onnx = convert_sklearn(model, "kmeans",
[("input", Int64TensorType([None,
X.shape[1]]))],
target_opset=TARGET_OPSET)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X.astype(numpy.int64)[40:60],
model,
model_onnx,
basename="SklearnBatchKMeansInt-Dec4",
allow_failure="StrictVersion(onnx.__version__)"
" < StrictVersion('1.2') or "
"StrictVersion(onnxruntime.__version__) "
"<= StrictVersion('0.2.1')",
)