本文整理汇总了Python中sklearn.cluster.MiniBatchKMeans.n_clusters方法的典型用法代码示例。如果您正苦于以下问题:Python MiniBatchKMeans.n_clusters方法的具体用法?Python MiniBatchKMeans.n_clusters怎么用?Python MiniBatchKMeans.n_clusters使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.cluster.MiniBatchKMeans
的用法示例。
在下文中一共展示了MiniBatchKMeans.n_clusters方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: cluster_to_words
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import n_clusters [as 别名]
def cluster_to_words(features, config):
# Create clustering estimator
# KMEANS
# estimator = KMeans(init='k-means++',
# n_clusters=config.SIFT.BoW.num_clusters,
# n_init=10, verbose=True, n_jobs=-2, tol=1e-3)
# Mini batch KMEANS
batch_size = config.SIFT.BoW.num_clusters * 10
estimator = MiniBatchKMeans(init='k-means++',
n_clusters=config.SIFT.BoW.requested_num_clusters,
batch_size=batch_size,
tol=0.001,
init_size=10*config.SIFT.BoW.requested_num_clusters,
n_init = 10,
verbose=True)
# normalize SIFT features
# features = normalize_features(features)
# Cluster features
print "Clustering features into {} clusters".format(estimator.n_clusters)
estimator.fit(features)
# Drop duplicate clusters (usually empty clusters)
clusters = pd.DataFrame(data=estimator.cluster_centers_)
clusters.drop_duplicates(inplace=True)
estimator.cluster_centers_ = np.array(clusters)
estimator.n_clusters = clusters.shape[0]
# Update config to show new number of clusters
configuration.update_config(config,
'SIFT.BoW.num_clusters',
estimator.n_clusters)
# config.SIFT.BoW.num_clusters = estimator.n_clusters
return estimator
示例2: gap
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import n_clusters [as 别名]
def gap(X, Ks, Wstar=None, B=0):
"""
Gap statistic for estimating the "optimal" number of clusters.
For details see [1]_.
:param X: numpy.ndarray
Input data, as a matrix with samples by rows.
:param Ks: enumerable
A list or numpy.vector or different numbers of clusters
to be tried.
:param Wstar: numpy.array
Estimates of the distribution of W* under a uniform random data
distribution. Can be reused in case of repeated estimations of K
for different (but similar) data sets X. If None, a new W* will
be generated.
:param B: int
Number of random data sets to be used in estimating
the null distribution. If Wstar is not None, B is obtained as
the number of rows of Wstar.
:return: a tuple
Two elements are returned:
-the estimated "optimal" K
-the matrix Wstar (see above)
References:
-----------
.. [1] Tibshirani, Walther, Hastie. Estimating the number of clusters
in a data set via the gap statistic. J. R. Statist. Soc. B (2001) 63,
Part 2, pp 411-423. See: http://web.stanford.edu/~hastie/Papers/gap.pdf
"""
#Get the bounding box of the population
mins, maxes = np.min(X, axis = 0), np.max(X, axis = 0)
Ks = np.array(Ks)
Ks.sort() # make sure they are sorted increasingly
nk = Ks.size
K_max = Ks.max() # maximum number of clusters
mbk = MiniBatchKMeans(compute_labels=True)
if Wstar is None:
# generate the null distribution
Wstar = np.zeros([B, nk]) # dispersion of random dataset b with k+1 clusters
for b in range(B):
dataset = np.random.rand(*X.shape) * (maxes-mins) + mins
for i, k in enumerate(Ks):
mbk.n_clusters = k+1
mbk.fit(dataset)
Wstar[b, i] = np.log(mbk.inertia_)
W = np.zeros(nk) # dispersion
for i, k in enumerate(Ks):
mbk.n_clusters = k+1
mbk.fit(X)
W[i] = np.log(mbk.inertia_)
Gap = Wstar.mean(axis=0) - W
s = np.std(Wstar, axis=0) / np.sqrt(1.0 + 1.0/B)
kl = [k for i, k in enumerate(Ks[:-1]) if Gap[i] >= Gap[i+1] - s[i+1]]
if len(kl) > 0:
# at least one reasonable k:
khat = min(kl)
else:
# just resturn the max:
khat = Ks[-1]
return khat, Wstar