本文整理汇总了Python中scipy.cluster.vq.kmeans函数的典型用法代码示例。如果您正苦于以下问题:Python kmeans函数的具体用法?Python kmeans怎么用?Python kmeans使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了kmeans函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: clustering_scipy_kmeans
def clustering_scipy_kmeans(features, n_clust = 8):
"""
"""
whitened = whiten(features)
print whitened.shape
initial = [kmeans(whitened,i) for i in np.arange(1,12)]
plt.plot([var for (cent,var) in initial])
plt.show()
#cent, var = initial[3]
##use vq() to get as assignment for each obs.
#assignment,cdist = vq(whitened,cent)
#plt.scatter(whitened[:,0], whitened[:,1], c=assignment)
#plt.show()
codebook, distortion = kmeans(whitened, n_clust)
print codebook, distortion
assigned_label, dist = vq(whitened, codebook)
for ii in range(8):
plt.subplot(4,2,ii+1)
plt.plot(codebook[ii])
plt.show()
centroid, label = kmeans2(whitened, n_clust, minit = 'points')
print centroid, label
for ii in range(8):
plt.subplot(4,2,ii)
plt.plot(centroid[ii])
plt.show()
示例2: custom
def custom():
_items = {}
users = []
for line in open('my_items_likehood.txt'):
user, item = keys(line)
users.append(user)
if item in _items:
_items[item].append(user)
else:
_items[item] = [user]
sorted_users = sorted(users)
l = len(sorted_users)
items={}
count=0
features=[]
for item in _items:
features.append(user_matrix(l, _items[item], sorted_users))
if count == 100: break
count += 1
print 'whiten'
whitened = whiten(array(features))
print 'kmeans'
print kmeans(whitened)
print "%d items voted by %d users" % (len(items), len(users))
示例3: kmeans_net
def kmeans_net(net, layers, num_c=16, initials=None):
# net: 网络
# layers: 需要量化的层
# num_c: 各层的量化级别
# initials: 初始聚类中心
codebook = {} # 量化码表
if type(num_c) == type(1):
num_c = [num_c] * len(layers)
else:
assert len(num_c) == len(layers)
# 对各层进行聚类分析
print "==============Perform K-means============="
for idx, layer in enumerate(layers):
print "Eval layer:", layer
W = net.params[layer][0].data.flatten()
W = W[np.where(W != 0)] # 筛选不为0的权重
# 默认情况下,聚类中心为线性分布中心
if initials is None: # Default: uniform sample
min_W = np.min(W)
max_W = np.max(W)
initial_uni = np.linspace(min_W, max_W, num_c[idx] - 1)
codebook[layer], _ = scv.kmeans(W, initial_uni)
elif type(initials) == type(np.array([])):
codebook[layer], _ = scv.kmeans(W, initials)
elif initials == 'random':
codebook[layer], _ = scv.kmeans(W, num_c[idx] - 1)
else:
raise Exception
# 将0权重值附上
codebook[layer] = np.append(0.0, codebook[layer])
print "codebook size:", len(codebook[layer])
return codebook
示例4: kmeans2
def kmeans2():
features = locations()
whitened = whiten(features)
book = array((whitened[0],whitened[2]))
kmeans(whitened,book)
(array([[ 2.3110306 , 2.86287398],
[ 0.93218041, 1.24398691]]), 0.85684700941625547)
示例5: kmeans1
def kmeans1():
features = array([[ 1.9,2.3], [ 1.5,2.5], [ 0.8,0.6], [ 0.4,1.8], [ 0.1,0.1], [ 0.2,1.8], [ 2.0,0.5], [ 0.3,1.5], [ 1.0,1.0]])
whitened = whiten(features)
book = array((whitened[0],whitened[2]))
kmeans(whitened,book)
(array([[ 2.3110306 , 2.86287398],
[ 0.93218041, 1.24398691]]), 0.85684700941625547)
示例6: _get_larger_chroms
def _get_larger_chroms(ref_file):
"""Retrieve larger chromosomes, avoiding the smaller ones for plotting.
"""
from scipy.cluster.vq import kmeans, vq
all_sizes = []
for c in ref.file_contigs(ref_file):
all_sizes.append(float(c.size))
all_sizes.sort()
# separate out smaller chromosomes and haplotypes with kmeans
centroids, _ = kmeans(np.array(all_sizes), 2)
idx, _ = vq(np.array(all_sizes), centroids)
little_sizes = tz.first(tz.partitionby(lambda xs: xs[0], zip(idx, all_sizes)))
little_sizes = [x[1] for x in little_sizes]
# create one more cluster with the smaller, removing the haplotypes
centroids2, _ = kmeans(np.array(little_sizes), 2)
idx2, _ = vq(np.array(little_sizes), centroids2)
little_sizes2 = tz.first(tz.partitionby(lambda xs: xs[0], zip(idx2, little_sizes)))
little_sizes2 = [x[1] for x in little_sizes2]
# get any chromosomes not in haplotype/random bin
thresh = max(little_sizes2)
larger_chroms = []
for c in ref.file_contigs(ref_file):
if c.size > thresh:
larger_chroms.append(c.name)
return larger_chroms
示例7: cluster
def cluster(df, means, csv_min, csv_max):
data = []
for i in range(csv_min, csv_max):
a = array(df.ix[:, i].values)
b = a[a != "--"]
print np.sort(kmeans(b.astype(np.float), means)[0])
data.append(np.sort(kmeans(b.astype(np.float), means)[0]))
return data
示例8: test_kmeans_lost_cluster
def test_kmeans_lost_cluster(self):
# This will cause kmean to have a cluster with no points.
data = np.fromfile(DATAFILE1, sep=", ")
data = data.reshape((200, 2))
initk = np.array([[-1.8127404, -0.67128041], [2.04621601, 0.07401111], [-2.31149087, -0.05160469]])
kmeans(data, initk)
with warnings.catch_warnings():
warnings.simplefilter("ignore", UserWarning)
kmeans2(data, initk, missing="warn")
assert_raises(ClusterError, kmeans2, data, initk, missing="raise")
示例9: test_kmeans_lost_cluster
def test_kmeans_lost_cluster(self):
# This will cause kmean to have a cluster with no points.
data = TESTDATA_2D
initk = np.array([[-1.8127404, -0.67128041],
[2.04621601, 0.07401111],
[-2.31149087,-0.05160469]])
kmeans(data, initk)
with warnings.catch_warnings():
warnings.simplefilter('ignore', UserWarning)
kmeans2(data, initk, missing='warn')
assert_raises(ClusterError, kmeans2, data, initk, missing='raise')
示例10: clusterkmeans
def clusterkmeans(self):
wh = whiten(self.counts) #normalizes the counts for easier clustering
scale = self.counts[0] / wh[0]
#compute kmeans for k = 1,2 compare the distortions and choose the better one
one = kmeans(wh, 1)
two = kmeans(wh, 2)
if one[1] < two[1]:
print 'found only one cluser'
threshold = None
else:
km = two
threshold = scale * km[0].mean() #set threshold to be the average of two centers
return threshold
示例11: test_kmeans_lost_cluster
def test_kmeans_lost_cluster(self):
# This will cause kmeans to have a cluster with no points.
data = TESTDATA_2D
initk = np.array([[-1.8127404, -0.67128041],
[2.04621601, 0.07401111],
[-2.31149087,-0.05160469]])
with suppress_warnings() as sup:
sup.filter(UserWarning,
"One of the clusters is empty. Re-run kmean with a different initialization")
kmeans(data, initk)
kmeans2(data, initk, missing='warn')
assert_raises(ClusterError, kmeans2, data, initk, missing='raise')
示例12: createdatabase
def createdatabase():
X_train = detectcompute(train1)
print "Clustering the data with K-means"
codebook,distortion = kmeans(whiten(X_train),k)
print "Done.\n"
imtrain = singledetect(test1)
Pdatabase = bow(imtrain,codebook,k) #Pseudo database with list structure
#Writing to html.table
print "Converting the database into a HTML file"
htmltable = open("table.htm","r+")
begin = "<htm><body><table cellpadding=5><tr><th>Filename</th><th>Histogram</th></tr>"
htmltable.write(begin)
for i in range(len(Pdatabase)):
middle = "<tr><td>%(filename)s</td><td>%(histogram)s</td></tr>" % {"filename": Pdatabase[i][0], "histogram": Pdatabase[i][-1]}
htmltable.write(middle)
end = "</table></body></html>"
htmltable.write(end)
htmltable.close()
print "Done.\n"
codebook_to_file(codebook)
示例13: worldplot
def worldplot(self,kmeans=None,proj='merc'):
"""
plots customer GPS location on a map with state and national boundaries.
IN
kmeans (int) number of means for k-means clustering, default=None
proj (string) the map projection to use, use 'robin' to plot the whole earth, default='merc'
"""
# create a matplotlib Basemap object
if proj == 'robin':
my_map = Basemap(projection=proj,lat_0=0,lon_0=0,resolution='l',area_thresh=1000)
else:
my_map = Basemap(projection=proj,lat_0=33.,lon_0=-125.,resolution='l',area_thresh=1000.,
llcrnrlon=-130.,llcrnrlat=25,urcrnrlon=-65., urcrnrlat=50)
my_map.drawcoastlines(color='grey')
my_map.drawcountries(color='grey')
my_map.drawstates(color='grey')
my_map.drawlsmask(land_color='white',ocean_color='white')
my_map.drawmapboundary() #my_map.fillcontinents(color='black')
x,y = my_map(np.array(self.data['lon']),np.array(self.data['lat']))
my_map.plot(x,y,'ro',markersize=3,alpha=.4,linewidth=0)
if kmeans:
# k-means clustering algorithm---see wikipedia for details
data_in = self.data.drop(['id','clv','level'],axis=1)
# vq is scipy's vector quantization module
output,distortion = vq.kmeans(data_in,kmeans)
x1,y1 = my_map(output[:,1],output[:,0])
my_map.plot(x1,y1,'ko',markersize=20,alpha=.4,linewidth=0)
plt.show()
return output
示例14: test_large_features
def test_large_features(self):
# Generate a data set with large values, and run kmeans on it to
# (regression for 1077).
d = 300
n = 100
m1 = np.random.randn(d)
m2 = np.random.randn(d)
x = 10000 * np.random.randn(n, d) - 20000 * m1
y = 10000 * np.random.randn(n, d) + 20000 * m2
data = np.empty((x.shape[0] + y.shape[0], d), np.double)
data[:x.shape[0]] = x
data[x.shape[0]:] = y
kmeans(data, 2)
示例15: classify_kmeans
def classify_kmeans(infile, clusternumber):
'''
apply kmeans
'''
#Load infile in data array
driver = gdal.GetDriverByName('GTiff')
driver.Register()
ds = gdal.Open(infile, gdal.GA_Update)
databand = ds.GetRasterBand(1)
#Read input raster into array
data = ds.ReadAsArray()
#replace no data value with numpy.nan
#data[data==-999.0]=numpy.nan
pixel = numpy.reshape(data,(data.shape[0]*data.shape[1]))
centroids, variance = kmeans(pixel, clusternumber)
code, distance = vq(pixel,centroids)
centers_idx = numpy.reshape(code,(data.shape[0],data.shape[1]))
clustered = centroids[centers_idx]
# Write outraster to file
databand.WriteArray(clustered)
databand.FlushCache()
#Close file
databand = None
clustered = None
ds = None