本文整理汇总了Python中Pycluster.distancematrix方法的典型用法代码示例。如果您正苦于以下问题:Python Pycluster.distancematrix方法的具体用法?Python Pycluster.distancematrix怎么用?Python Pycluster.distancematrix使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Pycluster
的用法示例。
在下文中一共展示了Pycluster.distancematrix方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: silhouette
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import distancematrix [as 别名]
def silhouette(data, k=5, shuffle = True, shufflecount = 100):
#assume that data is a matrix with variables in rows and dimensions in columns
coefficients = {}
data = data.transpose()
for nclus in range(2,k):
clustermap = pc.kcluster(data,nclusters=nclus,npass=50)[0]
centroids = pc.clustercentroids(data,clusterid=clustermap)[0]
m = pc.distancematrix(data)
res = [silhouette_coefficient(m,clustermap,nclus,data.shape)]
for _ in range(shufflecount):
dat = data
map(np.random.shuffle,dat)
clustermap = pc.kcluster(dat,nclusters=nclus,npass=50)[0]
centroids = pc.clustercentroids(dat,clusterid=clustermap)[0]
#distance matrix-- well it's a list actually
m = pc.distancematrix(dat)
res.append([silhouette_coefficient(m,clustermap,nclus,dat.shape)])
coefficients[nclus]={'data':res[0],'distribution':res[1:]}
return coefficients
示例2: Kmedoids
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import distancematrix [as 别名]
def Kmedoids(num_patches, samples, progress=None):
"""Estimate patches as centroids of samples using k-Medoids.
This requires the `Pycluster` library to be installed.
:param int num_patches: number of patches to create
:type samples: 2D array
:param samples: example patches
:param progress: ignored
:rtype: 2D array with `num_patches` rows and N columns, where N is the number
of columns in `samples`.
:return: created patches
"""
logging.info("Learning %d prototypes per size by k-Medoids clustering" %
num_patches)
import Pycluster
dist = Pycluster.distancematrix(samples)
cluster_ids, _, _ = Pycluster.kmedoids(dist, nclusters=num_patches)
# `cluster_ids` contains `num_patches` unique values, each of which is
# the index of the medoid for a different cluster.
return samples[np.unique(cluster_ids)].astype(ACTIVATION_DTYPE)
示例3: int
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import distancematrix [as 别名]
import Pycluster as pc
import numpy as np
import sys
# Read data filename and desired number of clusters from command line
filename, n = sys.argv[1], int( sys.argv[2] )
data = np.loadtxt( filename )
# Perform clustering and find centroids
clustermap, _, _ = pc.kcluster( data, nclusters=n, npass=50 )
centroids, _ = pc.clustercentroids( data, clusterid=clustermap )
# Obtain distance matrix
m = pc.distancematrix( data )
# Find the masses of all clusters
mass = np.zeros( n )
for c in clustermap:
mass[c] += 1
# Create a matrix for individual silhouette coefficients
sil = np.zeros( n*len(data) )
sil.shape = ( len(data), n )
# Evaluate the distance for all pairs of points
for i in range( 0, len(data) ):
for j in range( i+1, len(data) ):
d = m[j][i]
示例4: cluster
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import distancematrix [as 别名]
def cluster(data, threshold = 0.5,method='sk', preprocess=True):
length = len(data)
print data.shape
nclus = 2
nclusmax=15
sil = [-1]
models=[]
if preprocess==True:
print 'Preprocessing by scaling each row by its range'
data /= (amax(data,axis=0)-amin(data,axis=0))[newaxis,:]
print 'Now to cluster'
if method == 'sk':
print 'Clustering using Scikits K-means implementation'
print "This option returns a tuple of"
print "\t\t (kmeans object, silhouette coefficients)"
while nclus < nclusmax: #average(sil[-1]) < threshold and
model = KMeans(init='k-means++',n_clusters=nclus)
#Assume data is propery preprocessed
model.fit(data)
labels = model.labels_
#<-- can only sample this in chunks of 100
print data.shape
print 'Calculating silhouette_score '
sil.append(silhouette_score(data,labels,metric='euclidean'))
models.append(model)
print 'For %d clusters, the silhouette coefficient is %.03f'%(nclus,sil[-1])
nclus += 1
return (models,sil)
elif method == 'pyclus':
import Pycluster as pc
print 'Clustering using the C Clustering library'
print 'This option returns a dictionary with the distance matrix, silhouettes, and clusterids for each iteration.'
res = []
sil_co_one = 1
sil_co = [1]
#Assume
while sil_co_one > threshold and nclus < nclusmax:
print 'No. of clus: %d'%nclus
print 'Before kcluster'
clustermap,_,_ = pc.kcluster(data,nclusters=nclus,npass=50)
print 'After kcluster'
centroids,_ = pc.clustercentroids(data,clusterid=clustermap)
print 'After centroids'
m = pc.distancematrix(data)
print 'Finding mass'
#Find the masses of all clusters
mass = zeros(nclus)
for c in clustermap:
mass[c] += 1
#Create a matrix for individual silhouette coefficients
sil = zeros((len(data),nclus))
print 'Evaluating pairwise distance'
#Evaluate the distance for all pairs of points
for i in xrange(0,length):
for j in range(i+1,length):
d = m[j][i]
sil[i, clustermap[j] ] += d
sil[j, clustermap[i] ] += d
#Average over cluster
for i in range(0,len(data)):
sil[i,:] /= mass
print 'Sil co'
#Evaluate the silhouette coefficient
s = 0
for i in xrange(0,length):
c = clustermap[i]
a = sil[i,c]
b = min( sil[i, range(0,c) + range(c+1,nclus)])
si = (b-a)/max(b,a) #silhouette coefficient of point i
s+=si
nclus += 1
sil_co.append( s/length)
sil_co_one = s/length
print 'Sil co %.02f'%sil_co_one
res.append({'clustermap':clustermap,
'centroids':centroids,
'distances':m,
'mass':mass,
'silhouettes':sil_co})
return res