当前位置: 首页>>代码示例>>Python>>正文


Python Pycluster.distancematrix方法代码示例

本文整理汇总了Python中Pycluster.distancematrix方法的典型用法代码示例。如果您正苦于以下问题:Python Pycluster.distancematrix方法的具体用法?Python Pycluster.distancematrix怎么用?Python Pycluster.distancematrix使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Pycluster的用法示例。


在下文中一共展示了Pycluster.distancematrix方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: silhouette

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import distancematrix [as 别名]
def silhouette(data, k=5, shuffle = True, shufflecount = 100):
	#assume that data is a matrix with variables in rows and dimensions in columns
	coefficients = {}
	data = data.transpose()
	for nclus in range(2,k):
		
		clustermap = pc.kcluster(data,nclusters=nclus,npass=50)[0]
		centroids = pc.clustercentroids(data,clusterid=clustermap)[0]
		m = pc.distancematrix(data)
		res = [silhouette_coefficient(m,clustermap,nclus,data.shape)]

		for _ in range(shufflecount):

			dat = data
			map(np.random.shuffle,dat)
			clustermap = pc.kcluster(dat,nclusters=nclus,npass=50)[0]
			centroids = pc.clustercentroids(dat,clusterid=clustermap)[0]

			#distance matrix-- well it's a list actually
			m = pc.distancematrix(dat)

			res.append([silhouette_coefficient(m,clustermap,nclus,dat.shape)])
		coefficients[nclus]={'data':res[0],'distribution':res[1:]}
	return coefficients
开发者ID:mac389,项目名称:clinic,代码行数:26,代码来源:utils.py

示例2: Kmedoids

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import distancematrix [as 别名]
def Kmedoids(num_patches, samples, progress=None):
  """Estimate patches as centroids of samples using k-Medoids.

  This requires the `Pycluster` library to be installed.

  :param int num_patches: number of patches to create
  :type samples: 2D array
  :param samples: example patches
  :param progress: ignored
  :rtype: 2D array with `num_patches` rows and N columns, where N is the number
     of columns in `samples`.
  :return: created patches

  """
  logging.info("Learning %d prototypes per size by k-Medoids clustering" %
      num_patches)
  import Pycluster
  dist = Pycluster.distancematrix(samples)
  cluster_ids, _, _ = Pycluster.kmedoids(dist, nclusters=num_patches)
  # `cluster_ids` contains `num_patches` unique values, each of which is
  # the index of the medoid for a different cluster.
  return samples[np.unique(cluster_ids)].astype(ACTIVATION_DTYPE)
开发者ID:mthomure,项目名称:glimpse-project,代码行数:24,代码来源:prototypes.py

示例3: int

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import distancematrix [as 别名]
import Pycluster as pc
import numpy as np
import sys

# Read data filename and desired number of clusters from command line
filename, n = sys.argv[1], int( sys.argv[2] )

data = np.loadtxt( filename )

# Perform clustering and find centroids
clustermap, _, _ = pc.kcluster( data, nclusters=n, npass=50 )
centroids, _ = pc.clustercentroids( data, clusterid=clustermap )

# Obtain distance matrix
m = pc.distancematrix( data )

# Find the masses of all clusters
mass = np.zeros( n )
for c in clustermap:
    mass[c] += 1

# Create a matrix for individual silhouette coefficients
sil = np.zeros( n*len(data) )
sil.shape = ( len(data), n )

# Evaluate the distance for all pairs of points
for i in range( 0, len(data) ):
    for j in range( i+1, len(data) ):
        d = m[j][i]
开发者ID:CeasarSS,项目名称:books,代码行数:31,代码来源:ch13_lst1.py

示例4: cluster

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import distancematrix [as 别名]
def cluster(data, threshold = 0.5,method='sk', preprocess=True):
	length = len(data)
	print data.shape
	nclus = 2
	nclusmax=15
	sil = [-1]
	models=[]
	if preprocess==True:
		print 'Preprocessing by scaling each row by its range'
		data /= (amax(data,axis=0)-amin(data,axis=0))[newaxis,:]
		print 'Now to cluster'	
	if method == 'sk':
		print 'Clustering using Scikits K-means implementation'
		print "This option returns a tuple of"
		print "\t\t (kmeans object, silhouette coefficients)"
		while nclus < nclusmax: #average(sil[-1]) < threshold and
			model = KMeans(init='k-means++',n_clusters=nclus) 
			#Assume data is propery preprocessed
			model.fit(data)
			labels = model.labels_
			#<-- can only sample this in chunks of 100
			print data.shape
			print 'Calculating silhouette_score '
			sil.append(silhouette_score(data,labels,metric='euclidean')) 
			models.append(model)
			print 'For %d clusters, the silhouette coefficient is %.03f'%(nclus,sil[-1])
			nclus += 1
		return (models,sil)
	elif method == 'pyclus':
		import Pycluster as pc
		print 'Clustering using the C Clustering library'
		print 'This option returns a dictionary with the distance matrix, silhouettes, and clusterids for each iteration.'
		res = []
		sil_co_one = 1
		sil_co = [1]
		#Assume 
		while sil_co_one > threshold and nclus < nclusmax:
			print 'No. of clus: %d'%nclus
			print 'Before kcluster'
			clustermap,_,_ = pc.kcluster(data,nclusters=nclus,npass=50)
			print 'After kcluster'
			centroids,_ = pc.clustercentroids(data,clusterid=clustermap)
			print 'After centroids'
	
			m = pc.distancematrix(data)
			
			print 'Finding mass'
			#Find the masses of all clusters
			mass = zeros(nclus)
			for c in clustermap:
				mass[c] += 1
		
			#Create a matrix for individual silhouette coefficients
			sil = zeros((len(data),nclus))
			
			print 'Evaluating pairwise distance'
			#Evaluate the distance for all pairs of points		
			for i in xrange(0,length):
				for j in range(i+1,length):
					d = m[j][i]
					
					sil[i, clustermap[j] ] += d
					sil[j, clustermap[i] ] += d
			
			#Average over cluster
			for i in range(0,len(data)):
				sil[i,:] /= mass
			
			print 'Sil co'	
			#Evaluate the silhouette coefficient
			s = 0
			for i in xrange(0,length):
				c = clustermap[i]
				a = sil[i,c] 
				b = min( sil[i, range(0,c) + range(c+1,nclus)])
				si = (b-a)/max(b,a) #silhouette coefficient of point i
				s+=si
						
			nclus += 1
			sil_co.append( s/length)
			sil_co_one = s/length
			print 'Sil co %.02f'%sil_co_one
			res.append({'clustermap':clustermap,
						'centroids':centroids,
						 'distances':m,
						 'mass':mass,
						 'silhouettes':sil_co})
		return res
开发者ID:mac389,项目名称:brainpy,代码行数:90,代码来源:sortUtils.py


注:本文中的Pycluster.distancematrix方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。