当前位置: 首页>>代码示例>>Python>>正文


Python Pycluster.kmedoids方法代码示例

本文整理汇总了Python中Pycluster.kmedoids方法的典型用法代码示例。如果您正苦于以下问题:Python Pycluster.kmedoids方法的具体用法?Python Pycluster.kmedoids怎么用?Python Pycluster.kmedoids使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Pycluster的用法示例。


在下文中一共展示了Pycluster.kmedoids方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: clusterSessionsKmed

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
def clusterSessionsKmed(featMan, weightFile):

  data = featMan.returnKeys()
  weightList = getWeightMatrixForKMedFromFile(featMan.returnLastId(),
                                              weightFile, data)
  cnt = 0
  kclusters = {}
  for k in range(4, 5, 2):
    i = (len(weightList) + 1) / k
    if i == 0:
      i = 1
    clusArray, error, opt = clust.kmedoids(weightList, i, 10, None)
    print error, len(clusArray)
    clusters = {}
    for c in range(len(clusArray)):
      clusId = clusArray[c]
      q = featMan.returnQuery(c)
      if len(q) > 1:
        if clusId not in clusters:
          clusters[clusId] = set()
        clusters[clusId].add(q)
        cnt += 1

    kclusters[k] = clusters.values()

    print 'Cluster with kmed ', len(clusters), cnt, ' queries'
  return kclusters[4]
开发者ID:vmanisha,项目名称:QueryExpansion,代码行数:29,代码来源:kFoldTermPrediction.py

示例2: cluster_kmedoids

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
def cluster_kmedoids(sessions, clusters, distance_fn=string_similarity.jaccard_distance):
    """
    kmedoids clustering, requires distance matrix, therefore slow
    """
    distances = compute_distances(sessions, distance_fn)
    clusterids, error, nfound = Pycluster.kmedoids(distances, nclusters=clusters)
    return clusterids, error, nfound
开发者ID:1098896743,项目名称:pwum,代码行数:9,代码来源:clustering.py

示例3: cluster_kmedoids

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
	def cluster_kmedoids(self, k=2, npass=50):
		# Utilise la distance pour produire une partition de k classes
		# n est le nombre d'itérations
		
		c, err, nfound = pc.kmedoids(self.zd, k, npass=npass)
		
		return partition(c, self.mat)
开发者ID:LANCI,项目名称:kimono,代码行数:9,代码来源:kimono.py

示例4: clusterSessionsPre

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
def clusterSessionsPre(catQueryDist, featMan, weightMatrix):

  tclusters = {}
  print len(catQueryDist)
  for termCount in range(4, 5):
    tclusters[termCount] = []
    for cat, qSet in catQueryDist.items():
      if len(qSet) > 1:  # and cat in pairs:
        k = len(qSet) / termCount
        if k == 0:
          k = 1
        #print cat, len(qSet), k
        qList = list(qSet)
        catDist = getWeightMatrixForKMed(qList, weightMatrix)

        clusArray, error, opt = clust.kmedoids(catDist, k, 5, None)
        #print 'Queries', qList
        clusters = {}
        for c in range(len(clusArray)):
          clusId = clusArray[c]
          if clusId not in clusters:
            clusters[clusId] = []
          qc = featMan.returnQuery(qList[c])
          if len(qc) > 1:
            clusters[clusId].append(qc)
        #print cat, len(clusters)
        for entry in clusters.values():
          tclusters[termCount].append(entry)

  print len(tclusters[4])
  return tclusters[4]
开发者ID:vmanisha,项目名称:QueryExpansion,代码行数:33,代码来源:kFoldTermPrediction.py

示例5: clusterCatWithMediods

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
def clusterCatWithMediods(lowerLimit, upperLimit,featMan, weightMatrix, \
						 samePairsSet, differentPairsSet, catQueryDist, \
						outFile = 'cat-clusters-with-med.txt'):
	
	oFile = open(outFile,'w')
	metrics = {}
	for noTerms in range(lowerLimit, upperLimit):
		#fclusters = []
		cluster_list = []
		i = 0
		oFile = open(outFile+str(noTerms)+'.txt','w')
		for cat, qSet in catQueryDist.items():
			if len(qSet) > 1: # and cat in pairs:
				k = len(qSet)/noTerms
				if k == 0:
					k = 1
			
				qList = sorted(list(qSet),reverse=True)
				catDist = getWeightMatrixForKMed(qList, weightMatrix,'cat_kmediods')
							
				clusArray, error, opt = clust.kmedoids(catDist,k, 5, None)
				clusters = {}
				for c in range(1, len(clusArray)):
					clusId = clusArray[c]
					if clusId not in clusters:
						clusters[clusId] = set()
					clusters[clusId].add(qList[c-1])

				
				for entry in clusters.values():
					cluster_list.append(list(entry))
					qStr = toString(entry,featMan)
					#fclusters.append(qStr)
					oFile.write(cat+'\t'+qStr+'\n');
				print 'Clust category',cat, 'length', len(clusters),\
                                        'Queries' , len(qSet),'k', k,  'error', error, opt
				if i % 5 == 0:
					print i
				i+=1	
		predictedSamePairsSet, predictedDifferentPairsSet = \
						getPairLabelsFromClusters(cluster_list,featMan)
		#metrics[noTerms] = getRecallPrecision(samePairsSet, \
		#			differentPairsSet,\
		#			predictedSamePairsSet,\
		#			predictedDifferentPairsSet)	
                metrics[noTerms] = getSamePairPrecisionRecallF1Calculator(samePairsSet,\
                                predictedSamePairsSet)

		oFile.close()
	for tcount, met in metrics.items():
		print tcount, met
	return metrics
开发者ID:vmanisha,项目名称:QueryExpansion,代码行数:54,代码来源:findCategoryClusters.py

示例6: clusterCatWithMediodsAndNetwork

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
def clusterCatWithMediodsAndNetwork(threshold, \
				    lowerLimit, upperLimit, featMan, \
				    weightMatrix, samePairsSet, \
				    differentPairsSet, catQueryDist, \
				    catNetwork, \
				    outFile = 'cat-clusters-with-med.txt'):
	#cluster each cat find the outliers
	#move them to parents
	metrics = {}
	for noTerms in range(lowerLimit, upperLimit, 2):
		cluster_list = []
		#fclusters = []
		i = 0
		oFile = open(outFile+str(noTerms)+'.txt','w')
		for cat, qSet in catQueryDist.items():
			if len(qSet) > 1: # and cat in pairs:
				k = len(qSet)/noTerms
				if k == 0:
					k = 1
				#print cat, len(qSet), k
				qList = list(qSet)
				catDist = getWeightMatrixForKMed(qList, weightMatrix)
				clusArray, error, opt = clust.kmedoids(catDist,k, 5, None)
				#print 'Queries', qList
				clusters = {}
				for c in range(len(clusArray)):
					clusId = clusArray[c]
					if clusId not in clusters:
						clusters[clusId] = set()
					clusters[clusId].add(qList[c])
				#outliers = getOutliers(qList,catDist)
				for entry in clusters.values():
					cluster_list.append(list(entry))
					qStr = toString(entry,featMan)
					oFile.write(cat+'\t'+qStr+'\n');
					#fclusters.append(qStr)
				print 'Clust ',cat, len(clusters), error, opt
				if i % 50 == 0:
					print i
				i+=1
		predictedSamePairsSet, predictedDifferentPairsSet = \
						getPairLabelsFromClusters(cluster_list,featMan)
		key = str(threshold)+'_'+str(noTerms)
		metrics[key] = getRecallPrecision(samePairsSet, differentPairsSet,\
			     		            predictedSamePairsSet,\
			     		            predictedDifferentPairsSet)
		oFile.close()
	for tcount, met in metrics.items():
		print tcount, met
	return metrics
开发者ID:vmanisha,项目名称:QueryExpansion,代码行数:52,代码来源:findCategoryClusters.py

示例7: cluster

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
def cluster(D, k):
    import Pycluster as pcl
    labels, _, _ = pcl.kmedoids(D, nclusters=k, npass=10, initialid=None)
    errors = np.array([ D[labels[i], i] for i in range(len(labels)) ])
    centroidids = np.unique(labels)
    cmap = np.zeros(labels.max()+1)
    for c in centroidids:
        cmap[c] = np.nonzero(centroidids == c)[0][0]
    labels = cmap[labels]
    logger.debug('k-medoids (k=%i): %.2f.' % (k, errors.sum()))
    return labels, { 'method': 'kmedoids',
                     'init': 'random',
                     'k': k,
                     'centroidids': centroidids,
                     'errors': errors,
                     'error': errors.sum(),
                     'error-label': 'sum of distances' }
开发者ID:gatagat,项目名称:stage-propagation,代码行数:19,代码来源:features_chaincode.py

示例8: kmedoids

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
def kmedoids(m):
    labels, error, nfound = Pycluster.kmedoids(m, 16, 5)

    # Find the clusters and rename to have same naming convention as affinity propagation
    clusters = []
    for label in labels:
        if label not in clusters:
            clusters.append(label)

    currentCluster = 0
    for cluster in clusters:
        currentLabel = 0
        for label in labels:
            if label == cluster:
                labels[currentLabel] = currentCluster
            currentLabel += 1
            # clusters[currentCluster] = currentCluster
        currentCluster += 1
    return labels, clusters
开发者ID:anna-saplitski,项目名称:Alice-Parse-Trees,代码行数:21,代码来源:cluster.py

示例9: cluster

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
    def cluster(self, num_cluster):
        category_tfidf = self.category_tfidf
        categories = list(category_tfidf)
        random.shuffle(categories)

        tfidf_norms = {category: sum(value**2 for value in tfidf.values())
                       for category, tfidf in category_tfidf.items()}

        for category, norm in tfidf_norms.items():
            if not norm:
                raise Exception((category, category_tfidf[category]))

        distances = []
        for i, category1 in enumerate(categories):
            cat1_tfidf = category_tfidf[category1]
            row_array = array([0.0] * i)
            for j, category2 in enumerate(categories):
                if j >= i:
                    break
                row_array[j] = self.compute_distance(cat1_tfidf, category_tfidf[category2], tfidf_norms[category1], tfidf_norms[category2])

            distances.append(row_array)

        clusterids, error, nfound = Pycluster.kmedoids(distances, num_cluster)
        print error

        category_clusters = [[] for _ in range(num_cluster)]

        print len(clusterids)
        print len(categories)
        print clusterids

        clusterid_map = {}


        for i, category in enumerate(clusterids):
            category_id = clusterid_map.setdefault(category,
                                                   len(clusterid_map))
            category_clusters[category_id].append(categories[i])

        return category_clusters
开发者ID:axiak,项目名称:trivia-helper,代码行数:43,代码来源:createindexes.py

示例10: Kmedoids

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
def Kmedoids(num_patches, samples, progress=None):
  """Estimate patches as centroids of samples using k-Medoids.

  This requires the `Pycluster` library to be installed.

  :param int num_patches: number of patches to create
  :type samples: 2D array
  :param samples: example patches
  :param progress: ignored
  :rtype: 2D array with `num_patches` rows and N columns, where N is the number
     of columns in `samples`.
  :return: created patches

  """
  logging.info("Learning %d prototypes per size by k-Medoids clustering" %
      num_patches)
  import Pycluster
  dist = Pycluster.distancematrix(samples)
  cluster_ids, _, _ = Pycluster.kmedoids(dist, nclusters=num_patches)
  # `cluster_ids` contains `num_patches` unique values, each of which is
  # the index of the medoid for a different cluster.
  return samples[np.unique(cluster_ids)].astype(ACTIVATION_DTYPE)
开发者ID:mthomure,项目名称:glimpse-project,代码行数:24,代码来源:prototypes.py

示例11: range

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
	
words=readwordlist.read("rt_words.csv")
vectWords=[]
for i in range(50):
	sysnet.setdefault(i,{'word':words[i],'sysnet':wn.synsets(words[i])})
	vectWords.append(i)


totalElement=len(vectWords)
totalClusters=10
distMatrix=numpy.ones((totalElement,totalElement),dtype=float)
for i in range(totalElement):
	for j in range(totalElement):
		distMatrix[i,j]=getDistance(i,j)

clusters=Pycluster.kmedoids(distMatrix,nclusters=totalClusters,npass=100)
print distMatrix
print clusters
groups={}
for i in range(len(clusters[0])):
	if clusters[0][i]<totalClusters :
		groups.setdefault(clusters[0][i],[]).append(sysnet[i]['word'])

for key,value in groups.items():
	print "\n***********************************\n"
	for v in value:
		print v
		
#print findclusters(vectWords,100)
#plt.show()
开发者ID:RafaelAlfaro,项目名称:SampleCode,代码行数:32,代码来源:clustering_exp2.py

示例12: generate_kmedoid

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
	def generate_kmedoid(self,locationid):

		trend_cross_trend_matrix,trends_list = self.get_matrix(locationid)
		clusterid , error , nfound = Pycluster.kmedoids(trend_cross_trend_matrix,nclusters=4,npass=100)
		return clusterid , trends_list
开发者ID:cchaplin,项目名称:TweetLyze,代码行数:7,代码来源:KMedoid.py

示例13: range

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
        temp = []
        for i in range(0,len(points)):
            p2 = points[i] 
            temp.append ( distance_function(p1,p2) )
        distances.append (temp)
    return distances



# def timespan(list):



nb_clusters = 15 # this is the number of cluster the dataset is supposed to be partitioned into
distances = get_distance_matrix(vectors, euclidean)
clusterid, error, nfound = Pycluster.kmedoids(distances, nclusters= nb_clusters, npass=100)



uniq_ids = list(set(clusterid))

new_ids = [ uniq_ids.index(val) for val in clusterid]

# print uniq_ids
# print new_ids



#############################################
# new_ids  ->  index:clusterid 				#
# vectors  ->  index:location				#
开发者ID:lucaschenex,项目名称:Non-Volcanic-Tremor-Analysis,代码行数:33,代码来源:sensitivity.py

示例14: int

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
filename, n = sys.argv[1], int( sys.argv[2] )

data = np.loadtxt( filename )
k = len(data)

# Calculate the distance matrix
m = np.zeros( k*k )
m.shape = ( k, k )

for i in range( 0, k ):
    for j in range( i, k ):
        d = dist( data[i], data[j] )
        m[i][j] = d
        m[j][i] = d

# Perform the actual clustering
clustermap, _, _ = pc.kmedoids( m, n, npass=20 )

# Find the indices of the points used as medoids, and the cluster masses
medoids = {}
for i in clustermap:
    medoids[i] = medoids.get(i,0) + 1

# Print points, grouped by cluster
for i in medoids.keys():
    print "Cluster=", i, " Mass=", medoids[i], " Centroid: ", data[i]

    for j in range( 0, len(data) ):
        if clustermap[j] == i:
            print "\t", data[j]
开发者ID:CeasarSS,项目名称:books,代码行数:32,代码来源:ch13_lst2.py

示例15: cluster_domains

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
def cluster_domains( GDA, missing_da,  p, starting_time) :
    """Clusters the domain graph using DBSCAN algorithm and make a picture of
    the whole matrix map
    
    Parameters
    ----------
        GDA : Graph
            undirected graph of domain similarities
        missing_da : list
            DA not in Graph, no edge (no similarity) no any other DA
        p : argument parser object
            parameter object
        starting_time : int 
            program starting time
    Returns
    -------
        clusters : list
            a list of list containing the clusterised DA
    """
    # add missing da as a self cluster
    clusters = [ [da] for da in missing_da ]       
    
    if p.daonly :
        #if True :
        # to gain some memory space DBSCAN is only used on connected components
        clusters_comp = nx.connected_components( GDA )
        all_unclustered = [ ]
        for comp in clusters_comp :
            if len( comp ) > p.minpts :
                H = GDA.subgraph( comp )
                # networkx return an numpy.matrixlib.defmatrix.matrix 
                mat =  1.0 - np.array(nx.to_numpy_matrix( H, nodelist=comp ) ) 
                mat.flat[ :: mat.shape[0] + 1 ] = 0  # diag to 0
                # run OPTICS on distance matrix
                optics = Optics(  p.minpts, epsilon=p.epsilon )
                ordered, reachability, core_dist = optics.run(mat)
                labels = optics.cluster( p.epsilon_p ) 
                # run dbscan on distance matrix
                slabels = set( labels )
                for k in slabels :
                    ind = np.where( labels == k )[0]
                    if k == -1 :
                        for i in ind :
                            clusters.append( [comp[i]] )
                    else :
                        clusters.append(  [ comp[i] for i in ind ]  )
            else :
                # if the component is whith less memebers than the minpts cutoff
                # all the members of the same components are put in the same clusters
                clusters.append( comp )
    else :
        nodes = GDA.nodes( )
        # networkx return an numpy.matrixlib.defmatrix.matrix 
        # instead of an numpy.ndarray matrix, not really convenient ...
        bigmat = 1.0 - np.array( nx.to_numpy_matrix( GDA, nodelist=nodes ) )
        bigmat.flat[ :: bigmat.shape[0] +1 ] = 0 # diagonal to 0
        clusterid, error, nfound = Pycluster.kmedoids (bigmat, nclusters=p.kcluster, npass=10 )
        for l in np.unique( clusterid ) :
            tmp_clust =  [nodes[i] for i in range(clusterid.shape[0]) if clusterid[i] == l ] 
            clusters.append( tmp_clust )            

    return clusters
开发者ID:T-B-F,项目名称:porthoda,代码行数:64,代码来源:proteinorthoDom_algo.py


注:本文中的Pycluster.kmedoids方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。