本文整理汇总了Python中Pycluster.kmedoids方法的典型用法代码示例。如果您正苦于以下问题:Python Pycluster.kmedoids方法的具体用法?Python Pycluster.kmedoids怎么用?Python Pycluster.kmedoids使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Pycluster
的用法示例。
在下文中一共展示了Pycluster.kmedoids方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: clusterSessionsKmed
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
def clusterSessionsKmed(featMan, weightFile):
data = featMan.returnKeys()
weightList = getWeightMatrixForKMedFromFile(featMan.returnLastId(),
weightFile, data)
cnt = 0
kclusters = {}
for k in range(4, 5, 2):
i = (len(weightList) + 1) / k
if i == 0:
i = 1
clusArray, error, opt = clust.kmedoids(weightList, i, 10, None)
print error, len(clusArray)
clusters = {}
for c in range(len(clusArray)):
clusId = clusArray[c]
q = featMan.returnQuery(c)
if len(q) > 1:
if clusId not in clusters:
clusters[clusId] = set()
clusters[clusId].add(q)
cnt += 1
kclusters[k] = clusters.values()
print 'Cluster with kmed ', len(clusters), cnt, ' queries'
return kclusters[4]
示例2: cluster_kmedoids
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
def cluster_kmedoids(sessions, clusters, distance_fn=string_similarity.jaccard_distance):
"""
kmedoids clustering, requires distance matrix, therefore slow
"""
distances = compute_distances(sessions, distance_fn)
clusterids, error, nfound = Pycluster.kmedoids(distances, nclusters=clusters)
return clusterids, error, nfound
示例3: cluster_kmedoids
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
def cluster_kmedoids(self, k=2, npass=50):
# Utilise la distance pour produire une partition de k classes
# n est le nombre d'itérations
c, err, nfound = pc.kmedoids(self.zd, k, npass=npass)
return partition(c, self.mat)
示例4: clusterSessionsPre
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
def clusterSessionsPre(catQueryDist, featMan, weightMatrix):
tclusters = {}
print len(catQueryDist)
for termCount in range(4, 5):
tclusters[termCount] = []
for cat, qSet in catQueryDist.items():
if len(qSet) > 1: # and cat in pairs:
k = len(qSet) / termCount
if k == 0:
k = 1
#print cat, len(qSet), k
qList = list(qSet)
catDist = getWeightMatrixForKMed(qList, weightMatrix)
clusArray, error, opt = clust.kmedoids(catDist, k, 5, None)
#print 'Queries', qList
clusters = {}
for c in range(len(clusArray)):
clusId = clusArray[c]
if clusId not in clusters:
clusters[clusId] = []
qc = featMan.returnQuery(qList[c])
if len(qc) > 1:
clusters[clusId].append(qc)
#print cat, len(clusters)
for entry in clusters.values():
tclusters[termCount].append(entry)
print len(tclusters[4])
return tclusters[4]
示例5: clusterCatWithMediods
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
def clusterCatWithMediods(lowerLimit, upperLimit,featMan, weightMatrix, \
samePairsSet, differentPairsSet, catQueryDist, \
outFile = 'cat-clusters-with-med.txt'):
oFile = open(outFile,'w')
metrics = {}
for noTerms in range(lowerLimit, upperLimit):
#fclusters = []
cluster_list = []
i = 0
oFile = open(outFile+str(noTerms)+'.txt','w')
for cat, qSet in catQueryDist.items():
if len(qSet) > 1: # and cat in pairs:
k = len(qSet)/noTerms
if k == 0:
k = 1
qList = sorted(list(qSet),reverse=True)
catDist = getWeightMatrixForKMed(qList, weightMatrix,'cat_kmediods')
clusArray, error, opt = clust.kmedoids(catDist,k, 5, None)
clusters = {}
for c in range(1, len(clusArray)):
clusId = clusArray[c]
if clusId not in clusters:
clusters[clusId] = set()
clusters[clusId].add(qList[c-1])
for entry in clusters.values():
cluster_list.append(list(entry))
qStr = toString(entry,featMan)
#fclusters.append(qStr)
oFile.write(cat+'\t'+qStr+'\n');
print 'Clust category',cat, 'length', len(clusters),\
'Queries' , len(qSet),'k', k, 'error', error, opt
if i % 5 == 0:
print i
i+=1
predictedSamePairsSet, predictedDifferentPairsSet = \
getPairLabelsFromClusters(cluster_list,featMan)
#metrics[noTerms] = getRecallPrecision(samePairsSet, \
# differentPairsSet,\
# predictedSamePairsSet,\
# predictedDifferentPairsSet)
metrics[noTerms] = getSamePairPrecisionRecallF1Calculator(samePairsSet,\
predictedSamePairsSet)
oFile.close()
for tcount, met in metrics.items():
print tcount, met
return metrics
示例6: clusterCatWithMediodsAndNetwork
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
def clusterCatWithMediodsAndNetwork(threshold, \
lowerLimit, upperLimit, featMan, \
weightMatrix, samePairsSet, \
differentPairsSet, catQueryDist, \
catNetwork, \
outFile = 'cat-clusters-with-med.txt'):
#cluster each cat find the outliers
#move them to parents
metrics = {}
for noTerms in range(lowerLimit, upperLimit, 2):
cluster_list = []
#fclusters = []
i = 0
oFile = open(outFile+str(noTerms)+'.txt','w')
for cat, qSet in catQueryDist.items():
if len(qSet) > 1: # and cat in pairs:
k = len(qSet)/noTerms
if k == 0:
k = 1
#print cat, len(qSet), k
qList = list(qSet)
catDist = getWeightMatrixForKMed(qList, weightMatrix)
clusArray, error, opt = clust.kmedoids(catDist,k, 5, None)
#print 'Queries', qList
clusters = {}
for c in range(len(clusArray)):
clusId = clusArray[c]
if clusId not in clusters:
clusters[clusId] = set()
clusters[clusId].add(qList[c])
#outliers = getOutliers(qList,catDist)
for entry in clusters.values():
cluster_list.append(list(entry))
qStr = toString(entry,featMan)
oFile.write(cat+'\t'+qStr+'\n');
#fclusters.append(qStr)
print 'Clust ',cat, len(clusters), error, opt
if i % 50 == 0:
print i
i+=1
predictedSamePairsSet, predictedDifferentPairsSet = \
getPairLabelsFromClusters(cluster_list,featMan)
key = str(threshold)+'_'+str(noTerms)
metrics[key] = getRecallPrecision(samePairsSet, differentPairsSet,\
predictedSamePairsSet,\
predictedDifferentPairsSet)
oFile.close()
for tcount, met in metrics.items():
print tcount, met
return metrics
示例7: cluster
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
def cluster(D, k):
import Pycluster as pcl
labels, _, _ = pcl.kmedoids(D, nclusters=k, npass=10, initialid=None)
errors = np.array([ D[labels[i], i] for i in range(len(labels)) ])
centroidids = np.unique(labels)
cmap = np.zeros(labels.max()+1)
for c in centroidids:
cmap[c] = np.nonzero(centroidids == c)[0][0]
labels = cmap[labels]
logger.debug('k-medoids (k=%i): %.2f.' % (k, errors.sum()))
return labels, { 'method': 'kmedoids',
'init': 'random',
'k': k,
'centroidids': centroidids,
'errors': errors,
'error': errors.sum(),
'error-label': 'sum of distances' }
示例8: kmedoids
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
def kmedoids(m):
labels, error, nfound = Pycluster.kmedoids(m, 16, 5)
# Find the clusters and rename to have same naming convention as affinity propagation
clusters = []
for label in labels:
if label not in clusters:
clusters.append(label)
currentCluster = 0
for cluster in clusters:
currentLabel = 0
for label in labels:
if label == cluster:
labels[currentLabel] = currentCluster
currentLabel += 1
# clusters[currentCluster] = currentCluster
currentCluster += 1
return labels, clusters
示例9: cluster
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
def cluster(self, num_cluster):
category_tfidf = self.category_tfidf
categories = list(category_tfidf)
random.shuffle(categories)
tfidf_norms = {category: sum(value**2 for value in tfidf.values())
for category, tfidf in category_tfidf.items()}
for category, norm in tfidf_norms.items():
if not norm:
raise Exception((category, category_tfidf[category]))
distances = []
for i, category1 in enumerate(categories):
cat1_tfidf = category_tfidf[category1]
row_array = array([0.0] * i)
for j, category2 in enumerate(categories):
if j >= i:
break
row_array[j] = self.compute_distance(cat1_tfidf, category_tfidf[category2], tfidf_norms[category1], tfidf_norms[category2])
distances.append(row_array)
clusterids, error, nfound = Pycluster.kmedoids(distances, num_cluster)
print error
category_clusters = [[] for _ in range(num_cluster)]
print len(clusterids)
print len(categories)
print clusterids
clusterid_map = {}
for i, category in enumerate(clusterids):
category_id = clusterid_map.setdefault(category,
len(clusterid_map))
category_clusters[category_id].append(categories[i])
return category_clusters
示例10: Kmedoids
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
def Kmedoids(num_patches, samples, progress=None):
"""Estimate patches as centroids of samples using k-Medoids.
This requires the `Pycluster` library to be installed.
:param int num_patches: number of patches to create
:type samples: 2D array
:param samples: example patches
:param progress: ignored
:rtype: 2D array with `num_patches` rows and N columns, where N is the number
of columns in `samples`.
:return: created patches
"""
logging.info("Learning %d prototypes per size by k-Medoids clustering" %
num_patches)
import Pycluster
dist = Pycluster.distancematrix(samples)
cluster_ids, _, _ = Pycluster.kmedoids(dist, nclusters=num_patches)
# `cluster_ids` contains `num_patches` unique values, each of which is
# the index of the medoid for a different cluster.
return samples[np.unique(cluster_ids)].astype(ACTIVATION_DTYPE)
示例11: range
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
words=readwordlist.read("rt_words.csv")
vectWords=[]
for i in range(50):
sysnet.setdefault(i,{'word':words[i],'sysnet':wn.synsets(words[i])})
vectWords.append(i)
totalElement=len(vectWords)
totalClusters=10
distMatrix=numpy.ones((totalElement,totalElement),dtype=float)
for i in range(totalElement):
for j in range(totalElement):
distMatrix[i,j]=getDistance(i,j)
clusters=Pycluster.kmedoids(distMatrix,nclusters=totalClusters,npass=100)
print distMatrix
print clusters
groups={}
for i in range(len(clusters[0])):
if clusters[0][i]<totalClusters :
groups.setdefault(clusters[0][i],[]).append(sysnet[i]['word'])
for key,value in groups.items():
print "\n***********************************\n"
for v in value:
print v
#print findclusters(vectWords,100)
#plt.show()
示例12: generate_kmedoid
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
def generate_kmedoid(self,locationid):
trend_cross_trend_matrix,trends_list = self.get_matrix(locationid)
clusterid , error , nfound = Pycluster.kmedoids(trend_cross_trend_matrix,nclusters=4,npass=100)
return clusterid , trends_list
示例13: range
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
temp = []
for i in range(0,len(points)):
p2 = points[i]
temp.append ( distance_function(p1,p2) )
distances.append (temp)
return distances
# def timespan(list):
nb_clusters = 15 # this is the number of cluster the dataset is supposed to be partitioned into
distances = get_distance_matrix(vectors, euclidean)
clusterid, error, nfound = Pycluster.kmedoids(distances, nclusters= nb_clusters, npass=100)
uniq_ids = list(set(clusterid))
new_ids = [ uniq_ids.index(val) for val in clusterid]
# print uniq_ids
# print new_ids
#############################################
# new_ids -> index:clusterid #
# vectors -> index:location #
示例14: int
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
filename, n = sys.argv[1], int( sys.argv[2] )
data = np.loadtxt( filename )
k = len(data)
# Calculate the distance matrix
m = np.zeros( k*k )
m.shape = ( k, k )
for i in range( 0, k ):
for j in range( i, k ):
d = dist( data[i], data[j] )
m[i][j] = d
m[j][i] = d
# Perform the actual clustering
clustermap, _, _ = pc.kmedoids( m, n, npass=20 )
# Find the indices of the points used as medoids, and the cluster masses
medoids = {}
for i in clustermap:
medoids[i] = medoids.get(i,0) + 1
# Print points, grouped by cluster
for i in medoids.keys():
print "Cluster=", i, " Mass=", medoids[i], " Centroid: ", data[i]
for j in range( 0, len(data) ):
if clustermap[j] == i:
print "\t", data[j]
示例15: cluster_domains
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kmedoids [as 别名]
def cluster_domains( GDA, missing_da, p, starting_time) :
"""Clusters the domain graph using DBSCAN algorithm and make a picture of
the whole matrix map
Parameters
----------
GDA : Graph
undirected graph of domain similarities
missing_da : list
DA not in Graph, no edge (no similarity) no any other DA
p : argument parser object
parameter object
starting_time : int
program starting time
Returns
-------
clusters : list
a list of list containing the clusterised DA
"""
# add missing da as a self cluster
clusters = [ [da] for da in missing_da ]
if p.daonly :
#if True :
# to gain some memory space DBSCAN is only used on connected components
clusters_comp = nx.connected_components( GDA )
all_unclustered = [ ]
for comp in clusters_comp :
if len( comp ) > p.minpts :
H = GDA.subgraph( comp )
# networkx return an numpy.matrixlib.defmatrix.matrix
mat = 1.0 - np.array(nx.to_numpy_matrix( H, nodelist=comp ) )
mat.flat[ :: mat.shape[0] + 1 ] = 0 # diag to 0
# run OPTICS on distance matrix
optics = Optics( p.minpts, epsilon=p.epsilon )
ordered, reachability, core_dist = optics.run(mat)
labels = optics.cluster( p.epsilon_p )
# run dbscan on distance matrix
slabels = set( labels )
for k in slabels :
ind = np.where( labels == k )[0]
if k == -1 :
for i in ind :
clusters.append( [comp[i]] )
else :
clusters.append( [ comp[i] for i in ind ] )
else :
# if the component is whith less memebers than the minpts cutoff
# all the members of the same components are put in the same clusters
clusters.append( comp )
else :
nodes = GDA.nodes( )
# networkx return an numpy.matrixlib.defmatrix.matrix
# instead of an numpy.ndarray matrix, not really convenient ...
bigmat = 1.0 - np.array( nx.to_numpy_matrix( GDA, nodelist=nodes ) )
bigmat.flat[ :: bigmat.shape[0] +1 ] = 0 # diagonal to 0
clusterid, error, nfound = Pycluster.kmedoids (bigmat, nclusters=p.kcluster, npass=10 )
for l in np.unique( clusterid ) :
tmp_clust = [nodes[i] for i in range(clusterid.shape[0]) if clusterid[i] == l ]
clusters.append( tmp_clust )
return clusters