本文整理汇总了Python中Pycluster类的典型用法代码示例。如果您正苦于以下问题:Python Pycluster类的具体用法?Python Pycluster怎么用?Python Pycluster使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Pycluster类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: findcenters
def findcenters(x, n=1000, k=6):
# get dimensions
m = x.shape[1]
# create centers as empty
centers = DataFrame(np.zeros(shape=(k, m)))
for i in range(n):
labels, _, _ = Pycluster.kcluster(x, nclusters=k, transpose=0, method="a", dist="e", npass=1)
center, _ = Pycluster.clustercentroids(x, clusterid=labels)
# sort centers by the distance to the origin
center = sorted(center, key=lambda t: np.linalg.norm(np.array(t) - np.zeros(m)), reverse=True)
# print np.linalg.norm(np.array(center[0])-np.zeros(m))
# print np.linalg.norm(np.array(center[1])-np.zeros(m))
# print np.linalg.norm(np.array(center[2])-np.zeros(m))
# print np.linalg.norm(np.array(center[3])-np.zeros(m))
# print np.linalg.norm(np.array(center[4])-np.zeros(m))
# print np.linalg.norm(np.array(center[5])-np.zeros(m))
# print np.array(center[0])
# print np.array(center[1])
# print np.array(center[2])
# print np.array(center[3])
# print np.array(center[4])
# print np.array(center[5])
# take the average
for j in range(k):
centers.ix[j, :] = centers.ix[j, :] + center[j]
centers = centers / n
return centers
示例2: _G
def _G(self, data, K):
labels, _, _ = Pycluster.kcluster(data.T, K)
centers, _ = Pycluster.clustercentroids(data.T, clusterid=labels)
centers = centers.T
G = zeros((K, data.shape[1]))
for k in range(K):
D = data - expand_dims(centers[:, k], axis=1)
G[k, :] = -sqrt(sum(multiply(D, D), axis=0))
return G
示例3: clustering
def clustering(file_path, k, dist_measure, PLOT):
"""
Do the K-means clustering for input data.
@param file_path: Input data file.
@param k: Number of centers in K-means algorithm.
@param dist_measure: Distance measure (in this case, we use Manhattan distance).
@param PLOT: Bool variable, check if plot the result (set it as True only in testing).
@return: Clusters id for all data points in the input data file.
"""
data = numpy.genfromtxt(file_path, delimiter=',')
if len(data.shape) == 1:
return [-1]
print "-- Processing file: " + file_path + " -- Data points: " + str(len(data))
print "-- Start clustering"
k = set_k(len(data), k)
ite_num = method_name(len(data))
# Do the K-means clustering
cluster_id, _, _ = Pycluster.kcluster(data, nclusters=k, mask=None, weight=None, transpose=0, npass=ite_num,
method='a', dist=dist_measure, initialid=None)
if PLOT is False:
return cluster_id
# Draw the clustering result plot.
centroids, _ = Pycluster.clustercentroids(data, clusterid=cluster_id)
if PLOT:
data_pca = mlab.PCA(data)
cutoff = data_pca.fracs[1]
data_2d = data_pca.project(data, minfrac=cutoff)
centroids_2d = data_pca.project(centroids, minfrac=cutoff)
else:
data_2d = data
centroids_2d = centroids
color = ['#2200CC', '#D9007E', '#FF6600', '#FFCC00', '#ACE600', '#0099CC',
'#8900CC', '#FF0000', '#FF9900', '#FFFF00', '#00CC01', '#0055CC']
for i in range(k):
scatter(data_2d[cluster_id == i, 0], data_2d[cluster_id == i, 1], color=color[i % 12])
plot(centroids_2d[:, 0], centroids_2d[:, 1], 'sg', markersize=8)
show()
return cluster_id
示例4: cluster
def cluster(parser, k):
"""
general method for clustering data
"""
#get index number for every page
code_book = parser.get_data_encoding(page_min_occurance=5)
#use only sequence of pages visited
simple_session = [session for session in parser.get_simple_sessions() if config.session_filter_fn(session)]
#use vector representation (v1,v2,v2) where v1 means page v1 was visited
#models = session_modeling.convert_sessions_to_vector(simple_session, code_book, binary=True)
#construct markov chains, estimate transition probabilities
models = session_modeling.convert_sessions_to_markov(simple_session, code_book, bayes=False)
idx, sse, _ = Pycluster.kcluster(models, k, method='a', dist='e')
#idx, sse, _ = cluster_kmedoids(models, k, string_similarity.jaccard_distance)
clusters = {}
for name, clusterid in zip(simple_session, idx):
clusters.setdefault(clusterid, []).append(name)
return clusters, sse
示例5: cluster_kmedoids
def cluster_kmedoids(sessions, clusters, distance_fn=string_similarity.jaccard_distance):
"""
kmedoids clustering, requires distance matrix, therefore slow
"""
distances = compute_distances(sessions, distance_fn)
clusterids, error, nfound = Pycluster.kmedoids(distances, nclusters=clusters)
return clusterids, error, nfound
示例6: testPricesDiffsVecsKmeansClustering
def testPricesDiffsVecsKmeansClustering(self):
"""Testing whether kmeans clustering with prices differences
vectors works."""
prices_diffs_vecs = utils.make_prices_diffs_vecs(self.data1)
labels, wcss, n = Pycluster.kcluster(prices_diffs_vecs, 3, npass=100)
clusters = utils.make_groups_from_labels(labels, self.data1)
# The result should be sth like this modulo group numbers. Probability
# that this isn't like this with npass=100 is (I think) very low! But
# it can happen that this grouping will be different.
suggested_clusters = {0: ['E'], 1: ['A', 'D'], 2: ['B', 'C']}
# Let's check this.
num_matches = 0
for cluster in clusters.values():
cluster.sort()
for suggested_cluster in suggested_clusters.values():
suggested_cluster.sort()
if cluster == suggested_cluster:
num_matches = num_matches + 1
# Ok, so we've found out that each suggested cluster exists
# in output of our kcluster algorithm and because length of
# clusters dict is 3 we can be sure these dictionaries are equal.
self.assertEqual(num_matches, 3)
self.assertEqual(len(clusters), 3)
示例7: suggest
def suggest(self, word):
v = self.analyze(word)
# pick first x
res = []
for nword, nv in self.ndx.items():
wsim = self.compute_similarity([v, nv])
res.append((wsim, nword, self.as_vector(nv)))
res.sort()
res = res[::-1]
# from first y pick the most distant ones
res2 = [v for (sim, word, v) in res]
resw = [word for (sim, word, v) in res]
lab, err, nfound = Pycluster.kcluster(res2, 40)
resg = defaultdict(lambda: [])
for i, l in enumerate(lab):
resg[l] += [res[i]]
res_sug = []
used_groups = set()
for l, w in zip(lab, resw):
if not l in used_groups:
res_sug += [w]
used_groups.add(l)
return res_sug
示例8: findk
def findk(x, n=1000, minK=2, maxK=20):
errors = []
# fit k-means clusters for n times
for i in range(minK, maxK + 1, 1):
_, error, nfound = Pycluster.kcluster(x, nclusters=i, transpose=0, method="a", dist="e", npass=n)
# get errors
errors.append(error)
print i
print errors
示例9: clusters
def clusters(labels, data, k):
kclus = Pycluster.kcluster(data, k, npass=1)[0]
nx = numpy.zeros((len(labels), len(labels)), dtype=numpy.float32)
for ind1 in range(len(labels)):
for ind2 in range(len(labels)):
if kclus[ind1] == kclus[ind2]:
nx[ind1][ind2] = 1
print k, " of ", len(labels)
return nx
示例10: getlabels
def getlabels(x, y, n = 1000 , k = 8):
if y == "none":
y = x
#fit k-means clusters
labels, _, _ = Pycluster.kcluster(y, nclusters = k, transpose=0,
method='a', dist='e', npass = n)
#write labels back
x.loc[:,"group"] = labels
return(x)
示例11: cluster_spw_rpw
def cluster_spw_rpw(list_of_recs):
number_of_clusters = 8
only_serve_return = []
if list_of_recs==[]:
print "ERRROR"
for rec in list_of_recs:
only_serve_return.append([float(rec[0]),float(rec[1])])
k = get_k_value(only_serve_return)
labels, error, nfound = Pycluster.kcluster(scipy.array(only_serve_return), k)
return labels
示例12: cluster
def cluster():
x = [[76.0,32.0],[63.0,40.0],[70.0,30.0],[64.0,45.0]]
k = 2
labels, error, nfound = Pycluster.kcluster(scipy.array(x),k)
print "Input data:"
print " spw " + " rpw"
j = 1
for i in x:
print str(j)+") "+str(i[0]) + " " + str(i[1])
j +=1
print " "
print "clusters: " + str(labels)
示例13: getlabels
def getlabels(x, y, n=1000, k=8):
if y == "none":
y = x
# fit k-means clusters
labels, _, _ = Pycluster.kcluster(y, nclusters=k, transpose=0, method="a", dist="e", npass=n)
# write labels back
x.loc[:, "group"] = labels
# count how many items in each group
labels = list(labels)
for i in range(k):
print labels.count(i)
return x
示例14: pyclustertest
def pyclustertest():
data=sp.rand(100,4)
cid,e,n=pcl.kcluster(data)
centroids,cmask=pcl.clustercentroids(D,clusterid=cid)
print data
print centroids
示例15: cluster_kmedoids
def cluster_kmedoids(self, k=2, npass=50):
# Utilise la distance pour produire une partition de k classes
# n est le nombre d'itérations
c, err, nfound = pc.kmedoids(self.zd, k, npass=npass)
return partition(c, self.mat)