当前位置: 首页>>代码示例>>Python>>正文


Python Pycluster类代码示例

本文整理汇总了Python中Pycluster的典型用法代码示例。如果您正苦于以下问题:Python Pycluster类的具体用法?Python Pycluster怎么用?Python Pycluster使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Pycluster类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: findcenters

def findcenters(x, n=1000, k=6):
    # get dimensions
    m = x.shape[1]
    # create centers as empty
    centers = DataFrame(np.zeros(shape=(k, m)))

    for i in range(n):
        labels, _, _ = Pycluster.kcluster(x, nclusters=k, transpose=0, method="a", dist="e", npass=1)
        center, _ = Pycluster.clustercentroids(x, clusterid=labels)
        # sort centers by the distance to the origin
        center = sorted(center, key=lambda t: np.linalg.norm(np.array(t) - np.zeros(m)), reverse=True)

        # print np.linalg.norm(np.array(center[0])-np.zeros(m))
        # print np.linalg.norm(np.array(center[1])-np.zeros(m))
        # print np.linalg.norm(np.array(center[2])-np.zeros(m))
        # print np.linalg.norm(np.array(center[3])-np.zeros(m))
        # print np.linalg.norm(np.array(center[4])-np.zeros(m))
        # print np.linalg.norm(np.array(center[5])-np.zeros(m))
        # print np.array(center[0])
        # print np.array(center[1])
        # print np.array(center[2])
        # print np.array(center[3])
        # print np.array(center[4])
        # print np.array(center[5])
        # take the average
        for j in range(k):
            centers.ix[j, :] = centers.ix[j, :] + center[j]
    centers = centers / n
    return centers
开发者ID:wsy1607,项目名称:Marketing-App,代码行数:29,代码来源:similarbeers.py

示例2: _G

    def _G(self, data, K):
        labels, _, _ = Pycluster.kcluster(data.T, K)
        centers, _ = Pycluster.clustercentroids(data.T, clusterid=labels)
        centers = centers.T
        G = zeros((K, data.shape[1]))
        
        for k in range(K):
            D = data - expand_dims(centers[:, k], axis=1)
            G[k, :] = -sqrt(sum(multiply(D, D), axis=0))

        return G
开发者ID:ttsuchi,项目名称:example-selection,代码行数:11,代码来源:selection.py

示例3: clustering

def clustering(file_path, k, dist_measure, PLOT):
    """
    Do the K-means clustering for input data.

    @param file_path: Input data file.
    @param k: Number of centers in K-means algorithm.
    @param dist_measure: Distance measure (in this case, we use Manhattan distance).
    @param PLOT: Bool variable, check if plot the result (set it as True only in testing).
    @return: Clusters id for all data points in the input data file.
    """

    data = numpy.genfromtxt(file_path, delimiter=',')

    if len(data.shape) == 1:
        return [-1]

    print "-- Processing file: " + file_path + "  -- Data points: " + str(len(data))
    print "-- Start clustering"

    k = set_k(len(data), k)
    ite_num = method_name(len(data))

    # Do the K-means clustering
    cluster_id, _, _ = Pycluster.kcluster(data, nclusters=k, mask=None, weight=None, transpose=0, npass=ite_num,
                                          method='a', dist=dist_measure, initialid=None)

    if PLOT is False:
        return cluster_id

    # Draw the clustering result plot.
    centroids, _ = Pycluster.clustercentroids(data, clusterid=cluster_id)

    if PLOT:
        data_pca = mlab.PCA(data)
        cutoff = data_pca.fracs[1]
        data_2d = data_pca.project(data, minfrac=cutoff)
        centroids_2d = data_pca.project(centroids, minfrac=cutoff)
    else:
        data_2d = data
        centroids_2d = centroids

    color = ['#2200CC', '#D9007E', '#FF6600', '#FFCC00', '#ACE600', '#0099CC',
             '#8900CC', '#FF0000', '#FF9900', '#FFFF00', '#00CC01', '#0055CC']

    for i in range(k):
        scatter(data_2d[cluster_id == i, 0], data_2d[cluster_id == i, 1], color=color[i % 12])

    plot(centroids_2d[:, 0], centroids_2d[:, 1], 'sg', markersize=8)
    show()

    return cluster_id
开发者ID:wdwind,项目名称:ImageTrends,代码行数:51,代码来源:PyClustering.py

示例4: cluster

def cluster(parser, k):
    """
    general method for clustering data
    """
    
    #get index number for every page
    code_book = parser.get_data_encoding(page_min_occurance=5)
    
    #use only sequence of pages visited
    simple_session = [session for session in parser.get_simple_sessions() if config.session_filter_fn(session)]
    
    #use vector representation (v1,v2,v2) where v1 means page v1 was visited    
    #models = session_modeling.convert_sessions_to_vector(simple_session, code_book, binary=True)
    
    #construct markov chains, estimate transition probabilities
    models = session_modeling.convert_sessions_to_markov(simple_session, code_book, bayes=False)
    idx, sse, _ = Pycluster.kcluster(models, k, method='a', dist='e')
 
    #idx, sse, _ = cluster_kmedoids(models, k, string_similarity.jaccard_distance)
    

    clusters = {}
    for name, clusterid in zip(simple_session, idx):
        clusters.setdefault(clusterid, []).append(name)
    
    return clusters, sse
开发者ID:1098896743,项目名称:pwum,代码行数:26,代码来源:clustering.py

示例5: cluster_kmedoids

def cluster_kmedoids(sessions, clusters, distance_fn=string_similarity.jaccard_distance):
    """
    kmedoids clustering, requires distance matrix, therefore slow
    """
    distances = compute_distances(sessions, distance_fn)
    clusterids, error, nfound = Pycluster.kmedoids(distances, nclusters=clusters)
    return clusterids, error, nfound
开发者ID:1098896743,项目名称:pwum,代码行数:7,代码来源:clustering.py

示例6: testPricesDiffsVecsKmeansClustering

	def testPricesDiffsVecsKmeansClustering(self):
		"""Testing whether kmeans clustering with prices differences
		   vectors works."""

		prices_diffs_vecs = utils.make_prices_diffs_vecs(self.data1)		
		labels, wcss, n = Pycluster.kcluster(prices_diffs_vecs, 3, npass=100)
		clusters = utils.make_groups_from_labels(labels, self.data1)

		# The result should be sth like this modulo group numbers. Probability
		# that this isn't like this with npass=100 is (I think) very low! But
		# it can happen that this grouping will be different.

		suggested_clusters = {0: ['E'], 1: ['A', 'D'], 2: ['B', 'C']}

		# Let's check this.

		num_matches = 0

		for cluster in clusters.values():
			cluster.sort()
			for suggested_cluster in suggested_clusters.values():
				suggested_cluster.sort()
				if cluster == suggested_cluster:
					num_matches = num_matches + 1

		# Ok, so we've found out that each suggested cluster exists
		# in output of our kcluster algorithm and because length of
		# clusters dict is 3 we can be sure these dictionaries are equal.

		self.assertEqual(num_matches, 3)
		self.assertEqual(len(clusters), 3)
开发者ID:kstosiek,项目名称:HDiDM2010,代码行数:31,代码来源:clusteringtest.py

示例7: suggest

    def suggest(self, word):
        v = self.analyze(word)

        # pick first x
        res = []
        for nword, nv in self.ndx.items():
            wsim = self.compute_similarity([v, nv])
            res.append((wsim, nword, self.as_vector(nv)))
        res.sort()
        res = res[::-1]

        # from first y pick the most distant ones
        res2 = [v for (sim, word, v) in res]
        resw = [word for (sim, word, v) in res]
        lab, err, nfound = Pycluster.kcluster(res2, 40)

        resg = defaultdict(lambda: [])
        for i, l in enumerate(lab):
            resg[l] += [res[i]]

        res_sug = []
        used_groups = set()
        for l, w in zip(lab, resw):
            if not l in used_groups:
                res_sug += [w]
                used_groups.add(l)
                
        return res_sug
开发者ID:ticcky,项目名称:kwesa,代码行数:28,代码来源:esa_analyze.py

示例8: findk

def findk(x, n=1000, minK=2, maxK=20):
    errors = []
    # fit k-means clusters for n times
    for i in range(minK, maxK + 1, 1):
        _, error, nfound = Pycluster.kcluster(x, nclusters=i, transpose=0, method="a", dist="e", npass=n)
        # get errors
        errors.append(error)
        print i
    print errors
开发者ID:wsy1607,项目名称:Marketing-App,代码行数:9,代码来源:similarbeers.py

示例9: clusters

def clusters(labels, data, k):
	kclus = Pycluster.kcluster(data, k, npass=1)[0]
	nx = numpy.zeros((len(labels), len(labels)), dtype=numpy.float32)
	for ind1 in range(len(labels)):
		for ind2 in range(len(labels)):
			if kclus[ind1] == kclus[ind2]:
				nx[ind1][ind2] = 1
	print k, " of ", len(labels)
	return nx
开发者ID:kenjsc,项目名称:CAMpping,代码行数:9,代码来源:kmeans.py

示例10: getlabels

def getlabels(x, y, n = 1000 , k = 8):
    if y == "none":
        y = x
    #fit k-means clusters
    labels, _, _ = Pycluster.kcluster(y, nclusters = k, transpose=0,
                                     method='a', dist='e', npass = n)
    #write labels back
    x.loc[:,"group"] = labels
    return(x)
开发者ID:wsy1607,项目名称:Machine-Learning-at-BountyMe,代码行数:9,代码来源:similarbeer.py

示例11: cluster_spw_rpw

def cluster_spw_rpw(list_of_recs):
	number_of_clusters = 8
	only_serve_return = []
	if list_of_recs==[]:
		print "ERRROR"
	for rec in list_of_recs:
		only_serve_return.append([float(rec[0]),float(rec[1])])
	k = get_k_value(only_serve_return)
	labels, error, nfound = Pycluster.kcluster(scipy.array(only_serve_return), k)
	return labels
开发者ID:rohanm93,项目名称:individualproject,代码行数:10,代码来源:cluster.py

示例12: cluster

def cluster():
	x = [[76.0,32.0],[63.0,40.0],[70.0,30.0],[64.0,45.0]]
	k = 2
	labels, error, nfound = Pycluster.kcluster(scipy.array(x),k)
	print "Input data:"
	print "   spw " + "  rpw"
	j = 1
	for i in x:
		print str(j)+") "+str(i[0]) + "  " + str(i[1])
		j +=1
	print " "
	print "clusters: " + str(labels)
开发者ID:rohanm93,项目名称:individualproject,代码行数:12,代码来源:cluster.py

示例13: getlabels

def getlabels(x, y, n=1000, k=8):
    if y == "none":
        y = x
    # fit k-means clusters
    labels, _, _ = Pycluster.kcluster(y, nclusters=k, transpose=0, method="a", dist="e", npass=n)
    # write labels back
    x.loc[:, "group"] = labels
    # count how many items in each group
    labels = list(labels)
    for i in range(k):
        print labels.count(i)
    return x
开发者ID:wsy1607,项目名称:Marketing-App,代码行数:12,代码来源:similarbeers.py

示例14: pyclustertest

def pyclustertest():
    
    data=sp.rand(100,4)
    cid,e,n=pcl.kcluster(data)
    centroids,cmask=pcl.clustercentroids(D,clusterid=cid)
    
    print data    
    print centroids
开发者ID:Garyfallidis,项目名称:trn,代码行数:8,代码来源:art.py

示例15: cluster_kmedoids

	def cluster_kmedoids(self, k=2, npass=50):
		# Utilise la distance pour produire une partition de k classes
		# n est le nombre d'itérations
		
		c, err, nfound = pc.kmedoids(self.zd, k, npass=npass)
		
		return partition(c, self.mat)
开发者ID:LANCI,项目名称:kimono,代码行数:7,代码来源:kimono.py


注:本文中的Pycluster类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。