当前位置: 首页>>代码示例>>Python>>正文


Python Pycluster.kcluster方法代码示例

本文整理汇总了Python中Pycluster.kcluster方法的典型用法代码示例。如果您正苦于以下问题:Python Pycluster.kcluster方法的具体用法?Python Pycluster.kcluster怎么用?Python Pycluster.kcluster使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Pycluster的用法示例。


在下文中一共展示了Pycluster.kcluster方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: multikmeans

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kcluster [as 别名]
	def multikmeans(self, krange=None):
		# La recette magique
		
		if krange==None:
			kr=np.arange(2, len(self.mat)-1)
		else: kr=krange
		lmat=len(self.mat)
		
		accords=np.zeros((lmat,lmat), dtype=int) # Où on comptera combien de fois chq paire de documents est classé ensemble
		t=deque() # pour sauver temps & mémoire, on emploie deque à la place de list
		t0=time()
		k2s = lambda x: x*0.85
		tunits=k2s(np.array(kr)).sum()
		
		# La boucle elle-même
		for k in kr:
			t1=time()
			
			# K-means
			c,err,nfound=pc.kcluster(self.mat,k)
			
			# Mise à jour des valeurs
			for i in np.unique(c):
				accords[c==i] += c==i
			
			# Prédiction du temps restant
			t2=time()
			tunits-=k2s(k)
			t.append((t2-t1)/k2s(k))
			prediction = tunits*np.mean(tuple(t)[-20:])
			print "k={0}: \t{1} ({2} depuis le début) \t{3} à faire".format(k,human_time(t2-t1),human_time(t2-t0),human_time(prediction))
		
		return accords/float(k)
开发者ID:LANCI,项目名称:kimono,代码行数:35,代码来源:kimono.py

示例2: findcenters

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kcluster [as 别名]
def findcenters(x, n=1000, k=6):
    # get dimensions
    m = x.shape[1]
    # create centers as empty
    centers = DataFrame(np.zeros(shape=(k, m)))

    for i in range(n):
        labels, _, _ = Pycluster.kcluster(x, nclusters=k, transpose=0, method="a", dist="e", npass=1)
        center, _ = Pycluster.clustercentroids(x, clusterid=labels)
        # sort centers by the distance to the origin
        center = sorted(center, key=lambda t: np.linalg.norm(np.array(t) - np.zeros(m)), reverse=True)

        # print np.linalg.norm(np.array(center[0])-np.zeros(m))
        # print np.linalg.norm(np.array(center[1])-np.zeros(m))
        # print np.linalg.norm(np.array(center[2])-np.zeros(m))
        # print np.linalg.norm(np.array(center[3])-np.zeros(m))
        # print np.linalg.norm(np.array(center[4])-np.zeros(m))
        # print np.linalg.norm(np.array(center[5])-np.zeros(m))
        # print np.array(center[0])
        # print np.array(center[1])
        # print np.array(center[2])
        # print np.array(center[3])
        # print np.array(center[4])
        # print np.array(center[5])
        # take the average
        for j in range(k):
            centers.ix[j, :] = centers.ix[j, :] + center[j]
    centers = centers / n
    return centers
开发者ID:wsy1607,项目名称:Marketing-App,代码行数:31,代码来源:similarbeers.py

示例3: cluster

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kcluster [as 别名]
def cluster(parser, k):
    """
    general method for clustering data
    """
    
    #get index number for every page
    code_book = parser.get_data_encoding(page_min_occurance=5)
    
    #use only sequence of pages visited
    simple_session = [session for session in parser.get_simple_sessions() if config.session_filter_fn(session)]
    
    #use vector representation (v1,v2,v2) where v1 means page v1 was visited    
    #models = session_modeling.convert_sessions_to_vector(simple_session, code_book, binary=True)
    
    #construct markov chains, estimate transition probabilities
    models = session_modeling.convert_sessions_to_markov(simple_session, code_book, bayes=False)
    idx, sse, _ = Pycluster.kcluster(models, k, method='a', dist='e')
 
    #idx, sse, _ = cluster_kmedoids(models, k, string_similarity.jaccard_distance)
    

    clusters = {}
    for name, clusterid in zip(simple_session, idx):
        clusters.setdefault(clusterid, []).append(name)
    
    return clusters, sse
开发者ID:1098896743,项目名称:pwum,代码行数:28,代码来源:clustering.py

示例4: suggest

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kcluster [as 别名]
    def suggest(self, word):
        v = self.analyze(word)

        # pick first x
        res = []
        for nword, nv in self.ndx.items():
            wsim = self.compute_similarity([v, nv])
            res.append((wsim, nword, self.as_vector(nv)))
        res.sort()
        res = res[::-1]

        # from first y pick the most distant ones
        res2 = [v for (sim, word, v) in res]
        resw = [word for (sim, word, v) in res]
        lab, err, nfound = Pycluster.kcluster(res2, 40)

        resg = defaultdict(lambda: [])
        for i, l in enumerate(lab):
            resg[l] += [res[i]]

        res_sug = []
        used_groups = set()
        for l, w in zip(lab, resw):
            if not l in used_groups:
                res_sug += [w]
                used_groups.add(l)
                
        return res_sug
开发者ID:ticcky,项目名称:kwesa,代码行数:30,代码来源:esa_analyze.py

示例5: testPricesDiffsVecsKmeansClustering

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kcluster [as 别名]
	def testPricesDiffsVecsKmeansClustering(self):
		"""Testing whether kmeans clustering with prices differences
		   vectors works."""

		prices_diffs_vecs = utils.make_prices_diffs_vecs(self.data1)		
		labels, wcss, n = Pycluster.kcluster(prices_diffs_vecs, 3, npass=100)
		clusters = utils.make_groups_from_labels(labels, self.data1)

		# The result should be sth like this modulo group numbers. Probability
		# that this isn't like this with npass=100 is (I think) very low! But
		# it can happen that this grouping will be different.

		suggested_clusters = {0: ['E'], 1: ['A', 'D'], 2: ['B', 'C']}

		# Let's check this.

		num_matches = 0

		for cluster in clusters.values():
			cluster.sort()
			for suggested_cluster in suggested_clusters.values():
				suggested_cluster.sort()
				if cluster == suggested_cluster:
					num_matches = num_matches + 1

		# Ok, so we've found out that each suggested cluster exists
		# in output of our kcluster algorithm and because length of
		# clusters dict is 3 we can be sure these dictionaries are equal.

		self.assertEqual(num_matches, 3)
		self.assertEqual(len(clusters), 3)
开发者ID:kstosiek,项目名称:HDiDM2010,代码行数:33,代码来源:clusteringtest.py

示例6: pyclustertest

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kcluster [as 别名]
def pyclustertest():
    
    data=sp.rand(100,4)
    cid,e,n=pcl.kcluster(data)
    centroids,cmask=pcl.clustercentroids(D,clusterid=cid)
    
    print data    
    print centroids
开发者ID:Garyfallidis,项目名称:trn,代码行数:10,代码来源:art.py

示例7: clusters

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kcluster [as 别名]
def clusters(labels, data, k):
	kclus = Pycluster.kcluster(data, k, npass=1)[0]
	nx = numpy.zeros((len(labels), len(labels)), dtype=numpy.float32)
	for ind1 in range(len(labels)):
		for ind2 in range(len(labels)):
			if kclus[ind1] == kclus[ind2]:
				nx[ind1][ind2] = 1
	print k, " of ", len(labels)
	return nx
开发者ID:kenjsc,项目名称:CAMpping,代码行数:11,代码来源:kmeans.py

示例8: getlabels

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kcluster [as 别名]
def getlabels(x, y, n = 1000 , k = 8):
    if y == "none":
        y = x
    #fit k-means clusters
    labels, _, _ = Pycluster.kcluster(y, nclusters = k, transpose=0,
                                     method='a', dist='e', npass = n)
    #write labels back
    x.loc[:,"group"] = labels
    return(x)
开发者ID:wsy1607,项目名称:Machine-Learning-at-BountyMe,代码行数:11,代码来源:similarbeer.py

示例9: findk

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kcluster [as 别名]
def findk(x, n=1000, minK=2, maxK=20):
    errors = []
    # fit k-means clusters for n times
    for i in range(minK, maxK + 1, 1):
        _, error, nfound = Pycluster.kcluster(x, nclusters=i, transpose=0, method="a", dist="e", npass=n)
        # get errors
        errors.append(error)
        print i
    print errors
开发者ID:wsy1607,项目名称:Marketing-App,代码行数:11,代码来源:similarbeers.py

示例10: cluster_spw_rpw

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kcluster [as 别名]
def cluster_spw_rpw(list_of_recs):
	number_of_clusters = 8
	only_serve_return = []
	if list_of_recs==[]:
		print "ERRROR"
	for rec in list_of_recs:
		only_serve_return.append([float(rec[0]),float(rec[1])])
	k = get_k_value(only_serve_return)
	labels, error, nfound = Pycluster.kcluster(scipy.array(only_serve_return), k)
	return labels
开发者ID:rohanm93,项目名称:individualproject,代码行数:12,代码来源:cluster.py

示例11: _G

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kcluster [as 别名]
    def _G(self, data, K):
        labels, _, _ = Pycluster.kcluster(data.T, K)
        centers, _ = Pycluster.clustercentroids(data.T, clusterid=labels)
        centers = centers.T
        G = zeros((K, data.shape[1]))
        
        for k in range(K):
            D = data - expand_dims(centers[:, k], axis=1)
            G[k, :] = -sqrt(sum(multiply(D, D), axis=0))

        return G
开发者ID:ttsuchi,项目名称:example-selection,代码行数:13,代码来源:selection.py

示例12: cluster

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kcluster [as 别名]
def cluster():
	x = [[76.0,32.0],[63.0,40.0],[70.0,30.0],[64.0,45.0]]
	k = 2
	labels, error, nfound = Pycluster.kcluster(scipy.array(x),k)
	print "Input data:"
	print "   spw " + "  rpw"
	j = 1
	for i in x:
		print str(j)+") "+str(i[0]) + "  " + str(i[1])
		j +=1
	print " "
	print "clusters: " + str(labels)
开发者ID:rohanm93,项目名称:individualproject,代码行数:14,代码来源:cluster.py

示例13: getlabels

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kcluster [as 别名]
def getlabels(x, y, n=1000, k=8):
    if y == "none":
        y = x
    # fit k-means clusters
    labels, _, _ = Pycluster.kcluster(y, nclusters=k, transpose=0, method="a", dist="e", npass=n)
    # write labels back
    x.loc[:, "group"] = labels
    # count how many items in each group
    labels = list(labels)
    for i in range(k):
        print labels.count(i)
    return x
开发者ID:wsy1607,项目名称:Marketing-App,代码行数:14,代码来源:similarbeers.py

示例14: kmeans

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kcluster [as 别名]
def kmeans(data, **kwargs):
    """
    Perform k-means clustering on unstructured N-dimensional data.
    
    @type data: array
    @param data: The data to be clustered
    @type kwargs: dict
    @param kwargs: The following args are accepted:
        - numClusters: The number of clusters to form (returned number of clusters may be less than k).
        - npasses: The number of times the k-means clustering algorithm is performed, 
        each time with a different (random) initial condition.
        - method: describes how the center of a cluster is found: 
            - method=='a': arithmetic mean.
            - method=='m': median.
        - initialCenters: a set of points that should be used as the initial
                          cluster centers
            
    @rtype: tuple
    @return: A list where each element indicates the cluster membership of the 
        corresponding index in the original data and a message string
    """
    k = 1
    npasses = 1
    method = 'a'
    initialCenters = None
    smartCenters = False
    msg = ''
    
    if 'numClusters' in kwargs:
        k = int(kwargs['numClusters'])
    if 'npasses' in kwargs:
        npasses = int(kwargs['npasses'])
    if 'method' in kwargs:
        method = kwargs['method']
    if 'initialCenters' in kwargs:
        initialCenters = kwargs['initialCenters']
    if 'smartCenters' in kwargs:
        smartCenters = kwargs['smartCenters']
    
    
    logData = tm.getMethod('log')(data)
    if initialCenters is not None:
        (clusterIDs, err, nOpt) = pc.kcluster(logData, k, npass=npasses, method=method)
        msg = "Number of rounds optimal solution was found: %i" % nOpt
    else:
        logCenters = tm.getMethod('log')(np.array(initialCenters[:k]))
        (centroids, clusterIDs) = kmeans2(logData, logCenters, minit='matrix')
        if len(np.unique(clusterIDs)) < k:
            wx.MessageBox('Warning: One or more of the returned clusters are empty. Please choose different initial cluster centers and re-run k-means for better results.', 'Insufficiently varied cluster centers', wx.OK | wx.ICON_WARNING)
            
    
    return clusterIDs, msg
开发者ID:smdabdoub,项目名称:find,代码行数:54,代码来源:kmeans.py

示例15: clustering

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import kcluster [as 别名]
def clustering(file_path, k, dist_measure, PLOT):
    """
    Do the K-means clustering for input data.

    @param file_path: Input data file.
    @param k: Number of centers in K-means algorithm.
    @param dist_measure: Distance measure (in this case, we use Manhattan distance).
    @param PLOT: Bool variable, check if plot the result (set it as True only in testing).
    @return: Clusters id for all data points in the input data file.
    """

    data = numpy.genfromtxt(file_path, delimiter=',')

    if len(data.shape) == 1:
        return [-1]

    print "-- Processing file: " + file_path + "  -- Data points: " + str(len(data))
    print "-- Start clustering"

    k = set_k(len(data), k)
    ite_num = method_name(len(data))

    # Do the K-means clustering
    cluster_id, _, _ = Pycluster.kcluster(data, nclusters=k, mask=None, weight=None, transpose=0, npass=ite_num,
                                          method='a', dist=dist_measure, initialid=None)

    if PLOT is False:
        return cluster_id

    # Draw the clustering result plot.
    centroids, _ = Pycluster.clustercentroids(data, clusterid=cluster_id)

    if PLOT:
        data_pca = mlab.PCA(data)
        cutoff = data_pca.fracs[1]
        data_2d = data_pca.project(data, minfrac=cutoff)
        centroids_2d = data_pca.project(centroids, minfrac=cutoff)
    else:
        data_2d = data
        centroids_2d = centroids

    color = ['#2200CC', '#D9007E', '#FF6600', '#FFCC00', '#ACE600', '#0099CC',
             '#8900CC', '#FF0000', '#FF9900', '#FFFF00', '#00CC01', '#0055CC']

    for i in range(k):
        scatter(data_2d[cluster_id == i, 0], data_2d[cluster_id == i, 1], color=color[i % 12])

    plot(centroids_2d[:, 0], centroids_2d[:, 1], 'sg', markersize=8)
    show()

    return cluster_id
开发者ID:wdwind,项目名称:ImageTrends,代码行数:53,代码来源:PyClustering.py


注:本文中的Pycluster.kcluster方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。