Python vq.kmeans函数代码示例

本文整理汇总了Python中scipy.cluster.vq.kmeans函数的典型用法代码示例。如果您正苦于以下问题：Python kmeans函数的具体用法？Python kmeans怎么用？Python kmeans使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了kmeans函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: clustering_scipy_kmeans

def clustering_scipy_kmeans(features, n_clust = 8):
  """
  """
  whitened = whiten(features)
  print whitened.shape
  
  initial = [kmeans(whitened,i) for i in np.arange(1,12)]
  plt.plot([var for (cent,var) in initial])
  plt.show()
  
  #cent, var = initial[3]
  ##use vq() to get as assignment for each obs.
  #assignment,cdist = vq(whitened,cent)
  #plt.scatter(whitened[:,0], whitened[:,1], c=assignment)
  #plt.show()
  
  codebook, distortion = kmeans(whitened, n_clust)
  print codebook, distortion
  assigned_label, dist = vq(whitened, codebook)
  for ii in range(8):
    plt.subplot(4,2,ii+1)
    plt.plot(codebook[ii])
  plt.show()
  
  centroid, label = kmeans2(whitened, n_clust, minit = 'points')
  print centroid, label
  for ii in range(8):
    plt.subplot(4,2,ii)
    plt.plot(centroid[ii])
  plt.show()

开发者ID:kaustuvkanti，项目名称:Experiments，代码行数:30，代码来源:dump_transition_for_clustering.py

示例2: custom

def custom():
	_items = {}
	users = []

	for line in open('my_items_likehood.txt'):
		user, item = keys(line)
		users.append(user)
		if item in _items:
			_items[item].append(user)
		else:
			_items[item] = [user]


	sorted_users = sorted(users)
	l = len(sorted_users)
	items={}
	count=0
	features=[]
	for item in _items:
	
		features.append(user_matrix(l, _items[item], sorted_users))
		if count == 100: break
		count += 1

	print 'whiten'
	whitened = whiten(array(features))
	print 'kmeans'
	print kmeans(whitened)
	print "%d items voted by %d users" % (len(items), len(users))

开发者ID:mcaprari，项目名称:rt-pub，代码行数:29，代码来源:t.py

示例3: kmeans_net

def kmeans_net(net, layers, num_c=16, initials=None):
    # net: 网络
    # layers: 需要量化的层
    # num_c: 各层的量化级别
    # initials: 初始聚类中心
    codebook = {} # 量化码表
    if type(num_c) == type(1):
        num_c = [num_c] * len(layers)
    else:
        assert len(num_c) == len(layers)

    # 对各层进行聚类分析
    print "==============Perform K-means============="
    for idx, layer in enumerate(layers):
        print "Eval layer:", layer
        W = net.params[layer][0].data.flatten()
        W = W[np.where(W != 0)] # 筛选不为0的权重
        # 默认情况下，聚类中心为线性分布中心
        if initials is None:  # Default: uniform sample
            min_W = np.min(W)
            max_W = np.max(W)
            initial_uni = np.linspace(min_W, max_W, num_c[idx] - 1)
            codebook[layer], _ = scv.kmeans(W, initial_uni)
        elif type(initials) == type(np.array([])):
            codebook[layer], _ = scv.kmeans(W, initials)
        elif initials == 'random':
            codebook[layer], _ = scv.kmeans(W, num_c[idx] - 1)
        else:
            raise Exception

        # 将0权重值附上
        codebook[layer] = np.append(0.0, codebook[layer])
        print "codebook size:", len(codebook[layer])

    return codebook

开发者ID:zhang405744522，项目名称:Caffe-Python-Tutorial，代码行数:35，代码来源:quantize.py

示例4: kmeans2

def kmeans2():
    features = locations()
    whitened = whiten(features)
    book = array((whitened[0],whitened[2]))
    kmeans(whitened,book)
    (array([[ 2.3110306 ,  2.86287398],
           [ 0.93218041,  1.24398691]]), 0.85684700941625547)

开发者ID:btoffoli，项目名称:data_mining_examples，代码行数:7，代码来源:locactionKmeans1.py

示例5: kmeans1

def kmeans1():
    features  = array([[ 1.9,2.3], [ 1.5,2.5], [ 0.8,0.6], [ 0.4,1.8], [ 0.1,0.1], [ 0.2,1.8], [ 2.0,0.5], [ 0.3,1.5], [ 1.0,1.0]])
    whitened = whiten(features)
    book = array((whitened[0],whitened[2]))
    kmeans(whitened,book)
    (array([[ 2.3110306 ,  2.86287398],
           [ 0.93218041,  1.24398691]]), 0.85684700941625547)

开发者ID:btoffoli，项目名称:data_mining_examples，代码行数:7，代码来源:locactionKmeans1.py

示例6: _get_larger_chroms

def _get_larger_chroms(ref_file):
    """Retrieve larger chromosomes, avoiding the smaller ones for plotting.
    """
    from scipy.cluster.vq import kmeans, vq
    all_sizes = []
    for c in ref.file_contigs(ref_file):
        all_sizes.append(float(c.size))
    all_sizes.sort()
    # separate out smaller chromosomes and haplotypes with kmeans
    centroids, _ = kmeans(np.array(all_sizes), 2)
    idx, _ = vq(np.array(all_sizes), centroids)
    little_sizes = tz.first(tz.partitionby(lambda xs: xs[0], zip(idx, all_sizes)))
    little_sizes = [x[1] for x in little_sizes]
    # create one more cluster with the smaller, removing the haplotypes
    centroids2, _ = kmeans(np.array(little_sizes), 2)
    idx2, _ = vq(np.array(little_sizes), centroids2)
    little_sizes2 = tz.first(tz.partitionby(lambda xs: xs[0], zip(idx2, little_sizes)))
    little_sizes2 = [x[1] for x in little_sizes2]
    # get any chromosomes not in haplotype/random bin
    thresh = max(little_sizes2)
    larger_chroms = []
    for c in ref.file_contigs(ref_file):
        if c.size > thresh:
            larger_chroms.append(c.name)
    return larger_chroms

开发者ID:Kisun，项目名称:bcbio-nextgen，代码行数:25，代码来源:cnvkit.py

示例7: cluster

def cluster(df, means, csv_min, csv_max):
    data = []
    for i in range(csv_min, csv_max):
        a = array(df.ix[:, i].values)
        b = a[a != "--"]
        print np.sort(kmeans(b.astype(np.float), means)[0])
        data.append(np.sort(kmeans(b.astype(np.float), means)[0]))
    return data

开发者ID:JasonAHeron，项目名称:Data-Visualization，代码行数:8，代码来源:cluster.py

示例8: test_kmeans_lost_cluster

    def test_kmeans_lost_cluster(self):
        # This will cause kmean to have a cluster with no points.
        data = np.fromfile(DATAFILE1, sep=", ")
        data = data.reshape((200, 2))
        initk = np.array([[-1.8127404, -0.67128041], [2.04621601, 0.07401111], [-2.31149087, -0.05160469]])

        kmeans(data, initk)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", UserWarning)
            kmeans2(data, initk, missing="warn")

        assert_raises(ClusterError, kmeans2, data, initk, missing="raise")

开发者ID:ymarfoq，项目名称:outilACVDesagregation，代码行数:12，代码来源:test_vq.py

示例9: test_kmeans_lost_cluster

    def test_kmeans_lost_cluster(self):
        # This will cause kmean to have a cluster with no points.
        data = TESTDATA_2D
        initk = np.array([[-1.8127404, -0.67128041],
                         [2.04621601, 0.07401111],
                         [-2.31149087,-0.05160469]])

        kmeans(data, initk)
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', UserWarning)
            kmeans2(data, initk, missing='warn')

        assert_raises(ClusterError, kmeans2, data, initk, missing='raise')

开发者ID:dyao-vu，项目名称:meta-core，代码行数:13，代码来源:test_vq.py

示例10: clusterkmeans

 def clusterkmeans(self):
     wh = whiten(self.counts) #normalizes the counts for easier clustering
     scale = self.counts[0] / wh[0]
     #compute kmeans for  k = 1,2 compare the distortions and choose the better one
     one = kmeans(wh, 1)
     two = kmeans(wh, 2)
     if one[1] < two[1]:
         print 'found only one cluser'
         threshold = None
     else:
         km = two
         threshold = scale * km[0].mean() #set threshold to be the average of two centers
     return threshold

开发者ID:HaeffnerLab，项目名称:sqip，代码行数:13，代码来源:dataProcessor_ionSwap_backup2.py

示例11: test_kmeans_lost_cluster

    def test_kmeans_lost_cluster(self):
        # This will cause kmeans to have a cluster with no points.
        data = TESTDATA_2D
        initk = np.array([[-1.8127404, -0.67128041],
                         [2.04621601, 0.07401111],
                         [-2.31149087,-0.05160469]])

        with suppress_warnings() as sup:
            sup.filter(UserWarning,
                       "One of the clusters is empty. Re-run kmean with a different initialization")
            kmeans(data, initk)
            kmeans2(data, initk, missing='warn')

        assert_raises(ClusterError, kmeans2, data, initk, missing='raise')

开发者ID:Brucechen13，项目名称:scipy，代码行数:14，代码来源:test_vq.py

示例12: createdatabase

def createdatabase():
	X_train = detectcompute(train1)

	print "Clustering the data with K-means"
	codebook,distortion = kmeans(whiten(X_train),k)
	print "Done.\n"
	
	imtrain = singledetect(test1)
	Pdatabase = bow(imtrain,codebook,k) #Pseudo database with list structure


	#Writing to html.table
	print "Converting the database into a HTML file"
	htmltable = open("table.htm","r+") 
	begin = "<htm><body><table cellpadding=5><tr><th>Filename</th><th>Histogram</th></tr>"
	htmltable.write(begin)

	for i in range(len(Pdatabase)):
	    middle = "<tr><td>%(filename)s</td><td>%(histogram)s</td></tr>" % {"filename": Pdatabase[i][0], "histogram": Pdatabase[i][-1]}
	    htmltable.write(middle)

	end = "</table></body></html>"    
	htmltable.write(end)
	htmltable.close()
	print "Done.\n"

	codebook_to_file(codebook)

开发者ID:MariaBarrett，项目名称:VIPExam2，代码行数:27，代码来源:main.py

示例13: worldplot

 def worldplot(self,kmeans=None,proj='merc'):
     """
     plots customer GPS location on a map with state and national boundaries.
     IN
         kmeans (int) number of means for k-means clustering, default=None
         proj (string) the map projection to use, use 'robin' to plot the whole earth, default='merc'
     """
     # create a matplotlib Basemap object
     if proj == 'robin':
         my_map = Basemap(projection=proj,lat_0=0,lon_0=0,resolution='l',area_thresh=1000)
     else:
         my_map = Basemap(projection=proj,lat_0=33.,lon_0=-125.,resolution='l',area_thresh=1000.,
                 llcrnrlon=-130.,llcrnrlat=25,urcrnrlon=-65., urcrnrlat=50)
     my_map.drawcoastlines(color='grey')
     my_map.drawcountries(color='grey')
     my_map.drawstates(color='grey')
     my_map.drawlsmask(land_color='white',ocean_color='white')
     my_map.drawmapboundary() #my_map.fillcontinents(color='black')
     x,y = my_map(np.array(self.data['lon']),np.array(self.data['lat']))
     my_map.plot(x,y,'ro',markersize=3,alpha=.4,linewidth=0)
     if kmeans:
         # k-means clustering algorithm---see wikipedia for details
         data_in = self.data.drop(['id','clv','level'],axis=1)
         # vq is scipy's vector quantization module
         output,distortion = vq.kmeans(data_in,kmeans)
         x1,y1 = my_map(output[:,1],output[:,0])
         my_map.plot(x1,y1,'ko',markersize=20,alpha=.4,linewidth=0)
     plt.show()
     return output

开发者ID:dgmiller，项目名称:portfolio，代码行数:29，代码来源:tools.py

示例14: test_large_features

    def test_large_features(self):
        # Generate a data set with large values, and run kmeans on it to
        # (regression for 1077).
        d = 300
        n = 100

        m1 = np.random.randn(d)
        m2 = np.random.randn(d)
        x = 10000 * np.random.randn(n, d) - 20000 * m1
        y = 10000 * np.random.randn(n, d) + 20000 * m2

        data = np.empty((x.shape[0] + y.shape[0], d), np.double)
        data[:x.shape[0]] = x
        data[x.shape[0]:] = y

        kmeans(data, 2)

开发者ID:beiko-lab，项目名称:gengis，代码行数:16，代码来源:test_vq.py

示例15: classify_kmeans

def classify_kmeans(infile, clusternumber):
    '''
    apply kmeans
    '''
    
    #Load infile in data array    
    driver = gdal.GetDriverByName('GTiff')
    driver.Register()
    ds = gdal.Open(infile, gdal.GA_Update)
    databand = ds.GetRasterBand(1)
    
    #Read input raster into array
    data = ds.ReadAsArray() 
    #replace no data value with numpy.nan
    #data[data==-999.0]=numpy.nan 
    
    pixel = numpy.reshape(data,(data.shape[0]*data.shape[1]))
    centroids, variance = kmeans(pixel, clusternumber)
    code, distance = vq(pixel,centroids)
    centers_idx = numpy.reshape(code,(data.shape[0],data.shape[1]))
    clustered = centroids[centers_idx]
    
    # Write outraster to file
    databand.WriteArray(clustered)
    databand.FlushCache()        
    
    #Close file
    databand = None
    clustered = None
    ds = None

开发者ID:NatPi，项目名称:RemoteSensing，代码行数:30，代码来源:GlacierSurfaceType_kmeans.py

注：本文中的scipy.cluster.vq.kmeans函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。