当前位置: 首页>>代码示例>>Python>>正文


Python vq.whiten函数代码示例

本文整理汇总了Python中scipy.cluster.vq.whiten函数的典型用法代码示例。如果您正苦于以下问题:Python whiten函数的具体用法?Python whiten怎么用?Python whiten使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了whiten函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: sent_integrate

def sent_integrate(sim_matrix,n_class):
    # 次元ごとの分散を均一にする
    whiten(sim_matrix)

    centroid, destortion = kmeans(sim_matrix, n_class, iter=100, thresh=1e-05)
    labels, dist = vq(sim_matrix, centroid)
    return labels
开发者ID:NuitNoir,项目名称:MachineLearning,代码行数:7,代码来源:qiita_doc_sim.py

示例2: parse

def parse(data_file_name, predict_index, ignore_indices, **options):
	data_file = open(data_file_name, 'r')
	lines = data_file.read().splitlines()
	x = []
	y = []
	for i, line in enumerate(lines):
		if i == 0 or i == 1:
			continue
		datas = line.split()
		x_category = []
		for i, data in enumerate(datas):
			if ignore_indices.has_key(i):
				continue
			if i == predict_index:
				if data == 'T':
					y.append(1.0)
				elif data == 'F':
					y.append(0.0)
				else:
					y.append(float(data))
				continue
			x_category.append(float(data))
		x.append(x_category)
	x = whiten(np.array(x)) if options.get('whiten_x') else np.array(x)
	y = whiten(np.array(y)) if options.get('whiten_y') else np.array(y)
	x = x - x.mean() if options.get('mean_center_x') else x
	y = y - y.mean() if options.get('mean_center_y') else y
	return (x, y)
开发者ID:AustinStoneProjects,项目名称:statsHw2,代码行数:28,代码来源:regression.py

示例3: clust_scatter

def clust_scatter(samples, clusters, allocation_table, n):

    c = len(allocation_table[0])  # Columns
    r = len(allocation_table)  # Rows

    time_scat_square = 0
    mat_scatter = 0

    for j in range(0, c):  # clusters
        for t in range(0, 10):  # maturities
            for p in range(0, r):  # samples within a cluster
                index = allocation_table[p, j]
                if index != 0:
                    time_scat_square += samples.samples[index-1].scatter_maturity[t].scatter
            mat_scatter += time_scat_square**2
            time_scat_square = 0
        clusters.clusters[j].scatter = np.sqrt(mat_scatter - 10 * clusters.clusters[j].mean**2)
        mat_scatter = 0
        if n == 0 or n == 4999:
            print('clust scatter : ' + str(clusters.clusters[j].scatter))

    # Normalize clusters' scatter
    vec = np.zeros(4)
    for j in range(0, c):
        vec[j] = clusters.clusters[j].scatter

    whiten(vec)
    for j in range(0, c):
        clusters.clusters[j].scatter = vec[j]

    return clusters;
开发者ID:ArtMgn,项目名称:k-means-pca,代码行数:31,代码来源:distances_lib.py

示例4: test1

    def test1(self):
        print "TEST 1:----------------------------------------------------------------"
        features = np.array([[1.9, 2.3],
                          [1.5, 2.5],
                          [0.8, 0.6],
                          [0.4, 1.8],
                          [0.1, 0.1],
                          [0.2, 1.8],
                          [2.0, 0.5],
                          [0.3, 1.5],
                          [1.0, 1.0]])
        whitened = whiten(features)
        book = np.array((whitened[0], whitened[2]))
        numpy_result = kmeans(whitened, book)[0]
        print numpy_result
        print ""

        features2 = np.array([[1.9, 2.3,0],
                             [1.5, 2.5,0],
                             [0.8, 0.6,0],
                             [0.4, 1.8,0],
                             [0.1, 0.1,0],
                             [0.2, 1.8,0],
                             [2.0, 0.5,0],
                             [0.3, 1.5,0],
                             [1.0, 1.0,0]])
        whitened2 = whiten(features2)
        book2 = [whitened[0], whitened[2]]
        our_result = np.array(KMeans.k_means2(whitened2.tolist(), 2, book2).centroids)[:, :-1]
        print our_result
开发者ID:jwallp,项目名称:151-Assignments,代码行数:30,代码来源:test_kmeans.py

示例5: LexicalFeatures

def LexicalFeatures():
    """
    Compute feature vectors for word and punctuation features
    """
    num_chapters = len(chapters)
    fvs_lexical = np.zeros((len(chapters), 3), np.float64)
    fvs_punct = np.zeros((len(chapters), 3), np.float64)
    for e, ch_text in enumerate(chapters):
        # note: the nltk.word_tokenize includes punctuation
        tokens = nltk.word_tokenize(ch_text.lower())
        words = word_tokenizer.tokenize(ch_text.lower())
        sentences = sentence_tokenizer.tokenize(ch_text)
        vocab = set(words)
        words_per_sentence = np.array([len(word_tokenizer.tokenize(s))
                                       for s in sentences])

        # average number of words per sentence
        fvs_lexical[e, 0] = words_per_sentence.mean()
        # sentence length variation
        fvs_lexical[e, 1] = words_per_sentence.std()
        # Lexical diversity
        fvs_lexical[e, 2] = len(vocab) / float(len(words))

        # Commas per sentence
        fvs_punct[e, 0] = tokens.count(',') / float(len(sentences))
        # Semicolons per sentence
        fvs_punct[e, 1] = tokens.count(';') / float(len(sentences))
        # Colons per sentence
        fvs_punct[e, 2] = tokens.count(':') / float(len(sentences))

    # apply whitening to decorrelate the features
    fvs_lexical = whiten(fvs_lexical)
    fvs_punct = whiten(fvs_punct)

    return fvs_lexical, fvs_punct
开发者ID:sophie-greene,项目名称:repo,代码行数:35,代码来源:author.py

示例6: kmeansCluster

    def kmeansCluster(self, layer, distance, number):
        import scipy
        import scipy.cluster.hierarchy as sch
        from scipy.cluster.vq import vq,kmeans,whiten
        import numpy as np

        count = layer.featureCount()
        self.setProgressRange(count)
        points = []
        for f in layer.getFeatures():
            geom = f.geometry()
            x = geom.asPoint().x()
            y = geom.asPoint().y()
            point = []
            point.append(x)
            point.append(y)
            points.append(point)
            self.updateProgress()

        distances = {0:'euclidean', 1:'cityblock', 2:'hamming'}
        disMat = sch.distance.pdist(points, distances.get(distance))#'euclidean''cityblock''hamming''cosine' 
        Z=sch.linkage(disMat,method='average') 
        P=sch.dendrogram(Z)
        cluster= sch.fcluster(Z, t=1, criterion='inconsistent')
        data=whiten(points)
        centroid=kmeans(data, number)[0]
        label=vq(data, centroid)[0]
        return centroid, label
开发者ID:GerrardYNWA,项目名称:KmeansClustering,代码行数:28,代码来源:kmeans_dialog.py

示例7: sparse_run

def sparse_run(g, pos1):

    g2 = sparse_graph(g)

    # pos1 = nx.spring_layout(g)
    pos2 = nx.spring_layout(g2)

    features = []
    for u in g2.nodes_iter():
        # print type(u)
        # print u
        # print pos[u]
        features.append(pos2[u])
    print "featurs:", len(features)
    features = ny.array(features)

    method = 2
    if method == 1:
        whitened = whiten(features)
        book = ny.array((whitened[0],whitened[2]))
        km = kmeans(whitened, book)

        print km
    elif method == 2:
        n_digits = 4
        km = KMeans(init='k-means++', n_clusters=n_digits, n_init=10)
        res = km.fit(features)
        print len(km.labels_), km.labels_
        print res
    return km.labels_, g2
开发者ID:WeiliangXing,项目名称:Facebook-Data-Mining,代码行数:30,代码来源:analyze.py

示例8: bow

def bow(images,codebook,clusters):
	out = images
	temp = []

	print "-"*60
	print "Creating the pseudo database."
	for im in images:
		c = Counter()
		bag,dist = vq(whiten(im[1]),codebook)
		
		for word in bag:
			c[word]+=1

		#Creating histograms
		for i in range(clusters):
			if i in c.iterkeys():
				c[i] = c[i]/sum(c.values())
			if i not in c.iterkeys():
				c[i] = 0
		
		temp.append(c)
		
	for i in range(len(temp)):
		out[i].append(temp[i])

	print "Done.\n"
	return out
开发者ID:MariaBarrett,项目名称:VIPExam2,代码行数:27,代码来源:main.py

示例9: kmeans

def kmeans(d, headers, K, metric, whiten=True, categories=None):
    '''Takes in a Data object, a set of headers, and the number of clusters to create
    Computes and returns the codebook, codes and representation errors.
    If given an Nx1 matrix of categories, it uses the category labels
    to calculate the initial cluster means.
    '''

    # assign to A the result getting the data given the headers
    try:
        A = d.get_data(headers)
    except AttributeError:
        A = d

    if whiten:
        W = vq.whiten(A)
    else:
        W = A

    codebook = kmeans_init(W, K, categories)

    # assign to codebook, codes, errors, the result of calling kmeans_algorithm with W and codebook
    codebook, codes, errors = kmeans_algorithm(W, codebook, metric)

    # return the codebook, codes, and representation error
    return codebook, codes, errors
开发者ID:ymohanty,项目名称:data-analysis,代码行数:25,代码来源:analysis.py

示例10: normalize

def normalize(data,mode="pca",n=10):
  """ normalize and reduce data by PCA"""
  
  if mode == "whiten":
    res = whiten(data)
  elif mode == "pca":
    v,P,res = pca_train(data,n,0,1) 
    print v
    print "eigen ratio is ",v[n-1] / v[0] 
  elif mode == "pca_whiten": 
    v,P,proj = pca_train(data,n,0,1) 
    res = whiten(proj)
  else:
    res = np.array(data)    
 
  return res
开发者ID:lucidfrontier45,项目名称:PyNumPDB,代码行数:16,代码来源:clustering.py

示例11: kmeans

def kmeans(dataset, n_cluster = 625):
    from scipy.cluster.vq import kmeans2, whiten
    feature_matrix = numpy.asarray(dataset)
    whitened = whiten(feature_matrix)
    cluster_num = 625
    _, cluster_labels = kmeans2(whitened, cluster_num, iter = 100)
    return cluster_labels
开发者ID:persistforever,项目名称:sentenceEmbedding,代码行数:7,代码来源:cluster.py

示例12: clustering_scipy_kmeans

def clustering_scipy_kmeans(features, n_clust = 8):
  """
  """
  whitened = whiten(features)
  print whitened.shape
  
  initial = [kmeans(whitened,i) for i in np.arange(1,12)]
  plt.plot([var for (cent,var) in initial])
  plt.show()
  
  #cent, var = initial[3]
  ##use vq() to get as assignment for each obs.
  #assignment,cdist = vq(whitened,cent)
  #plt.scatter(whitened[:,0], whitened[:,1], c=assignment)
  #plt.show()
  
  codebook, distortion = kmeans(whitened, n_clust)
  print codebook, distortion
  assigned_label, dist = vq(whitened, codebook)
  for ii in range(8):
    plt.subplot(4,2,ii+1)
    plt.plot(codebook[ii])
  plt.show()
  
  centroid, label = kmeans2(whitened, n_clust, minit = 'points')
  print centroid, label
  for ii in range(8):
    plt.subplot(4,2,ii)
    plt.plot(centroid[ii])
  plt.show()
开发者ID:kaustuvkanti,项目名称:Experiments,代码行数:30,代码来源:dump_transition_for_clustering.py

示例13: _get_cluster

    def _get_cluster(self, feat_array, k):
        # Normalise the feature array
        whitened = whiten(feat_array)

        codebook, _ = kmeans(whitened, k, iter=self.iter)
        code, _ = vq(whitened, codebook)
        return code
开发者ID:realstraw,项目名称:abathur,代码行数:7,代码来源:cluster.py

示例14: do_cluster

def do_cluster(cluster_count, filename):
    """Use the scipy k-means clustering algorithms to cluster data.

    Return the item names for the smallest cluster.
    """
    input = Data(filename, -1)
    d = vq.whiten(input.data.transpose())
    codebook, avg_distortion = vq.kmeans(d, cluster_count, 150)
    codes, distortions = vq.vq(d, codebook)

    # codes is now a vector of cluster assignments
    # it is ordered the same as data elements in input

    c_sizes = {}
    small_i = 0
    if DEBUG: print "Cluster Sizes: ",
    for i in range(cluster_count):
        c_sizes[i] = count(codes, i)
        if DEBUG: print c_sizes[i],
    if DEBUG: print
    for i in range(cluster_count):
        if c_sizes[i] < c_sizes[small_i]: 
            small_i = i

    if DEBUG: print "Smallest cluster size: " + str(c_sizes[small_i])

    return [input._names[i] for i in findall(codes, small_i)]
开发者ID:jrbl,项目名称:wikilytics,代码行数:27,代码来源:jkm.py

示例15: cluster

    def cluster(self, graph):
        """
        Take a graph and cluster using the method in "On spectral clusering: analysis
        and algorithm" by Ng et al., 2001. 

        :param graph: the graph to cluster
        :type graph: :class:`apgl.graph.AbstractMatrixGraph`

        :returns:  An array of size graph.getNumVertices() of cluster membership 
        """
        L = graph.normalisedLaplacianSym()

        omega, Q = numpy.linalg.eig(L)
        inds = numpy.argsort(omega)

        #First normalise rows, then columns
        standardiser = Standardiser()
        V = standardiser.normaliseArray(Q[:, inds[0:self.k]].T).T
        V = vq.whiten(V)
        #Using kmeans2 here seems to result in a high variance
        #in the quality of clustering. Therefore stick to kmeans
        centroids, clusters = vq.kmeans(V, self.k, iter=self.numIterKmeans)
        clusters, distortion = vq.vq(V, centroids)

        return clusters
开发者ID:charanpald,项目名称:sandbox,代码行数:25,代码来源:SpectralClusterer.py


注:本文中的scipy.cluster.vq.whiten函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。