本文整理汇总了Python中scipy.cluster.vq.whiten函数的典型用法代码示例。如果您正苦于以下问题:Python whiten函数的具体用法?Python whiten怎么用?Python whiten使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了whiten函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: sent_integrate
def sent_integrate(sim_matrix,n_class):
# 次元ごとの分散を均一にする
whiten(sim_matrix)
centroid, destortion = kmeans(sim_matrix, n_class, iter=100, thresh=1e-05)
labels, dist = vq(sim_matrix, centroid)
return labels
示例2: parse
def parse(data_file_name, predict_index, ignore_indices, **options):
data_file = open(data_file_name, 'r')
lines = data_file.read().splitlines()
x = []
y = []
for i, line in enumerate(lines):
if i == 0 or i == 1:
continue
datas = line.split()
x_category = []
for i, data in enumerate(datas):
if ignore_indices.has_key(i):
continue
if i == predict_index:
if data == 'T':
y.append(1.0)
elif data == 'F':
y.append(0.0)
else:
y.append(float(data))
continue
x_category.append(float(data))
x.append(x_category)
x = whiten(np.array(x)) if options.get('whiten_x') else np.array(x)
y = whiten(np.array(y)) if options.get('whiten_y') else np.array(y)
x = x - x.mean() if options.get('mean_center_x') else x
y = y - y.mean() if options.get('mean_center_y') else y
return (x, y)
示例3: clust_scatter
def clust_scatter(samples, clusters, allocation_table, n):
c = len(allocation_table[0]) # Columns
r = len(allocation_table) # Rows
time_scat_square = 0
mat_scatter = 0
for j in range(0, c): # clusters
for t in range(0, 10): # maturities
for p in range(0, r): # samples within a cluster
index = allocation_table[p, j]
if index != 0:
time_scat_square += samples.samples[index-1].scatter_maturity[t].scatter
mat_scatter += time_scat_square**2
time_scat_square = 0
clusters.clusters[j].scatter = np.sqrt(mat_scatter - 10 * clusters.clusters[j].mean**2)
mat_scatter = 0
if n == 0 or n == 4999:
print('clust scatter : ' + str(clusters.clusters[j].scatter))
# Normalize clusters' scatter
vec = np.zeros(4)
for j in range(0, c):
vec[j] = clusters.clusters[j].scatter
whiten(vec)
for j in range(0, c):
clusters.clusters[j].scatter = vec[j]
return clusters;
示例4: test1
def test1(self):
print "TEST 1:----------------------------------------------------------------"
features = np.array([[1.9, 2.3],
[1.5, 2.5],
[0.8, 0.6],
[0.4, 1.8],
[0.1, 0.1],
[0.2, 1.8],
[2.0, 0.5],
[0.3, 1.5],
[1.0, 1.0]])
whitened = whiten(features)
book = np.array((whitened[0], whitened[2]))
numpy_result = kmeans(whitened, book)[0]
print numpy_result
print ""
features2 = np.array([[1.9, 2.3,0],
[1.5, 2.5,0],
[0.8, 0.6,0],
[0.4, 1.8,0],
[0.1, 0.1,0],
[0.2, 1.8,0],
[2.0, 0.5,0],
[0.3, 1.5,0],
[1.0, 1.0,0]])
whitened2 = whiten(features2)
book2 = [whitened[0], whitened[2]]
our_result = np.array(KMeans.k_means2(whitened2.tolist(), 2, book2).centroids)[:, :-1]
print our_result
示例5: LexicalFeatures
def LexicalFeatures():
"""
Compute feature vectors for word and punctuation features
"""
num_chapters = len(chapters)
fvs_lexical = np.zeros((len(chapters), 3), np.float64)
fvs_punct = np.zeros((len(chapters), 3), np.float64)
for e, ch_text in enumerate(chapters):
# note: the nltk.word_tokenize includes punctuation
tokens = nltk.word_tokenize(ch_text.lower())
words = word_tokenizer.tokenize(ch_text.lower())
sentences = sentence_tokenizer.tokenize(ch_text)
vocab = set(words)
words_per_sentence = np.array([len(word_tokenizer.tokenize(s))
for s in sentences])
# average number of words per sentence
fvs_lexical[e, 0] = words_per_sentence.mean()
# sentence length variation
fvs_lexical[e, 1] = words_per_sentence.std()
# Lexical diversity
fvs_lexical[e, 2] = len(vocab) / float(len(words))
# Commas per sentence
fvs_punct[e, 0] = tokens.count(',') / float(len(sentences))
# Semicolons per sentence
fvs_punct[e, 1] = tokens.count(';') / float(len(sentences))
# Colons per sentence
fvs_punct[e, 2] = tokens.count(':') / float(len(sentences))
# apply whitening to decorrelate the features
fvs_lexical = whiten(fvs_lexical)
fvs_punct = whiten(fvs_punct)
return fvs_lexical, fvs_punct
示例6: kmeansCluster
def kmeansCluster(self, layer, distance, number):
import scipy
import scipy.cluster.hierarchy as sch
from scipy.cluster.vq import vq,kmeans,whiten
import numpy as np
count = layer.featureCount()
self.setProgressRange(count)
points = []
for f in layer.getFeatures():
geom = f.geometry()
x = geom.asPoint().x()
y = geom.asPoint().y()
point = []
point.append(x)
point.append(y)
points.append(point)
self.updateProgress()
distances = {0:'euclidean', 1:'cityblock', 2:'hamming'}
disMat = sch.distance.pdist(points, distances.get(distance))#'euclidean''cityblock''hamming''cosine'
Z=sch.linkage(disMat,method='average')
P=sch.dendrogram(Z)
cluster= sch.fcluster(Z, t=1, criterion='inconsistent')
data=whiten(points)
centroid=kmeans(data, number)[0]
label=vq(data, centroid)[0]
return centroid, label
示例7: sparse_run
def sparse_run(g, pos1):
g2 = sparse_graph(g)
# pos1 = nx.spring_layout(g)
pos2 = nx.spring_layout(g2)
features = []
for u in g2.nodes_iter():
# print type(u)
# print u
# print pos[u]
features.append(pos2[u])
print "featurs:", len(features)
features = ny.array(features)
method = 2
if method == 1:
whitened = whiten(features)
book = ny.array((whitened[0],whitened[2]))
km = kmeans(whitened, book)
print km
elif method == 2:
n_digits = 4
km = KMeans(init='k-means++', n_clusters=n_digits, n_init=10)
res = km.fit(features)
print len(km.labels_), km.labels_
print res
return km.labels_, g2
示例8: bow
def bow(images,codebook,clusters):
out = images
temp = []
print "-"*60
print "Creating the pseudo database."
for im in images:
c = Counter()
bag,dist = vq(whiten(im[1]),codebook)
for word in bag:
c[word]+=1
#Creating histograms
for i in range(clusters):
if i in c.iterkeys():
c[i] = c[i]/sum(c.values())
if i not in c.iterkeys():
c[i] = 0
temp.append(c)
for i in range(len(temp)):
out[i].append(temp[i])
print "Done.\n"
return out
示例9: kmeans
def kmeans(d, headers, K, metric, whiten=True, categories=None):
'''Takes in a Data object, a set of headers, and the number of clusters to create
Computes and returns the codebook, codes and representation errors.
If given an Nx1 matrix of categories, it uses the category labels
to calculate the initial cluster means.
'''
# assign to A the result getting the data given the headers
try:
A = d.get_data(headers)
except AttributeError:
A = d
if whiten:
W = vq.whiten(A)
else:
W = A
codebook = kmeans_init(W, K, categories)
# assign to codebook, codes, errors, the result of calling kmeans_algorithm with W and codebook
codebook, codes, errors = kmeans_algorithm(W, codebook, metric)
# return the codebook, codes, and representation error
return codebook, codes, errors
示例10: normalize
def normalize(data,mode="pca",n=10):
""" normalize and reduce data by PCA"""
if mode == "whiten":
res = whiten(data)
elif mode == "pca":
v,P,res = pca_train(data,n,0,1)
print v
print "eigen ratio is ",v[n-1] / v[0]
elif mode == "pca_whiten":
v,P,proj = pca_train(data,n,0,1)
res = whiten(proj)
else:
res = np.array(data)
return res
示例11: kmeans
def kmeans(dataset, n_cluster = 625):
from scipy.cluster.vq import kmeans2, whiten
feature_matrix = numpy.asarray(dataset)
whitened = whiten(feature_matrix)
cluster_num = 625
_, cluster_labels = kmeans2(whitened, cluster_num, iter = 100)
return cluster_labels
示例12: clustering_scipy_kmeans
def clustering_scipy_kmeans(features, n_clust = 8):
"""
"""
whitened = whiten(features)
print whitened.shape
initial = [kmeans(whitened,i) for i in np.arange(1,12)]
plt.plot([var for (cent,var) in initial])
plt.show()
#cent, var = initial[3]
##use vq() to get as assignment for each obs.
#assignment,cdist = vq(whitened,cent)
#plt.scatter(whitened[:,0], whitened[:,1], c=assignment)
#plt.show()
codebook, distortion = kmeans(whitened, n_clust)
print codebook, distortion
assigned_label, dist = vq(whitened, codebook)
for ii in range(8):
plt.subplot(4,2,ii+1)
plt.plot(codebook[ii])
plt.show()
centroid, label = kmeans2(whitened, n_clust, minit = 'points')
print centroid, label
for ii in range(8):
plt.subplot(4,2,ii)
plt.plot(centroid[ii])
plt.show()
示例13: _get_cluster
def _get_cluster(self, feat_array, k):
# Normalise the feature array
whitened = whiten(feat_array)
codebook, _ = kmeans(whitened, k, iter=self.iter)
code, _ = vq(whitened, codebook)
return code
示例14: do_cluster
def do_cluster(cluster_count, filename):
"""Use the scipy k-means clustering algorithms to cluster data.
Return the item names for the smallest cluster.
"""
input = Data(filename, -1)
d = vq.whiten(input.data.transpose())
codebook, avg_distortion = vq.kmeans(d, cluster_count, 150)
codes, distortions = vq.vq(d, codebook)
# codes is now a vector of cluster assignments
# it is ordered the same as data elements in input
c_sizes = {}
small_i = 0
if DEBUG: print "Cluster Sizes: ",
for i in range(cluster_count):
c_sizes[i] = count(codes, i)
if DEBUG: print c_sizes[i],
if DEBUG: print
for i in range(cluster_count):
if c_sizes[i] < c_sizes[small_i]:
small_i = i
if DEBUG: print "Smallest cluster size: " + str(c_sizes[small_i])
return [input._names[i] for i in findall(codes, small_i)]
示例15: cluster
def cluster(self, graph):
"""
Take a graph and cluster using the method in "On spectral clusering: analysis
and algorithm" by Ng et al., 2001.
:param graph: the graph to cluster
:type graph: :class:`apgl.graph.AbstractMatrixGraph`
:returns: An array of size graph.getNumVertices() of cluster membership
"""
L = graph.normalisedLaplacianSym()
omega, Q = numpy.linalg.eig(L)
inds = numpy.argsort(omega)
#First normalise rows, then columns
standardiser = Standardiser()
V = standardiser.normaliseArray(Q[:, inds[0:self.k]].T).T
V = vq.whiten(V)
#Using kmeans2 here seems to result in a high variance
#in the quality of clustering. Therefore stick to kmeans
centroids, clusters = vq.kmeans(V, self.k, iter=self.numIterKmeans)
clusters, distortion = vq.vq(V, centroids)
return clusters