本文整理汇总了Python中scipy.cluster.vq.whiten方法的典型用法代码示例。如果您正苦于以下问题:Python vq.whiten方法的具体用法?Python vq.whiten怎么用?Python vq.whiten使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scipy.cluster.vq
的用法示例。
在下文中一共展示了vq.whiten方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Kmeans
# 需要导入模块: from scipy.cluster import vq [as 别名]
# 或者: from scipy.cluster.vq import whiten [as 别名]
def Kmeans(file, vocabfile, k):
np.random.seed((1000,2000))
whitened = whiten(embeddings)
codebook, distortion = kmeans(whitened, k)
clusters = [l2_nearest(embeddings, c, representatives+1) for c in codebook]
# output
print(len(codebook), distortion)
for centroid in codebook:
print(' '.join([str(x) for x in centroid]))
print()
for cluster in clusters:
print(' '.join([id_word[i] for i, d in cluster]).encode('utf-8'))
print()
# assign clusters to words
codes, _ = vq(embeddings, codebook)
for w, c in zip(word_id.keys(), codes):
print(w, c)
示例2: findClusters_kmeans
# 需要导入模块: from scipy.cluster import vq [as 别名]
# 或者: from scipy.cluster.vq import whiten [as 别名]
def findClusters_kmeans(data):
'''
Cluster data using k-means
'''
# whiten the observations
data_w = vq.whiten(data)
# create the classifier object
kmeans, labels = vq.kmeans2(
data_w,
k=4,
iter=30
)
# fit the data
return kmeans, labels
# the file name of the dataset
示例3: compute_labels_kmeans
# 需要导入模块: from scipy.cluster import vq [as 别名]
# 或者: from scipy.cluster.vq import whiten [as 别名]
def compute_labels_kmeans(fmcs, k):
# Removing the higher frequencies seem to yield better results
fmcs = fmcs[:, fmcs.shape[1] // 2:]
# Pre-process
fmcs = np.log1p(fmcs)
wfmcs = vq.whiten(fmcs)
# Make sure we are not using more clusters than existing segments
if k > fmcs.shape[0]:
k = fmcs.shape[0]
# K-means
kmeans = KMeans(n_clusters=k, n_init=100)
kmeans.fit(wfmcs)
return kmeans.labels_
示例4: argparser
# 需要导入模块: from scipy.cluster import vq [as 别名]
# 或者: from scipy.cluster.vq import whiten [as 别名]
def argparser():
try:
import argparse
except ImportError:
import compat.argparse as argparse
ap=argparse.ArgumentParser()
ap.add_argument('vectors', nargs=1, metavar='FILE', help='word vectors')
ap.add_argument('-a', '--approximate', default=False, action='store_true',
help='filter by approximate similarity (with -t)')
ap.add_argument('-i', '--min-index', default=0, type=int,
help='index of first word (default 0)')
ap.add_argument('-M', '--metric', default=DEFAULT_METRIC,
choices=sorted(metrics.keys()),
help='distance metric to apply')
ap.add_argument('-n', '--normalize', default=False, action='store_true',
help='normalize vectors to unit length')
ap.add_argument('-r', '--max-rank', metavar='INT', default=None,
type=int, help='only consider r most frequent words')
ap.add_argument('-t', '--threshold', metavar='FLOAT', default=None,
type=float, help='only output distances <= t')
ap.add_argument('-T', '--tolerance', metavar='FLOAT', default=0.1,
type=float, help='approximation tolerace (with -a)')
ap.add_argument('-w', '--whiten', default=False, action='store_true',
help='normalize features to unit variance ')
ap.add_argument('-W', '--words', default=False, action='store_true',
help='output words instead of indices')
return ap
示例5: process_options
# 需要导入模块: from scipy.cluster import vq [as 别名]
# 或者: from scipy.cluster.vq import whiten [as 别名]
def process_options(args):
options = argparser().parse_args(args)
if options.max_rank is not None and options.max_rank < 1:
raise ValueError('max-rank must be >= 1')
if options.threshold is not None and options.threshold < 0.0:
raise ValueError('threshold must be >= 0')
if options.tolerance is not None and options.tolerance < 0.0:
raise ValueError('tolerance must be >= 0')
if options.approximate and not options.threshold:
raise ValueError('approximate only makes sense with a threshold')
if options.approximate and options.metric != 'cosine':
raise NotImplementedError('approximate only supported for cosine')
wv = wvlib.load(options.vectors[0], max_rank=options.max_rank)
if options.normalize:
logging.info('normalize vectors to unit length')
wv.normalize()
words, vectors = wv.words(), wv.vectors()
if options.whiten:
# whitening should be implemented in wvlib to support together with
# approximate similarity
if options.approximate:
raise NotImplemenedError
logging.info('normalize features to unit variance')
vectors = whiten(vectors)
return words, vectors, wv, options
示例6: run_kmeans
# 需要导入模块: from scipy.cluster import vq [as 别名]
# 或者: from scipy.cluster.vq import whiten [as 别名]
def run_kmeans(self, X, K):
"""Runs k-means and returns the labels assigned to the data."""
wX = vq.whiten(X)
means, dist = vq.kmeans(wX, K, iter=100)
labels, dist = vq.vq(wX, means)
return means, labels
示例7: compute_bic
# 需要导入模块: from scipy.cluster import vq [as 别名]
# 或者: from scipy.cluster.vq import whiten [as 别名]
def compute_bic(self, D, means, labels, K, R):
"""Computes the Bayesian Information Criterion."""
D = vq.whiten(D)
Rn = D.shape[0]
M = D.shape[1]
if R == K:
return 1
# Maximum likelihood estimate (MLE)
mle_var = 0
for k in range(len(means)):
X = D[np.argwhere(labels == k)]
X = X.reshape((X.shape[0], X.shape[-1]))
for x in X:
mle_var += distance.euclidean(x, means[k])
#print x, means[k], mle_var
mle_var /= float(R - K)
# Log-likelihood of the data
l_D = - Rn/2. * np.log(2*np.pi) - (Rn * M)/2. * np.log(mle_var) - \
(Rn - K) / 2. + Rn * np.log(Rn) - Rn * np.log(R)
# Params of BIC
p = (K-1) + M * K + mle_var
#print "BIC:", l_D, p, R, K
# Return the bic
return l_D - p / 2. * np.log(R)
示例8: test_kmeans
# 需要导入模块: from scipy.cluster import vq [as 别名]
# 或者: from scipy.cluster.vq import whiten [as 别名]
def test_kmeans(K=5):
"""Test k-means with the synthetic data."""
X = XMeans.generate_2d_data(K=4)
wX = vq.whiten(X)
dic, dist = vq.kmeans(wX, K, iter=100)
plt.scatter(wX[:, 0], wX[:, 1])
plt.scatter(dic[:, 0], dic[:, 1], color="m")
plt.show()
示例9: get_load_archetypes
# 需要导入模块: from scipy.cluster import vq [as 别名]
# 或者: from scipy.cluster.vq import whiten [as 别名]
def get_load_archetypes(Load, k=2, x='hour', y='dayofyear', plot_diagnostics=False):
"""Extract typical load profiles using k-means and vector quantization. the time scale of archetypes depend on the selected dimensions (x,y).
For the default values daily archetypes will be extracted.
Parameters:
Load (pd.Series): timeseries
k (int): number of archetypes to identify and extract
x (str): This will define how the timeseries will be grouped by. Has to be an accessor of pd.DatetimeIndex
y (str): similar to above for y axis.
plot_diagnostics (bool): If true a figure is plotted showing an overview of the results
Returns:
np.ndarray: dimensions (k, len(x))
"""
from scipy.cluster.vq import whiten, kmeans, vq
df = reshape_timeseries(Load, x=x, y=y, aggfunc='mean').astype(float)
df_white = whiten(df)
clusters_center, __ = kmeans(df_white, k)
clusters_center_dewhitened = clusters_center.T * np.array([df.std(), ] * k ).T
if plot_diagnostics:
try:
import matplotlib.pyplot as plt
clusters, _ = vq(df_white, clusters_center)
cm = _n_colors_from_colormap(k)
ax1 = df.T.plot(legend=False, alpha=.1,
color=[cm[i] for i in clusters])
# Add colored cluster centers as lines
ax1.set_prop_cycle('color', cm)
ax1.plot(clusters_center_dewhitened, linewidth=3, linestyle='--')
plt.figure() # FIXME: works only with weekdays
day_clusters = pd.DataFrame({y: Load.resample('d').mean().index.weekday,
'clusters': clusters,
'val': 1})
x_labels = "Mon Tue Wed Thu Fri Sat Sun".split()
day_clusters.pivot_table(columns=y, index='clusters',
aggfunc='count').T.plot.bar(stacked=True)
plt.gca().set_xticklabels(x_labels)
except Exception: #FIXME: specify exception
print ('Works only with daily profile clustering')
return clusters_center_dewhitened