当前位置: 首页>>代码示例>>Python>>正文


Python vq.whiten方法代码示例

本文整理汇总了Python中scipy.cluster.vq.whiten方法的典型用法代码示例。如果您正苦于以下问题:Python vq.whiten方法的具体用法?Python vq.whiten怎么用?Python vq.whiten使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scipy.cluster.vq的用法示例。


在下文中一共展示了vq.whiten方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: Kmeans

# 需要导入模块: from scipy.cluster import vq [as 别名]
# 或者: from scipy.cluster.vq import whiten [as 别名]
def Kmeans(file, vocabfile, k):
  np.random.seed((1000,2000))
  whitened = whiten(embeddings)
  codebook, distortion = kmeans(whitened, k)
  clusters = [l2_nearest(embeddings, c, representatives+1) for c in codebook]
  # output
  print(len(codebook), distortion)
  for centroid in codebook:
    print(' '.join([str(x) for x in centroid]))
  print()
  for cluster in clusters:
    print(' '.join([id_word[i] for i, d in cluster]).encode('utf-8'))
  print()
  # assign clusters to words
  codes, _ = vq(embeddings, codebook)
  for w, c in zip(word_id.keys(), codes):
    print(w, c) 
开发者ID:attardi,项目名称:deepnl,代码行数:19,代码来源:knn.py

示例2: findClusters_kmeans

# 需要导入模块: from scipy.cluster import vq [as 别名]
# 或者: from scipy.cluster.vq import whiten [as 别名]
def findClusters_kmeans(data):
    '''
        Cluster data using k-means
    '''
    # whiten the observations
    data_w = vq.whiten(data)

    # create the classifier object
    kmeans, labels = vq.kmeans2(
        data_w,
        k=4,
        iter=30
    )

    # fit the data
    return kmeans, labels

# the file name of the dataset 
开发者ID:drabastomek,项目名称:practicalDataAnalysisCookbook,代码行数:20,代码来源:clustering_kmeans_alternative.py

示例3: compute_labels_kmeans

# 需要导入模块: from scipy.cluster import vq [as 别名]
# 或者: from scipy.cluster.vq import whiten [as 别名]
def compute_labels_kmeans(fmcs, k):
    # Removing the higher frequencies seem to yield better results
    fmcs = fmcs[:, fmcs.shape[1] // 2:]

    # Pre-process
    fmcs = np.log1p(fmcs)
    wfmcs = vq.whiten(fmcs)

    # Make sure we are not using more clusters than existing segments
    if k > fmcs.shape[0]:
        k = fmcs.shape[0]

    # K-means
    kmeans = KMeans(n_clusters=k, n_init=100)
    kmeans.fit(wfmcs)

    return kmeans.labels_ 
开发者ID:urinieto,项目名称:msaf,代码行数:19,代码来源:segmenter.py

示例4: argparser

# 需要导入模块: from scipy.cluster import vq [as 别名]
# 或者: from scipy.cluster.vq import whiten [as 别名]
def argparser():
    try:
        import argparse
    except ImportError:
        import compat.argparse as argparse

    ap=argparse.ArgumentParser()
    ap.add_argument('vectors', nargs=1, metavar='FILE', help='word vectors')
    ap.add_argument('-a', '--approximate', default=False, action='store_true',
                    help='filter by approximate similarity (with -t)')
    ap.add_argument('-i', '--min-index', default=0, type=int,
                    help='index of first word (default 0)')
    ap.add_argument('-M', '--metric', default=DEFAULT_METRIC, 
                    choices=sorted(metrics.keys()),
                    help='distance metric to apply')
    ap.add_argument('-n', '--normalize', default=False, action='store_true',
                    help='normalize vectors to unit length')
    ap.add_argument('-r', '--max-rank', metavar='INT', default=None, 
                    type=int, help='only consider r most frequent words')
    ap.add_argument('-t', '--threshold', metavar='FLOAT', default=None,
                    type=float, help='only output distances <= t')
    ap.add_argument('-T', '--tolerance', metavar='FLOAT', default=0.1,
                    type=float, help='approximation tolerace (with -a)')
    ap.add_argument('-w', '--whiten', default=False, action='store_true',
                    help='normalize features to unit variance ')
    ap.add_argument('-W', '--words',  default=False, action='store_true',
                    help='output words instead of indices')
    return ap 
开发者ID:cambridgeltl,项目名称:link-prediction_with_deep-learning,代码行数:30,代码来源:pairdist.py

示例5: process_options

# 需要导入模块: from scipy.cluster import vq [as 别名]
# 或者: from scipy.cluster.vq import whiten [as 别名]
def process_options(args):    
    options = argparser().parse_args(args)

    if options.max_rank is not None and options.max_rank < 1:
        raise ValueError('max-rank must be >= 1')
    if options.threshold is not None and options.threshold < 0.0:
        raise ValueError('threshold must be >= 0')
    if options.tolerance is not None and options.tolerance < 0.0:
        raise ValueError('tolerance must be >= 0')
    if options.approximate and not options.threshold:
        raise ValueError('approximate only makes sense with a threshold')
    if options.approximate and options.metric != 'cosine':
        raise NotImplementedError('approximate only supported for cosine')

    wv = wvlib.load(options.vectors[0], max_rank=options.max_rank)

    if options.normalize:
        logging.info('normalize vectors to unit length')
        wv.normalize()

    words, vectors = wv.words(), wv.vectors()

    if options.whiten:
        # whitening should be implemented in wvlib to support together with
        # approximate similarity
        if options.approximate:
            raise NotImplemenedError
        logging.info('normalize features to unit variance')
        vectors = whiten(vectors)

    return words, vectors, wv, options 
开发者ID:cambridgeltl,项目名称:link-prediction_with_deep-learning,代码行数:33,代码来源:pairdist.py

示例6: run_kmeans

# 需要导入模块: from scipy.cluster import vq [as 别名]
# 或者: from scipy.cluster.vq import whiten [as 别名]
def run_kmeans(self, X, K):
        """Runs k-means and returns the labels assigned to the data."""
        wX = vq.whiten(X)
        means, dist = vq.kmeans(wX, K, iter=100)
        labels, dist = vq.vq(wX, means)
        return means, labels 
开发者ID:urinieto,项目名称:msaf,代码行数:8,代码来源:xmeans.py

示例7: compute_bic

# 需要导入模块: from scipy.cluster import vq [as 别名]
# 或者: from scipy.cluster.vq import whiten [as 别名]
def compute_bic(self, D, means, labels, K, R):
        """Computes the Bayesian Information Criterion."""
        D = vq.whiten(D)
        Rn = D.shape[0]
        M = D.shape[1]

        if R == K:
            return 1

        # Maximum likelihood estimate (MLE)
        mle_var = 0
        for k in range(len(means)):
            X = D[np.argwhere(labels == k)]
            X = X.reshape((X.shape[0], X.shape[-1]))
            for x in X:
                mle_var += distance.euclidean(x, means[k])
                #print x, means[k], mle_var
        mle_var /= float(R - K)

        # Log-likelihood of the data
        l_D = - Rn/2. * np.log(2*np.pi) - (Rn * M)/2. * np.log(mle_var) - \
            (Rn - K) / 2. + Rn * np.log(Rn) - Rn * np.log(R)

        # Params of BIC
        p = (K-1) + M * K + mle_var

        #print "BIC:", l_D, p, R, K

        # Return the bic
        return l_D - p / 2. * np.log(R) 
开发者ID:urinieto,项目名称:msaf,代码行数:32,代码来源:xmeans.py

示例8: test_kmeans

# 需要导入模块: from scipy.cluster import vq [as 别名]
# 或者: from scipy.cluster.vq import whiten [as 别名]
def test_kmeans(K=5):
    """Test k-means with the synthetic data."""
    X = XMeans.generate_2d_data(K=4)
    wX = vq.whiten(X)
    dic, dist = vq.kmeans(wX, K, iter=100)

    plt.scatter(wX[:, 0], wX[:, 1])
    plt.scatter(dic[:, 0], dic[:, 1], color="m")
    plt.show() 
开发者ID:urinieto,项目名称:msaf,代码行数:11,代码来源:xmeans.py

示例9: get_load_archetypes

# 需要导入模块: from scipy.cluster import vq [as 别名]
# 或者: from scipy.cluster.vq import whiten [as 别名]
def get_load_archetypes(Load, k=2, x='hour', y='dayofyear', plot_diagnostics=False):
    """Extract typical load profiles using k-means and vector quantization. the time scale of archetypes depend on the selected dimensions (x,y).
    For the default values daily archetypes will be extracted.

    Parameters:
        Load (pd.Series): timeseries
        k (int): number of archetypes to identify and extract
        x (str): This will define how the timeseries will be grouped by. Has to be an accessor of pd.DatetimeIndex
        y (str): similar to above for y axis.
        plot_diagnostics (bool): If true a figure is plotted showing an overview of the results
    Returns:
        np.ndarray: dimensions (k, len(x))
    """
    from scipy.cluster.vq import whiten, kmeans, vq

    df = reshape_timeseries(Load, x=x, y=y, aggfunc='mean').astype(float)
    df_white = whiten(df)
    clusters_center, __ = kmeans(df_white, k)
    clusters_center_dewhitened = clusters_center.T * np.array([df.std(), ] * k ).T

    if plot_diagnostics:
        try:
            import matplotlib.pyplot as plt
            clusters, _ = vq(df_white, clusters_center)
            cm = _n_colors_from_colormap(k)
            ax1 = df.T.plot(legend=False, alpha=.1,
                            color=[cm[i] for i in clusters])
            # Add colored cluster centers as lines
            ax1.set_prop_cycle('color', cm)
            ax1.plot(clusters_center_dewhitened, linewidth=3, linestyle='--')
            plt.figure()  # FIXME: works only with weekdays
            day_clusters = pd.DataFrame({y: Load.resample('d').mean().index.weekday,
                                         'clusters': clusters,
                                         'val': 1})
            x_labels = "Mon Tue Wed Thu Fri Sat Sun".split()
            day_clusters.pivot_table(columns=y, index='clusters',
                                     aggfunc='count').T.plot.bar(stacked=True)
            plt.gca().set_xticklabels(x_labels)
        except Exception: #FIXME: specify exception
            print ('Works only with daily profile clustering')

    return clusters_center_dewhitened 
开发者ID:kavvkon,项目名称:enlopy,代码行数:44,代码来源:analysis.py


注:本文中的scipy.cluster.vq.whiten方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。