Python scipy.cluster方法代码示例

本文整理汇总了Python中scipy.cluster方法的典型用法代码示例。如果您正苦于以下问题：Python scipy.cluster方法的具体用法？Python scipy.cluster怎么用？Python scipy.cluster使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scipy的用法示例。

在下文中一共展示了scipy.cluster方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _signal_recompose_sum

# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def _signal_recompose_sum(components, clusters):
    # Reorient components
    components = components.T

    # Reconstruct Time Series from correlated components
    clusters = [np.where(clusters == cluster)[0] for cluster in np.unique(clusters)]

    if len(clusters) == 0:
        raise ValueError("Not enough clusters of components detected. Please decrease the " "`threshold`.")
    # Initialize components matrix
    recomposed = np.zeros((len(components), len(clusters)))
    for i, indices in enumerate(clusters):
        recomposed[:, i] = components[:, indices].sum(axis=1)
    return recomposed.T


# =============================================================================
# Clustering Methods
# =============================================================================

# Weighted Correlation
# ----------------------------------------------------------------------------

开发者ID:neuropsychology，项目名称:NeuroKit，代码行数:24，代码来源:signal_recompose.py

示例2: process_options

# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def process_options(args):    
    options = argparser().parse_args(args)

    if options.max_rank is not None and options.max_rank < 1:
        raise ValueError('max-rank must be >= 1')
    if options.eps <= 0.0:
        raise ValueError('eps must be > 0')

    wv = wvlib.load(options.vectors[0], max_rank=options.max_rank)

    if options.normalize:
        logging.info('normalize vectors to unit length')
        wv.normalize()

    words, vectors = wv.words(), wv.vectors()

    if options.whiten:
        logging.info('normalize features to unit variance')
        vectors = scipy.cluster.vq.whiten(vectors)

    return words, vectors, options

开发者ID:cambridgeltl，项目名称:link-prediction_with_deep-learning，代码行数:23，代码来源:dbscan.py

示例3: compute_group_overlap_score

# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def compute_group_overlap_score(ref_labels, pred_labels,
                                threshold_overlap_pred=0.5,
                                threshold_overlap_ref=0.5):
    """How well do the pred_labels explain the ref_labels?

    A predicted cluster explains a reference cluster if it is contained within the reference
    cluster with at least 50% (threshold_overlap_pred) of its points and these correspond
    to at least 50% (threshold_overlap_ref) of the reference cluster.
    """
    ref_unique, ref_counts = np.unique(ref_labels, return_counts=True)
    ref_dict = dict(zip(ref_unique, ref_counts))
    pred_unique, pred_counts = np.unique(pred_labels, return_counts=True)
    pred_dict = dict(zip(pred_unique, pred_counts))
    summary = []
    for true in ref_unique:
        sub_pred_unique, sub_pred_counts = np.unique(pred_labels[true == ref_labels], return_counts=True)
        relative_overlaps_pred = [sub_pred_counts[i] / pred_dict[n] for i, n in enumerate(sub_pred_unique)]
        relative_overlaps_ref = [sub_pred_counts[i] / ref_dict[true] for i, n in enumerate(sub_pred_unique)]
        pred_best_index = np.argmax(relative_overlaps_pred)
        summary.append(1 if (relative_overlaps_pred[pred_best_index] >= threshold_overlap_pred and
                             relative_overlaps_ref[pred_best_index] >= threshold_overlap_ref)
                       else 0)
        # print(true, sub_pred_unique[pred_best_index], relative_overlaps_pred[pred_best_index],
        #       relative_overlaps_ref[pred_best_index], summary[-1])
    return sum(summary)/len(summary)

开发者ID:colomemaria，项目名称:epiScanpy，代码行数:27，代码来源:utils.py

示例4: hierarch_cluster

# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def hierarch_cluster(M):
    """Cluster matrix using hierarchical clustering.

    Parameters
    ----------
    M : np.ndarray
        Matrix, for example, distance matrix.

    Returns
    -------
    Mclus : np.ndarray
        Clustered matrix.
    indices : np.ndarray
        Indices used to cluster the matrix.
    """
    import scipy as sp
    import scipy.cluster
    link = sp.cluster.hierarchy.linkage(M)
    indices = sp.cluster.hierarchy.leaves_list(link)
    Mclus = np.array(M[:, indices])
    Mclus = Mclus[indices, :]
    if False:
        pl.matshow(Mclus)
        pl.colorbar()
    return Mclus, indices

开发者ID:colomemaria，项目名称:epiScanpy，代码行数:27，代码来源:utils.py

示例5: performClusteringLinkage

# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def performClusteringLinkage(segmentBKTable, segmentCVTable, N_init, linkageCriterion,linkageMetric ):
    from scipy.cluster.hierarchy import linkage
    from scipy import cluster
    if linkageMetric == 'jaccard':
      observations = segmentBKTable
    elif linkageMetric == 'cosine':
      observations = segmentCVTable
    else:
      observations = segmentCVTable      
    clusteringTable = np.zeros([np.size(segmentCVTable,0),N_init]) 
    Z = linkage(observations,method=linkageCriterion,metric=linkageMetric)
    for i in np.arange(N_init):
      clusteringTable[:,i] = cluster.hierarchy.cut_tree(Z,N_init-i).T+1  
    k=N_init
    print('done')
    return clusteringTable, k

开发者ID:josepatino，项目名称:pyBK，代码行数:18，代码来源:diarizationFunctions.py

示例6: dendrogram

# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def dendrogram(data, threshold, layer_directory):
    colnames = data.columns
    data = np.array(data)

    Z = hierarchy.linkage(data.T, 'single',  'cosine')
    plt.figure(figsize=(15, 9))
    dn = hierarchy.dendrogram(Z, labels = colnames, color_threshold=threshold)
    plt.title("Clustering of Samples Based on Mutational Signatures" )
    plt.ylabel("Cosine Distance")
    plt.xlabel("Sample IDs")
    #plt.ylim((0,1))
    plt.savefig(layer_directory+'/dendrogram.pdf',figsize=(10, 8), dpi=300)
    # which datapoints goes to which cluster
    # The indices of the datapoints will be displayed as the ids 
    Y = hierarchy.fcluster(Z, threshold, criterion='distance', R=None, monocrit=None)
    dataframe = pd.DataFrame({"Cluster":Y, "Sample Names":list(colnames)})
    dataframe = dataframe.set_index("Sample Names")
    #print(dataframe)
    dictionary = {"clusters":Y, "informations":dn}
    
    return dataframe 


######################################## Plot the reconstruction error vs stabilities and select the optimum number of signature ####################################################

开发者ID:AlexandrovLab，项目名称:SigProfilerExtractor，代码行数:26，代码来源:subroutines.py

示例7: sigma_bin_walls

# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def sigma_bin_walls(sigma, bins):
        import scipy, scipy.cluster, scipy.cluster.vq as vq
        std = np.std(sigma)
        if np.isclose(std, 0): return pimms.imm_array([0, np.max(sigma)])
        cl = sorted(std * vq.kmeans(sigma/std, bins)[0])
        cl = np.mean([cl[:-1],cl[1:]], axis=0)
        return pimms.imm_array(np.concatenate(([0], cl, [np.max(sigma)])))

开发者ID:noahbenson，项目名称:neuropythy，代码行数:9，代码来源:cmag.py

示例8: _signal_recompose_wcorr

# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def _signal_recompose_wcorr(components, threshold=0.5, metric="chebyshev"):
    """"""
    # Calculate the w-correlation matrix.
    wcorr = _signal_recompose_get_wcorr(components, show=False)

    # Find clusters in correlation matrix
    pairwise_distances = scipy.cluster.hierarchy.distance.pdist(wcorr, metric=metric)
    linkage = scipy.cluster.hierarchy.linkage(pairwise_distances, method="complete")
    threshold = threshold * pairwise_distances.max()
    clusters = scipy.cluster.hierarchy.fcluster(linkage, threshold, "distance")

    return clusters

开发者ID:neuropsychology，项目名称:NeuroKit，代码行数:14，代码来源:signal_recompose.py

示例9: process_options

# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def process_options(args):    
    options = argparser().parse_args(args)

    if options.max_rank is not None and options.max_rank < 1:
        raise ValueError('max-rank must be >= 1')
    if options.k is not None and options.k < 2:
        raise ValueError('cluster number must be >= 2')

    if options.method == MINIBATCH_KMEANS and not with_sklearn:
        logging.warning('minibatch kmeans not available, using kmeans (slow)')
        options.method = KMEANS

    if options.jobs != 1 and (options.method != KMEANS or not with_sklearn):
        logging.warning('jobs > 1 only supported scikit-learn %s' % KMEANS)
        options.jobs = 1

    wv = wvlib.load(options.vectors[0], max_rank=options.max_rank)

    if options.k is None:
        options.k = int(math.ceil((len(wv.words())/2)**0.5))
        logging.info('set k=%d (%d words)' % (options.k, len(wv.words())))

    if options.normalize:
        logging.info('normalize vectors to unit length')
        wv.normalize()

    words, vectors = wv.words(), wv.vectors()

    if options.whiten:
        logging.info('normalize features to unit variance')
        vectors = scipy.cluster.vq.whiten(vectors)

    return words, vectors, options

开发者ID:cambridgeltl，项目名称:link-prediction_with_deep-learning，代码行数:35，代码来源:kmeans.py

示例10: minibatch_kmeans

# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def minibatch_kmeans(vectors, k):
    if not with_sklearn:
        raise NotImplementedError
    # Sculley (http://www.eecs.tufts.edu/~dsculley/papers/fastkmeans.pdf)
    # uses batch size 1000. sklearn KMeans defaults to n_init 10
    kmeans = sklearn.cluster.MiniBatchKMeans(k, batch_size=1000, n_init=10)
    kmeans.fit(vectors)
    return kmeans.labels_

开发者ID:cambridgeltl，项目名称:link-prediction_with_deep-learning，代码行数:10，代码来源:kmeans.py

示例11: kmeans

# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def kmeans(vectors, k, jobs=1):
    vectors = numpy.array(vectors)
    if with_sklearn:
        if jobs == 1:
            kmeans = sklearn.cluster.KMeans(k)
        else:
            kmeans = sklearn.cluster.KMeans(k, n_jobs=jobs) # sklearn > 0.10
        kmeans.fit(vectors)
        return kmeans.labels_
    else:
        codebook, distortion = scipy.cluster.vq.kmeans(vectors, k)
        cluster_ids, dist = scipy.cluster.vq.vq(vectors, codebook)
        return cluster_ids

开发者ID:cambridgeltl，项目名称:link-prediction_with_deep-learning，代码行数:15，代码来源:kmeans.py

示例12: write_cluster_ids

# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def write_cluster_ids(words, cluster_ids, out=None):
    """Write given list of words and their corresponding cluster ids to out."""

    assert len(words) == len(cluster_ids), 'word/cluster ids number mismatch'

    if out is None:
        out = sys.stdout
    for word, cid in izip(words, cluster_ids):
        print >> out, '%s\t%d' % (word, cid)

开发者ID:cambridgeltl，项目名称:link-prediction_with_deep-learning，代码行数:11，代码来源:kmeans.py

示例13: _auto_color

# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def _auto_color(self, url:str, ranks):
        phrases = ["Calculating colors..."] # in case I want more
        #try:
        await self.bot.say("**{}**".format(random.choice(phrases)))
        clusters = 10

        async with aiohttp.get(url) as r:
            image = await r.content.read()
        with open('data/leveler/temp_auto.png','wb') as f:
            f.write(image)

        im = Image.open('data/leveler/temp_auto.png').convert('RGBA')
        im = im.resize((290, 290)) # resized to reduce time
        ar = scipy.misc.fromimage(im)
        shape = ar.shape
        ar = ar.reshape(scipy.product(shape[:2]), shape[2])

        codes, dist = scipy.cluster.vq.kmeans(ar.astype(float), clusters)
        vecs, dist = scipy.cluster.vq.vq(ar, codes)         # assign codes
        counts, bins = scipy.histogram(vecs, len(codes))    # count occurrences

        # sort counts
        freq_index = []
        index = 0
        for count in counts:
            freq_index.append((index, count))
            index += 1
        sorted_list = sorted(freq_index, key=operator.itemgetter(1), reverse=True)

        colors = []
        for rank in ranks:
            color_index = min(rank, len(codes))
            peak = codes[sorted_list[color_index][0]] # gets the original index
            peak = peak.astype(int)

            colors.append(''.join(format(c, '02x') for c in peak))
        return colors # returns array
        #except:
            #await self.bot.say("```Error or no scipy. Install scipy doing 'pip3 install numpy' and 'pip3 install scipy' or read here: https://github.com/AznStevy/Maybe-Useful-Cogs/blob/master/README.md```")

    # converts hex to rgb

开发者ID:AznStevy，项目名称:Maybe-Useful-Cogs，代码行数:43，代码来源:leveler.py

示例14: plot_rank_order_dendrogram

# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def plot_rank_order_dendrogram(df:pd.DataFrame, threshold:float=0.8, savename:Optional[str]=None, settings:PlotSettings=PlotSettings()) \
        -> Dict[str,Union[List[str],float]]:
    r'''
    Plots a dendrogram of features in df clustered via Spearman's rank correlation coefficient.
    Also returns a sets of features with correlation coefficients greater than the threshold

    Arguments:
        df: Pandas DataFrame containing data
        threshold: Threshold on correlation coefficient
        savename: Optional name of file to which to save the plot of feature importances
        settings: :class:`~lumin.plotting.plot_settings.PlotSettings` class to control figure appearance

    Returns:
        Dict of sets of features with correlation coefficients greater than the threshold and cluster distance
    '''

    corr = np.round(scipy.stats.spearmanr(df).correlation, 4)
    corr_condensed = hc.distance.squareform(1-np.abs(corr))  # Abs because negtaive of a feature is a trvial transformation: information unaffected
    z = hc.linkage(corr_condensed, method='average', optimal_ordering=True)

    with sns.axes_style('white'), sns.color_palette(settings.cat_palette):
        plt.figure(figsize=(settings.w_large, (0.5*len(df.columns))))
        hc.dendrogram(z, labels=df.columns, orientation='left', leaf_font_size=settings.lbl_sz, color_threshold=1-threshold)
        plt.xlabel("Distance (1 - |Spearman's Rank Correlation Coefficient|)", fontsize=settings.lbl_sz, color=settings.lbl_col)
        plt.xticks(fontsize=settings.tk_sz, color=settings.tk_col)
        if savename is not None: plt.savefig(settings.savepath/f'{savename}{settings.format}', bbox_inches='tight')
        plt.show()

    feats = df.columns
    sets = {}
    for i, merge in enumerate(z):
        if merge[2] > 1-threshold: continue
        if merge[0] <= len(z): a = [feats[int(merge[0])]]
        else:                  a = sets.pop(int(merge[0]))['children']
        if merge[1] <= len(z): b = [feats[int(merge[1])]]
        else:                  b = sets.pop(int(merge[1]))['children']
        sets[1 + i + len(z)] = {'children': [*a, *b], 'distance': merge[2]}
    return sets

开发者ID:GilesStrong，项目名称:lumin，代码行数:40，代码来源:data_viewing.py

示例15: _get_covariance_from_clusters

# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def _get_covariance_from_clusters(self, points):
        """Compute covariance from re-centered clusters."""

        # Compute pairwise distances.
        distances = spatial.distance.pdist(points, metric='mahalanobis',
                                           VI=self.am)

        # Identify conglomerates of points by constructing a linkage matrix.
        linkages = cluster.hierarchy.single(distances)

        # Cut when linkage between clusters exceed the radius.
        clusteridxs = cluster.hierarchy.fcluster(linkages, 1.0,
                                                 criterion='distance')
        nclusters = np.max(clusteridxs)
        if nclusters == 1:
            return self._get_covariance_from_all_points(points)
        else:
            i = 0
            overlapped_points = np.empty_like(points)
            for idx in np.unique(clusteridxs):
                group_points = points[clusteridxs == idx, :]
                group_mean = group_points.mean(axis=0).reshape((1, -1))
                j = i + len(group_points)
                overlapped_points[i:j, :] = group_points - group_mean
                i = j
            return self._get_covariance_from_all_points(overlapped_points)

开发者ID:joshspeagle，项目名称:dynesty，代码行数:28，代码来源:bounding.py

注：本文中的scipy.cluster方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。