当前位置: 首页>>代码示例>>Python>>正文


Python hierarchy.fcluster方法代码示例

本文整理汇总了Python中scipy.cluster.hierarchy.fcluster方法的典型用法代码示例。如果您正苦于以下问题:Python hierarchy.fcluster方法的具体用法?Python hierarchy.fcluster怎么用?Python hierarchy.fcluster使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scipy.cluster.hierarchy的用法示例。


在下文中一共展示了hierarchy.fcluster方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: create_cluster_map

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import fcluster [as 别名]
def create_cluster_map(self, bow, srcids):
        cluster_map = {}
        z = linkage(bow, metric='cityblock', method='complete')
        dists = list(set(z[:, 2]))
        thresh = (dists[1] + dists[2]) / 2
        self.logger.info('Threshold: {0}'.format(thresh))
        b = hier.fcluster(z, thresh, criterion='distance')
        assert bow.shape[0] == len(b)
        assert len(b) == len(srcids)
        for cid, srcid in zip(b, srcids):
            cluster_map[cid] = cluster_map.get(cid, []) + [srcid]

        self.logger.info('# of clusters: {0}'.format(len(b)))
        self.logger.info('sizes of clustsers:{0}'.format(sorted(map(len, cluster_map.values()))))

        return cluster_map 
开发者ID:plastering,项目名称:plastering,代码行数:18,代码来源:zodiac.py

示例2: hier_clustering

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import fcluster [as 别名]
def hier_clustering(d, threshold=3):
    srcids = d.keys()
    tokenizer = lambda x: x.split()
    vectorizer = TfidfVectorizer(tokenizer=tokenizer)
    assert isinstance(d, dict)
    assert isinstance(list(d.values())[0], list)
    assert isinstance(list(d.values())[0][0], str)
    doc = [' '.join(d[srcid]) for srcid in srcids]
    vect = vectorizer.fit_transform(doc)
    #TODO: Make vect aligned to the required format
    z = linkage(vect.toarray(), metric='cityblock', method='complete')
    dists = list(set(z[:,2]))
#    threshold = 3
    #threshold = (dists[2] + dists[3]) / 2
    b = hier.fcluster(z, threshold, criterion='distance')
    cluster_dict = defaultdict(list)
    for srcid, cluster_id in zip(srcids, b):
        cluster_dict[str(cluster_id)].append(srcid)
    value_lengther = lambda x: len(x[1])
    return OrderedDict(\
               sorted(cluster_dict.items(), key=value_lengther, reverse=True)) 
开发者ID:plastering,项目名称:plastering,代码行数:23,代码来源:common.py

示例3: dendrogram

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import fcluster [as 别名]
def dendrogram(data, threshold, layer_directory):
    colnames = data.columns
    data = np.array(data)

    Z = hierarchy.linkage(data.T, 'single',  'cosine')
    plt.figure(figsize=(15, 9))
    dn = hierarchy.dendrogram(Z, labels = colnames, color_threshold=threshold)
    plt.title("Clustering of Samples Based on Mutational Signatures" )
    plt.ylabel("Cosine Distance")
    plt.xlabel("Sample IDs")
    #plt.ylim((0,1))
    plt.savefig(layer_directory+'/dendrogram.pdf',figsize=(10, 8), dpi=300)
    # which datapoints goes to which cluster
    # The indices of the datapoints will be displayed as the ids 
    Y = hierarchy.fcluster(Z, threshold, criterion='distance', R=None, monocrit=None)
    dataframe = pd.DataFrame({"Cluster":Y, "Sample Names":list(colnames)})
    dataframe = dataframe.set_index("Sample Names")
    #print(dataframe)
    dictionary = {"clusters":Y, "informations":dn}
    
    return dataframe 


######################################## Plot the reconstruction error vs stabilities and select the optimum number of signature #################################################### 
开发者ID:AlexandrovLab,项目名称:SigProfilerExtractor,代码行数:26,代码来源:subroutines.py

示例4: getClusters

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import fcluster [as 别名]
def getClusters(self, embed):

		n, m 	= len(embed), self.p.embed_dims
		X 	= np.empty((n, m), np.float32)

		for i in range(len(embed)): 
			X[i, :] = embed[i]

		dist 	  = pdist(X, 	  metric=self.p.metric)
		clust_res = linkage(dist, method=self.p.linkage)
		labels    = fcluster(clust_res, t=self.p.thresh_val, criterion='distance') - 1
		clusters  = [[] for i in range(max(labels) + 1)]

		for i in range(len(labels)): 
			clusters[labels[i]].append(i)

		return clusters 
开发者ID:malllabiisc,项目名称:cesi,代码行数:19,代码来源:cluster.py

示例5: cluster

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import fcluster [as 别名]
def cluster(currents,context,original_labels,chrom,pos1,plot,plotdir,cluster=False):
    colours = {'m6A':'#B4656F','A':'#55B196'} #TODO update for other labels
    if len(currents) > 1 and cluster :
        pdistance = ssd.pdist(currents,metric='correlation')
        dm = ssd.squareform(pdistance)
        link = linkage(dm,method='complete',metric='correlation')
        klabels = fcluster(link,2,'maxclust') #1,'inconsistent') #2,'maxclust')
        #klabels = [1 if x == 1 else 0 for x in klabels]
        #labels = ['m6A']*len(klabels)
        strategy = 'correlation'
    else:
        klabels = [1 if x==1 else 0 for x in original_labels]
        strategy = 'classifierProb'
    if plot:
        plot_w_labels(klabels,original_labels,currents,strategy,context,'chrom.'+chrom+'.pos.'+pos1,plotdir,colours)
    #for cluster in clusters: 
开发者ID:al-mcintyre,项目名称:mCaller,代码行数:18,代码来源:make_bed.py

示例6: fit_predict

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import fcluster [as 别名]
def fit_predict(self, X, y=None):
        self.model = linkage(X, method=self.method, metric=self.metric)
        return fcluster(self.model, t=self.n_clusters, criterion='maxclust') - 1 
开发者ID:canard0328,项目名称:malss,代码行数:5,代码来源:hierarchy.py

示例7: mean_z

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import fcluster [as 别名]
def mean_z(z_all, dim_limit):
    # use correlation clustering to average group assignments
    lz = hi.linkage(z_all.T, 'single', 'hamming')
    # not sure why cluster id starts from 1
    z = hi.fcluster(lz, 0) - 1
    all_cat = np.unique(z)
    for a in all_cat:
        a_size = np.sum(a == z)
        if a_size > dim_limit:
            z[a == z] = sample_multinomial([1.] * a_size, a_size, dim_limit)
    return z 
开发者ID:zi-w,项目名称:Ensemble-Bayesian-Optimization,代码行数:13,代码来源:helper.py

示例8: calculate_shell_bvalues_and_indices

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import fcluster [as 别名]
def calculate_shell_bvalues_and_indices(bvalues, max_distance=20e6):
    """
    Calculates which measurements belong to different acquisition shells.
    It uses scipy's linkage clustering algorithm, which uses the max_distance
    input as a limit of including measurements in the same cluster.

    For example, if bvalues were [1, 2, 3, 4, 5] and max_distance was 1, then
    all bvalues would belong to the same cluster.
    However, if bvalues were [1, 2, 4, 5] max max_distance was 1, then this
    would result in 2 clusters.

    Parameters
    ----------
    bvalues: 1D numpy array of shape (Ndata)
        bvalues of the acquisition in s/m^2.
    max_distance: float
        maximum b-value distance for a measurement to be included in the same
        shell.

    Returns
    -------
    shell_indices: 1D numpy array of shape (Ndata)
        array of integers, starting from 0, representing to which shell a
        measurement belongs. The number itself has no meaning other than just
        being different for different shells.
    shell_bvalues: 1D numpy array of shape (Nshells)
        array of the mean bvalues for every acquisition shell.
    """
    linkage_matrix = linkage(np.c_[bvalues])
    clusters = fcluster(linkage_matrix, max_distance, criterion='distance')
    shell_indices = np.empty_like(bvalues, dtype=int)
    cluster_bvalues = np.zeros((np.max(clusters), 2))
    for ind in np.unique(clusters):
        cluster_bvalues[ind - 1] = np.mean(bvalues[clusters == ind]), ind
    shell_bvalues, ordered_cluster_indices = (
        cluster_bvalues[cluster_bvalues[:, 0].argsort()].T)
    for i, ind in enumerate(ordered_cluster_indices):
        shell_indices[clusters == ind] = i
    return shell_indices, shell_bvalues 
开发者ID:AthenaEPI,项目名称:dmipy,代码行数:41,代码来源:acquisition_scheme.py

示例9: remove_correlated_feats

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import fcluster [as 别名]
def remove_correlated_feats(df):
    tmp = df.T
    # Remove columns with no variation
    nunique = tmp.apply(pd.Series.nunique)
    cols_to_drop = nunique[nunique == 1].index
    tmp.drop(cols_to_drop, axis=1, inplace=True)

    perc_spearman = scipy.stats.spearmanr(tmp)
    abs_corr = np.subtract(np.ones(shape=perc_spearman.correlation.shape),
                           np.absolute(perc_spearman.correlation))
    np.fill_diagonal(abs_corr, 0)
    abs_corr_clean = np.maximum(abs_corr,
                                abs_corr.transpose())  # some floating point mismatches, just make symmetric
    clustering = linkage(squareform(abs_corr_clean), method='average')
    clusters = fcluster(clustering, .1, criterion='distance')
    names = tmp.columns.tolist()
    names_to_cluster = list(zip(names, clusters))
    indices_to_keep = []
    ### Extract models closest to cluster centroids
    for x in range(1, len(set(clusters)) + 1):
        # Create mask from the list of assignments for extracting submatrix of the cluster
        mask = np.array([1 if i == x else 0 for i in clusters], dtype=bool)

        # Take the index of the column with the smallest sum of distances from the submatrix
        idx = np.argmin(sum(abs_corr_clean[:, mask][mask, :]))

        # Extract names of cluster elements from names_to_cluster
        sublist = [name for (name, cluster) in names_to_cluster if cluster == x]

        # Element closest to centroid
        centroid = sublist[idx]
        indices_to_keep.append(centroid)

    return df.loc[df.index.isin(indices_to_keep)] 
开发者ID:SBRG,项目名称:ssbio,代码行数:36,代码来源:atlas3.py

示例10: cluster_from_dist_matrix

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import fcluster [as 别名]
def cluster_from_dist_matrix(dist_matrix, threshold):
    """Use scipy to cluster a distance matrix.

    Args:
        dist_matrix: distance matrix, represented in scipy's 1d condensed form
        threshold: maximum inter-cluster distance to merge clusters (higher
            results in fewer clusters)

    Returns:
        list c such that c[i] is a collection of all the observations
        (whose pairwise distances are indexed in dist) in the i'th
        cluster, in sorted order by descending cluster size
    """
    linkage = hierarchy.linkage(dist_matrix, method='average')
    clusters = hierarchy.fcluster(linkage, threshold, criterion='distance')

    # clusters are numbered starting at 1, but base the count on
    # first_clust_num just in case this changes
    first_clust_num = min(clusters)
    num_clusters = max(clusters) + 1 - first_clust_num
    elements_in_cluster = defaultdict(list)
    for i, clust_num in enumerate(clusters):
        elements_in_cluster[clust_num].append(i)
    cluster_sizes = {c: len(elements_in_cluster[c])
                     for c in range(first_clust_num,
                                    num_clusters + first_clust_num)}

    elements_in_cluster_sorted = []
    for clust_num, _ in sorted(cluster_sizes.items(),
            key=operator.itemgetter(1), reverse=True):
        elements_in_cluster_sorted += [elements_in_cluster[clust_num]]
    return elements_in_cluster_sorted 
开发者ID:broadinstitute,项目名称:catch,代码行数:34,代码来源:cluster.py

示例11: cluster_correlations

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import fcluster [as 别名]
def cluster_correlations(corr_mat, indices=None):
    """
    Apply agglomerative clustering in order to sort
    a correlation matrix.

    Based on https://github.com/TheLoneNut/CorrelationMatrixClustering/blob/master/CorrelationMatrixClustering.ipynb

    Parameters:
    -----------
    - corr_mat : a square correlation matrix (pandas DataFrame)
    - indices : cluster labels [None]; if not provided we'll do
        an aglomerative clustering to get cluster labels.

    Returns:
    --------
    - corr : a sorted correlation matrix
    - indices : cluster indexes based on the original dataset

    Example:
    --------
    >> assoc = associations(
        customers,
        plot=False
    )
    >> correlations = assoc['corr']
    >> correlations, _ = cluster_correlations(correlations)
    """
    if indices is None:
        X = corr_mat.values
        d = sch.distance.pdist(X)
        L = sch.linkage(d, method='complete')
        indices = sch.fcluster(L, 0.5 * d.max(), 'distance')
    columns = [corr_mat.columns.tolist()[i]
               for i in list((np.argsort(indices)))]
    corr_mat = corr_mat.reindex(columns=columns).reindex(index=columns)
    return corr_mat, indices 
开发者ID:shakedzy,项目名称:dython,代码行数:38,代码来源:nominal.py

示例12: get_word_clusters

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import fcluster [as 别名]
def get_word_clusters(sentence_dict):
    srcids = list(sentence_dict.keys())
    sentences = []
    for srcid in srcids:
        sentence = []
        for metadata_type, sent in sentence_dict[srcid].items():
            sentence.append(''.join(sent))
        sentence = '\n'.join(sentence)
        sentence = ' '.join(re.findall('[a-z]+', sentence))
        sentences.append(sentence)
    vect = TfidfVectorizer()
    #vect = CountVectorizer()
    bow = vect.fit_transform(sentences).toarray()
    try:
        z = linkage(bow, metric='cityblock', method='complete')
    except:
        pdb.set_trace()
    dists = list(set(z[:,2]))
    thresh = (dists[2] + dists[3]) /2
    #thresh = (dists[1] + dists[2]) /2
    print("Threshold: ", thresh)
    b = hier.fcluster(z,thresh, criterion='distance')
    cluster_dict = defaultdict(list)

    for srcid, cluster_id in zip(srcids, b):
        cluster_dict[cluster_id].append(srcid)
    return dict(cluster_dict) 
开发者ID:plastering,项目名称:plastering,代码行数:29,代码来源:common.py

示例13: _offline_clustering

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import fcluster [as 别名]
def _offline_clustering(self, X):
        print('Starting offline clustering...')
        p_dist = pdist(X, metric=self._distance_metric)
        Z = linkage(p_dist, 'complete')
        cluster_index = fcluster(Z, self.max_dist, criterion='distance')
        self._extract_representatives(X, cluster_index)
        print('Processed {} instances.'.format(X.shape[0]))
        print('Found {} clusters offline.\n'.format(len(self.representatives)))
        # print('The representive vectors are:')
        # pprint.pprint(self.representatives.tolist()) 
开发者ID:logpai,项目名称:loglizer,代码行数:12,代码来源:LogClustering.py

示例14: signal_clustering

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import fcluster [as 别名]
def signal_clustering(corr_matrix:      DataFrame,
                      threshold:        float,
                      cluster_pickle:   str = "",
                      linkage_pickle:   str = "",
                      force:            bool = False):
    if force:
        if path.isfile(cluster_pickle):
            remove(cluster_pickle)
        if path.isfile(linkage_pickle):
            remove(linkage_pickle)
    if path.isfile(cluster_pickle) and path.isfile(linkage_pickle):
        print("\nSignal clustering already completed and forcing is turned off. Using pickled data...")
        return [load(open(cluster_pickle, "rb")), load(open(linkage_pickle, "rb"))]

    # Remove negative values from the correlation matrix and invert the values
    corr_matrix.where(corr_matrix > 0, 0, inplace=True)
    corr_matrix = 1 - corr_matrix
    X = corr_matrix.values  # type: ndarray
    Y = clip(ssd.squareform(X), 0, None)
    # Z is the linkage matrix. This can serve as input to the scipy.cluster.hierarchy.dendrogram method
    Z = linkage(Y, method='single', optimal_ordering=True)
    fclus = fcluster(Z, t=threshold, criterion='distance')
    cluster_dict = {}
    for i, cluster_label in enumerate(fclus):
        if cluster_label in cluster_dict:
            cluster_dict[cluster_label].append(corr_matrix.index[i])
        else:
            cluster_dict[cluster_label] = [corr_matrix.index[i]]
    return cluster_dict, Z 
开发者ID:brent-stone,项目名称:CAN_Reverse_Engineering,代码行数:31,代码来源:SemanticAnalysis.py

示例15: clusters_from_partitions

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import fcluster [as 别名]
def clusters_from_partitions(partitions, options):
    """Finds clusters in partitions using span-position distance and hierarchical clustering. 
    Assumes that all signatures in the given partition are of the same type and on the same contig"""
    clusters_final = []
    large_partitions = 0
    # Find clusters in each partition individually.
    for partition in partitions:
        if len(partition) == 1:
            clusters_final.append([partition[0]])
            continue
        elif len(partition) > 100:
            partition_sample = sample(partition, 100)
            large_partitions += 1
        else:
            partition_sample = partition
        element_type = partition_sample[0].type
        if element_type == "DEL" or element_type == "INV" or element_type == "DUP_TAN":
            data = np.array( [[signature.get_source()[1], signature.get_source()[2], options.distance_normalizer] for signature in partition_sample])
            Z = linkage(data, method = "average", metric = span_position_distance)
        elif element_type == "INS":
            data = np.array( [[signature.get_source()[1], signature.get_source()[2], options.distance_normalizer] for signature in partition_sample])
            Z = linkage(data, method = "average", metric = span_position_distance_insertions)
        elif element_type == "DUP_INT":
            data = np.array( [[signature.get_source()[1], signature.get_source()[2], signature.get_destination()[1], options.distance_normalizer] for signature in partition_sample])
            Z = linkage(data, method = "average", metric = span_position_distance_intdups)

        cluster_indices = list(fcluster(Z, options.cluster_max_distance, criterion='distance'))
        new_clusters = [[] for i in range(max(cluster_indices))]
        for signature_index, cluster_index in enumerate(cluster_indices):
            new_clusters[cluster_index-1].append(partition_sample[signature_index])
        clusters_final.extend(new_clusters)
    if len(partitions) > 0:
        if len(partitions[0]) > 0:
            logging.debug("%d out of %d partitions for %s exceeded 100 elements." % (large_partitions, len(partitions), partitions[0][0].type))
    return clusters_final 
开发者ID:eldariont,项目名称:svim,代码行数:37,代码来源:SVIM_clustering.py


注:本文中的scipy.cluster.hierarchy.fcluster方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。