Python hierarchy.linkage函数代码示例

本文整理汇总了Python中scipy.cluster.hierarchy.linkage函数的典型用法代码示例。如果您正苦于以下问题：Python linkage函数的具体用法？Python linkage怎么用？Python linkage使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了linkage函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: random_distribution

def random_distribution(n):

    #make up some data
    data = np.random.normal(scale=n, size=(n, n))
    data[0:n / 2,0:n / 2] += 75
    data[n / 2:, n / 2:] = np.random.poisson(lam=n,size=data[n / 2:, n / 2:].shape)
    #cluster the rows
    row_dist = ssd.squareform(ssd.pdist(data))
    row_Z = sch.linkage(row_dist)
    row_idxing = sch.leaves_list(row_Z)

    row_labels = ['bar{}'.format(i) for i in range(n)]

    #cluster the columns
    col_dist = ssd.squareform(ssd.pdist(data.T))
    col_Z = sch.linkage(col_dist)
    col_idxing = sch.leaves_list(col_Z)
    #make the dendrogram

    col_labels = ['foo{}'.format(i) for i in range(n)]

    data = data[:,col_idxing][row_idxing,:]

    heatmap = pdh.DendroHeatMap(heat_map_data=data,left_dendrogram=row_Z, top_dendrogram=col_Z, heatmap_colors=("#ffeda0", "#feb24c", "#f03b20"), window_size="auto", color_legend_displayed=False, label_color="#777777")
    heatmap.row_labels = row_labels
    heatmap.col_labels = col_labels
    heatmap.title = 'An example heatmap'
    heatmap.show()#heatmap.save("example.png")

开发者ID:kaniblu，项目名称:dhm，代码行数:28，代码来源:example.py

示例2: getDistMatrixes

    def getDistMatrixes(cls, distDict, distMeasure, linkageCriterion):
        """
        Find and return the correlation matrix, linkage matrix and distance matrix for the distance/correlation
        measure given with distMeasure parameter.
        """
        from scipy.spatial.distance import squareform
        from numpy import ones, fill_diagonal
        from scipy.cluster.hierarchy import linkage

        if distMeasure == cls.CORR_PEARSON or distMeasure == cls.SIM_MCCONNAUGHEY:
            '''As these measures generate values between -1 and 1, need special handling'''

            # Cluster distances, i.e. convert correlation into distance between 0 and 1
            triangularCorrMatrix = distDict[distMeasure]
            triangularDistMatrix = ones(len(triangularCorrMatrix)) - [(x + 1) / 2 for x in triangularCorrMatrix]
            linkageMatrix = linkage(cls.removeNanDistances(triangularDistMatrix), linkageCriterion)

            # Make correlation matrix square
            correlationMatrix = squareform(triangularCorrMatrix)
            fill_diagonal(correlationMatrix, 1)
        else:

            # Cluster distances
            triangularDistMatrix = distDict[distMeasure]
            linkageMatrix = linkage(cls.removeNanDistances(triangularDistMatrix), linkageCriterion)

            # Convert triangular distances into square correlation matrix
            squareDistMatrix = squareform(triangularDistMatrix)
            squareSize = len(squareDistMatrix)
            correlationMatrix = ones((squareSize, squareSize)) - squareDistMatrix

        return correlationMatrix, linkageMatrix, triangularDistMatrix

开发者ID:johhorn，项目名称:gwas-clustering，代码行数:32，代码来源:CommonClusteringFunctions.py

示例3: draw_intensity

def draw_intensity(a, cmap=GREEN_CMAP, metric='euclidean', method='average', sort_x=True, sort_y=True):
    main_axes = plt.gca()
    divider = make_axes_locatable(main_axes)

    if sort_x is True:
        plt.sca(divider.append_axes("top", 0.5, pad=0))
        xlinkage = linkage(pdist(a.T, metric=metric), method=method, metric=metric)
        xdendro = dendrogram(xlinkage, orientation='top', no_labels=True,
                             distance_sort='descending',
                             link_color_func=lambda x: 'black')
        plt.gca().set_axis_off()
        a = a[[a.columns[i] for i in xdendro['leaves']]]

    if sort_y is True:
        plt.sca(divider.append_axes("left", 1.0, pad=0))
        ylinkage = linkage(pdist(a, metric=metric), method=method, metric=metric)
        ydendro = dendrogram(ylinkage, orientation='right', no_labels=True,
                             distance_sort='descending',
                             link_color_func=lambda x: 'black')
        plt.gca().set_axis_off()
        a = a.ix[[a.index[i] for i in ydendro['leaves']]]

    plt.sca(main_axes)
    plt.imshow(a, aspect='auto', interpolation='none',
               cmap=cmap, vmin=0.0, vmax=1.0)
    plt.colorbar(pad=0.15)
    plt.gca().yaxis.tick_right()
    plt.xticks(range(a.shape[1]), a.columns, rotation=90, size='small')
    plt.yticks(range(a.shape[0]), a.index, size='x-small')
    plt.gca().xaxis.set_ticks_position('none')
    plt.gca().yaxis.set_ticks_position('none')
    plt.gca().invert_yaxis()

    plt.show()

开发者ID:neuroinformatics，项目名称:bah2016_registration，代码行数:34，代码来源:draw_cluster.py

示例4: hierarchical_clustering

def hierarchical_clustering(data, skill,  method='single', metric='euclidean', dendrogram=True, concepts=False, cluster_number=3, corr_as_vectors=False):
    pk, level = data.get_skill_id(skill)
    items = data.get_items_df()
    skills = data.get_skills_df()
    corr = compute_corr(data, merge_skills=concepts)
    print("Corr ({}) contain total {} values and from that {} nans".format(corr.shape, corr.size, corr.isnull().sum().sum()))
    corr[corr.isnull()] = 0

    if concepts:
        items = items[items["skill_lvl_" + str(level)] == pk]
        skill_ids = items[~items["skill_lvl_3"].isnull()]["skill_lvl_3"].unique()
        corr = pd.DataFrame(corr, index=skill_ids, columns=skill_ids)
        labels = list(skills.loc[corr.index]["name"])

    else:
        items = items[items["skill_lvl_" + str(level)] == pk]
        items = items[items["visualization"] != "pairing"]
        corr = pd.DataFrame(corr, index=items.index, columns=items.index)
        labels = ["{1} - {0}".format(item["name"], item["visualization"][0]) for id, item in list(items.iterrows())]

    if corr_as_vectors:
        Z = hr.linkage(corr, method=method, metric=metric)
    else:
        Z = hr.linkage(dst.squareform(1 - corr), method=method)
    Z[Z < 0] = 0
    if dendrogram:
        plt.title('{}: method: {}, metric: {}, as vectors: {}'.format(skill, method, metric, corr_as_vectors))
        plt.xlabel('items' if not concepts else "concepts")
        plt.ylabel('distance')
        hr.dendrogram(Z, leaf_rotation=90., leaf_font_size=10., labels=labels)

    return hr.fcluster(Z, cluster_number, "maxclust")

开发者ID:thran，项目名称:experiments2.0，代码行数:32，代码来源:experiments_clustering.py

示例5: compare_clusters

def compare_clusters(args):

    ref_df = pd.read_table(args['ref'], sep='\t', skipinitialspace=True, index_col=0).as_matrix()
    check_symmetry(ref_df)
    linkage_ref = linkage(ref_df, 'average')
    c_ref, coph_dists_ref = cophenet(linkage_ref, pdist(ref_df))

    outfile = open(args['output'],"w")
    outfile.write("Tree_cluster\tMantel_Correlation_Coefficient\tManter_P-value\tCophenetic_Pearson\tCophenetic_P-value\n")

    for i in args['all']:
        fst_df = pd.read_table(i, sep='\t', skipinitialspace=True, index_col=0).as_matrix()
        check_symmetry(fst_df)
        mantel_coeff = 0.0
        p_value_mantel = 0.0
        cophenetic_pearson = 0.0
        p_value_cophenetic = 0.0
        n = 0
        try:
            mantel_coeff, p_value_mantel, n = mantel(ref_df, fst_df)
            linkage_fst = linkage(fst_df, 'average')
            c_fst, coph_dists_fst = cophenet(linkage_fst, pdist(fst_df))
            cophenetic_pearson, p_value_cophenetic = pearsonr(coph_dists_ref, coph_dists_fst)
        except Exception as e:
            print("Error : %s" % str(e))
            mantel_coeff = "Failed"
            p_value_manel = "Failed"
            cophenetic_pearson = "Failed"
            p_value_cophenetic = "Failed"

        outfile.write(i+"\t"+str(mantel_coeff)+"\t"+str(p_value_mantel)+"\t"+str(cophenetic_pearson)+"\t"+str(p_value_cophenetic)+"\n")

    outfile.close()

开发者ID:zorino，项目名称:ray，代码行数:33，代码来源:treeclust-compare.py

示例6: cluster_fps

    def cluster_fps(self):
        clkg = hcluster.linkage(self.dm,method = 'average') 
        coarse_r = hcluster.fcluster(clkg,0.3,criterion = 'distance')
        self.coarse_r = coarse_r

        bcount = np.bincount(coarse_r)
        knum = len(np.nonzero(bcount > 1)[0])

        s = self.density_matrix.shape
        if False and len(s) >1 and s[0] > 10 and s[1] > 10 and knum < min(s) / 2:
            (u,s,vt) = la.svds(self.sps_matrixs,k = knum)
            self.u = u
            print '============'
        else:
            
            self.result = self.coarse_r
            return (clkg,clkg)
 

#rankA = npla.matrix_rank(self.sps_matrixs)
#        if rankA < 3:
        a = np.matrix(np.diag(s)) * np.matrix(vt)
        pd = dist.pdist(np.array(a.T),'cosine')
        pd[np.abs(pd) < 1e-11] = 0
        lkg = hcluster.linkage(pd,method = 'average')
        self.lkg = lkg

        self.result = hcluster.fcluster(lkg,self.svd_cluster_thr,criterion = 'distance')

#        self.result = hcluster.fcluster(lkg,1)

# self.result = hcluster.fclusterdata(u,0.7,metric = 'cosine', criterion = 'distance',method = 'average')
        return (lkg,clkg)

开发者ID:lrpopeyou，项目名称:cluster_fp_user，代码行数:33，代码来源:cluster_by_grid.py

示例7: main

def main():
    D = 2 # so we can visualize it more easily
    s = 4 # separation so we can control how far apart the means are
    mu1 = np.array([0, 0])
    mu2 = np.array([s, s])
    mu3 = np.array([0, s])

    N = 900 # number of samples
    X = np.zeros((N, D))
    X[:300, :] = np.random.randn(300, D) + mu1
    X[300:600, :] = np.random.randn(300, D) + mu2
    X[600:, :] = np.random.randn(300, D) + mu3

    Z = linkage(X, 'ward')
    print "Z.shape:", Z.shape
    # Z has the format [idx1, idx2, dist, sample_count]
    # therefore, its size will be (N-1, 4)
    plt.title("Ward")
    dendrogram(Z)
    plt.show()

    Z = linkage(X, 'single')
    plt.title("Single")
    dendrogram(Z)
    plt.show()

    Z = linkage(X, 'complete')
    plt.title("Complete")
    dendrogram(Z)
    plt.show()

开发者ID:AndreyDrv，项目名称:machine_learning_examples，代码行数:30，代码来源:hcluster.py

示例8: HierarchicalCluster

def HierarchicalCluster(A):
    #see http://stackoverflow.com/questions/2982929/plotting-results-of-hierarchical-clustering-ontop-of-a-matrix-of-data-in-python
    Corr = np.corrcoef(A.T)
    fig = plt.figure(figsize=(8,8))
    ax1 = fig.add_axes([0.09,0.1,0.2,0.6])
    Y = hrc.linkage(Corr, method='centroid')
    Z1 = hrc.dendrogram(Y, orientation='right')
    ax1.set_xticks([])
    ax1.set_yticks([])

    ax2 = fig.add_axes([0.3,0.71,0.6,0.2])
    Y = hrc.linkage(Corr, method='centroid')
    Z2 = hrc.dendrogram(Y)
    ax2.set_xticks([])
    ax2.set_yticks([])

    axmatrix = fig.add_axes([0.3,0.1,0.6,0.6])
    idx1 = Z1['leaves']
    idx2 = Z2['leaves']
    Corr = Corr[idx1, :]
    Corr = Corr[:, idx2]
    im = axmatrix.matshow(Corr, aspect='auto', origin='lower')

    axcolor = fig.add_axes([0.91,0.1,0.02,0.6])
    pylab.colorbar(im, cax=axcolor)
    fig.show()
    fig.savefig('dendrogram.png')

开发者ID:izkula，项目名称:matrix-viz，代码行数:27，代码来源:npz_to_csv.py

示例9: hcluster_cols

	def hcluster_cols(self, thresh):
		try:
			link = linkage(self.X.T, method='complete', metric = 'cosine')
			assignments = fcluster(link, thresh, 'distance')

		except:
			link = linkage(self.X.T, method='complete', metric = 'euclidean')
			assignments = fcluster(link, thresh, 'distance')

		col_ind = np.arange(len(self.crimes))
		d = pd.DataFrame(zip(col_ind, assignments)).groupby(1)[0].aggregate(lambda x: tuple(x))
		df_new = pd.DataFrame(index = np.arange(len(self.names)))
		for i in d:
			cols = []
			for w in i:
			    cols.append(w)
			if len(cols) > 1:
				df_new[str(self.crimes[cols])] = np.mean(self.X[:,cols], axis = 1)
			else:
			    df_new[str(self.crimes[cols[0]])] = self.X[:,cols[0]]

		# plt.figure(figsize=(10,20))
		# dendro = dendrogram(link, color_threshold=thresh, leaf_font_size=13, labels = self.crimes, orientation = 'left')
		# plt.subplots_adjust(top=.99, bottom=0.5, left=0.05, right=0.99)
		# plt.show()

		self.df = df_new
		self.crimes = df_new.columns.values

开发者ID:nhu2000，项目名称:hood_project，代码行数:28，代码来源:pca_class.py

示例10: starthcc

 def starthcc(self):
     print self.dm,self.lin
     dataFrame = pd.DataFrame(self.tr, columns=['x', 'y'])
     from scipy.spatial.distance import pdist, squareform
     
     # not printed as pretty, but the values are correct
     distxy = squareform(pdist(dataFrame, metric=(self.dm)))
     #print distxy
     if self.lin=="single":
         plt.figure()
         R = dendrogram(linkage(distxy, method=str(self.lin)))
         
         plt.xlabel('X units')
         plt.ylabel('Y units')
         plt.suptitle('Cluster Dendrogram', fontweight='bold', fontsize=14);
        
         plt.show()
     elif self.lin=="complete":
         plt.figure()
         R = dendrogram(linkage(distxy, method=str(self.lin)))
         
         plt.xlabel('X units')
         plt.ylabel('Y units')
         plt.suptitle('Cluster Dendrogram', fontweight='bold', fontsize=14);
        
         plt.show()
     else:
         plt.figure()
         R = dendrogram(linkage(distxy, method=str(self.lin)))
         
         plt.xlabel('X units')
         plt.ylabel('Y units')
         plt.suptitle('Cluster Dendrogram', fontweight='bold', fontsize=14);
        
         plt.show()

开发者ID:vishnumani2009，项目名称:OpenSource-Open-Ended-Statistical-toolkit，代码行数:35，代码来源:heirarfrontend.py

示例11: save_mat

def save_mat(c2map, filepath):
	mat = c2map['mat']
	fig = pylab.figure(figsize=(8,8))
	
	# Compute and plot first dendrogram.
	ax1 = fig.add_axes([0.09,0.1,0.2,0.6])
	Y = sch.linkage(mat, method='centroid')
	Z1 = sch.dendrogram(Y, orientation='right')
	ax1.set_xticks([])
	ax1.set_yticks([])

	# Compute and plot second dendrogram.
	ax2 = fig.add_axes([0.3,0.71,0.6,0.2])
	Y = sch.linkage(mat, method='single')
	Z2 = sch.dendrogram(Y)
	ax2.set_xticks([])
	ax2.set_yticks([])

	# Plot distance matrix.
	axmatrix = fig.add_axes([0.3,0.1,0.6,0.6])
	idx1 = Z1['leaves']
	idx2 = Z2['leaves']
	mat = mat[idx1,:]
	mat = mat[:,idx2]
	im = axmatrix.matshow(mat, aspect='auto', origin='lower', cmap=pylab.cm.YlGnBu)
	axmatrix.set_xticks([])
	axmatrix.set_yticks([])

	# Plot colorbar.
	axcolor = fig.add_axes([0.91,0.1,0.02,0.6])
	pylab.colorbar(im, cax=axcolor)

	fig.savefig(filepath)

开发者ID:andpromobile，项目名称:csipb-jamu-prj，代码行数:33，代码来源:c2map.py

示例12: plot_transition_clustermap

def plot_transition_clustermap(data_array, gene_names, pseudotimes, n_clusters=10, gradient=False):
    if gradient:
        data_to_plot = zscore(np.gradient(data_array)[1].T, axis=0)
        scale = None
        metric = 'seuclidean'
        row_linkage = linkage(pdist(abs(data_to_plot), metric=metric), method='complete')
    else:
        data_to_plot = data_array.T
        scale = 0
        metric = 'correlation'
        row_linkage = linkage(pdist(data_to_plot, metric=metric), method='complete')
    
    assignments = fcluster(row_linkage, n_clusters, criterion='maxclust')
    cm = sns.clustermap(data_to_plot, col_cluster=False, standard_scale=scale, 
                        yticklabels=gene_names, row_linkage=row_linkage,
                        row_colors=[settings.STATE_COLORS[i] for i in assignments])
    r = np.arange(10, data_array.shape[0], data_array.shape[0]/10)
    plt.setp(cm.ax_heatmap.get_yticklabels(), fontsize=5)
    cm.ax_heatmap.set_xticks(r)
    cm.ax_heatmap.set_xticklabels(['%.1f' % x for x in pseudotimes[r]])
    cm.ax_heatmap.set_xlabel('Pseudotime')
    cm.ax_heatmap.set_ylabel('Gene')
    
    gene_clusters = defaultdict(list)
    for i, cl in enumerate(assignments):
        gene_clusters[settings.STATE_COLORS[cl]].append(gene_names[i])
    return gene_clusters

开发者ID:dimenwarper，项目名称:scimitar，代码行数:27，代码来源:plotting.py

示例13: check_linkage_q

    def check_linkage_q(self, method):
        # Tests linkage(Y, method) on the Q data set.
        Z = linkage(hierarchy_test_data.X, method)
        expectedZ = getattr(hierarchy_test_data, "linkage_X_" + method)
        assert_allclose(Z, expectedZ, atol=1e-06)

        y = scipy.spatial.distance.pdist(hierarchy_test_data.X, metric="euclidean")
        Z = linkage(y, method)
        assert_allclose(Z, expectedZ, atol=1e-06)

开发者ID:metamorph-inc，项目名称:meta-core，代码行数:9，代码来源:test_hierarchy.py

示例14: _cluster_idx

def _cluster_idx(df):
    """ sort indices by clusters """
    dcol = pdist(df.T)
    drow = pdist(df)
    lcol = linkage(dcol)
    lrow = linkage(drow)
    cols = dendrogram(lcol, no_plot=True)['leaves']
    rows = dendrogram(lrow, no_plot=True)['leaves']
    return rows,cols

开发者ID:jdrudolph，项目名称:mypy，代码行数:9，代码来源:pdplot.py

示例15: plot_clustered_heatmap

def plot_clustered_heatmap(df, genes_list, cancer, output_path, scale='binary'):
    # Build nxm matrix (n samples, m genes)
    X = df[genes_list].as_matrix().transpose()
    
    if scale == 'binary':
        Z = linkage(X, method='complete', metric='hamming')
        colorscale = [[0, "rgb(111, 168, 220)"], [1, "rgb(5, 10, 172)"]]
        colorbar = {'tick0': 0,'dtick': 1}
    elif scale == 'logarithmic':
        Z = linkage(X, method='ward')
        X_max = X.max()
        colorscale = [[0, 'rgb(250, 250, 250)'],
                      [1./X_max, 'rgb(200, 200, 200)'],
                      [5./X_max, 'rgb(150, 150, 200)'],
                      [20./X_max, 'rgb(100, 100, 200)'],
                      [100./X_max, 'rgb(50, 50, 200)'],
                      [1., 'rgb(0, 0, 200)']]
        colorbar = {'tick0': 0,
                    'tickmode': 'array',
                    'tickvals': [0, 1, 5, 20, 100, X_max]}
    c, coph_dists = cophenet(Z, pdist(X))
    print "Cophenetic Correlation Coefficient:", c
    
    #layout = go.Layout(yaxis=dict(title='%s germline mutations (ordered by samples somatic mutation load)'% cancer, zeroline=False))    
#    fig = pylab.figure(figsize=(8,8))
#    ax1 = fig.add_axes([0.09,0.1,0.2,0.6])
#    ax1.set_xticks([])
#    ax1.set_yticks([])
#    axmatrix = fig.add_axes([0.3,0.1,0.6,0.6])
    den = dendrogram(Z, orientation='left')
    idx = den['leaves']
    X = X[idx,:]
    print "X shape:", X.shape
    genes_ordered = [genes_list[i] for i in idx]
    logger.info("ordered genes: %s", str(genes_ordered))
    
#    im = axmatrix.matshow(X, aspect='auto', origin='lower', cmap=pylab.cm.YlGnBu)
#    axmatrix.set_xticks([])
#    axmatrix.set_yticks([])
#    # Plot colorbar.
#    axcolor = fig.add_axes([0.91,0.1,0.02,0.6])
#    pylab.colorbar(im, cax=axcolor)
#    fig.savefig(output_path)
    
    # Plotting the heatmap (without the hirarchy)
    heatmap_trace = go.Heatmap(z=X.tolist(), x=df.patient_id, y=genes_ordered, showscale=True, colorscale=colorscale, colorbar=colorbar)
    mutation_load_trace = go.Bar(x=df.patient_id, y=df.somatic_mutations_count/30.0)
    fig = tls.make_subplots(rows=29, cols=1, specs=[[{'rowspan':5, 'colspan' : 1}]] + [[None]] * 4 + [[{'rowspan' : 24, 'colspan' : 1}]] + [[None]] * 23)
    fig.append_trace(mutation_load_trace, 1, 1)
    fig.append_trace(heatmap_trace, 6, 1)
    fig['layout']['xaxis1'].update(showticklabels = False)
    fig['layout']['xaxis1'].update(zeroline = False, showgrid=False)
    fig['layout']['yaxis1'].update(zeroline = False, showgrid = False, tickfont=dict(family='Arial', size=4))
    fig['layout']['xaxis2'].update(showticklabels = False)
    fig['layout']['xaxis2'].update(zeroline = False, showgrid=False)
    fig['layout']['yaxis2'].update(zeroline = False, showgrid = False, tickfont=dict(family='Arial', size=4))
    plot(fig, auto_open=False, filename="%s_%s_heatmap_clustered.html" % (output_path, cancer))

开发者ID:galynz，项目名称:bio_project，代码行数:57，代码来源:plot_mutations_heatmap.py

注：本文中的scipy.cluster.hierarchy.linkage函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。