本文整理汇总了Python中scipy.cluster.hierarchy.linkage函数的典型用法代码示例。如果您正苦于以下问题:Python linkage函数的具体用法?Python linkage怎么用?Python linkage使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了linkage函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: random_distribution
def random_distribution(n):
#make up some data
data = np.random.normal(scale=n, size=(n, n))
data[0:n / 2,0:n / 2] += 75
data[n / 2:, n / 2:] = np.random.poisson(lam=n,size=data[n / 2:, n / 2:].shape)
#cluster the rows
row_dist = ssd.squareform(ssd.pdist(data))
row_Z = sch.linkage(row_dist)
row_idxing = sch.leaves_list(row_Z)
row_labels = ['bar{}'.format(i) for i in range(n)]
#cluster the columns
col_dist = ssd.squareform(ssd.pdist(data.T))
col_Z = sch.linkage(col_dist)
col_idxing = sch.leaves_list(col_Z)
#make the dendrogram
col_labels = ['foo{}'.format(i) for i in range(n)]
data = data[:,col_idxing][row_idxing,:]
heatmap = pdh.DendroHeatMap(heat_map_data=data,left_dendrogram=row_Z, top_dendrogram=col_Z, heatmap_colors=("#ffeda0", "#feb24c", "#f03b20"), window_size="auto", color_legend_displayed=False, label_color="#777777")
heatmap.row_labels = row_labels
heatmap.col_labels = col_labels
heatmap.title = 'An example heatmap'
heatmap.show()#heatmap.save("example.png")
示例2: getDistMatrixes
def getDistMatrixes(cls, distDict, distMeasure, linkageCriterion):
"""
Find and return the correlation matrix, linkage matrix and distance matrix for the distance/correlation
measure given with distMeasure parameter.
"""
from scipy.spatial.distance import squareform
from numpy import ones, fill_diagonal
from scipy.cluster.hierarchy import linkage
if distMeasure == cls.CORR_PEARSON or distMeasure == cls.SIM_MCCONNAUGHEY:
'''As these measures generate values between -1 and 1, need special handling'''
# Cluster distances, i.e. convert correlation into distance between 0 and 1
triangularCorrMatrix = distDict[distMeasure]
triangularDistMatrix = ones(len(triangularCorrMatrix)) - [(x + 1) / 2 for x in triangularCorrMatrix]
linkageMatrix = linkage(cls.removeNanDistances(triangularDistMatrix), linkageCriterion)
# Make correlation matrix square
correlationMatrix = squareform(triangularCorrMatrix)
fill_diagonal(correlationMatrix, 1)
else:
# Cluster distances
triangularDistMatrix = distDict[distMeasure]
linkageMatrix = linkage(cls.removeNanDistances(triangularDistMatrix), linkageCriterion)
# Convert triangular distances into square correlation matrix
squareDistMatrix = squareform(triangularDistMatrix)
squareSize = len(squareDistMatrix)
correlationMatrix = ones((squareSize, squareSize)) - squareDistMatrix
return correlationMatrix, linkageMatrix, triangularDistMatrix
示例3: draw_intensity
def draw_intensity(a, cmap=GREEN_CMAP, metric='euclidean', method='average', sort_x=True, sort_y=True):
main_axes = plt.gca()
divider = make_axes_locatable(main_axes)
if sort_x is True:
plt.sca(divider.append_axes("top", 0.5, pad=0))
xlinkage = linkage(pdist(a.T, metric=metric), method=method, metric=metric)
xdendro = dendrogram(xlinkage, orientation='top', no_labels=True,
distance_sort='descending',
link_color_func=lambda x: 'black')
plt.gca().set_axis_off()
a = a[[a.columns[i] for i in xdendro['leaves']]]
if sort_y is True:
plt.sca(divider.append_axes("left", 1.0, pad=0))
ylinkage = linkage(pdist(a, metric=metric), method=method, metric=metric)
ydendro = dendrogram(ylinkage, orientation='right', no_labels=True,
distance_sort='descending',
link_color_func=lambda x: 'black')
plt.gca().set_axis_off()
a = a.ix[[a.index[i] for i in ydendro['leaves']]]
plt.sca(main_axes)
plt.imshow(a, aspect='auto', interpolation='none',
cmap=cmap, vmin=0.0, vmax=1.0)
plt.colorbar(pad=0.15)
plt.gca().yaxis.tick_right()
plt.xticks(range(a.shape[1]), a.columns, rotation=90, size='small')
plt.yticks(range(a.shape[0]), a.index, size='x-small')
plt.gca().xaxis.set_ticks_position('none')
plt.gca().yaxis.set_ticks_position('none')
plt.gca().invert_yaxis()
plt.show()
示例4: hierarchical_clustering
def hierarchical_clustering(data, skill, method='single', metric='euclidean', dendrogram=True, concepts=False, cluster_number=3, corr_as_vectors=False):
pk, level = data.get_skill_id(skill)
items = data.get_items_df()
skills = data.get_skills_df()
corr = compute_corr(data, merge_skills=concepts)
print("Corr ({}) contain total {} values and from that {} nans".format(corr.shape, corr.size, corr.isnull().sum().sum()))
corr[corr.isnull()] = 0
if concepts:
items = items[items["skill_lvl_" + str(level)] == pk]
skill_ids = items[~items["skill_lvl_3"].isnull()]["skill_lvl_3"].unique()
corr = pd.DataFrame(corr, index=skill_ids, columns=skill_ids)
labels = list(skills.loc[corr.index]["name"])
else:
items = items[items["skill_lvl_" + str(level)] == pk]
items = items[items["visualization"] != "pairing"]
corr = pd.DataFrame(corr, index=items.index, columns=items.index)
labels = ["{1} - {0}".format(item["name"], item["visualization"][0]) for id, item in list(items.iterrows())]
if corr_as_vectors:
Z = hr.linkage(corr, method=method, metric=metric)
else:
Z = hr.linkage(dst.squareform(1 - corr), method=method)
Z[Z < 0] = 0
if dendrogram:
plt.title('{}: method: {}, metric: {}, as vectors: {}'.format(skill, method, metric, corr_as_vectors))
plt.xlabel('items' if not concepts else "concepts")
plt.ylabel('distance')
hr.dendrogram(Z, leaf_rotation=90., leaf_font_size=10., labels=labels)
return hr.fcluster(Z, cluster_number, "maxclust")
示例5: compare_clusters
def compare_clusters(args):
ref_df = pd.read_table(args['ref'], sep='\t', skipinitialspace=True, index_col=0).as_matrix()
check_symmetry(ref_df)
linkage_ref = linkage(ref_df, 'average')
c_ref, coph_dists_ref = cophenet(linkage_ref, pdist(ref_df))
outfile = open(args['output'],"w")
outfile.write("Tree_cluster\tMantel_Correlation_Coefficient\tManter_P-value\tCophenetic_Pearson\tCophenetic_P-value\n")
for i in args['all']:
fst_df = pd.read_table(i, sep='\t', skipinitialspace=True, index_col=0).as_matrix()
check_symmetry(fst_df)
mantel_coeff = 0.0
p_value_mantel = 0.0
cophenetic_pearson = 0.0
p_value_cophenetic = 0.0
n = 0
try:
mantel_coeff, p_value_mantel, n = mantel(ref_df, fst_df)
linkage_fst = linkage(fst_df, 'average')
c_fst, coph_dists_fst = cophenet(linkage_fst, pdist(fst_df))
cophenetic_pearson, p_value_cophenetic = pearsonr(coph_dists_ref, coph_dists_fst)
except Exception as e:
print("Error : %s" % str(e))
mantel_coeff = "Failed"
p_value_manel = "Failed"
cophenetic_pearson = "Failed"
p_value_cophenetic = "Failed"
outfile.write(i+"\t"+str(mantel_coeff)+"\t"+str(p_value_mantel)+"\t"+str(cophenetic_pearson)+"\t"+str(p_value_cophenetic)+"\n")
outfile.close()
示例6: cluster_fps
def cluster_fps(self):
clkg = hcluster.linkage(self.dm,method = 'average')
coarse_r = hcluster.fcluster(clkg,0.3,criterion = 'distance')
self.coarse_r = coarse_r
bcount = np.bincount(coarse_r)
knum = len(np.nonzero(bcount > 1)[0])
s = self.density_matrix.shape
if False and len(s) >1 and s[0] > 10 and s[1] > 10 and knum < min(s) / 2:
(u,s,vt) = la.svds(self.sps_matrixs,k = knum)
self.u = u
print '============'
else:
self.result = self.coarse_r
return (clkg,clkg)
#rankA = npla.matrix_rank(self.sps_matrixs)
# if rankA < 3:
a = np.matrix(np.diag(s)) * np.matrix(vt)
pd = dist.pdist(np.array(a.T),'cosine')
pd[np.abs(pd) < 1e-11] = 0
lkg = hcluster.linkage(pd,method = 'average')
self.lkg = lkg
self.result = hcluster.fcluster(lkg,self.svd_cluster_thr,criterion = 'distance')
# self.result = hcluster.fcluster(lkg,1)
# self.result = hcluster.fclusterdata(u,0.7,metric = 'cosine', criterion = 'distance',method = 'average')
return (lkg,clkg)
示例7: main
def main():
D = 2 # so we can visualize it more easily
s = 4 # separation so we can control how far apart the means are
mu1 = np.array([0, 0])
mu2 = np.array([s, s])
mu3 = np.array([0, s])
N = 900 # number of samples
X = np.zeros((N, D))
X[:300, :] = np.random.randn(300, D) + mu1
X[300:600, :] = np.random.randn(300, D) + mu2
X[600:, :] = np.random.randn(300, D) + mu3
Z = linkage(X, 'ward')
print "Z.shape:", Z.shape
# Z has the format [idx1, idx2, dist, sample_count]
# therefore, its size will be (N-1, 4)
plt.title("Ward")
dendrogram(Z)
plt.show()
Z = linkage(X, 'single')
plt.title("Single")
dendrogram(Z)
plt.show()
Z = linkage(X, 'complete')
plt.title("Complete")
dendrogram(Z)
plt.show()
示例8: HierarchicalCluster
def HierarchicalCluster(A):
#see http://stackoverflow.com/questions/2982929/plotting-results-of-hierarchical-clustering-ontop-of-a-matrix-of-data-in-python
Corr = np.corrcoef(A.T)
fig = plt.figure(figsize=(8,8))
ax1 = fig.add_axes([0.09,0.1,0.2,0.6])
Y = hrc.linkage(Corr, method='centroid')
Z1 = hrc.dendrogram(Y, orientation='right')
ax1.set_xticks([])
ax1.set_yticks([])
ax2 = fig.add_axes([0.3,0.71,0.6,0.2])
Y = hrc.linkage(Corr, method='centroid')
Z2 = hrc.dendrogram(Y)
ax2.set_xticks([])
ax2.set_yticks([])
axmatrix = fig.add_axes([0.3,0.1,0.6,0.6])
idx1 = Z1['leaves']
idx2 = Z2['leaves']
Corr = Corr[idx1, :]
Corr = Corr[:, idx2]
im = axmatrix.matshow(Corr, aspect='auto', origin='lower')
axcolor = fig.add_axes([0.91,0.1,0.02,0.6])
pylab.colorbar(im, cax=axcolor)
fig.show()
fig.savefig('dendrogram.png')
示例9: hcluster_cols
def hcluster_cols(self, thresh):
try:
link = linkage(self.X.T, method='complete', metric = 'cosine')
assignments = fcluster(link, thresh, 'distance')
except:
link = linkage(self.X.T, method='complete', metric = 'euclidean')
assignments = fcluster(link, thresh, 'distance')
col_ind = np.arange(len(self.crimes))
d = pd.DataFrame(zip(col_ind, assignments)).groupby(1)[0].aggregate(lambda x: tuple(x))
df_new = pd.DataFrame(index = np.arange(len(self.names)))
for i in d:
cols = []
for w in i:
cols.append(w)
if len(cols) > 1:
df_new[str(self.crimes[cols])] = np.mean(self.X[:,cols], axis = 1)
else:
df_new[str(self.crimes[cols[0]])] = self.X[:,cols[0]]
# plt.figure(figsize=(10,20))
# dendro = dendrogram(link, color_threshold=thresh, leaf_font_size=13, labels = self.crimes, orientation = 'left')
# plt.subplots_adjust(top=.99, bottom=0.5, left=0.05, right=0.99)
# plt.show()
self.df = df_new
self.crimes = df_new.columns.values
示例10: starthcc
def starthcc(self):
print self.dm,self.lin
dataFrame = pd.DataFrame(self.tr, columns=['x', 'y'])
from scipy.spatial.distance import pdist, squareform
# not printed as pretty, but the values are correct
distxy = squareform(pdist(dataFrame, metric=(self.dm)))
#print distxy
if self.lin=="single":
plt.figure()
R = dendrogram(linkage(distxy, method=str(self.lin)))
plt.xlabel('X units')
plt.ylabel('Y units')
plt.suptitle('Cluster Dendrogram', fontweight='bold', fontsize=14);
plt.show()
elif self.lin=="complete":
plt.figure()
R = dendrogram(linkage(distxy, method=str(self.lin)))
plt.xlabel('X units')
plt.ylabel('Y units')
plt.suptitle('Cluster Dendrogram', fontweight='bold', fontsize=14);
plt.show()
else:
plt.figure()
R = dendrogram(linkage(distxy, method=str(self.lin)))
plt.xlabel('X units')
plt.ylabel('Y units')
plt.suptitle('Cluster Dendrogram', fontweight='bold', fontsize=14);
plt.show()
示例11: save_mat
def save_mat(c2map, filepath):
mat = c2map['mat']
fig = pylab.figure(figsize=(8,8))
# Compute and plot first dendrogram.
ax1 = fig.add_axes([0.09,0.1,0.2,0.6])
Y = sch.linkage(mat, method='centroid')
Z1 = sch.dendrogram(Y, orientation='right')
ax1.set_xticks([])
ax1.set_yticks([])
# Compute and plot second dendrogram.
ax2 = fig.add_axes([0.3,0.71,0.6,0.2])
Y = sch.linkage(mat, method='single')
Z2 = sch.dendrogram(Y)
ax2.set_xticks([])
ax2.set_yticks([])
# Plot distance matrix.
axmatrix = fig.add_axes([0.3,0.1,0.6,0.6])
idx1 = Z1['leaves']
idx2 = Z2['leaves']
mat = mat[idx1,:]
mat = mat[:,idx2]
im = axmatrix.matshow(mat, aspect='auto', origin='lower', cmap=pylab.cm.YlGnBu)
axmatrix.set_xticks([])
axmatrix.set_yticks([])
# Plot colorbar.
axcolor = fig.add_axes([0.91,0.1,0.02,0.6])
pylab.colorbar(im, cax=axcolor)
fig.savefig(filepath)
示例12: plot_transition_clustermap
def plot_transition_clustermap(data_array, gene_names, pseudotimes, n_clusters=10, gradient=False):
if gradient:
data_to_plot = zscore(np.gradient(data_array)[1].T, axis=0)
scale = None
metric = 'seuclidean'
row_linkage = linkage(pdist(abs(data_to_plot), metric=metric), method='complete')
else:
data_to_plot = data_array.T
scale = 0
metric = 'correlation'
row_linkage = linkage(pdist(data_to_plot, metric=metric), method='complete')
assignments = fcluster(row_linkage, n_clusters, criterion='maxclust')
cm = sns.clustermap(data_to_plot, col_cluster=False, standard_scale=scale,
yticklabels=gene_names, row_linkage=row_linkage,
row_colors=[settings.STATE_COLORS[i] for i in assignments])
r = np.arange(10, data_array.shape[0], data_array.shape[0]/10)
plt.setp(cm.ax_heatmap.get_yticklabels(), fontsize=5)
cm.ax_heatmap.set_xticks(r)
cm.ax_heatmap.set_xticklabels(['%.1f' % x for x in pseudotimes[r]])
cm.ax_heatmap.set_xlabel('Pseudotime')
cm.ax_heatmap.set_ylabel('Gene')
gene_clusters = defaultdict(list)
for i, cl in enumerate(assignments):
gene_clusters[settings.STATE_COLORS[cl]].append(gene_names[i])
return gene_clusters
示例13: check_linkage_q
def check_linkage_q(self, method):
# Tests linkage(Y, method) on the Q data set.
Z = linkage(hierarchy_test_data.X, method)
expectedZ = getattr(hierarchy_test_data, "linkage_X_" + method)
assert_allclose(Z, expectedZ, atol=1e-06)
y = scipy.spatial.distance.pdist(hierarchy_test_data.X, metric="euclidean")
Z = linkage(y, method)
assert_allclose(Z, expectedZ, atol=1e-06)
示例14: _cluster_idx
def _cluster_idx(df):
""" sort indices by clusters """
dcol = pdist(df.T)
drow = pdist(df)
lcol = linkage(dcol)
lrow = linkage(drow)
cols = dendrogram(lcol, no_plot=True)['leaves']
rows = dendrogram(lrow, no_plot=True)['leaves']
return rows,cols
示例15: plot_clustered_heatmap
def plot_clustered_heatmap(df, genes_list, cancer, output_path, scale='binary'):
# Build nxm matrix (n samples, m genes)
X = df[genes_list].as_matrix().transpose()
if scale == 'binary':
Z = linkage(X, method='complete', metric='hamming')
colorscale = [[0, "rgb(111, 168, 220)"], [1, "rgb(5, 10, 172)"]]
colorbar = {'tick0': 0,'dtick': 1}
elif scale == 'logarithmic':
Z = linkage(X, method='ward')
X_max = X.max()
colorscale = [[0, 'rgb(250, 250, 250)'],
[1./X_max, 'rgb(200, 200, 200)'],
[5./X_max, 'rgb(150, 150, 200)'],
[20./X_max, 'rgb(100, 100, 200)'],
[100./X_max, 'rgb(50, 50, 200)'],
[1., 'rgb(0, 0, 200)']]
colorbar = {'tick0': 0,
'tickmode': 'array',
'tickvals': [0, 1, 5, 20, 100, X_max]}
c, coph_dists = cophenet(Z, pdist(X))
print "Cophenetic Correlation Coefficient:", c
#layout = go.Layout(yaxis=dict(title='%s germline mutations (ordered by samples somatic mutation load)'% cancer, zeroline=False))
# fig = pylab.figure(figsize=(8,8))
# ax1 = fig.add_axes([0.09,0.1,0.2,0.6])
# ax1.set_xticks([])
# ax1.set_yticks([])
# axmatrix = fig.add_axes([0.3,0.1,0.6,0.6])
den = dendrogram(Z, orientation='left')
idx = den['leaves']
X = X[idx,:]
print "X shape:", X.shape
genes_ordered = [genes_list[i] for i in idx]
logger.info("ordered genes: %s", str(genes_ordered))
# im = axmatrix.matshow(X, aspect='auto', origin='lower', cmap=pylab.cm.YlGnBu)
# axmatrix.set_xticks([])
# axmatrix.set_yticks([])
# # Plot colorbar.
# axcolor = fig.add_axes([0.91,0.1,0.02,0.6])
# pylab.colorbar(im, cax=axcolor)
# fig.savefig(output_path)
# Plotting the heatmap (without the hirarchy)
heatmap_trace = go.Heatmap(z=X.tolist(), x=df.patient_id, y=genes_ordered, showscale=True, colorscale=colorscale, colorbar=colorbar)
mutation_load_trace = go.Bar(x=df.patient_id, y=df.somatic_mutations_count/30.0)
fig = tls.make_subplots(rows=29, cols=1, specs=[[{'rowspan':5, 'colspan' : 1}]] + [[None]] * 4 + [[{'rowspan' : 24, 'colspan' : 1}]] + [[None]] * 23)
fig.append_trace(mutation_load_trace, 1, 1)
fig.append_trace(heatmap_trace, 6, 1)
fig['layout']['xaxis1'].update(showticklabels = False)
fig['layout']['xaxis1'].update(zeroline = False, showgrid=False)
fig['layout']['yaxis1'].update(zeroline = False, showgrid = False, tickfont=dict(family='Arial', size=4))
fig['layout']['xaxis2'].update(showticklabels = False)
fig['layout']['xaxis2'].update(zeroline = False, showgrid=False)
fig['layout']['yaxis2'].update(zeroline = False, showgrid = False, tickfont=dict(family='Arial', size=4))
plot(fig, auto_open=False, filename="%s_%s_heatmap_clustered.html" % (output_path, cancer))