本文整理汇总了Python中scipy.cluster.hierarchy.dendrogram方法的典型用法代码示例。如果您正苦于以下问题:Python hierarchy.dendrogram方法的具体用法?Python hierarchy.dendrogram怎么用?Python hierarchy.dendrogram使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scipy.cluster.hierarchy
的用法示例。
在下文中一共展示了hierarchy.dendrogram方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: plotdendro
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import dendrogram [as 别名]
def plotdendro(Z,ncluster,filename,rep_ind):
plt.figure(figsize=(10, 15))
plt.title('Hierarchical Clustering Dendrogram')
plt.xlabel('sample index')
plt.ylabel('distance')
d = sc.dendrogram(Z,truncate_mode='lastp', p=ncluster,orientation='right',leaf_rotation=90.,leaf_font_size=20.,show_contracted=False)
coord=[]
for i in range(len(d['icoord'])):
if d['dcoord'][i][0]==0.0 :
coord.append(d['icoord'][i][0])
for i in range(len(d['icoord'])):
if d['dcoord'][i][3]==0.0 :
coord.append(d['icoord'][i][3])
plt.savefig(filename, dpi=100, facecolor='w', edgecolor='w',
orientation='portrait', papertype='letter', format=None,
transparent=True, bbox_inches=None, pad_inches=0.1,
frameon=None)
示例2: plot_dendrogram
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import dendrogram [as 别名]
def plot_dendrogram(self, **kwargs):
# Distances between each pair of children
distance = np.arange(self.children.shape[0])
position = np.arange(self.children.shape[0])
# Create linkage matrix and then plot the dendrogram
linkage_matrix = np.column_stack([
self.children, distance, position]
).astype(float)
# Plot the corresponding dendrogram
fig, ax = plt.subplots(figsize=(15, 7)) # set size
ax = dendrogram(linkage_matrix, **kwargs)
plt.tick_params(axis='x', bottom='off', top='off', labelbottom='off')
plt.tight_layout()
plt.show()
示例3: run_query_associations
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import dendrogram [as 别名]
def run_query_associations(ont, aset, args):
if args.dendrogram:
plot_subject_term_matrix(ont, aset, args)
return
import plotly.plotly as py
import plotly.graph_objs as go
tups = aset.query_associations(subjects=args.subjects)
for (s,c) in tups:
print("{} {}".format(s, c))
z, xaxis, yaxis = tuple_to_matrix(tups)
xaxis = mk_axis(xaxis, aset, args)
yaxis = mk_axis(yaxis, aset, args)
logging.info("PLOTTING: {} x {} = {}".format(xaxis, yaxis, z))
trace = go.Heatmap(z=z,
x=xaxis,
y=yaxis)
data=[trace]
py.plot(data, filename='labelled-heatmap')
#plot_dendrogram(z, xaxis, yaxis)
# TODO: fix this really dumb implementation
示例4: dendrogram
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import dendrogram [as 别名]
def dendrogram(data, threshold, layer_directory):
colnames = data.columns
data = np.array(data)
Z = hierarchy.linkage(data.T, 'single', 'cosine')
plt.figure(figsize=(15, 9))
dn = hierarchy.dendrogram(Z, labels = colnames, color_threshold=threshold)
plt.title("Clustering of Samples Based on Mutational Signatures" )
plt.ylabel("Cosine Distance")
plt.xlabel("Sample IDs")
#plt.ylim((0,1))
plt.savefig(layer_directory+'/dendrogram.pdf',figsize=(10, 8), dpi=300)
# which datapoints goes to which cluster
# The indices of the datapoints will be displayed as the ids
Y = hierarchy.fcluster(Z, threshold, criterion='distance', R=None, monocrit=None)
dataframe = pd.DataFrame({"Cluster":Y, "Sample Names":list(colnames)})
dataframe = dataframe.set_index("Sample Names")
#print(dataframe)
dictionary = {"clusters":Y, "informations":dn}
return dataframe
######################################## Plot the reconstruction error vs stabilities and select the optimum number of signature ####################################################
示例5: plot_dendrogram
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import dendrogram [as 别名]
def plot_dendrogram(self, show_chart=True, save_path=None, figsize=(8, 8),
threshold=None):
"""
Plots the dendrogram using scipy's own method.
:param show_chart: If True, shows the chart.
:param save_path: local directory to save file.
:param figsize: tuple with figsize dimensions.
:param threshold: height of the dendrogram to color the nodes. If None, the colors of the nodes follow scipy's
standard behaviour, which cuts the dendrogram on 70% of its height (0.7*max(self.link[:,2]).
"""
plt.figure(figsize=figsize)
dn = sch.dendrogram(self.link, orientation='left', labels=self.sort_ix, color_threshold=threshold)
plt.tight_layout()
if not (save_path is None):
plt.savefig(save_path,
pad_inches=1,
dpi=400)
if show_chart:
plt.show()
plt.close()
示例6: assign_colors
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import dendrogram [as 别名]
def assign_colors(self):
""" Assign colors for plotting the dendrogram """
clusters = self.linkage_clusters
no_IDS = self.n
colorlist = ["blue", "green", "red", "orange"]
node_color = ["black"] * (2*no_IDS-1)
i = 0
for cluster in sorted(list(clusters.keys())):
if len(clusters[cluster]) > 1:
color = colorlist[i]
for node in clusters[cluster]:
node_color[node] = color
i += 1
if i == len(colorlist):
i = 0
self.node_color = node_color #list corresponding to each possible clustering in tree
示例7: dendrogram
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import dendrogram [as 别名]
def dendrogram(data,
vectorizer,
method="ward",
color_threshold=1,
size=10,
filename=None):
"""dendrogram.
"median","centroid","weighted","single","ward","complete","average"
"""
data = list(data)
# get labels
labels = []
for graph in data:
label = graph.graph.get('id', None)
if label:
labels.append(label)
# transform input into sparse vectors
data_matrix = vectorizer.transform(data)
# labels
if not labels:
labels = [str(i) for i in range(data_matrix.shape[0])]
# embed high dimensional sparse vectors in 2D
from sklearn import metrics
from scipy.cluster.hierarchy import linkage, dendrogram
distance_matrix = metrics.pairwise.pairwise_distances(data_matrix)
linkage_matrix = linkage(distance_matrix, method=method)
plt.figure(figsize=(size, size))
dendrogram(linkage_matrix,
color_threshold=color_threshold,
labels=labels,
orientation='right')
if filename is not None:
plt.savefig(filename)
else:
plt.show()
示例8: dendrogram
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import dendrogram [as 别名]
def dendrogram(self):
return dendrogram(self.model, truncate_mode='lastp', p=min(12, len(self.model)))
示例9: plotdendro
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import dendrogram [as 别名]
def plotdendro(Z,ncluster,filename,rep_ind):
plt.figure(figsize=(10, 15))
plt.title('Hierarchical Clustering Dendrogram')
plt.xlabel('sample index')
plt.ylabel('distance')
d=sc.dendrogram(Z,truncate_mode='lastp', p=ncluster,orientation='right',leaf_rotation=90.,leaf_font_size=20.,show_contracted=False)
# coord = np.c_[np.array(d['icoord'])[:,1:3],np.array(d['dcoord'])[:,1]]
# coord = coord[np.argsort(coord[:,2])]
num=ncluster-1
coord=[]
for i in range(len(d['icoord'])):
if d['dcoord'][i][0]==0.0 :
coord.append(d['icoord'][i][0])
for i in range(len(d['icoord'])):
if d['dcoord'][i][3]==0.0 :
coord.append(d['icoord'][i][3])
#print d['leaves']
#return
#for posi in coord:
# x = posi
# y = 0.05
# plt.plot(x, y, 'ro')
# plt.annotate("%2i" % rep_ind[num], (x, y), xytext=(0, -8),
# textcoords='offset points',
# va='top', ha='center')
# num = num-1
#plt.show()
plt.savefig(filename, dpi=100, facecolor='w', edgecolor='w',
orientation='portrait', papertype='letter', format=None,
transparent=True, bbox_inches=None, pad_inches=0.1,
frameon=None)
示例10: clusterdistmatfull
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import dendrogram [as 别名]
def clusterdistmatfull(distmatrixfile,sim,mode='average',plot=False):
# Compute the clusturing on dist^2 so that the average
# distance of a cluster with an other is the RMS distance
sim2 = sim*sim
Z = sc.linkage(sim2,mode)
# get the full tree
plt.figure(figsize=(10, 15))
plt.title('Hierarchical Clustering Dendrogram')
plt.xlabel('sample index')
plt.ylabel('distance')
dendo = sc.dendrogram(Z,orientation='right',leaf_rotation=90.,leaf_font_size=20.,show_contracted=False)
c_list = np.array(dendo['leaves'])
c_count = Counter(c_list)
nbclst = len(c_count)
print "Number of clusters", nbclst
# c_list = np.zeros(len(sim))
# # Change cluster groups numbering to (0:n-1)
# for i in range(len(sim)):
# c_list[i] = int(clist[i]-1)
return c_list,Z
示例11: plot_rank_order_dendrogram
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import dendrogram [as 别名]
def plot_rank_order_dendrogram(df:pd.DataFrame, threshold:float=0.8, savename:Optional[str]=None, settings:PlotSettings=PlotSettings()) \
-> Dict[str,Union[List[str],float]]:
r'''
Plots a dendrogram of features in df clustered via Spearman's rank correlation coefficient.
Also returns a sets of features with correlation coefficients greater than the threshold
Arguments:
df: Pandas DataFrame containing data
threshold: Threshold on correlation coefficient
savename: Optional name of file to which to save the plot of feature importances
settings: :class:`~lumin.plotting.plot_settings.PlotSettings` class to control figure appearance
Returns:
Dict of sets of features with correlation coefficients greater than the threshold and cluster distance
'''
corr = np.round(scipy.stats.spearmanr(df).correlation, 4)
corr_condensed = hc.distance.squareform(1-np.abs(corr)) # Abs because negtaive of a feature is a trvial transformation: information unaffected
z = hc.linkage(corr_condensed, method='average', optimal_ordering=True)
with sns.axes_style('white'), sns.color_palette(settings.cat_palette):
plt.figure(figsize=(settings.w_large, (0.5*len(df.columns))))
hc.dendrogram(z, labels=df.columns, orientation='left', leaf_font_size=settings.lbl_sz, color_threshold=1-threshold)
plt.xlabel("Distance (1 - |Spearman's Rank Correlation Coefficient|)", fontsize=settings.lbl_sz, color=settings.lbl_col)
plt.xticks(fontsize=settings.tk_sz, color=settings.tk_col)
if savename is not None: plt.savefig(settings.savepath/f'{savename}{settings.format}', bbox_inches='tight')
plt.show()
feats = df.columns
sets = {}
for i, merge in enumerate(z):
if merge[2] > 1-threshold: continue
if merge[0] <= len(z): a = [feats[int(merge[0])]]
else: a = sets.pop(int(merge[0]))['children']
if merge[1] <= len(z): b = [feats[int(merge[1])]]
else: b = sets.pop(int(merge[1]))['children']
sets[1 + i + len(z)] = {'children': [*a, *b], 'distance': merge[2]}
return sets
示例12: _sort_traces
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import dendrogram [as 别名]
def _sort_traces(self, rdt, cdt):
"""Sort row dendrogram clusters and column dendrogram clusters
so that the background trace (above threshold) is trace 0
and all other traces are ordered top-to-bottom (row dendrogram)
or left-to-right (column dendrogram).
Parameters:
- rdt (list[dict]): The row dendrogram cluster traces.
- cdt (list[dict]): The column dendrogram cluster traces.
Returns:
- tuple: The sorted row dendrogram clusters and column
dendrogram clusters.
"""
tmp_rdt = []
tmp_cdt = []
if len(rdt) > 0:
# first, find background trace: (max 'x')
rdt.sort(key=lambda t: -1 * max(list(t["x"])))
tmp_rdt.append(rdt[0])
# then, sort top-to-bottom
r = rdt[1:]
r.sort(key=lambda t: -1 * min(list(t["y"])))
tmp_rdt += r
if len(cdt) > 0:
# background trace has max 'y'
cdt.sort(key=lambda t: -1 * max(list(t["y"])))
tmp_cdt.append(cdt[0])
# sort left to right
c = cdt[1:]
c.sort(key=lambda t: min(list(t["x"])))
tmp_cdt += c
return (tmp_rdt, tmp_cdt)
示例13: heatmap_dists
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import dendrogram [as 别名]
def heatmap_dists(data, norm=False, labels=None, metric='euclidean', method='ward'):
fig, (ax, cax) = plt.subplots(ncols=2,figsize=(7 * 1.05 ,7),
gridspec_kw={"width_ratios":[1, 0.05]})
if labels is None:
try:
labels = data.index
except AttributeError:
pass
n = data.shape[0]
assert labels is None or len(labels) == n
dists = ssd.pdist(data, metric=metric)
linkage = sch.linkage(dists, metric=metric, method=method)
dendro = sch.dendrogram(linkage, no_plot=True)
order = dendro['leaves']
sq_form_dists = ssd.squareform(dists)[order][:, order]
assert sq_form_dists.shape == (n,n)
hmap = ax.imshow(sq_form_dists, aspect='auto')
ax.set_xticks(np.arange(n))
ax.set_yticks(np.arange(n))
if labels is not None:
ax.set_xticklabels(labels[order], rotation=90)
ax.set_yticklabels(labels[order])
cb = plt.colorbar(hmap, cax=cax)
return fig, (ax, cax)
# Tasks
示例14: plot_subject_term_matrix
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import dendrogram [as 别名]
def plot_subject_term_matrix(ont, aset, args):
import numpy as np
import pandas as pd
import scipy.cluster.hierarchy as sch
import scipy.spatial as scs
df = aset.as_dataframe(subjects=args.subjects)
print('DF={}'.format(df))
d = scs.distance.pdist(df)
Z = sch.linkage(d, method='complete')
P = sch.dendrogram(Z)
print(P)
示例15: make_plot
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import dendrogram [as 别名]
def make_plot(self):
self.z = hc.linkage(self.data, method='average')
self.ax = self.fig.add_subplot(1, 1, 1)
self.dendro = \
hc.dendrogram(self.z,
labels=self.data.columns,
color_threshold=0,
orientation='left',
ax=self.ax,
link_color_func=lambda x: self.color)
_ = [
tl.set_fontproperties(self.fp_ticklabel)
for tl in self.ax.get_yticklabels()
]
_ = [
tl.set_fontproperties(self.fp_ticklabel)
for tl in self.ax.get_xticklabels()
]
self.ax.xaxis.grid(True, color='#FFFFFF', lw=1, ls='solid')
self.ax.yaxis.grid(False)
self.ax.set_axisbelow(True)
self.ax.set_facecolor('#EAEAF2')
list(map(lambda s: s.set_lw(0), self.ax.spines.values()))
self.ax.tick_params(which='both', length=0)