本文整理汇总了Python中scipy.cluster.hierarchy.dendrogram函数的典型用法代码示例。如果您正苦于以下问题:Python dendrogram函数的具体用法?Python dendrogram怎么用?Python dendrogram使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了dendrogram函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: save_mat
def save_mat(c2map, filepath):
mat = c2map['mat']
fig = pylab.figure(figsize=(8,8))
# Compute and plot first dendrogram.
ax1 = fig.add_axes([0.09,0.1,0.2,0.6])
Y = sch.linkage(mat, method='centroid')
Z1 = sch.dendrogram(Y, orientation='right')
ax1.set_xticks([])
ax1.set_yticks([])
# Compute and plot second dendrogram.
ax2 = fig.add_axes([0.3,0.71,0.6,0.2])
Y = sch.linkage(mat, method='single')
Z2 = sch.dendrogram(Y)
ax2.set_xticks([])
ax2.set_yticks([])
# Plot distance matrix.
axmatrix = fig.add_axes([0.3,0.1,0.6,0.6])
idx1 = Z1['leaves']
idx2 = Z2['leaves']
mat = mat[idx1,:]
mat = mat[:,idx2]
im = axmatrix.matshow(mat, aspect='auto', origin='lower', cmap=pylab.cm.YlGnBu)
axmatrix.set_xticks([])
axmatrix.set_yticks([])
# Plot colorbar.
axcolor = fig.add_axes([0.91,0.1,0.02,0.6])
pylab.colorbar(im, cax=axcolor)
fig.savefig(filepath)
示例2: plot_corr_dendrogram
def plot_corr_dendrogram(
corr, cluster_method='weighted', **dendrogram_kwargs):
"""
Plot a correlation matrix as a dendrogram (on the current axes).
Parameters
----------
corr : numpy ndarray or pandas DataFrame
cluster_method : String
Method to use to amalgomate clusters.
Either 'single', 'complete', 'average', or 'weighted'.
See scipy.cluster.hierarchy.linkage for details.
dendrogram_kwargs : Additional kwargs
Pass to the call of scipy.cluster.hierarchy.dendrogram()
"""
# Convert to a DataFrame in all cases.
if not isinstance(corr, pd.DataFrame):
names = range(len(corr))
else:
names = corr.index.tolist()
corr = corr.values
dist = (1 - corr) / 2.
Z = linkage(squareform(dist), method=cluster_method)
dendrogram(Z, labels=names, **dendrogram_kwargs)
示例3: make_dendrogram_w
def make_dendrogram_w(LinkageMatrix, GraphFolder,
Method, Metric, CorrCoeff, Labels, Colors,
DisplayLevels):
import matplotlib
if not os.path.exists(GraphFolder):
os.makedirs(GraphFolder)
plt.figure(figsize=(12,24))
plt.title("Plays clustered by topic probabilities", fontsize=14)
#plt.ylabel("Parameters: "+Method+" method, "+Metric+" metric. CorrCoeff: "+str(CorrCoeff)+".")
plt.xlabel("Distance\n(Parameters: "+Method+" / "+Metric+")", fontsize=12)
matplotlib.rcParams['lines.linewidth'] = 1.2
dendrogram(
LinkageMatrix,
p = DisplayLevels,
truncate_mode="level",
color_threshold = 30,
show_leaf_counts = True,
no_labels = False,
orientation="left",
labels = Labels,
leaf_rotation = 0, # rotates the x axis labels
leaf_font_size = 4, # font size for the x axis labels
)
#plt.show()
plt.savefig(GraphFolder+"dendrogram_"+Method+"-"+Metric+"-"+str(DisplayLevels)+".png", dpi=300, figsize=(12,18), bbox_inches="tight")
plt.close()
示例4: cengci
def cengci(data):
X = data
distMatrix = pdist(X)
Z = linkage(X, 'ward')
c, coph_dists = cophenet(Z, pdist(X))
print c
dendrogram(Z)
示例5: HierarchicalCluster
def HierarchicalCluster(A):
#see http://stackoverflow.com/questions/2982929/plotting-results-of-hierarchical-clustering-ontop-of-a-matrix-of-data-in-python
Corr = np.corrcoef(A.T)
fig = plt.figure(figsize=(8,8))
ax1 = fig.add_axes([0.09,0.1,0.2,0.6])
Y = hrc.linkage(Corr, method='centroid')
Z1 = hrc.dendrogram(Y, orientation='right')
ax1.set_xticks([])
ax1.set_yticks([])
ax2 = fig.add_axes([0.3,0.71,0.6,0.2])
Y = hrc.linkage(Corr, method='centroid')
Z2 = hrc.dendrogram(Y)
ax2.set_xticks([])
ax2.set_yticks([])
axmatrix = fig.add_axes([0.3,0.1,0.6,0.6])
idx1 = Z1['leaves']
idx2 = Z2['leaves']
Corr = Corr[idx1, :]
Corr = Corr[:, idx2]
im = axmatrix.matshow(Corr, aspect='auto', origin='lower')
axcolor = fig.add_axes([0.91,0.1,0.02,0.6])
pylab.colorbar(im, cax=axcolor)
fig.show()
fig.savefig('dendrogram.png')
示例6: dendrogram_pdf
def dendrogram_pdf(args, dm, leafLabels):
from scipy.cluster.hierarchy import linkage, dendrogram
#from hcluster import squareform, linkage, dendrogram
#from numpy import array
#import pylab
import matplotlib
matplotlib.use('PDF') # pdf
import matplotlib.pyplot as plt
#condensed_dm = distance.squareform( dm )
#plt.figure(figsize=(100,10))
leafNodes = len(leafLabels)
fig = plt.figure(figsize=(14,(leafNodes*0.25)), dpi=100)
#fig = plt.figure(figsize=(14,100), dpi=10)
#fig.set_size_inches(14,(leafNodes*0.2))
#ax = fig.add_subplot(111)
#plt.tight_layout()
#ax.set_title('Dendrogram: '+args.metric.capitalize())
# padding:
#plt.subplots_adjust(bottom=0.25)
#plt.subplots_adjust(top=0.05)
plt.subplots_adjust(left=0.01)
plt.subplots_adjust(right=0.65)
plt.subplots_adjust(top=0.7)
plt.subplots_adjust(bottom=0.25)
#leafLabels = [ '\n'.join(l.split('--')) for l in leafLabels ]
linkage_matrix = linkage(dm, method="average" )
dendrogram(linkage_matrix, color_threshold=1, leaf_font_size=6, orientation='right', labels=leafLabels)
image_file = os.path.join(args.basedir, 'tmp',args.prefix+'_dendrogram.pdf')
plt.savefig(image_file)
示例7: plot_dendrogram
def plot_dendrogram(self, method = 'complete', metric = 'euclidean'):
import scipy.cluster.hierarchy as sch
""" Plot dendogram
Parameters
------------
method: str
method to use for scipy.cluster.hierarachy.linkage. Default
is 'complete'
metric: str
metric to use for scipy.cluster.hierarachy.linkage. Default
is 'euclidean'
Returns
------------
Dendrogram
"""
# Get par names
pars = self.df.index.values
D = np.abs(self.array)
Y = sch.linkage(D, method=method, metric = metric)
plt.figure()
sch.dendrogram(Y, labels = pars)
plt.tight_layout()
示例8: ben_gen
def ben_gen():
for num_data, hier_num, grey_option in itertools.product(
np.arange(len(data)), np.arange(len(desired_hier)), [0, 1]
):
hierarchy_structure = all_clustering_data[num_data][grey_option][hier_num]
plt.figure()
dendrogram(hierarchy_structure, color_threshold=1.6)
# plt.ylim(0,5)
plt.title(data_names[num_data] + grey_output + names_distances[hier_num + 1])
yield "okay"
plt.close()
plt.figure()
dendrogram(hierarchy_structure, color_threshold=1.6)
plt.ylim(0, ylimit)
plt.title(data_names[num_data] + grey_output + names_distances[hier_num + 1])
yield "okay"
plt.close()
# predict_average = fcluster(hierarchy_structure,1.6,criterion='distance')
raise StopIteration
示例9: plot_corr_dendrogram
def plot_corr_dendrogram(
corr, cluster_method='weighted', **dendrogram_kwargs):
"""
Plot a correlation matrix as a dendrogram (on the current axes).
Uses scipy.cluster.hierarchy.linkage
to compute clusters based on distance between samples.
Since correlation is passed in, this correlation must be converted to a
distance (using distance_fun). The default distance_fun makes highly
correlated points have low distance, and vice versa.
Parameters
----------
corr : numpy ndarray or pandas DataFrame
corr[i, j] is the correlation (should be between -1 and 1) of samples
i and j.
cluster_method : String
Method to use to amalgomate clusters.
Either 'single', 'complete', 'average', or 'weighted'.
See scipy.cluster.hierarchy.linkage for details.
dendrogram_kwargs : Additional kwargs
Pass to the call of scipy.cluster.hierarchy.dendrogram()
"""
# Convert to a DataFrame in all cases.
if not isinstance(corr, pd.DataFrame):
names = range(len(corr))
else:
names = corr.index.tolist()
corr = corr.values
dist = (1 - corr) / 2.
Z = linkage(squareform(dist), method=cluster_method)
dendrogram(Z, labels=names, **dendrogram_kwargs)
示例10: hierarchical_clustering
def hierarchical_clustering(self, data = 'open_shut'):
'''
Cluster the clusters in the cluster list based on the method.
'''
feature_list = []
for cluster in self.cluster_list:
if data == 'open_shut':
feature_list.append([np.log(cluster._get_mean_open()),
np.log(cluster._get_mean_shut())])
elif data == 'popen':
feature_list.append([cluster.popen,])
elif data == 'amp':
feature_list.append([cluster.mean_amp,])
Z = linkage(feature_list, 'ward')
plt.figure(figsize=(25, 10))
plt.title('Hierarchical Clustering Dendrogram')
plt.xlabel('sample index')
plt.ylabel('distance')
dendrogram(
Z,
leaf_rotation=90., # rotates the x axis labels
leaf_font_size=8., # font size for the x axis labels
)
plt.show()
示例11: plot_dendrogram
def plot_dendrogram(model, **kwargs):
'''
taken from online example in sklearn fork
turns hierarchical model into dendrogram
'''
from scipy.cluster.hierarchy import dendrogram
from sklearn.datasets import load_iris
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import pairwise_distances
from matplotlib import pyplot as plt
# Children of hierarchical clustering
children = model.children_
# Distances between each pair of children
# Since we don't have this information, we can use a uniform one for plotting
distance = np.arange(children.shape[0])
# The number of observations contained in each cluster level
no_of_observations = np.arange(2, children.shape[0]+2)
# Create linkage matrix and then plot the dendrogram
linkage_matrix = np.column_stack([children, distance, no_of_observations]).astype(float)
# Plot the corresponding dendrogram
dendrogram(linkage_matrix, **kwargs)
示例12: create_dendrogram
def create_dendrogram(cds, clusters=None, filename=None):
num_subj = cds.shape[0]
num_voxels = cds.shape[1]
if clusters == None:
clusters = cds.a.event_bounds
num_scenes = len(clusters)
ds_list = np.zeros((num_subj, num_voxels, num_scenes-1))
prev_cutoff = 0
ds_tup = ()
# average correlations for each scene
for i in range(num_scenes - 1):
ds_list[:,:,i] = np.mean(cds.samples[:,:,clusters[i]:clusters[i+1]], axis=2)
Z = hierarchy.linkage(np.mean(ds_list, axis=0).T, metric='correlation')
fig = plt.figure(figsize=(14,8))
hierarchy.dendrogram(Z)
plt.show()
if filename is not None:
fig.savefig(filename)
示例13: cal_idf_overlap
def cal_idf_overlap():
list_subj = utils.list_subject
ls_distance_final = []
ls_distance_row = []
#print len(list_att)
stop_words = get_stop_words('en')
tmp_corpus = []
for i in range(len(list_subj)):
item = str(list_subj[i]).split(" ")
for token in item:
if token in stop_words:
pass
else:
tmp_corpus.append(token)
#print "corpus", corpus
length = len(list_subj)
for i in range(0, length):
if i == 500 or i == 1000 or i == 1500:
print i
for j in range(0, length):
print i, j
idf_instance = IDF.IDF(str(list_subj[i]),str(list_subj[j]), tmp_corpus)
distance = idf_instance.cal_overlap()
ls_distance_row.append(distance)
ls_distance_final.append(ls_distance_row)
ls_distance_row = []
myarray = np.asarray(ls_distance_final)
print myarray
Z = linkage(myarray, "ward")
thefile = open('/Users/Aaron/test.txt', 'w')
for item in Z:
thefile.write("%s\n" % item)
plt.figure(figsize=(25, 10))
plt.title('Hierarchical Clustering Dendrogram')
plt.xlabel('sample index')
plt.ylabel('distance')
dendrogram(
Z,
leaf_rotation=90., # rotates the x axis labels
leaf_font_size=8., # font size for the x axis labels
)
plt.show()
plt.title('Hierarchical Clustering Dendrogram (truncated)')
plt.xlabel('sample index')
plt.ylabel('distance')
dendrogram(
Z,
truncate_mode='lastp', # show only the last p merged clusters
p=30, # show only the last p merged clusters
show_leaf_counts=True, # otherwise numbers in brackets are counts
leaf_rotation=90.,
leaf_font_size=12.,
show_contracted=True, # to get a distribution impression in truncated branches
)
plt.show()
示例14: computeLinkage
def computeLinkage( self, printDendogram = False ):
# generate two clusters: a with 100 points, b with 50:
#np.random.seed(4711) # for repeatability of this tutorial
#a = np.random.multivariate_normal([10, 0], [[3, 1], [1, 4]], size=[100,])
#b = np.random.multivariate_normal([0, 20], [[3, 1], [1, 4]], size=[50,])
#X = np.concatenate((a, b),)
self.X = array( self.buildingAverages.values() )
#print X # 150 samples with 2 dimensions
#plt.scatter(X[:,0], X[:,1])
#plt.show()
# generate the linkage matrix
self.Z = linkage(self.X, 'ward')
c, coph_dists = cophenet(self.Z, pdist(self.X))
if (printDendogram):
# calculate full dendrogram
plt.figure(figsize=(25, 10))
plt.title('Hierarchical Clustering Dendrogram (truncated)')
plt.xlabel('Dendogram of Dartmouth campus buildings clusters')
plt.ylabel('distance')
dendrogram(
self.Z,
#truncate_mode='lastp', # show only the last p merged clusters
#p=20, # show only the last p merged clusters
show_leaf_counts=True, # otherwise numbers in brackets are counts
leaf_rotation=90.,
leaf_font_size=12.,
show_contracted=True, # to get a distribution impression in truncated branches
)
plt.show()
return self.Z
示例15: dendrogram
def dendrogram(data, vectorizer, method="ward", color_threshold=1, size=10, filename=None):
'"median","centroid","weighted","single","ward","complete","average"'
if hasattr(data, '__iter__'):
iterable = data
else:
raise Exception('ERROR: Input must be iterable')
import itertools
iterable_1, iterable_2 = itertools.tee(iterable)
# get labels
labels = []
for graph in iterable_2:
label = graph.graph.get('id', None)
if label:
labels.append(label)
# transform input into sparse vectors
X = vectorizer.transform(iterable_1)
# labels
if not labels:
labels = [str(i) for i in range(X.shape[0])]
# embed high dimensional sparse vectors in 2D
from sklearn import metrics
from scipy.cluster.hierarchy import linkage, dendrogram
D = metrics.pairwise.pairwise_distances(X)
Z = linkage(D, method=method)
plt.figure(figsize=(size, size))
dendrogram(Z, color_threshold=color_threshold, labels=labels, orientation='right')
if filename is not None:
plt.savefig(filename)
else:
plt.show()