本文整理汇总了Python中scipy.cluster.hierarchy.to_tree函数的典型用法代码示例。如果您正苦于以下问题:Python to_tree函数的具体用法?Python to_tree怎么用?Python to_tree使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了to_tree函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: make_tree
def make_tree(X, C, method='single'):
if method == 'single':
tree = to_tree(single(C))
elif method == 'ward':
tree = to_tree(ward(X))
elif method == 'average':
tree = to_tree(average(C))
return Tree(root=construct_node(tree))
示例2: classify_by_scores
def classify_by_scores(M, threshold, loci, return_file_names=None):
M_array = ssd.squareform(M)
Z = linkage(M_array, method='average')
root = to_tree(Z)
root = clone_graph(root)
nodes = get_nodes(root)
id2node = {node.id: node for node in nodes}
leaf_ids = leaves_list(Z)
cnt = 0
i = 0
total_count = 1
pool = []
while True:
cur_node = id2node[leaf_ids[i]]
parent_dist = cur_node.parent.dist
while parent_dist < threshold:
cur_node = cur_node.parent
parent_dist = cur_node.parent.dist
cur_leaf_ids = get_leaves(cur_node)
pool.append([id for id in cur_leaf_ids])
total_count += cur_node.count
i += len(cur_leaf_ids)
if i >= len(leaf_ids)-1:
break
cnt += 1
clusters = [l for l in pool if len(l) > 1]
singles = [l[0] for l in pool if len(l) == 1]
clusters = sorted(clusters, key=lambda x: len(x), reverse=True)
if return_file_names:
clusters_fn = []
for cluster in clusters:
clusters_fn.append([os.path.basename(loci[i].file_name) for i in cluster])
singles_fn = [ os.path.basename(loci[i].file_name) for i in singles]
return singles_fn, clusters_fn
else:
return singles, clusters
示例3: __init__
def __init__(self, flat_cluster, cluster = None, curve_list = None):
from scipy.cluster.hierarchy import to_tree
from numpy import asarray, sort
self.flat = flat_cluster # FlatClusters object
self.co_analysis = self.flat.get_co_analysis() #CoAnalysis object
self.cluster = cluster #Cluster object
if not cluster == None:
self.curve_list = cluster.list_curve_indexes()
else:
self.curve_list = curve_list
self.Z = self.co_analysis.get_hierarchical_cluster()
root = to_tree(self.Z) # root of entire cluster!
curves = asarray(self.curve_list) # list of curves in this cluster
# Get the cluster node that corresponds to the curves in the cluster above
self.cluster_node = get_cluster_node(root, root.left, root.right, curves)
self.id = self.cluster_node.get_id()
# Get the right and left cluster nodes
self.left = self.cluster_node.left
self.right = self.cluster_node.right
# Get the left and right cluster lists
self.left_list = sort(any_pre_order(root, self.left))
self.right_list = sort(any_pre_order(root, self.right))
示例4: cluster_alchemy
def cluster_alchemy(dataset, gamma=None, filter=False):
doc_proc = dp.DocumentsProcessor(dataset)
if gamma:
tfidf_matrix, f_score_dict, params = doc_proc.get_data_with_alchemy(gamma=gamma, filter=filter)
else:
tfidf_matrix, f_score_dict, params = doc_proc.get_data_with_alchemy()
print 'starting clustering: found %s document and %s features' \
% (tfidf_matrix.shape[0], tfidf_matrix.shape[1])
linkage_matrix = hr.average(tfidf_matrix.toarray())
t = hr.to_tree(linkage_matrix, rd=True)
clusters = {}
for node in t[1]:
if not node.is_leaf():
l = []
clusters[node.get_id()] = collect_leaf_nodes(node, l)
f = f_score(clusters, f_score_dict)
l = print_f_score_dict(f)
params['avg_f_score'] = average_f_score(f, tfidf_matrix.shape[0])
params['all_fscore'] = l
print 'average f_score: %s' % params['avg_f_score']
return params
示例5: _process_block
def _process_block():
"""Initialize nested dictionary for d3, then recursively iterate through tree and create the dict."""
tree = to_tree(linkage, rd=False)
_add_node(tree, bcluster_dendro)
_label_tree(bcluster_dendro["children"][-1]) # get the last element
return bcluster_dendro
示例6: __get_column_dendrogram__
def __get_column_dendrogram__(self):
#root and nodes have the coloumn clustered data
root, nodes = hcluster.to_tree(self.cluster_object.column_clustering, rd=True)
#node_id2node is a list
node_id2node = {}
#dendogram is a graph having node as starting address and a list followed by every node
dendrogram = {"nodes":{}}
#iterate through all nodes
for node in nodes:
print ("id is:", id)
node_id = node.id
# if node is leaf node
if node.count == 1:
node_id2node[node_id] = {"count":1, "distance":0}
else:
# assign left and right child in form of graph to a node_id2
node_left_child = node.get_left().id
node_right_child = node.get_right().id
node_id2node[node_id] = {"count":node.count, "distance":round(node.dist, 3), "left_child": node_left_child, "right_child": node_right_child}
#assigning parent as the number of node in id2node
for n in node_id2node:
node = node_id2node[n]
if node["count"] != 1:
node_id2node[node["left_child"]]["parent"] = n
node_id2node[node["right_child"]]["parent"] = n
#if array list of nodes is not present in the dandrogram
for n in node_id2node:
if not n in dendrogram["nodes"]:
dendrogram["nodes"][n] = node_id2node[n]
return dendrogram
示例7: hierarchical_clustering_to_dendrogram
def hierarchical_clustering_to_dendrogram(clustering):
"""Converts an array representing a clustering to a dendrogram.
Args:
clustering (ndarray): A hierarchical clustering matrix, in the form
returned by scipy.hierarchical.linkage.
Returns:
(networkx.DiGraph): A dendrogram. Each node in the dendrogram has the
'distance' attribute, which is the threshold at which its children
are merged in the clustering.
"""
root = _hierarchy.to_tree(clustering)
tree = _nx.DiGraph()
tree.add_node(root.id, distance=root.dist)
if root.left:
queue = [(root, root.left), (root, root.right)]
while queue:
parent, child = queue.pop(0)
tree.add_edge(parent.id, child.id)
tree.node[child.id]['distance'] = float(child.dist)
if child.left:
queue.append((child, child.left))
if child.right:
queue.append((child, child.right))
return tree
示例8: to_dict
def to_dict(self, correlation_matrix, linkage_matrix):
from scipy.cluster import hierarchy
tree = hierarchy.to_tree(linkage_matrix, rd=False)
leaves_list = hierarchy.leaves_list(linkage_matrix)
d = {}
# http://w3facility.org/question/scipy-dendrogram-to-json-for-d3-js-tree-visualisation/
# https://gist.github.com/mdml/7537455
def add_node(node):
if node.is_leaf(): return
cluster_id = node.get_id() - len(linkage_matrix) - 1
row = linkage_matrix[cluster_id]
d[cluster_id+1] = {
'datasets': [i+1 for i in sorted(node.pre_order())],
'height': row[2],
}
# Recursively add the current node's children
if node.left: add_node(node.left)
if node.right: add_node(node.right)
add_node(tree)
return d
示例9: check_leaves_list_iris
def check_leaves_list_iris(self, method):
# Tests leaves_list(Z) on the Iris data set
X = eo['iris']
Y = pdist(X)
Z = linkage(X, method)
node = to_tree(Z)
assert_equal(node.pre_order(), leaves_list(Z))
示例10: tfidf_covariance
def tfidf_covariance(texts, savepath):
if not savepath.endswith("/"):
savepath = savepath + "/"
if os.path.exists(savepath + "__linkage_average.npy"):
Z = np.load(savepath + "__linkage_average.npy")
else:
if not os.path.exists(savepath):
os.makedirs(savepath)
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(input = str,
strip_accents = 'ascii',
analyzer ='word',
max_features=5000)
y = vectorizer.fit_transform(" ".join(text) for text in texts)
Z = linkage(y.todense(), method='average', metric='euclidean')
np.save(savepath + "__linkage_average.npy", Z)
if os.path.exists(savepath + "__covariance__.npy"):
Cov = np.load(savepath + "__covariance__.npy")
observables = HierarchicalObservation(Cov)
else:
root, nodes = to_tree(Z, rd=True)
assign_parents(root)
adj_mat = get_adjacency_matrix(nodes)
deg_mat = get_degree_matrix(nodes)
sigma = 5
laplacian = np.diag(deg_mat) - adj_mat + 1/(sigma**2) * np.eye(len(deg_mat))
Cov = np.linalg.inv(laplacian)[:len(texts), :len(texts)]
np.save(savepath + "__covariance__.npy", Cov)
observables = HierarchicalObservation(Cov)
return observables
示例11: plot_dendrogram
def plot_dendrogram(Z, dendogram_file_name):
root = to_tree(Z)
threshold = root.dist / 3.0
all_leaves = get_leaves(root)
plt.figure(figsize=(30, 30))
title = 'Hierarchical Clustering Dendrogram( %d leaves)' % len(all_leaves)
xlabel = 'loci'
ylabel = 'distance'
fancy_dendrogram(
Z,
leaf_rotation=90., # rotates the x axis labels
leaf_font_size=4., # font size for the x axis labels
annotate_above=10,
max_d=threshold,
title=title,
xlabel=xlabel,
ylabel=ylabel
)
# plt.savefig(os.path.join(report_path, 'dendrogram_distance_array.eps'), format='eps', dpi=900)
if dendogram_file_name.endswith('pdf'):
plt.savefig(dendogram_file_name, format='pdf')
elif dendogram_file_name.endswith('png'):
plt.savefig(dendogram_file_name, format='png')
else:
raise NotImplemented('File format has to be either png or pdf')
plt.close()
return threshold
示例12: create_cluster_heatmap
def create_cluster_heatmap(self, compress=False, compressed_value="median", write_data=True):
"""Creates cluster heatmap representation in inchlib format. By setting compress parameter to True you can
cut the dendrogram in a distance to decrease the row size of the heatmap to specified count.
When compressing the type of the resulted value of merged rows is given by the compressed_value parameter (median, mean).
When the metadata are nominal (text values) the most frequent is the result after compression.
By setting write_data to False the data features won't be present in the resulting format."""
self.dendrogram = {"data": self.__get_cluster_heatmap__(write_data)}
self.compress = compress
self.compressed_value = compressed_value
self.compress_cluster_treshold = 0
if self.compress and self.compress >= 0:
self.compress_cluster_treshold = self.__get_distance_treshold__(compress)
print("Distance treshold for compression:", self.compress_cluster_treshold)
if self.compress_cluster_treshold >= 0:
self.__compress_data__()
else:
self.compress = False
if self.header and write_data:
self.dendrogram["data"]["feature_names"] = [h for h in self.header]
elif self.header and not write_data:
self.dendrogram["data"]["feature_names"] = []
if self.axis == "both" and len(self.cluster_object.column_clustering):
column_dendrogram = hcluster.to_tree(self.cluster_object.column_clustering)
self.dendrogram["column_dendrogram"] = self.__get_column_dendrogram__()
示例13: guide_tree_from_sequences
def guide_tree_from_sequences(sequences,
metric=kmer_distance,
display_tree = False):
""" Build a UPGMA tree by applying metric to sequences
Parameters
----------
sequences : list of skbio.Sequence objects (or subclasses)
The sequences to be represented in the resulting guide tree.
metric : function
Function that returns a single distance value when given a pair of
skbio.Sequence objects.
display_tree : bool, optional
Print the tree before returning.
Returns
-------
skbio.TreeNode
"""
guide_dm = DistanceMatrix.from_iterable(
sequences, metric=metric, key='id')
guide_lm = average(guide_dm.condensed_form())
guide_tree = to_tree(guide_lm)
if display_tree:
guide_d = dendrogram(guide_lm, labels=guide_dm.ids, orientation='right',
link_color_func=lambda x: 'black')
return guide_tree
示例14: test_Q_subtree_pre_order
def test_Q_subtree_pre_order(self):
# Tests that pre_order() works when called on sub-trees.
X = hierarchy_test_data.Q_X
Z = linkage(X, 'single')
node = to_tree(Z)
assert_equal(node.pre_order(), (node.get_left().pre_order()
+ node.get_right().pre_order()))
示例15: guide_tree_from_sequences
def guide_tree_from_sequences(sequences,
distance_fn=kmer_distance,
display_tree = False):
""" Build a UPGMA tree by applying distance_fn to sequences
Parameters
----------
sequences : skbio.SequenceCollection
The sequences to be represented in the resulting guide tree.
sequence_distance_fn : function
Function that returns and skbio.DistanceMatrix given an
skbio.SequenceCollection.
display_tree : bool, optional
Print the tree before returning.
Returns
-------
skbio.TreeNode
"""
guide_dm = sequences.distances(distance_fn)
guide_lm = average(guide_dm.condensed_form())
guide_tree = to_tree(guide_lm)
if display_tree:
guide_d = dendrogram(guide_lm, labels=guide_dm.ids, orientation='right',
link_color_func=lambda x: 'black')
return guide_tree