当前位置: 首页>>代码示例>>Python>>正文


Python hierarchy.to_tree函数代码示例

本文整理汇总了Python中scipy.cluster.hierarchy.to_tree函数的典型用法代码示例。如果您正苦于以下问题:Python to_tree函数的具体用法?Python to_tree怎么用?Python to_tree使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了to_tree函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: make_tree

def make_tree(X, C, method='single'):
    if method == 'single':
        tree = to_tree(single(C))
    elif method == 'ward':
        tree = to_tree(ward(X))
    elif method == 'average':
        tree = to_tree(average(C))
    return Tree(root=construct_node(tree))
开发者ID:sharadmv,项目名称:trees,代码行数:8,代码来源:agglomerative.py

示例2: classify_by_scores

def classify_by_scores(M, threshold, loci, return_file_names=None):

    M_array = ssd.squareform(M)

    Z = linkage(M_array, method='average')

    root = to_tree(Z)
    root = clone_graph(root)

    nodes = get_nodes(root)
    id2node = {node.id: node for node in nodes}

    leaf_ids = leaves_list(Z)

    cnt = 0
    i = 0
    total_count = 1

    pool = []

    while True:
        cur_node = id2node[leaf_ids[i]]
        parent_dist = cur_node.parent.dist

        while parent_dist < threshold:
            cur_node = cur_node.parent
            parent_dist = cur_node.parent.dist

        cur_leaf_ids = get_leaves(cur_node)

        pool.append([id for id in cur_leaf_ids])

        total_count += cur_node.count

        i += len(cur_leaf_ids)

        if i >= len(leaf_ids)-1:
            break
        cnt += 1

    clusters = [l for l in pool if len(l) > 1]
    singles = [l[0] for l in pool if len(l) == 1]

    clusters = sorted(clusters, key=lambda x: len(x), reverse=True)

    if return_file_names:

        clusters_fn = []

        for cluster in clusters:

            clusters_fn.append([os.path.basename(loci[i].file_name) for i in cluster])

        singles_fn = [ os.path.basename(loci[i].file_name) for i in singles]

        return singles_fn, clusters_fn

    else:

        return singles, clusters
开发者ID:kyrgyzbala,项目名称:NewSystems,代码行数:60,代码来源:dendrogram.py

示例3: __init__

    def __init__(self, flat_cluster, cluster = None, curve_list = None):
        from scipy.cluster.hierarchy import to_tree
        from numpy import asarray, sort

        self.flat = flat_cluster # FlatClusters object
        self.co_analysis = self.flat.get_co_analysis() #CoAnalysis object
        self.cluster = cluster #Cluster object

        if not cluster == None:
            self.curve_list = cluster.list_curve_indexes()
        else:
            self.curve_list = curve_list

        self.Z = self.co_analysis.get_hierarchical_cluster()

        root = to_tree(self.Z) # root of entire cluster!
        curves = asarray(self.curve_list) # list of curves in this cluster

        # Get the cluster node that corresponds to the curves in the cluster above
        self.cluster_node = get_cluster_node(root, root.left, root.right, curves)
        self.id = self.cluster_node.get_id()

        # Get the right and left cluster nodes
        self.left = self.cluster_node.left
        self.right = self.cluster_node.right

        # Get the left and right cluster lists
        self.left_list = sort(any_pre_order(root, self.left))
        self.right_list = sort(any_pre_order(root, self.right))
开发者ID:hilaryjoyce,项目名称:smfs-data-analysis,代码行数:29,代码来源:clustering.py

示例4: cluster_alchemy

def cluster_alchemy(dataset, gamma=None, filter=False):
    doc_proc = dp.DocumentsProcessor(dataset)
    if gamma:
        tfidf_matrix, f_score_dict, params = doc_proc.get_data_with_alchemy(gamma=gamma, filter=filter)
    else:
        tfidf_matrix, f_score_dict, params = doc_proc.get_data_with_alchemy()

    print 'starting clustering: found %s document and %s features' \
          % (tfidf_matrix.shape[0], tfidf_matrix.shape[1])

    linkage_matrix = hr.average(tfidf_matrix.toarray())

    t = hr.to_tree(linkage_matrix, rd=True)

    clusters = {}

    for node in t[1]:
        if not node.is_leaf():
            l = []
            clusters[node.get_id()] = collect_leaf_nodes(node, l)

    f = f_score(clusters, f_score_dict)

    l = print_f_score_dict(f)

    params['avg_f_score'] = average_f_score(f, tfidf_matrix.shape[0])
    params['all_fscore'] = l

    print 'average f_score: %s' % params['avg_f_score']
    return params
开发者ID:Neuro17,项目名称:LOD-doc-clustering,代码行数:30,代码来源:classifier.py

示例5: _process_block

    def _process_block():
        """Initialize nested dictionary for d3, then recursively iterate through tree and create the dict."""
        tree = to_tree(linkage, rd=False)

        _add_node(tree, bcluster_dendro)
        _label_tree(bcluster_dendro["children"][-1])  # get the last element
        return bcluster_dendro
开发者ID:Lilykos,项目名称:clusterix,代码行数:7,代码来源:bcluster.py

示例6: __get_column_dendrogram__

    def __get_column_dendrogram__(self):
	#root and nodes have the coloumn clustered data
        root, nodes = hcluster.to_tree(self.cluster_object.column_clustering, rd=True)
	#node_id2node is a list
        node_id2node = {}
	#dendogram is a graph having node as starting address and a list followed by every node
        dendrogram = {"nodes":{}}
	
	#iterate through all nodes
        for node in nodes:
	    print ("id is:", id)
            node_id = node.id
	    # if node is leaf node
            if node.count == 1:
                node_id2node[node_id] = {"count":1, "distance":0}

            else:
	    # assign left and right child in form of graph to a node_id2
                node_left_child = node.get_left().id
                node_right_child = node.get_right().id
                node_id2node[node_id] = {"count":node.count, "distance":round(node.dist, 3), "left_child": node_left_child, "right_child": node_right_child}

	#assigning parent as the number of node in id2node
        for n in node_id2node:
            node = node_id2node[n]
            if node["count"] != 1:
                node_id2node[node["left_child"]]["parent"] = n
                node_id2node[node["right_child"]]["parent"] = n

	#if array list of nodes is not present in the dandrogram
        for n in node_id2node:
             if not n in dendrogram["nodes"]:
                dendrogram["nodes"][n] = node_id2node[n]

        return dendrogram
开发者ID:karandeepmadaan,项目名称:paji,代码行数:35,代码来源:inchlib_clust.py

示例7: hierarchical_clustering_to_dendrogram

def hierarchical_clustering_to_dendrogram(clustering):
    """Converts an array representing a clustering to a dendrogram.

    Args:
        clustering (ndarray): A hierarchical clustering matrix, in the form
            returned by scipy.hierarchical.linkage.

    Returns:
        (networkx.DiGraph): A dendrogram. Each node in the dendrogram has the
        'distance' attribute, which is the threshold at which its children
        are merged in the clustering.
    """
    root = _hierarchy.to_tree(clustering)

    tree = _nx.DiGraph()
    tree.add_node(root.id, distance=root.dist)

    if root.left:
        queue = [(root, root.left), (root, root.right)]

    while queue:
        parent, child = queue.pop(0)

        tree.add_edge(parent.id, child.id)
        tree.node[child.id]['distance'] = float(child.dist)

        if child.left:
            queue.append((child, child.left))

        if child.right:
            queue.append((child, child.right))

    return tree
开发者ID:eldridgejm,项目名称:umetric,代码行数:33,代码来源:core.py

示例8: to_dict

  def to_dict(self, correlation_matrix, linkage_matrix):

    from scipy.cluster import hierarchy
    tree = hierarchy.to_tree(linkage_matrix, rd=False)
    leaves_list = hierarchy.leaves_list(linkage_matrix)

    d = {}

    # http://w3facility.org/question/scipy-dendrogram-to-json-for-d3-js-tree-visualisation/
    # https://gist.github.com/mdml/7537455

    def add_node(node):
      if node.is_leaf(): return
      cluster_id = node.get_id() - len(linkage_matrix) - 1
      row = linkage_matrix[cluster_id]
      d[cluster_id+1] = {
        'datasets': [i+1 for i in sorted(node.pre_order())],
        'height': row[2],
      }

      # Recursively add the current node's children
      if node.left: add_node(node.left)
      if node.right: add_node(node.right)

    add_node(tree)

    return d
开发者ID:xia2,项目名称:xia2,代码行数:27,代码来源:MultiCrystalAnalysis.py

示例9: check_leaves_list_iris

 def check_leaves_list_iris(self, method):
     # Tests leaves_list(Z) on the Iris data set
     X = eo['iris']
     Y = pdist(X)
     Z = linkage(X, method)
     node = to_tree(Z)
     assert_equal(node.pre_order(), leaves_list(Z))
开发者ID:FrankZhao66,项目名称:scipy,代码行数:7,代码来源:test_hierarchy.py

示例10: tfidf_covariance

def tfidf_covariance(texts, savepath):
    if not savepath.endswith("/"):
        savepath = savepath + "/"
    if os.path.exists(savepath + "__linkage_average.npy"):
        Z = np.load(savepath + "__linkage_average.npy")
    else:
        if not os.path.exists(savepath):
            os.makedirs(savepath)
        from sklearn.feature_extraction.text import TfidfVectorizer
        vectorizer = TfidfVectorizer(input = str,
                                 strip_accents = 'ascii',
                                 analyzer ='word',
                                 max_features=5000)
        y = vectorizer.fit_transform(" ".join(text) for text in texts)
        Z = linkage(y.todense(), method='average', metric='euclidean')
        np.save(savepath + "__linkage_average.npy", Z)

    if os.path.exists(savepath + "__covariance__.npy"):
        Cov = np.load(savepath + "__covariance__.npy")
        observables = HierarchicalObservation(Cov)
    else:
        root, nodes = to_tree(Z, rd=True)
        assign_parents(root)
        adj_mat = get_adjacency_matrix(nodes)
        deg_mat = get_degree_matrix(nodes)
        sigma = 5
        laplacian = np.diag(deg_mat) - adj_mat + 1/(sigma**2) * np.eye(len(deg_mat))
        Cov = np.linalg.inv(laplacian)[:len(texts), :len(texts)]
        np.save(savepath + "__covariance__.npy", Cov)
        observables = HierarchicalObservation(Cov)
    return observables
开发者ID:JonathanRaiman,项目名称:PythonObjectLM,代码行数:31,代码来源:covariance.py

示例11: plot_dendrogram

def plot_dendrogram(Z, dendogram_file_name):

    root = to_tree(Z)
    threshold = root.dist / 3.0
    all_leaves = get_leaves(root)

    plt.figure(figsize=(30, 30))
    title = 'Hierarchical Clustering Dendrogram( %d leaves)' % len(all_leaves)
    xlabel = 'loci'
    ylabel = 'distance'

    fancy_dendrogram(
        Z,
        leaf_rotation=90.,  # rotates the x axis labels
        leaf_font_size=4.,  # font size for the x axis labels
        annotate_above=10,
        max_d=threshold,
        title=title,
        xlabel=xlabel,
        ylabel=ylabel
    )

    # plt.savefig(os.path.join(report_path, 'dendrogram_distance_array.eps'), format='eps', dpi=900)
    if dendogram_file_name.endswith('pdf'):
        plt.savefig(dendogram_file_name, format='pdf')
    elif dendogram_file_name.endswith('png'):
        plt.savefig(dendogram_file_name, format='png')
    else:
        raise NotImplemented('File format has to be either png or pdf')

    plt.close()
    return threshold
开发者ID:kyrgyzbala,项目名称:NewSystems,代码行数:32,代码来源:dendrogram.py

示例12: create_cluster_heatmap

    def create_cluster_heatmap(self, compress=False, compressed_value="median", write_data=True):
        """Creates cluster heatmap representation in inchlib format. By setting compress parameter to True you can
        cut the dendrogram in a distance to decrease the row size of the heatmap to specified count. 
        When compressing the type of the resulted value of merged rows is given by the compressed_value parameter (median, mean).
        When the metadata are nominal (text values) the most frequent is the result after compression.
        By setting write_data to False the data features won't be present in the resulting format."""
        self.dendrogram = {"data": self.__get_cluster_heatmap__(write_data)}

        self.compress = compress
        self.compressed_value = compressed_value
        self.compress_cluster_treshold = 0
        if self.compress and self.compress >= 0:
            self.compress_cluster_treshold = self.__get_distance_treshold__(compress)
            print("Distance treshold for compression:", self.compress_cluster_treshold)
            if self.compress_cluster_treshold >= 0:
                self.__compress_data__()
        else:
            self.compress = False

        if self.header and write_data:
            self.dendrogram["data"]["feature_names"] = [h for h in self.header]
        elif self.header and not write_data:
            self.dendrogram["data"]["feature_names"] = []
        
        if self.axis == "both" and len(self.cluster_object.column_clustering):
            column_dendrogram = hcluster.to_tree(self.cluster_object.column_clustering)            
            self.dendrogram["column_dendrogram"] = self.__get_column_dendrogram__()
开发者ID:AlfiyaZi,项目名称:InCHlib.js,代码行数:27,代码来源:inchlib_clust_dev.py

示例13: guide_tree_from_sequences

def guide_tree_from_sequences(sequences,
                              metric=kmer_distance,
                              display_tree = False):
    """ Build a UPGMA tree by applying metric to sequences

    Parameters
    ----------
    sequences : list of skbio.Sequence objects (or subclasses)
      The sequences to be represented in the resulting guide tree.
    metric : function
      Function that returns a single distance value when given a pair of
      skbio.Sequence objects.
    display_tree : bool, optional
      Print the tree before returning.

    Returns
    -------
    skbio.TreeNode

    """
    guide_dm = DistanceMatrix.from_iterable(
                    sequences, metric=metric, key='id')
    guide_lm = average(guide_dm.condensed_form())
    guide_tree = to_tree(guide_lm)
    if display_tree:
        guide_d = dendrogram(guide_lm, labels=guide_dm.ids, orientation='right',
               link_color_func=lambda x: 'black')
    return guide_tree
开发者ID:lsl5,项目名称:An-Introduction-To-Applied-Bioinformatics,代码行数:28,代码来源:__init__.py

示例14: test_Q_subtree_pre_order

 def test_Q_subtree_pre_order(self):
     # Tests that pre_order() works when called on sub-trees.
     X = hierarchy_test_data.Q_X
     Z = linkage(X, 'single')
     node = to_tree(Z)
     assert_equal(node.pre_order(), (node.get_left().pre_order()
                                     + node.get_right().pre_order()))
开发者ID:abudulemusa,项目名称:scipy,代码行数:7,代码来源:test_hierarchy.py

示例15: guide_tree_from_sequences

def guide_tree_from_sequences(sequences,
                              distance_fn=kmer_distance,
                              display_tree = False):
    """ Build a UPGMA tree by applying distance_fn to sequences

    Parameters
    ----------
    sequences : skbio.SequenceCollection
      The sequences to be represented in the resulting guide tree.
    sequence_distance_fn : function
      Function that returns and skbio.DistanceMatrix given an
      skbio.SequenceCollection.
    display_tree : bool, optional
      Print the tree before returning.

    Returns
    -------
    skbio.TreeNode

    """
    guide_dm = sequences.distances(distance_fn)
    guide_lm = average(guide_dm.condensed_form())
    guide_tree = to_tree(guide_lm)
    if display_tree:
        guide_d = dendrogram(guide_lm, labels=guide_dm.ids, orientation='right',
               link_color_func=lambda x: 'black')
    return guide_tree
开发者ID:gitter-badger,项目名称:An-Introduction-To-Applied-Bioinformatics,代码行数:27,代码来源:__init__.py


注:本文中的scipy.cluster.hierarchy.to_tree函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。