当前位置: 首页>>代码示例>>Python>>正文


Python hierarchy.linkage方法代码示例

本文整理汇总了Python中scipy.cluster.hierarchy.linkage方法的典型用法代码示例。如果您正苦于以下问题:Python hierarchy.linkage方法的具体用法?Python hierarchy.linkage怎么用?Python hierarchy.linkage使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在scipy.cluster.hierarchy的用法示例。


在下文中一共展示了hierarchy.linkage方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_linkage_misc

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def test_linkage_misc():
    # Misc tests on linkage
    rng = np.random.RandomState(42)
    X = rng.normal(size=(5, 5))
    assert_raises(ValueError, AgglomerativeClustering(linkage='foo').fit, X)
    assert_raises(ValueError, linkage_tree, X, linkage='foo')
    assert_raises(ValueError, linkage_tree, X, connectivity=np.ones((4, 4)))

    # Smoke test FeatureAgglomeration
    FeatureAgglomeration().fit(X)

    # test hierarchical clustering on a precomputed distances matrix
    dis = cosine_distances(X)

    res = linkage_tree(dis, affinity="precomputed")
    assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0])

    # test hierarchical clustering on a precomputed distances matrix
    res = linkage_tree(X, affinity=manhattan_distances)
    assert_array_equal(res[0], linkage_tree(X, affinity="manhattan")[0]) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:22,代码来源:test_hierarchical.py

示例2: test_structured_linkage_tree

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def test_structured_linkage_tree():
    # Check that we obtain the correct solution for structured linkage trees.
    rng = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    # Avoiding a mask with only 'True' entries
    mask[4:7, 4:7] = 0
    X = rng.randn(50, 100)
    connectivity = grid_to_graph(*mask.shape)
    for tree_builder in _TREE_BUILDERS.values():
        children, n_components, n_leaves, parent = \
            tree_builder(X.T, connectivity)
        n_nodes = 2 * X.shape[1] - 1
        assert len(children) + n_leaves == n_nodes
        # Check that ward_tree raises a ValueError with a connectivity matrix
        # of the wrong shape
        assert_raises(ValueError,
                      tree_builder, X.T, np.ones((4, 4)))
        # Check that fitting with no samples raises an error
        assert_raises(ValueError,
                      tree_builder, X.T[:0], connectivity) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:22,代码来源:test_hierarchical.py

示例3: test_unstructured_linkage_tree

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def test_unstructured_linkage_tree():
    # Check that we obtain the correct solution for unstructured linkage trees.
    rng = np.random.RandomState(0)
    X = rng.randn(50, 100)
    for this_X in (X, X[0]):
        # With specified a number of clusters just for the sake of
        # raising a warning and testing the warning code
        with ignore_warnings():
            children, n_nodes, n_leaves, parent = assert_warns(
                UserWarning, ward_tree, this_X.T, n_clusters=10)
        n_nodes = 2 * X.shape[1] - 1
        assert_equal(len(children) + n_leaves, n_nodes)

    for tree_builder in _TREE_BUILDERS.values():
        for this_X in (X, X[0]):
            with ignore_warnings():
                children, n_nodes, n_leaves, parent = assert_warns(
                    UserWarning, tree_builder, this_X.T, n_clusters=10)

            n_nodes = 2 * X.shape[1] - 1
            assert_equal(len(children) + n_leaves, n_nodes) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:23,代码来源:test_hierarchical.py

示例4: test_identical_points

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def test_identical_points():
    # Ensure identical points are handled correctly when using mst with
    # a sparse connectivity matrix
    X = np.array([[0, 0, 0], [0, 0, 0],
                  [1, 1, 1], [1, 1, 1],
                  [2, 2, 2], [2, 2, 2]])
    true_labels = np.array([0, 0, 1, 1, 2, 2])
    connectivity = kneighbors_graph(X, n_neighbors=3, include_self=False)
    connectivity = 0.5 * (connectivity + connectivity.T)
    connectivity, n_components = _fix_connectivity(X,
                                                   connectivity,
                                                   'euclidean')

    for linkage in ('single', 'average', 'average', 'ward'):
        clustering = AgglomerativeClustering(n_clusters=3,
                                             linkage=linkage,
                                             connectivity=connectivity)
        clustering.fit(X)

        assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
                                                         true_labels), 1) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:23,代码来源:test_hierarchical.py

示例5: test_cluster_distances_with_distance_threshold

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def test_cluster_distances_with_distance_threshold():
    rng = np.random.RandomState(0)
    n_samples = 100
    X = rng.randint(-10, 10, size=(n_samples, 3))
    # check the distances within the clusters and with other clusters
    distance_threshold = 4
    clustering = AgglomerativeClustering(
        n_clusters=None,
        distance_threshold=distance_threshold,
        linkage="single").fit(X)
    labels = clustering.labels_
    D = pairwise_distances(X, metric="minkowski", p=2)
    # to avoid taking the 0 diagonal in min()
    np.fill_diagonal(D, np.inf)
    for label in np.unique(labels):
        in_cluster_mask = labels == label
        max_in_cluster_distance = (D[in_cluster_mask][:, in_cluster_mask]
                                   .min(axis=0).max())
        min_out_cluster_distance = (D[in_cluster_mask][:, ~in_cluster_mask]
                                    .min(axis=0).min())
        # single data point clusters only have that inf diagonal here
        if in_cluster_mask.sum() > 1:
            assert max_in_cluster_distance < distance_threshold
        assert min_out_cluster_distance >= distance_threshold 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:26,代码来源:test_hierarchical.py

示例6: create_newick_file_from_matrix_file

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def create_newick_file_from_matrix_file(observation_matrix_path, output_file_path, linkage=constants.linkage_method_default,
                         distance=constants.distance_metric_default, norm='l1', progress=progress, transpose=False,
                         items_order_file_path=None):
    is_distance_and_linkage_compatible(distance, linkage)
    filesnpaths.is_file_exists(observation_matrix_path)
    filesnpaths.is_file_tab_delimited(observation_matrix_path)

    filesnpaths.is_output_file_writable(output_file_path)
    if items_order_file_path:
        filesnpaths.is_output_file_writable(items_order_file_path)

    id_to_sample_dict, sample_to_id_dict, header, vectors = utils.get_vectors_from_TAB_delim_matrix(observation_matrix_path, transpose=transpose)

    vectors = np.array(vectors)

    newick = get_newick_from_matrix(vectors, distance, linkage, norm, id_to_sample_dict)

    if output_file_path:
        open(output_file_path, 'w').write(newick.strip() + '\n')

    if items_order_file_path:
        open(items_order_file_path, 'w').write('\n'.join(utils.get_names_order_from_newick_tree(newick)) + '\n') 
开发者ID:merenlab,项目名称:anvio,代码行数:24,代码来源:clustering.py

示例7: _get_clusters

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def _get_clusters(self):
        """Cluster the data according to the specified dimensions.

        Returns:
        - tuple: The linkage matrices for the columns and/or rows.
        """

        Zcol = None
        Zrow = None

        # cluster along columns
        if self._cluster in ["col", "all"]:
            tmp = np.transpose(self._data)
            dcol = self._dist_fun(tmp, metric=self._col_dist)
            Zcol = self._link_fun(dcol, optimal_ordering=self._optimal_leaf_order)
        # cluster along rows only if 'all' is selected
        if self._cluster in ["row", "all"]:
            drow = self._dist_fun(self._data, metric=self._row_dist)
            Zrow = self._link_fun(drow, optimal_ordering=self._optimal_leaf_order)

        return (Zcol, Zrow) 
开发者ID:plotly,项目名称:dash-bio,代码行数:23,代码来源:_clustergram.py

示例8: get_col_linkage

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def get_col_linkage(combined_df, method='ward', metric='cosine'):
    CACHE_DIR = os.path.expanduser('~/cache/alt_splice_heatmap/sqtl')
    if not os.path.exists(CACHE_DIR): os.makedirs(CACHE_DIR)
    col_linkage_cache_path = os.path.join(CACHE_DIR, 'col_linkage_%s_%s.npy' %(method, metric))
    idx_linkage_cache_path = os.path.join(CACHE_DIR, 'idx.npy')
    col_name_cache_path = os.path.join(CACHE_DIR, 'col_names.npy')
    if os.path.exists(col_linkage_cache_path):
        print "Loading linkage from %s" %col_linkage_cache_path
        col_linkage = np.load(col_linkage_cache_path)
        assert np.array_equal(np.load(idx_linkage_cache_path), combined_df.index)
        assert np.array_equal(np.load(col_name_cache_path), combined_df.columns)
    else:
        print "Calculating linkage"
        col_linkage = hc.linkage(sp.distance.pdist(combined_df.values.T), method=method, metric=metric)
        np.save(col_linkage_cache_path, col_linkage)
        np.save(idx_linkage_cache_path, combined_df.index)
        np.save(col_name_cache_path, combined_df.columns)
    return col_linkage 
开发者ID:ratschlab,项目名称:pancanatlas_code_public,代码行数:20,代码来源:sqtl.py

示例9: create_cluster_map

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def create_cluster_map(self, bow, srcids):
        cluster_map = {}
        z = linkage(bow, metric='cityblock', method='complete')
        dists = list(set(z[:, 2]))
        thresh = (dists[1] + dists[2]) / 2
        self.logger.info('Threshold: {0}'.format(thresh))
        b = hier.fcluster(z, thresh, criterion='distance')
        assert bow.shape[0] == len(b)
        assert len(b) == len(srcids)
        for cid, srcid in zip(b, srcids):
            cluster_map[cid] = cluster_map.get(cid, []) + [srcid]

        self.logger.info('# of clusters: {0}'.format(len(b)))
        self.logger.info('sizes of clustsers:{0}'.format(sorted(map(len, cluster_map.values()))))

        return cluster_map 
开发者ID:plastering,项目名称:plastering,代码行数:18,代码来源:zodiac.py

示例10: hier_clustering

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def hier_clustering(d, threshold=3):
    srcids = d.keys()
    tokenizer = lambda x: x.split()
    vectorizer = TfidfVectorizer(tokenizer=tokenizer)
    assert isinstance(d, dict)
    assert isinstance(list(d.values())[0], list)
    assert isinstance(list(d.values())[0][0], str)
    doc = [' '.join(d[srcid]) for srcid in srcids]
    vect = vectorizer.fit_transform(doc)
    #TODO: Make vect aligned to the required format
    z = linkage(vect.toarray(), metric='cityblock', method='complete')
    dists = list(set(z[:,2]))
#    threshold = 3
    #threshold = (dists[2] + dists[3]) / 2
    b = hier.fcluster(z, threshold, criterion='distance')
    cluster_dict = defaultdict(list)
    for srcid, cluster_id in zip(srcids, b):
        cluster_dict[str(cluster_id)].append(srcid)
    value_lengther = lambda x: len(x[1])
    return OrderedDict(\
               sorted(cluster_dict.items(), key=value_lengther, reverse=True)) 
开发者ID:plastering,项目名称:plastering,代码行数:23,代码来源:common.py

示例11: generate_graphs

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def generate_graphs(clusters_list, output, size, linkage, cutoff, distances, traj):
    """
    DESCRIPTION
    Create a linear cluster mapping graph where every frame is printed as a
    colored barplot
    Args:
        clusters_list (list): list of cluster
        output (string): output name for graph
        size (int): number of frames
        linkage (numpy array): matrix linkage
        cutoff (float): cutoff distance value for clustering (in the dendogram)
        distances(numpy array): distance matrix
        traj (Trajectory): trajectory for time usage in axis barplot
    Return:
        colors_list (list) to be used with 2D distance projection graph
    """
    colors_list = plot_barplot(clusters_list, output, size, traj)
    plot_dendro(linkage, output, cutoff, colors_list, clusters_list)
    plot_hist(clusters_list, output, colors_list)
    if (distances.shape[0] < 10000):
        plot_distmat(distances, output)
    else:
        printScreenLogfile("Too many frames! The RMSD distance matrix will not be generated")
    return colors_list 
开发者ID:tubiana,项目名称:TTClust,代码行数:26,代码来源:ttclust.py

示例12: get_hrp

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def get_hrp(cov, corr):
    """Construct a hierarchical portfolio
    
    Params
    ------
    cov: pd.DataFrame
    corr: pd.DataFrame
    
    Returns
    -------
    pd.Series
    """
    dist = get_corr_dist(corr)
    link = sch.linkage(dist, 'single')
    sort_idx = get_quasi_diag(link)
    # Recover label
    sort_idx = corr.index[sort_idx].tolist()
    hrp = get_rec_bipart(cov, sort_idx)
    return hrp.sort_index() 
开发者ID:jjakimoto,项目名称:finance_ml,代码行数:21,代码来源:allocation.py

示例13: performClusteringLinkage

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def performClusteringLinkage(segmentBKTable, segmentCVTable, N_init, linkageCriterion,linkageMetric ):
    from scipy.cluster.hierarchy import linkage
    from scipy import cluster
    if linkageMetric == 'jaccard':
      observations = segmentBKTable
    elif linkageMetric == 'cosine':
      observations = segmentCVTable
    else:
      observations = segmentCVTable      
    clusteringTable = np.zeros([np.size(segmentCVTable,0),N_init]) 
    Z = linkage(observations,method=linkageCriterion,metric=linkageMetric)
    for i in np.arange(N_init):
      clusteringTable[:,i] = cluster.hierarchy.cut_tree(Z,N_init-i).T+1  
    k=N_init
    print('done')
    return clusteringTable, k 
开发者ID:josepatino,项目名称:pyBK,代码行数:18,代码来源:diarizationFunctions.py

示例14: dendrogram

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def dendrogram(data, threshold, layer_directory):
    colnames = data.columns
    data = np.array(data)

    Z = hierarchy.linkage(data.T, 'single',  'cosine')
    plt.figure(figsize=(15, 9))
    dn = hierarchy.dendrogram(Z, labels = colnames, color_threshold=threshold)
    plt.title("Clustering of Samples Based on Mutational Signatures" )
    plt.ylabel("Cosine Distance")
    plt.xlabel("Sample IDs")
    #plt.ylim((0,1))
    plt.savefig(layer_directory+'/dendrogram.pdf',figsize=(10, 8), dpi=300)
    # which datapoints goes to which cluster
    # The indices of the datapoints will be displayed as the ids 
    Y = hierarchy.fcluster(Z, threshold, criterion='distance', R=None, monocrit=None)
    dataframe = pd.DataFrame({"Cluster":Y, "Sample Names":list(colnames)})
    dataframe = dataframe.set_index("Sample Names")
    #print(dataframe)
    dictionary = {"clusters":Y, "informations":dn}
    
    return dataframe 


######################################## Plot the reconstruction error vs stabilities and select the optimum number of signature #################################################### 
开发者ID:AlexandrovLab,项目名称:SigProfilerExtractor,代码行数:26,代码来源:subroutines.py

示例15: __agglomerative__

# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def __agglomerative__(self,markings):
        """
        runs an initial agglomerative clustering over the given markings
        :param markings:
        :return:
        """
        # this converts stuff into panda format - probably a better way to do this but the labels do seem
        # necessary
        labels = [str(i) for i in markings]
        param_labels = [str(i) for i in range(len(markings[0]))]

        df = pd.DataFrame(np.array(markings), columns=param_labels, index=labels)
        row_dist = pd.DataFrame(squareform(pdist(df, metric='euclidean')), columns=labels, index=labels)
        # use ward metric to do the actual clustering
        row_clusters = linkage(row_dist, method='ward')

        return row_clusters 
开发者ID:zooniverse,项目名称:aggregation,代码行数:19,代码来源:agglomerative.py


注:本文中的scipy.cluster.hierarchy.linkage方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。