当前位置: 首页>>代码示例>>Python>>正文


Python hierarchy.ward函数代码示例

本文整理汇总了Python中scipy.cluster.hierarchy.ward函数的典型用法代码示例。如果您正苦于以下问题:Python ward函数的具体用法?Python ward怎么用?Python ward使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了ward函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: demoFourGs

def demoFourGs():
    '''
    Demonstrate the performance of LCC
    on points drawn from a four gaussians
    '''           
    s=(640,480)
    dat = genNormalClusters(N=100, size=s)
    cList = ['red', 'blue','green','yellow']
    img_truth = plotClusts(dat[0], dat[1], size=s, 
                           colors=[cList[i] for i in dat[1]], window=None)
    
    #generate normal hierarchical clustering off euclidean data points
    print "Generating Hierarchical Clustering on Raw Data"
    Z2 = spc.ward(scipy.array(dat[0]))
    clusts2 = spc.fcluster(Z2, 4, criterion="maxclust")
    img_HC = plotClusts(dat[0], clusts2, size=s, 
                           colors=[cList[i-1] for i in clusts2], window=None)
    
    #generate LCC clustering
    print "Generating LCC Clustering"
    (clusts, _,_,_) = pf.LatentConfigurationClustering(dat[0], pt_dist, 4, numtrees=27)
    img_LCC = plotClusts(dat[0], clusts, size=s, 
                           colors=[cList[i-1] for i in clusts], window=None)
    
    im = pv.ImageMontage([img_truth, img_LCC, img_HC], layout=(1,3), gutter=3,
                          tileSize=(320,240), labels=None )
    im.show(window="Truth vs. LCC vs. HC")
开发者ID:Sciumo,项目名称:ProximityForest,代码行数:27,代码来源:LatentConfigurationClustering_Demo.py

示例2: test_scikit_vs_scipy

def test_scikit_vs_scipy():
    """Test scikit ward with full connectivity (i.e. unstructured) vs scipy
    """
    from scipy.sparse import lil_matrix

    n, p, k = 10, 5, 3
    rnd = np.random.RandomState(0)

    connectivity = lil_matrix(np.ones((n, n)))
    for i in range(5):
        X = 0.1 * rnd.normal(size=(n, p))
        X -= 4 * np.arange(n)[:, np.newaxis]
        X -= X.mean(axis=1)[:, np.newaxis]

        out = hierarchy.ward(X)

        children_ = out[:, :2].astype(np.int)
        children, _, n_leaves, _ = ward_tree(X, connectivity)

        cut = _hc_cut(k, children, n_leaves)
        cut_ = _hc_cut(k, children_, n_leaves)
        assess_same_labelling(cut, cut_)

    # Test error management in _hc_cut
    assert_raises(ValueError, _hc_cut, n_leaves + 1, children, n_leaves)
开发者ID:VirgileFritsch,项目名称:scikit-learn,代码行数:25,代码来源:test_hierarchical.py

示例3: make_tree

def make_tree(X, C, method='single'):
    if method == 'single':
        tree = to_tree(single(C))
    elif method == 'ward':
        tree = to_tree(ward(X))
    elif method == 'average':
        tree = to_tree(average(C))
    return Tree(root=construct_node(tree))
开发者ID:sharadmv,项目名称:trees,代码行数:8,代码来源:agglomerative.py

示例4: plotHierarchichalClusterGraph

def plotHierarchichalClusterGraph(tf_idf_matrix, headlines_utf):
    dist = 1 - cosine_similarity(tf_idf_matrix)
    linkage_matrix = ward(dist)
    fig, ax = plt.subplots(figsize=(15, 20)) # set size
    dendrogram(linkage_matrix, orientation="right", labels=headlines_utf);

    plt.tick_params(axis= 'x', which='both', bottom='off', top='off', labelbottom='off')
    plt.tight_layout()
    plt.savefig('../plots/hierachichal_clusters.png', dpi=200) 
开发者ID:rubyagarwal,项目名称:NewsClustering,代码行数:9,代码来源:clusterInfoProcessor.py

示例5: setUp

    def setUp(self):
        np.random.seed(0)
        x = np.random.rand(10)
        dm = DistanceMatrix.from_iterable(x, lambda x, y: np.abs(x-y))
        lm = ward(dm.condensed_form())
        ids = np.arange(len(x)).astype(np.str)
        self.tree = TreeNode.from_linkage_matrix(lm, ids)

        # initialize tree with branch length and named internal nodes
        for i, n in enumerate(self.tree.postorder(include_self=True)):
            n.length = 1
            if not n.is_tip():
                n.name = "y%d" % i
开发者ID:biocore,项目名称:gneiss,代码行数:13,代码来源:test_dendrogram.py

示例6: hierarchyCluster

def hierarchyCluster(dist,titles):
    linkage_matrix = ward(dist) #define the linkage_matrix using ward clustering pre-computed distances
    fig, ax = plt.subplots(figsize=(15, 20)) # set size
    ax = dendrogram(linkage_matrix, orientation="right", labels=titles);

    plt.tick_params(\
        axis= 'x',          # changes apply to the x-axis
        which='major',      # both major and minor ticks are affected
        bottom='off',      # ticks along the bottom edge are off
        top='off',         # ticks along the top edge are off
        labelbottom='on')

    plt.tight_layout() #show plot with tight layout
    plt.show()
开发者ID:tuling56,项目名称:Python,代码行数:14,代码来源:mtextcluster_fun.py

示例7: _ward_cluster

def _ward_cluster(X):
    """Clusters 1-corr using Ward distance

    Parameters
    ----------
    X
    Returns
    -------
    """
    # pairwise (1-corr) of zscores
    D = pdist( X, metric="correlation" )

    # return top branch split using ward linkage
    return fcluster( ward(D), 2, criterion="maxclust" )
开发者ID:jknox13,项目名称:cortical_paper,代码行数:14,代码来源:clustering.py

示例8: hierachical_clustering

    def hierachical_clustering(self):
        linkage_matrix = ward(self.__dist_matrix) #define the linkage_matrix using ward clustering pre-computed distances

        fig, ax = plt.subplots(figsize=(15, 9)) # set size
        ax = dendrogram(linkage_matrix, orientation="right", labels=titles);

        plt.tick_params(\
            axis= 'x',          # changes apply to the x-axis
            which='both',      # both major and minor ticks are affected
            bottom='off',      # ticks along the bottom edge are off
            top='off',         # ticks along the top edge are off
            labelbottom='off')

        fig.set_tight_layout(True) #show plot with tight layout
        plt.show()
开发者ID:adisorn711,项目名称:comp6237cw2,代码行数:15,代码来源:AJTokenizer.py

示例9: test_cache_ntips

    def test_cache_ntips(self):
        dm = DistanceMatrix.from_iterable([0, 1, 2, 3],
                                          lambda x, y: np.abs(x-y))
        lm = ward(dm.condensed_form())
        ids = np.arange(4).astype(np.str)
        t = mock.from_linkage_matrix(lm, ids)

        t._cache_ntips()

        self.assertEquals(t.leafcount, 4)
        self.assertEquals(t.children[0].leafcount, 2)
        self.assertEquals(t.children[1].leafcount, 2)
        self.assertEquals(t.children[0].children[0].leafcount, 1)
        self.assertEquals(t.children[0].children[1].leafcount, 1)
        self.assertEquals(t.children[1].children[0].leafcount, 1)
        self.assertEquals(t.children[1].children[1].leafcount, 1)
开发者ID:biocore,项目名称:gneiss,代码行数:16,代码来源:test_dendrogram.py

示例10: knn

def knn(df, axis=None, labels=None):
    dist = 1 - cosine_similarity(df.values)
    # define the linkage_matrix using ward clustering pre-computed distances
    linkage_matrix = ward(dist)

    fig, ax = plt.subplots(figsize=(15, 20))  # set size
    ax = dendrogram(linkage_matrix, orientation="right", labels=labels)

    plt.tick_params(
        axis='x',          # changes apply to the x-axis
        which='both',      # both major and minor ticks are affected
        bottom='off',      # ticks along the bottom edge are off
        top='off',         # ticks along the top edge are off
        labelbottom='off')

    plt.tight_layout()
开发者ID:heggy231,项目名称:social-deprivation,代码行数:16,代码来源:dataAnalysis.py

示例11: create_hierarchy

    def create_hierarchy(self, sim_matrix):
        linkage_matrix = ward(sim_matrix)
        fig, ax = plt.subplots(figsize=(15, 20)) # set size
        ax = dendrogram(linkage_matrix, orientation="right", labels=self.titles);

        plt.tick_params(\
            axis= 'x',          # changes apply to the x-axis
            which='both',      # both major and minor ticks are affected
            bottom='off',      # ticks along the bottom edge are off
            top='off',         # ticks along the top edge are off
            labelbottom='off')

        plt.tight_layout() #show plot with tight layout

        #uncomment below to save figure
        plt.savefig('ward_clusters.png', dpi=200) #save figure as ward_clusters
        return
开发者ID:MoizRauf,项目名称:OQuant_Wiki_Clustering,代码行数:17,代码来源:ClusteringAlgo.py

示例12: lsa_dendrogram

def lsa_dendrogram(lessonpath):
    # document-term matrix and document indices
    dtm, docindex, lessonname = dtm_matrix(lessonpath)

    # reconstructed dtm matrix using LSA and a reduced subspace of dimension 3
    dtm2 = LSA_dtm(dtm, 3)

    # distance metric based on cosine similarity
    dist = 1 - cosine_similarity(dtm)

    dist = np.round(dist, 10)

    # linkage matrix
    linkage_matrix = ward(dist)

    # dendrogram
    show(dendrogram(linkage_matrix, orientation="right", labels=docindex))
开发者ID:dizcology,项目名称:cogitatio_2,代码行数:17,代码来源:LSA_code.py

示例13: find_clusters

 def find_clusters(self, features):
     ''' Returns the clusters and their centroids.'''
     # 1. Cluster the data.
     totalClusters = int(round(features.shape[0] / 2))
     distance = 1 - pairwise_distances(features, metric = "cosine")
     # Ward minimizes the sum of squared differences within all clusters.
     # It is a variance-minimizing approach, which is similar to the k-means objective function.
     linkage_matrix = ward(distance)
     clusters = fcluster(linkage_matrix, totalClusters, criterion = 'maxclust')
     print "Number of clusters:", totalClusters
     
     # 2. Find the centroid for each cluster.
     centroid = np.empty([totalClusters, features.shape[1]])
     for i in range(1, totalClusters + 1):
         nCluster = np.where(clusters == i)
         centroid[i-1,:] = np.mean(features[nCluster], axis = 0)
     return (clusters, centroid)
开发者ID:yxy-github,项目名称:Twitter,代码行数:17,代码来源:twitterAlgorithms.py

示例14: get_clusters

	def get_clusters(self, data, features=None, text_features=[], n_clusters=8, centroid_features=10, random_seeds=True, 
		weights=[]):

		"""
		Applies Agglomerative hierarchial clustering using Ward's linkage

		Parameters
		----------
		data : Pandas DataFrame
			Data on which on apply clustering 
		features : list, optional, default : all columns used as features
			Subset of columns in the data frame to be used as features
		text_features : list, optional, default : None
			List of features that are of type text. These are then vectorizer using 
			TfidfVectorizer.
		n_clusters : int, optional, default: 8
			The number of clusters to form as well as the number of centroids to generate.
		centroid_features : int, optional, default: 10
			The number of most-important-features to return against each cluster centroid
		random_seeds : boolean, optional, default: False
			If False, uses clusters from kernel density estimation followed by thresholding
			as initial seeds. The number of clusters is also determined by results of kde and
			thus n_clusters parameter is ignored. 

		Returns
		-------
		result : tuple (labels, centroid_features)
			labels : 
				cluster numbers against each row of the data passed
			centroids : dictionary
				map of most important features of each cluster 
		"""

		X = self.encode_features(data, features, text_features)

		ipshell()

		dist = 1 - cosine_similarity(X)

		self.linkage_matrix = ward(dist)

		return (km.labels_, centroids)
开发者ID:fahadsultan,项目名称:datalib,代码行数:42,代码来源:ward.py

示例15: setUp

    def setUp(self):
        np.random.seed(0)
        self.table = pd.DataFrame(np.random.random((5, 5)),
                                  index=['0', '1', '2', '3', '4'],
                                  columns=['0', '1', '2', '3', '4'])

        num_otus = 5  # otus
        x = np.random.rand(num_otus)
        dm = DistanceMatrix.from_iterable(x, lambda x, y: np.abs(x-y))
        lm = ward(dm.condensed_form())
        t = TreeNode.from_linkage_matrix(lm, np.arange(len(x)).astype(np.str))
        self.t = SquareDendrogram.from_tree(t)
        self.md = pd.Series(['a', 'a', 'a', 'b', 'b'],
                            index=['0', '1', '2', '3', '4'])
        for i, n in enumerate(t.postorder()):
            if not n.is_tip():
                n.name = "y%d" % i
            n.length = np.random.rand()*3

        self.highlights = pd.DataFrame({'y8': ['#FF0000', '#00FF00'],
                                        'y6': ['#0000FF', '#F0000F']}).T
开发者ID:biocore,项目名称:gneiss,代码行数:21,代码来源:test_heatmap.py


注:本文中的scipy.cluster.hierarchy.ward函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。