本文整理汇总了Python中scipy.cluster.hierarchy.ward函数的典型用法代码示例。如果您正苦于以下问题:Python ward函数的具体用法?Python ward怎么用?Python ward使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了ward函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: demoFourGs
def demoFourGs():
'''
Demonstrate the performance of LCC
on points drawn from a four gaussians
'''
s=(640,480)
dat = genNormalClusters(N=100, size=s)
cList = ['red', 'blue','green','yellow']
img_truth = plotClusts(dat[0], dat[1], size=s,
colors=[cList[i] for i in dat[1]], window=None)
#generate normal hierarchical clustering off euclidean data points
print "Generating Hierarchical Clustering on Raw Data"
Z2 = spc.ward(scipy.array(dat[0]))
clusts2 = spc.fcluster(Z2, 4, criterion="maxclust")
img_HC = plotClusts(dat[0], clusts2, size=s,
colors=[cList[i-1] for i in clusts2], window=None)
#generate LCC clustering
print "Generating LCC Clustering"
(clusts, _,_,_) = pf.LatentConfigurationClustering(dat[0], pt_dist, 4, numtrees=27)
img_LCC = plotClusts(dat[0], clusts, size=s,
colors=[cList[i-1] for i in clusts], window=None)
im = pv.ImageMontage([img_truth, img_LCC, img_HC], layout=(1,3), gutter=3,
tileSize=(320,240), labels=None )
im.show(window="Truth vs. LCC vs. HC")
示例2: test_scikit_vs_scipy
def test_scikit_vs_scipy():
"""Test scikit ward with full connectivity (i.e. unstructured) vs scipy
"""
from scipy.sparse import lil_matrix
n, p, k = 10, 5, 3
rnd = np.random.RandomState(0)
connectivity = lil_matrix(np.ones((n, n)))
for i in range(5):
X = 0.1 * rnd.normal(size=(n, p))
X -= 4 * np.arange(n)[:, np.newaxis]
X -= X.mean(axis=1)[:, np.newaxis]
out = hierarchy.ward(X)
children_ = out[:, :2].astype(np.int)
children, _, n_leaves, _ = ward_tree(X, connectivity)
cut = _hc_cut(k, children, n_leaves)
cut_ = _hc_cut(k, children_, n_leaves)
assess_same_labelling(cut, cut_)
# Test error management in _hc_cut
assert_raises(ValueError, _hc_cut, n_leaves + 1, children, n_leaves)
示例3: make_tree
def make_tree(X, C, method='single'):
if method == 'single':
tree = to_tree(single(C))
elif method == 'ward':
tree = to_tree(ward(X))
elif method == 'average':
tree = to_tree(average(C))
return Tree(root=construct_node(tree))
示例4: plotHierarchichalClusterGraph
def plotHierarchichalClusterGraph(tf_idf_matrix, headlines_utf):
dist = 1 - cosine_similarity(tf_idf_matrix)
linkage_matrix = ward(dist)
fig, ax = plt.subplots(figsize=(15, 20)) # set size
dendrogram(linkage_matrix, orientation="right", labels=headlines_utf);
plt.tick_params(axis= 'x', which='both', bottom='off', top='off', labelbottom='off')
plt.tight_layout()
plt.savefig('../plots/hierachichal_clusters.png', dpi=200)
示例5: setUp
def setUp(self):
np.random.seed(0)
x = np.random.rand(10)
dm = DistanceMatrix.from_iterable(x, lambda x, y: np.abs(x-y))
lm = ward(dm.condensed_form())
ids = np.arange(len(x)).astype(np.str)
self.tree = TreeNode.from_linkage_matrix(lm, ids)
# initialize tree with branch length and named internal nodes
for i, n in enumerate(self.tree.postorder(include_self=True)):
n.length = 1
if not n.is_tip():
n.name = "y%d" % i
示例6: hierarchyCluster
def hierarchyCluster(dist,titles):
linkage_matrix = ward(dist) #define the linkage_matrix using ward clustering pre-computed distances
fig, ax = plt.subplots(figsize=(15, 20)) # set size
ax = dendrogram(linkage_matrix, orientation="right", labels=titles);
plt.tick_params(\
axis= 'x', # changes apply to the x-axis
which='major', # both major and minor ticks are affected
bottom='off', # ticks along the bottom edge are off
top='off', # ticks along the top edge are off
labelbottom='on')
plt.tight_layout() #show plot with tight layout
plt.show()
示例7: _ward_cluster
def _ward_cluster(X):
"""Clusters 1-corr using Ward distance
Parameters
----------
X
Returns
-------
"""
# pairwise (1-corr) of zscores
D = pdist( X, metric="correlation" )
# return top branch split using ward linkage
return fcluster( ward(D), 2, criterion="maxclust" )
示例8: hierachical_clustering
def hierachical_clustering(self):
linkage_matrix = ward(self.__dist_matrix) #define the linkage_matrix using ward clustering pre-computed distances
fig, ax = plt.subplots(figsize=(15, 9)) # set size
ax = dendrogram(linkage_matrix, orientation="right", labels=titles);
plt.tick_params(\
axis= 'x', # changes apply to the x-axis
which='both', # both major and minor ticks are affected
bottom='off', # ticks along the bottom edge are off
top='off', # ticks along the top edge are off
labelbottom='off')
fig.set_tight_layout(True) #show plot with tight layout
plt.show()
示例9: test_cache_ntips
def test_cache_ntips(self):
dm = DistanceMatrix.from_iterable([0, 1, 2, 3],
lambda x, y: np.abs(x-y))
lm = ward(dm.condensed_form())
ids = np.arange(4).astype(np.str)
t = mock.from_linkage_matrix(lm, ids)
t._cache_ntips()
self.assertEquals(t.leafcount, 4)
self.assertEquals(t.children[0].leafcount, 2)
self.assertEquals(t.children[1].leafcount, 2)
self.assertEquals(t.children[0].children[0].leafcount, 1)
self.assertEquals(t.children[0].children[1].leafcount, 1)
self.assertEquals(t.children[1].children[0].leafcount, 1)
self.assertEquals(t.children[1].children[1].leafcount, 1)
示例10: knn
def knn(df, axis=None, labels=None):
dist = 1 - cosine_similarity(df.values)
# define the linkage_matrix using ward clustering pre-computed distances
linkage_matrix = ward(dist)
fig, ax = plt.subplots(figsize=(15, 20)) # set size
ax = dendrogram(linkage_matrix, orientation="right", labels=labels)
plt.tick_params(
axis='x', # changes apply to the x-axis
which='both', # both major and minor ticks are affected
bottom='off', # ticks along the bottom edge are off
top='off', # ticks along the top edge are off
labelbottom='off')
plt.tight_layout()
示例11: create_hierarchy
def create_hierarchy(self, sim_matrix):
linkage_matrix = ward(sim_matrix)
fig, ax = plt.subplots(figsize=(15, 20)) # set size
ax = dendrogram(linkage_matrix, orientation="right", labels=self.titles);
plt.tick_params(\
axis= 'x', # changes apply to the x-axis
which='both', # both major and minor ticks are affected
bottom='off', # ticks along the bottom edge are off
top='off', # ticks along the top edge are off
labelbottom='off')
plt.tight_layout() #show plot with tight layout
#uncomment below to save figure
plt.savefig('ward_clusters.png', dpi=200) #save figure as ward_clusters
return
示例12: lsa_dendrogram
def lsa_dendrogram(lessonpath):
# document-term matrix and document indices
dtm, docindex, lessonname = dtm_matrix(lessonpath)
# reconstructed dtm matrix using LSA and a reduced subspace of dimension 3
dtm2 = LSA_dtm(dtm, 3)
# distance metric based on cosine similarity
dist = 1 - cosine_similarity(dtm)
dist = np.round(dist, 10)
# linkage matrix
linkage_matrix = ward(dist)
# dendrogram
show(dendrogram(linkage_matrix, orientation="right", labels=docindex))
示例13: find_clusters
def find_clusters(self, features):
''' Returns the clusters and their centroids.'''
# 1. Cluster the data.
totalClusters = int(round(features.shape[0] / 2))
distance = 1 - pairwise_distances(features, metric = "cosine")
# Ward minimizes the sum of squared differences within all clusters.
# It is a variance-minimizing approach, which is similar to the k-means objective function.
linkage_matrix = ward(distance)
clusters = fcluster(linkage_matrix, totalClusters, criterion = 'maxclust')
print "Number of clusters:", totalClusters
# 2. Find the centroid for each cluster.
centroid = np.empty([totalClusters, features.shape[1]])
for i in range(1, totalClusters + 1):
nCluster = np.where(clusters == i)
centroid[i-1,:] = np.mean(features[nCluster], axis = 0)
return (clusters, centroid)
示例14: get_clusters
def get_clusters(self, data, features=None, text_features=[], n_clusters=8, centroid_features=10, random_seeds=True,
weights=[]):
"""
Applies Agglomerative hierarchial clustering using Ward's linkage
Parameters
----------
data : Pandas DataFrame
Data on which on apply clustering
features : list, optional, default : all columns used as features
Subset of columns in the data frame to be used as features
text_features : list, optional, default : None
List of features that are of type text. These are then vectorizer using
TfidfVectorizer.
n_clusters : int, optional, default: 8
The number of clusters to form as well as the number of centroids to generate.
centroid_features : int, optional, default: 10
The number of most-important-features to return against each cluster centroid
random_seeds : boolean, optional, default: False
If False, uses clusters from kernel density estimation followed by thresholding
as initial seeds. The number of clusters is also determined by results of kde and
thus n_clusters parameter is ignored.
Returns
-------
result : tuple (labels, centroid_features)
labels :
cluster numbers against each row of the data passed
centroids : dictionary
map of most important features of each cluster
"""
X = self.encode_features(data, features, text_features)
ipshell()
dist = 1 - cosine_similarity(X)
self.linkage_matrix = ward(dist)
return (km.labels_, centroids)
示例15: setUp
def setUp(self):
np.random.seed(0)
self.table = pd.DataFrame(np.random.random((5, 5)),
index=['0', '1', '2', '3', '4'],
columns=['0', '1', '2', '3', '4'])
num_otus = 5 # otus
x = np.random.rand(num_otus)
dm = DistanceMatrix.from_iterable(x, lambda x, y: np.abs(x-y))
lm = ward(dm.condensed_form())
t = TreeNode.from_linkage_matrix(lm, np.arange(len(x)).astype(np.str))
self.t = SquareDendrogram.from_tree(t)
self.md = pd.Series(['a', 'a', 'a', 'b', 'b'],
index=['0', '1', '2', '3', '4'])
for i, n in enumerate(t.postorder()):
if not n.is_tip():
n.name = "y%d" % i
n.length = np.random.rand()*3
self.highlights = pd.DataFrame({'y8': ['#FF0000', '#00FF00'],
'y6': ['#0000FF', '#F0000F']}).T