本文整理汇总了Python中sklearn.cluster.Ward类的典型用法代码示例。如果您正苦于以下问题:Python Ward类的具体用法?Python Ward怎么用?Python Ward使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Ward类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __hieclu
def __hieclu(self):
#use Hierarchical clustering
print 'using hierarchical clustering......'
ac = Ward(n_clusters = self.k)
ac.fit(self.data_matrix)
result = ac.fit_predict(self.data_matrix)
return result
示例2: constraint
def constraint(self, nodes, edges, lables):
if len(nodes) != len(lables):
print("#nodes(%d) != #clusters(%d)" % (len(nodes), len(lables)))
N = len(nodes)
circles = {}
guidance_matrix = sp.zeros([N, N])
# guidance_matrix = {}
for i in range(len(nodes)):
if lables[i] in circles:
circles[lables[i]].append(nodes[i])
else:
circles[lables[i]] = [nodes[i]]
for key in circles.iterkeys():
print(key, len(circles[key]))
c = 36
for ni in circles[c]:
i = nodes.index(ni)
for nj in circles[c]:
j = nodes.index(nj)
guidance_matrix[i, j] = 1.0
guidance_matrix = sparse.lil_matrix(guidance_matrix)
# pos = sum(x > 0 for x in guidance_matrix)
print(guidance_matrix)
ward = Ward(n_clusters=6, n_components=2, connectivity=guidance_matrix)
predicts = ward.fit_predict(self.A)
print(predicts)
示例3: agglomerate
def agglomerate(self, nodes, edges, clusters):
if len(nodes) != len(clusters):
print("#nodes(%d) != #clusters(%d)" % (len(nodes), len(clusters)))
neighbors = {}
for edge in edges:
if edge[0] in neighbors:
neighbors[edge[0]].append(edge[1])
else:
neighbors[edge[0]] = [edge[1]]
node_clusters = {} # node: its cluster id
communities = {} # cluster id: all neighbors for its members
for i in range(len(nodes)):
if clusters[i] in communities:
communities[clusters[i]].extend(neighbors[nodes[i]])
else:
communities[clusters[i]] = neighbors[nodes[i]]
node_clusters[nodes[i]] = clusters[i]
N = len(communities)
affinity_matrix = sp.zeros([N, N])
for comm in communities:
members = [node_clusters[node] for node in communities[comm]]
degree = dict(Counter(members))
for key in degree:
affinity_matrix[comm, key] = degree[key]
ward = Ward(n_clusters=6)
predicts = ward.fit_predict(affinity_matrix)
return [predicts[node_clusters[node]] for node in nodes]
示例4: test_connectivity_popagation
def test_connectivity_popagation():
"""
Check that connectivity in the ward tree is propagated correctly during
merging.
"""
from sklearn.neighbors import NearestNeighbors
X = np.array(
[
(0.014, 0.120),
(0.014, 0.099),
(0.014, 0.097),
(0.017, 0.153),
(0.017, 0.153),
(0.018, 0.153),
(0.018, 0.153),
(0.018, 0.153),
(0.018, 0.153),
(0.018, 0.153),
(0.018, 0.153),
(0.018, 0.153),
(0.018, 0.152),
(0.018, 0.149),
(0.018, 0.144),
]
)
nn = NearestNeighbors(n_neighbors=10).fit(X)
connectivity = nn.kneighbors_graph(X)
ward = Ward(n_clusters=4, connectivity=connectivity)
# If changes are not propagated correctly, fit crashes with an
# IndexError
ward.fit(X)
示例5: hierarchicalClustering
def hierarchicalClustering(x,k):
model = Ward(n_clusters=k)
labels = model.fit_predict(np.asarray(x))
# Centroids is a list of lists
centroids = []
for c in range(k):
base = []
for d in range(len(x[0])):
base.append(0)
centroids.append(base)
# Stores number of examples per cluster
ctrs = np.zeros(k)
# Sum up all vectors for each cluster
for c in range(len(x)):
centDex = labels[c]
for d in range(len(centroids[centDex])):
centroids[centDex][d] += x[c][d]
ctrs[centDex] += 1
# Average the vectors in each cluster to get the centroids
for c in range(len(centroids)):
for d in range(len(centroids[c])):
centroids[c][d] = centroids[c][d]/ctrs[c]
return (centroids,labels)
示例6: hieclu
def hieclu(data_matrix, k):
#use Hierarchical clustering
print 'using hierarchical clustering......'
ac = Ward(n_clusters=k)
ac.fit(data_matrix)
result = ac.fit_predict(data_matrix)
return result
示例7: test_ward_clustering
def test_ward_clustering():
"""
Check that we obtain the correct number of clusters with Ward clustering.
"""
rnd = np.random.RandomState(0)
mask = np.ones([10, 10], dtype=np.bool)
X = rnd.randn(100, 50)
connectivity = grid_to_graph(*mask.shape)
clustering = Ward(n_clusters=10, connectivity=connectivity)
clustering.fit(X)
assert_true(np.size(np.unique(clustering.labels_)) == 10)
示例8: test_connectivity_fixing_non_lil
def test_connectivity_fixing_non_lil():
"""
Check non regression of a bug if a non item assignable connectivity is
provided with more than one component.
"""
# create dummy data
x = np.array([[0, 0], [1, 1]])
# create a mask with several components to force connectivity fixing
m = np.array([[True, False], [False, True]])
c = grid_to_graph(n_x=2, n_y=2, mask=m)
w = Ward(connectivity=c)
w.fit(x)
示例9: cluster_ward
def cluster_ward(classif_data, vect_data):
ward = Ward(n_clusters=10)
np_arr_train = np.array(vect_data["train_vect"])
np_arr_label = np.array(classif_data["topics"])
np_arr_test = np.array(vect_data["test_vect"])
labels = ward.fit_predict(np_arr_train)
print "Ward"
sil_score = metrics.silhouette_score(np_arr_train, labels, metric='euclidean')
print sil_score
return labels
示例10: get_km_segments
def get_km_segments(x, image, sps, n_segments=25):
if len(x) == 2:
feats, edges = x
else:
feats, edges, _ = x
colors_ = get_colors(image, sps)
centers = get_centers(sps)
n_spixel = len(feats)
graph = sparse.coo_matrix((np.ones(edges.shape[0]), edges.T), shape=(n_spixel, n_spixel))
ward = Ward(n_clusters=n_segments, connectivity=graph + graph.T)
# km = KMeans(n_clusters=n_segments)
color_feats = np.hstack([colors_, centers * 0.5])
# return km.fit_predict(color_feats)
return ward.fit_predict(color_feats)
示例11: spectral_cluster
def spectral_cluster(data, n_clusters, method='sl'):
# 获取拉普拉斯矩阵
if method == 'NJW':
lap_matrix = get_lap_matrix_njw(data, 0.1)
eigenvalues, eigenvectors = np.linalg.eig(lap_matrix)
idx = eigenvalues.argsort()[::-1]
eigenvalues = eigenvalues[idx]
eigenvectors = eigenvectors[:, idx]
elif method == 'self-tuning':
lap_matrix = get_lap_matrix_self_tuning(data)
eigenvalues, eigenvectors = np.linalg.eig(lap_matrix)
idx = eigenvalues.argsort()[::-1]
eigenvalues = eigenvalues[idx]
eigenvectors = eigenvectors[:, idx]
else:
lap_matrix = get_lap_matrix_sl(data, 0.1)
eigenvalues, eigenvectors = np.linalg.eig(lap_matrix)
idx = eigenvalues.argsort()
eigenvalues = eigenvalues[idx]
eigenvectors = eigenvectors[:, idx]
#print(eigenvalues)
# 获取前n_clusters个特征向量
x_matrix = eigenvectors[:, 0:n_clusters]
# 归一化特征向量矩阵
y_matrix = normal_eigen(x_matrix)
# 调用自己写的k_means函数
"""
k_dist_dic, k_centers_dic, cluster_group = kmeans.k_means(y_matrix, n_clusters)
mat_plot_cluster_sample(data, cluster_group, method)
"""
# 调用自己写的bi_k_means函数
"""center_list, cluster_assign = bikmeans.exe_bi_k_means(y_matrix, n_clusters)
labels = cluster_assign[:, 0]
mat_plot_cluster_sample(data, labels. method)
# 调用sklearn中的KMeans函数,效果比自己写的强了好多
k_means = KMeans(n_clusters)
k_means.fit(y_matrix)
#k_centers = k_means.cluster_centers_
#mat_plot_cluster_sample(data, k_means.labels_, method)
"""
# 调用sklearn中的hierarchical 聚类方法进行聚类
hie_cluster = Ward(n_clusters)
hie_cluster.fit(y_matrix)
mat_plot_cluster_sample(data, hie_cluster.labels_, method)
示例12: ward
def ward(self, X, n_clusters, plot=True):
k_means = Ward(n_clusters=n_clusters, copy=False, compute_full_tree=True, memory="cache")
k_means.fit(X)
labels = k_means.labels_
pl.close('all')
pl.figure(1)
pl.clf()
if plot:
colors = "rbgcmybgrcmybgrcmybgrcm" * 10
X2d = RandomizedPCA(n_components=2).fit_transform(X)
for i in xrange(len(X2d)):
x = X2d[i]
pl.plot(x[0], x[1], "o", markerfacecolor=colors[labels[i]], markeredgecolor=colors[labels[i]], alpha=0.035)
pl.show()
return k_means.labels_
示例13: cluster_ward
def cluster_ward(self, calpha=True):
'''
cluster the positively predicted residues using the Ward method.
Returns a list of cluster labels the same length as the number of positively predicted residues.
'''
if calpha:
data_atoms = self.positive_surface_residues.ca
#else:
# data_atoms = self.positive_surface_residues.select('ca or sidechain').copy()
if data_atoms.getCoords().shape[0] < 4:
print self.pdbid, data_atoms.getCoords().shape
return {}
connectivity = kneighbors_graph(data_atoms.getCoords(), 5)
ward = Ward(n_clusters=self.WARD_N_CLUSTERS, connectivity=connectivity)
ward.fit(data_atoms.getCoords())
resnums = data_atoms.getResnums()
reslabels = ward.labels_
clusters = sorted([resnums[reslabels==i] for i in set(reslabels)], key=len, reverse=True)
return dict(enumerate(clusters))
示例14: compute_clusters
def compute_clusters(dataset,features_vector):
"""
Apply clustering method
"""
labels = dataset.target
true_k = np.unique(labels).shape[0]
# Run clustering method
print "Performing clustering with method ", cmd_options.clust_method.upper()
print
if(cmd_options.clust_method == "hclust"):
result = features_vector.toarray()
ward = Ward(n_clusters=true_k)
ward.fit(result)
return ward
if(cmd_options.clust_method == "kmeans"):
km = KMeans(n_clusters=true_k, init='k-means++', max_iter=1000, verbose=1)
km.fit(features_vector)
return km
示例15: ward
def ward(X, n_clust):
"H"
ward = Ward(n_clusters=n_clust)
ward.fit(X)
return ward