本文整理汇总了Python中sklearn.cluster.AgglomerativeClustering.fit方法的典型用法代码示例。如果您正苦于以下问题:Python AgglomerativeClustering.fit方法的具体用法?Python AgglomerativeClustering.fit怎么用?Python AgglomerativeClustering.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.cluster.AgglomerativeClustering
的用法示例。
在下文中一共展示了AgglomerativeClustering.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: train_agglomerative
# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
def train_agglomerative():
print "starting agglomerative clustering..."
model = AgglomerativeClustering(n_clusters=num_clusters, affinity=aggl_affinity,
linkage=aggl_linkage)
model.fit(X)
labels = model.labels_
print labels
示例2: cluster_agg
# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
def cluster_agg(cluster_data):
clstr = AgglomerativeClustering(n_clusters=11, linkage='ward')
clstr.fit(cluster_data)
df['tier'] = clstr.labels_
results = df[['Player', 'tier']]
return results
示例3: eval_dist
# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
def eval_dist(linkage='ward'):
a_score = []
idx = []
d = [[] for i in xrange(3)]
for k in xrange(2, 50 + 1):
print 'k={}'.format(k)
est = AgglomerativeClustering(n_clusters=k, linkage=linkage)
est.fit(x)
ari_v = metrics.adjusted_rand_score(y, est.labels_)
ds = calc_distance(k, est.labels_)
for i in xrange(3):
d[i].append(ds[i])
print ari_v
a_score.append(ari_v)
idx.append(k)
fig, axes = plt.subplots(nrows=1, ncols=2)
axes[0].plot(idx, a_score)
# plt.xlim(0, 220)
axes[0].set_ylim(ymin=0)
axes[0].set_ylabel('ARI')
axes[0].set_xlabel('# of clusters')
# plt.savefig('figs/hc_ari.png')
# plt.show()
# plt.close()
labels = ['Minimum', 'Maximum', 'Average']
# for i in xrange(3):
# axes[1].plot(idx, d[i], label=labels[i])
axes[1].plot(idx, d[1])
axes[1].legend()
axes[1].set_ylabel('distance')
axes[1].set_xlabel('# of clusters')
# plt.savefig('figs/hc_distance.png')
plt.show()
示例4: __generate_dummy_data
# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
def __generate_dummy_data():
from sklearn.cluster import AgglomerativeClustering
import itertools
X = np.array([[
-5.27453240e-01, -6.14130238e-01, -1.63611427e+00,
-9.26556498e-01, 7.82296885e-01, -1.06286220e+00,
-1.24368729e+00, -1.16151964e+00, -2.25816923e-01,
-3.32354552e-02],
[ -2.01273137e-01, 5.25758359e-01, 1.37940072e+00,
-7.63256657e-01, -1.27275323e+00, -1.31618084e+00,
-7.00167331e-01, 2.21410669e+00, 9.15456567e-01,
7.93076923e-01],
[ 1.53249104e-01, -5.48642411e-01, -1.06559060e+00,
-3.05253203e-01, -1.93393495e+00, 1.39827978e-01,
1.73359830e-01, 2.85576854e-02, -1.19427027e+00,
1.04395610e+00],
[ 1.00595172e+02, 1.01661346e+02, 1.00115635e+02,
9.86884249e+01, 9.86506406e+01, 1.02214982e+02,
1.01144087e+02, 1.00642778e+02, 1.01635339e+02,
9.88981171e+01],
[ 1.01506262e+02, 1.00525318e+02, 9.93021764e+01,
9.92514163e+01, 1.01199015e+02, 1.01771241e+02,
1.00464097e+02, 9.97482396e+01, 9.96888274e+01,
9.88297336e+01]])
model = AgglomerativeClustering(linkage="average", affinity="cosine")
model.fit(X)
ii = itertools.count(X.shape[0])
DEBUG(str([{'node_id': next(ii), 'left': x[0], 'right':x[1]} for x in model.children_]))
return model, model.labels_
示例5: knn_connectivity
# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
def knn_connectivity(self, X):
knn_graph = kneighbors_graph(X, 30, include_self=False)
for connectivity in (None, knn_graph):
n_clusters = 4
plt.figure(figsize=(10, 4))
for index, linkage in enumerate(('average', 'complete', 'ward')):
plt.subplot(1, 3, index + 1)
model = AgglomerativeClustering(linkage=linkage,
connectivity=connectivity,
n_clusters=n_clusters)
t0 = time.time()
model.fit(X)
elapsed_time = time.time() - t0
plt.scatter(X[:, 0], X[:, 1], c=model.labels_,
cmap=plt.cm.spectral)
plt.title('linkage=%s (time %.2fs)' % (linkage, elapsed_time),
fontdict=dict(verticalalignment='top'))
plt.axis('equal')
plt.axis('off')
plt.subplots_adjust(bottom=0, top=.89, wspace=0,
left=0, right=1)
plt.suptitle('n_cluster=%i, connectivity=%r' %
(n_clusters, connectivity is not None), size=17)
plt.show()
示例6: wardHierarchical
# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
def wardHierarchical(img):
connectivity = grid_to_graph(*img.shape)
print("Compute structured hierarchical clustering...")
st = time.time()
n_clusters = 15 # number of regions
ward = AgglomerativeClustering(n_clusters=n_clusters, linkage='ward',
connectivity=connectivity)
face = sp.misc.imresize(img, 0.10) / 255.
X = np.reshape(img, (-1, 1))
ward.fit(X)
label = np.reshape(ward.labels_, face.shape)
print("Elapsed time: ", time.time() - st)
print("Number of pixels: ", label.size)
print("Number of clusters: ", np.unique(label).size)
plt.figure(figsize=(5, 5))
plt.imshow(face, cmap=plt.cm.gray)
for l in range(n_clusters):
plt.contour(label == l, contours=1,
colors=[plt.cm.spectral(l / float(n_clusters)), ])
plt.xticks(())
plt.yticks(())
plt.show()
示例7: plot_mfi
# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
def plot_mfi(self, outputfile='embeddings.pdf', nb_clusters=8, weights='NA'):
# collect embeddings for mfi:
X = np.asarray([self.w2v_model[w] for w in self.mfi \
if w in self.w2v_model], dtype='float32')
# dimension reduction:
tsne = TSNE(n_components=2)
coor = tsne.fit_transform(X) # unsparsify
plt.clf()
sns.set_style('dark')
sns.plt.rcParams['axes.linewidth'] = 0.4
fig, ax1 = sns.plt.subplots()
labels = self.mfi
# first plot slices:
x1, x2 = coor[:,0], coor[:,1]
ax1.scatter(x1, x2, 100, edgecolors='none', facecolors='none')
# clustering on top (add some colouring):
clustering = AgglomerativeClustering(linkage='ward',
affinity='euclidean', n_clusters=nb_clusters)
clustering.fit(coor)
# add names:
for x, y, name, cluster_label in zip(x1, x2, labels, clustering.labels_):
ax1.text(x, y, name, ha='center', va="center",
color=plt.cm.spectral(cluster_label / 10.),
fontdict={'family': 'Arial', 'size': 8})
# control aesthetics:
ax1.set_xlabel('')
ax1.set_ylabel('')
ax1.set_xticklabels([])
ax1.set_xticks([])
ax1.set_yticklabels([])
ax1.set_yticks([])
sns.plt.savefig(outputfile, bbox_inches=0)
示例8: agglomerative_clusters
# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
def agglomerative_clusters(self, word_vectors):
#Pre-calculate BallTree object
starting = time.time()
Ball_Tree = BallTree(word_vectors, leaf_size = 200, metric = "minkowski")
print("BallTree object in " + str(time.time() - starting))
#Pre-calculate k_neighbors graph
starting = time.time()
connectivity_graph = kneighbors_graph(Ball_Tree,
n_neighbors = 1,
mode = "connectivity",
metric = "minkowski",
p = 2,
include_self = False,
n_jobs = workers
)
print("Pre-compute connectivity graph in " + str(time.time() - starting))
#Agglomerative clustering
starting = time.time()
Agl = AgglomerativeClustering(n_clusters = 100,
affinity = "minkowski",
connectivity = connectivity_graph,
compute_full_tree = True,
linkage = "average"
)
Agl.fit(word_vectors)
print("Agglomerative clustering in " + str(time.time() - starting))
clusters = Agl.labels_
return clusters
示例9: clustering
# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
def clustering(data, params):
# parse parameters
for item in params:
if isinstance(params[item], str):
exec(item+'='+'"'+params[item]+'"')
else:
exec(item+'='+str(params[item]))
# apply Agglomerative Clustering to reduced data
clusters = AgglomerativeClustering(n_clusters=n_clusters,
affinity=affinity, linkage=linkage)
clusters.fit(data)
# Agglomerative Clustering does not give centers of clusters
# so lets try the mean of each cluster
cluster_centers = []
for i in range(n_clusters):
mask = (clusters.labels_ == i)
cluster_centers.append(mean(data[mask], axis=0))
cluster_centers = array(cluster_centers)
return [cluster_centers, clusters.labels_]
示例10: test_connectivity_propagation
# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
def test_connectivity_propagation():
# Check that connectivity in the ward tree is propagated correctly during
# merging.
X = np.array(
[
(0.014, 0.120),
(0.014, 0.099),
(0.014, 0.097),
(0.017, 0.153),
(0.017, 0.153),
(0.018, 0.153),
(0.018, 0.153),
(0.018, 0.153),
(0.018, 0.153),
(0.018, 0.153),
(0.018, 0.153),
(0.018, 0.153),
(0.018, 0.152),
(0.018, 0.149),
(0.018, 0.144),
]
)
connectivity = kneighbors_graph(X, 10, include_self=False)
ward = AgglomerativeClustering(n_clusters=4, connectivity=connectivity, linkage="ward")
# If changes are not propagated correctly, fit crashes with an
# IndexError
ward.fit(X)
示例11: test_agglomerative_clustering_with_distance_threshold
# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
def test_agglomerative_clustering_with_distance_threshold(linkage):
# Check that we obtain the correct number of clusters with
# agglomerative clustering with distance_threshold.
rng = np.random.RandomState(0)
mask = np.ones([10, 10], dtype=np.bool)
n_samples = 100
X = rng.randn(n_samples, 50)
connectivity = grid_to_graph(*mask.shape)
# test when distance threshold is set to 10
distance_threshold = 10
for conn in [None, connectivity]:
clustering = AgglomerativeClustering(
n_clusters=None,
distance_threshold=distance_threshold,
connectivity=conn, linkage=linkage)
clustering.fit(X)
clusters_produced = clustering.labels_
num_clusters_produced = len(np.unique(clustering.labels_))
# test if the clusters produced match the point in the linkage tree
# where the distance exceeds the threshold
tree_builder = _TREE_BUILDERS[linkage]
children, n_components, n_leaves, parent, distances = \
tree_builder(X, connectivity=conn, n_clusters=None,
return_distance=True)
num_clusters_at_threshold = np.count_nonzero(
distances >= distance_threshold) + 1
# test number of clusters produced
assert num_clusters_at_threshold == num_clusters_produced
# test clusters produced
clusters_at_threshold = _hc_cut(n_clusters=num_clusters_produced,
children=children,
n_leaves=n_leaves)
assert np.array_equiv(clusters_produced,
clusters_at_threshold)
示例12: classify_core
# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
def classify_core(self, N_CLUSTERS, clusterType, data_for_trial_type, begin_time, end_time):
BEGIN_TIME_FRAME = begin_time*self.griddy.TIME_GRID_SPACING
END_TIME_FRAME = end_time*self.griddy.TIME_GRID_SPACING
data = data_for_trial_type[:,BEGIN_TIME_FRAME:END_TIME_FRAME,self.griddy.VEL_X]
labels = None
if clusterType == 'kmeans':
kmeans = KMeans(n_clusters=N_CLUSTERS)
kmeans.fit(data)
labels = kmeans.labels_
elif clusterType == 'affinity_propagation':
ap = AffinityPropagation(damping=0.75)
ap.fit(data)
labels = ap.labels_
N_CLUSTERS = np.max(self.labels)+1
elif clusterType == 'DBSCAN':
dbscan = DBSCAN()
dbscan.fit(data)
labels = dbscan.labels_
N_CLUSTERS = np.max(labels)+1
print 'N_CLUSTERS=' + str(N_CLUSTERS)
elif clusterType == 'AgglomerativeClustering':
ac = AgglomerativeClustering(n_clusters=N_CLUSTERS)
ac.fit(data)
labels = ac.labels_
else:
print 'ERROR: clusterType: ' + clusterType + ' is not recognized'
return (labels, N_CLUSTERS)
开发者ID:SashaRayshubskiy,项目名称:osmotropotaxis_analysis_python,代码行数:33,代码来源:fly_trajectory_classifier.py
示例13: programmer_3
# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
def programmer_3():
standardizedfile = "data/standardized.xls"
k = 3
data = pd.read_excel(standardizedfile, index_col=u"基站编号")
# 层次聚类
model = AgglomerativeClustering(n_clusters=k, linkage="ward")
model.fit(data)
# 详细输入原始数据及对应类别
r = pd.concat([data, pd.Series(model.labels_, index=data.index)], axis=1)
r.columns = list(data.columns) + [u"聚类类别"]
# 绘制聚类图,并且用不同样式进行画图
style = ["ro-", "go-", "bo-"]
xlabels = [u"工作日人均停留时间", u"凌晨人均停留时间", u"周末人均停留时间", u"日均人流量"]
pic_output = "tmp/type_"
for i in range(k):
plt.figure()
tmp = r[r[u"聚类类别"] == i].iloc[:, :4]
for j in range(len(tmp)):
plt.plot(range(1, 5), tmp.iloc[j], style[i])
plt.xticks(range(1, 5), xlabels, rotation=20)
plt.title(u"商圈类别%s" % (i + 1))
# 调整底部
plt.subplots_adjust(bottom=0.15)
plt.savefig(u"%s%s.png" % (pic_output, i + 1))
示例14: test_compute_full_tree
# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
def test_compute_full_tree():
"""Test that the full tree is computed if n_clusters is small"""
rng = np.random.RandomState(0)
X = rng.randn(10, 2)
connectivity = kneighbors_graph(X, 5, include_self=False)
# When n_clusters is less, the full tree should be built
# that is the number of merges should be n_samples - 1
agc = AgglomerativeClustering(n_clusters=2, connectivity=connectivity)
agc.fit(X)
n_samples = X.shape[0]
n_nodes = agc.children_.shape[0]
assert_equal(n_nodes, n_samples - 1)
# When n_clusters is large, greater than max of 100 and 0.02 * n_samples.
# we should stop when there are n_clusters.
n_clusters = 101
X = rng.randn(200, 2)
connectivity = kneighbors_graph(X, 10, include_self=False)
agc = AgglomerativeClustering(n_clusters=n_clusters,
connectivity=connectivity)
agc.fit(X)
n_samples = X.shape[0]
n_nodes = agc.children_.shape[0]
assert_equal(n_nodes, n_samples - n_clusters)
示例15: clustering_tweets_hc
# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
def clustering_tweets_hc(labeled_tweets, num_cluster):
vectorizer = cst_vectorizer.StemmedTfidfVectorizer(**param)
tweet_vec = vectorizer.fit_transform(labeled_tweets).toarray()
# print(tweet_vec)
n_clusters = num_cluster
from sklearn.neighbors import kneighbors_graph
knn_graph = kneighbors_graph(tweet_vec, 1, include_self=False)
# print(knn_graph)
connectivity = knn_graph
from sklearn.cluster import AgglomerativeClustering
model = AgglomerativeClustering(linkage='ward', connectivity=connectivity, n_clusters=n_clusters)
model.fit(tweet_vec)
c = model.labels_
# print(c,len(c))
clustered_tweets = []
for i in range(0, num_cluster):
similar_indices = (c == i).nonzero()[0]
sent = ''
for sid in similar_indices:
sent = labeled_tweets[sid] + ' ' + sent
clustered_tweets.append(sent)
return clustered_tweets