本文整理匯總了Python中sklearn.cluster.AgglomerativeClustering方法的典型用法代碼示例。如果您正苦於以下問題:Python cluster.AgglomerativeClustering方法的具體用法?Python cluster.AgglomerativeClustering怎麽用?Python cluster.AgglomerativeClustering使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類sklearn.cluster
的用法示例。
在下文中一共展示了cluster.AgglomerativeClustering方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: print_labeled_tests
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import AgglomerativeClustering [as 別名]
def print_labeled_tests(w, y, test_size=0.2, seed=42):
"""
Clustering and label prediction tests
"""
X_train, X_test, y_train, y_test = train_test_split(
w, y, test_size=test_size, random_state=seed)
# Print Label Prediction Tests
res = LabelPrediction(w, y, test_size=test_size, seed=seed)
# Can only cluster on single-label (not multioutput)
if len(y.shape) < 2:
n_clusters = np.unique(y).size
umpagglo = cluster.AgglomerativeClustering(
n_clusters=n_clusters,
affinity='cosine',
linkage='average'
).fit(w).labels_
x = evalClusteringOnLabels(umpagglo, y, verbose=True)
res = {**res, **x}
return res
示例2: test_linkage_misc
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import AgglomerativeClustering [as 別名]
def test_linkage_misc():
# Misc tests on linkage
rng = np.random.RandomState(42)
X = rng.normal(size=(5, 5))
assert_raises(ValueError, AgglomerativeClustering(linkage='foo').fit, X)
assert_raises(ValueError, linkage_tree, X, linkage='foo')
assert_raises(ValueError, linkage_tree, X, connectivity=np.ones((4, 4)))
# Smoke test FeatureAgglomeration
FeatureAgglomeration().fit(X)
# test hierarchical clustering on a precomputed distances matrix
dis = cosine_distances(X)
res = linkage_tree(dis, affinity="precomputed")
assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0])
# test hierarchical clustering on a precomputed distances matrix
res = linkage_tree(X, affinity=manhattan_distances)
assert_array_equal(res[0], linkage_tree(X, affinity="manhattan")[0])
示例3: test_identical_points
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import AgglomerativeClustering [as 別名]
def test_identical_points():
# Ensure identical points are handled correctly when using mst with
# a sparse connectivity matrix
X = np.array([[0, 0, 0], [0, 0, 0],
[1, 1, 1], [1, 1, 1],
[2, 2, 2], [2, 2, 2]])
true_labels = np.array([0, 0, 1, 1, 2, 2])
connectivity = kneighbors_graph(X, n_neighbors=3, include_self=False)
connectivity = 0.5 * (connectivity + connectivity.T)
connectivity, n_components = _fix_connectivity(X,
connectivity,
'euclidean')
for linkage in ('single', 'average', 'average', 'ward'):
clustering = AgglomerativeClustering(n_clusters=3,
linkage=linkage,
connectivity=connectivity)
clustering.fit(X)
assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
true_labels), 1)
示例4: test_compute_full_tree
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import AgglomerativeClustering [as 別名]
def test_compute_full_tree():
# Test that the full tree is computed if n_clusters is small
rng = np.random.RandomState(0)
X = rng.randn(10, 2)
connectivity = kneighbors_graph(X, 5, include_self=False)
# When n_clusters is less, the full tree should be built
# that is the number of merges should be n_samples - 1
agc = AgglomerativeClustering(n_clusters=2, connectivity=connectivity)
agc.fit(X)
n_samples = X.shape[0]
n_nodes = agc.children_.shape[0]
assert_equal(n_nodes, n_samples - 1)
# When n_clusters is large, greater than max of 100 and 0.02 * n_samples.
# we should stop when there are n_clusters.
n_clusters = 101
X = rng.randn(200, 2)
connectivity = kneighbors_graph(X, 10, include_self=False)
agc = AgglomerativeClustering(n_clusters=n_clusters,
connectivity=connectivity)
agc.fit(X)
n_samples = X.shape[0]
n_nodes = agc.children_.shape[0]
assert_equal(n_nodes, n_samples - n_clusters)
示例5: test_cluster_distances_with_distance_threshold
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import AgglomerativeClustering [as 別名]
def test_cluster_distances_with_distance_threshold():
rng = np.random.RandomState(0)
n_samples = 100
X = rng.randint(-10, 10, size=(n_samples, 3))
# check the distances within the clusters and with other clusters
distance_threshold = 4
clustering = AgglomerativeClustering(
n_clusters=None,
distance_threshold=distance_threshold,
linkage="single").fit(X)
labels = clustering.labels_
D = pairwise_distances(X, metric="minkowski", p=2)
# to avoid taking the 0 diagonal in min()
np.fill_diagonal(D, np.inf)
for label in np.unique(labels):
in_cluster_mask = labels == label
max_in_cluster_distance = (D[in_cluster_mask][:, in_cluster_mask]
.min(axis=0).max())
min_out_cluster_distance = (D[in_cluster_mask][:, ~in_cluster_mask]
.min(axis=0).min())
# single data point clusters only have that inf diagonal here
if in_cluster_mask.sum() > 1:
assert max_in_cluster_distance < distance_threshold
assert min_out_cluster_distance >= distance_threshold
示例6: detection_with_agglomaritve_clustering
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import AgglomerativeClustering [as 別名]
def detection_with_agglomaritve_clustering(image_set):
"""
Really good if the classes you are analyzing are close to what the network learned.
:param image_set: The bottleneck values of the relevant images.
:return: Predictions vector
N.B : The detector breaks with a full black image.
"""
# http://scikit-learn.org/stable/auto_examples/cluster/plot_agglomerative_clustering.html#sphx-glr-auto-examples-cluster-plot-agglomerative-clustering-py
clf = cluster.AgglomerativeClustering(n_clusters=2, affinity="l2", linkage="complete")
clf.fit(image_set)
predictions = clf.labels_
predictions = normalize_predictions(predictions)
return predictions
示例7: perform_clustering
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import AgglomerativeClustering [as 別名]
def perform_clustering(X, connectivity, title, num_clusters=3, linkage='ward'):
plt.figure()
model = AgglomerativeClustering(linkage=linkage,
connectivity=connectivity, n_clusters=num_clusters)
model.fit(X)
# extract labels
labels = model.labels_
# specify marker shapes for different clusters
markers = '.vx'
for i, marker in zip(range(num_clusters), markers):
# plot the points belong to the current cluster
plt.scatter(X[labels==i, 0], X[labels==i, 1], s=50,
marker=marker, color='k', facecolors='none')
plt.title(title)
開發者ID:PacktPublishing,項目名稱:Python-Machine-Learning-Cookbook-Second-Edition,代碼行數:20,代碼來源:agglomerative.py
示例8: _cluster_documents
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import AgglomerativeClustering [as 別名]
def _cluster_documents(self):
method = self.params['cluster_method']
n_clusters = int(self.params['cluster_n_clusters'])
n_samples = len(self.document_vectors)
if n_clusters > n_samples:
n_clusters = n_samples
if method == 'kmeans':
clusterer = KMeans(n_clusters=n_clusters, init='k-means++', max_iter=100, n_init=1)
else:
clusterer = AgglomerativeClustering(n_clusters=n_clusters, linkage='complete', affinity='cosine')
clustering = clusterer.fit(self.document_vectors)
cluster_labels = clustering.labels_
clustering_dict = clustering.__dict__
clusters = {}
for document_id,cluster_label in enumerate(cluster_labels):
if cluster_label not in clusters:
clusters[cluster_label] = []
clusters[cluster_label].append(document_id)
return clusters
示例9: test_objectmapper
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import AgglomerativeClustering [as 別名]
def test_objectmapper(self):
df = pdml.ModelFrame([])
self.assertIs(df.cluster.AffinityPropagation, cluster.AffinityPropagation)
self.assertIs(df.cluster.AgglomerativeClustering, cluster.AgglomerativeClustering)
self.assertIs(df.cluster.Birch, cluster.Birch)
self.assertIs(df.cluster.DBSCAN, cluster.DBSCAN)
self.assertIs(df.cluster.FeatureAgglomeration, cluster.FeatureAgglomeration)
self.assertIs(df.cluster.KMeans, cluster.KMeans)
self.assertIs(df.cluster.MiniBatchKMeans, cluster.MiniBatchKMeans)
self.assertIs(df.cluster.MeanShift, cluster.MeanShift)
self.assertIs(df.cluster.SpectralClustering, cluster.SpectralClustering)
self.assertIs(df.cluster.bicluster.SpectralBiclustering,
cluster.bicluster.SpectralBiclustering)
self.assertIs(df.cluster.bicluster.SpectralCoclustering,
cluster.bicluster.SpectralCoclustering)
示例10: clustering
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import AgglomerativeClustering [as 別名]
def clustering(math_regions, char_data, image, algorithm, thresh_votes):
centers = []
for math_region in math_regions:
center = [(math_region[0]+math_region[2])/2, (math_region[1]+math_region[3])/2]
centers.append(center)
clustering = AgglomerativeClustering().fit(centers)
labels = np.unique(clustering.labels_)
for label in labels:
regions = math_regions[labels==label]
pass
示例11: test_agglomerative_clustering_wrong_arg_memory
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import AgglomerativeClustering [as 別名]
def test_agglomerative_clustering_wrong_arg_memory():
# Test either if an error is raised when memory is not
# either a str or a joblib.Memory instance
rng = np.random.RandomState(0)
n_samples = 100
X = rng.randn(n_samples, 50)
memory = 5
clustering = AgglomerativeClustering(memory=memory)
assert_raises(ValueError, clustering.fit, X)
示例12: test_single_linkage_clustering
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import AgglomerativeClustering [as 別名]
def test_single_linkage_clustering():
# Check that we get the correct result in two emblematic cases
moons, moon_labels = make_moons(noise=0.05, random_state=42)
clustering = AgglomerativeClustering(n_clusters=2, linkage='single')
clustering.fit(moons)
assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
moon_labels), 1)
circles, circle_labels = make_circles(factor=0.5, noise=0.025,
random_state=42)
clustering = AgglomerativeClustering(n_clusters=2, linkage='single')
clustering.fit(circles)
assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
circle_labels), 1)
示例13: test_connectivity_propagation
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import AgglomerativeClustering [as 別名]
def test_connectivity_propagation():
# Check that connectivity in the ward tree is propagated correctly during
# merging.
X = np.array([(.014, .120), (.014, .099), (.014, .097),
(.017, .153), (.017, .153), (.018, .153),
(.018, .153), (.018, .153), (.018, .153),
(.018, .153), (.018, .153), (.018, .153),
(.018, .152), (.018, .149), (.018, .144)])
connectivity = kneighbors_graph(X, 10, include_self=False)
ward = AgglomerativeClustering(
n_clusters=4, connectivity=connectivity, linkage='ward')
# If changes are not propagated correctly, fit crashes with an
# IndexError
ward.fit(X)
示例14: test_connectivity_callable
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import AgglomerativeClustering [as 別名]
def test_connectivity_callable():
rng = np.random.RandomState(0)
X = rng.rand(20, 5)
connectivity = kneighbors_graph(X, 3, include_self=False)
aglc1 = AgglomerativeClustering(connectivity=connectivity)
aglc2 = AgglomerativeClustering(
connectivity=partial(kneighbors_graph, n_neighbors=3,
include_self=False))
aglc1.fit(X)
aglc2.fit(X)
assert_array_equal(aglc1.labels_, aglc2.labels_)
示例15: test_connectivity_ignores_diagonal
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import AgglomerativeClustering [as 別名]
def test_connectivity_ignores_diagonal():
rng = np.random.RandomState(0)
X = rng.rand(20, 5)
connectivity = kneighbors_graph(X, 3, include_self=False)
connectivity_include_self = kneighbors_graph(X, 3, include_self=True)
aglc1 = AgglomerativeClustering(connectivity=connectivity)
aglc2 = AgglomerativeClustering(connectivity=connectivity_include_self)
aglc1.fit(X)
aglc2.fit(X)
assert_array_equal(aglc1.labels_, aglc2.labels_)