本文整理汇总了Python中sklearn.datasets.samples_generator.make_blobs方法的典型用法代码示例。如果您正苦于以下问题:Python samples_generator.make_blobs方法的具体用法?Python samples_generator.make_blobs怎么用?Python samples_generator.make_blobs使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.datasets.samples_generator
的用法示例。
在下文中一共展示了samples_generator.make_blobs方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: make_easy_visual_data
# 需要导入模块: from sklearn.datasets import samples_generator [as 别名]
# 或者: from sklearn.datasets.samples_generator import make_blobs [as 别名]
def make_easy_visual_data(path, N=600):
"""Make 3 clusters of 2D data where the cluster centers lie along a line.
The latent variable would be just their x or y value since that uniquely defines their projection onto the line.
"""
line = (1.5, 1)
centers = [(m, m * line[0] + line[1]) for m in (-4, 0, 6)]
cluster_std = [1, 1, 1.5]
X, labels = make_blobs(n_samples=N, cluster_std=cluster_std, centers=centers, n_features=len(centers[0]))
# scale data
minmaxscale = MinMaxScaler().fit(X)
X = minmaxscale.transform(X)
save_misc_data(path, X, labels, N)
return X, labels
示例2: test_dbscan_optics_parity
# 需要导入模块: from sklearn.datasets import samples_generator [as 别名]
# 或者: from sklearn.datasets.samples_generator import make_blobs [as 别名]
def test_dbscan_optics_parity(eps, min_samples):
# Test that OPTICS clustering labels are <= 5% difference of DBSCAN
centers = [[1, 1], [-1, -1], [1, -1]]
X, labels_true = make_blobs(n_samples=750, centers=centers,
cluster_std=0.4, random_state=0)
# calculate optics with dbscan extract at 0.3 epsilon
op = OPTICS(min_samples=min_samples, cluster_method='dbscan',
eps=eps).fit(X)
# calculate dbscan labels
db = DBSCAN(eps=eps, min_samples=min_samples).fit(X)
contingency = contingency_matrix(db.labels_, op.labels_)
agree = min(np.sum(np.max(contingency, axis=0)),
np.sum(np.max(contingency, axis=1)))
disagree = X.shape[0] - agree
percent_mismatch = np.round((disagree - 1) / X.shape[0], 2)
# verify label mismatch is <= 5% labels
assert percent_mismatch <= 0.05
示例3: test_elkan_results
# 需要导入模块: from sklearn.datasets import samples_generator [as 别名]
# 或者: from sklearn.datasets.samples_generator import make_blobs [as 别名]
def test_elkan_results(distribution):
# check that results are identical between lloyd and elkan algorithms
rnd = np.random.RandomState(0)
if distribution == 'normal':
X = rnd.normal(size=(50, 10))
else:
X, _ = make_blobs(random_state=rnd)
km_full = KMeans(algorithm='full', n_clusters=5, random_state=0, n_init=1)
km_elkan = KMeans(algorithm='elkan', n_clusters=5,
random_state=0, n_init=1)
km_full.fit(X)
km_elkan.fit(X)
assert_array_almost_equal(km_elkan.cluster_centers_,
km_full.cluster_centers_)
assert_array_equal(km_elkan.labels_, km_full.labels_)
示例4: test_minibatch_sensible_reassign_fit
# 需要导入模块: from sklearn.datasets import samples_generator [as 别名]
# 或者: from sklearn.datasets.samples_generator import make_blobs [as 别名]
def test_minibatch_sensible_reassign_fit():
# check if identical initial clusters are reassigned
# also a regression test for when there are more desired reassignments than
# samples.
zeroed_X, true_labels = make_blobs(n_samples=100, centers=5,
cluster_std=1., random_state=42)
zeroed_X[::2, :] = 0
mb_k_means = MiniBatchKMeans(n_clusters=20, batch_size=10, random_state=42,
init="random")
mb_k_means.fit(zeroed_X)
# there should not be too many exact zero cluster centers
assert_greater(mb_k_means.cluster_centers_.any(axis=1).sum(), 10)
# do the same with batch-size > X.shape[0] (regression test)
mb_k_means = MiniBatchKMeans(n_clusters=20, batch_size=201,
random_state=42, init="random")
mb_k_means.fit(zeroed_X)
# there should not be too many exact zero cluster centers
assert_greater(mb_k_means.cluster_centers_.any(axis=1).sum(), 10)
示例5: test_affinity_propagation_class
# 需要导入模块: from sklearn.datasets import samples_generator [as 别名]
# 或者: from sklearn.datasets.samples_generator import make_blobs [as 别名]
def test_affinity_propagation_class(self):
from sklearn.datasets.samples_generator import make_blobs
centers = [[1, 1], [-1, -1], [1, -1]]
X, labels_true = make_blobs(n_samples=300, centers=centers,
cluster_std=0.5, random_state=0)
df = pdml.ModelFrame(data=X, target=labels_true)
af = df.cluster.AffinityPropagation(preference=-50)
df.fit(af)
af2 = cluster.AffinityPropagation(preference=-50).fit(X)
tm.assert_numpy_array_equal(af.cluster_centers_indices_,
af2.cluster_centers_indices_)
tm.assert_numpy_array_equal(af.labels_, af2.labels_)
示例6: test_spectral_unknown_mode
# 需要导入模块: from sklearn.datasets import samples_generator [as 别名]
# 或者: from sklearn.datasets.samples_generator import make_blobs [as 别名]
def test_spectral_unknown_mode():
# Test that SpectralClustering fails with an unknown mode set.
centers = np.array([
[0., 0., 0.],
[10., 10., 10.],
[20., 20., 20.],
])
X, true_labels = make_blobs(n_samples=100, centers=centers,
cluster_std=1., random_state=42)
D = pairwise_distances(X) # Distance matrix
S = np.max(D) - D # Similarity matrix
S = sparse.coo_matrix(S)
assert_raises(ValueError, spectral_clustering, S, n_clusters=2,
random_state=0, eigen_solver="<unknown>")
示例7: test_spectral_unknown_assign_labels
# 需要导入模块: from sklearn.datasets import samples_generator [as 别名]
# 或者: from sklearn.datasets.samples_generator import make_blobs [as 别名]
def test_spectral_unknown_assign_labels():
# Test that SpectralClustering fails with an unknown assign_labels set.
centers = np.array([
[0., 0., 0.],
[10., 10., 10.],
[20., 20., 20.],
])
X, true_labels = make_blobs(n_samples=100, centers=centers,
cluster_std=1., random_state=42)
D = pairwise_distances(X) # Distance matrix
S = np.max(D) - D # Similarity matrix
S = sparse.coo_matrix(S)
assert_raises(ValueError, spectral_clustering, S, n_clusters=2,
random_state=0, assign_labels="<unknown>")
示例8: test_spectral_clustering_sparse
# 需要导入模块: from sklearn.datasets import samples_generator [as 别名]
# 或者: from sklearn.datasets.samples_generator import make_blobs [as 别名]
def test_spectral_clustering_sparse():
X, y = make_blobs(n_samples=20, random_state=0,
centers=[[1, 1], [-1, -1]], cluster_std=0.01)
S = rbf_kernel(X, gamma=1)
S = np.maximum(S - 1e-4, 0)
S = sparse.coo_matrix(S)
labels = SpectralClustering(random_state=0, n_clusters=2,
affinity='precomputed').fit(S).labels_
assert adjusted_rand_score(y, labels) == 1
示例9: test_parallel
# 需要导入模块: from sklearn.datasets import samples_generator [as 别名]
# 或者: from sklearn.datasets.samples_generator import make_blobs [as 别名]
def test_parallel():
centers = np.array([[1, 1], [-1, -1], [1, -1]]) + 10
X, _ = make_blobs(n_samples=50, n_features=2, centers=centers,
cluster_std=0.4, shuffle=True, random_state=11)
ms1 = MeanShift(n_jobs=2)
ms1.fit(X)
ms2 = MeanShift()
ms2.fit(X)
assert_array_almost_equal(ms1.cluster_centers_, ms2.cluster_centers_)
assert_array_equal(ms1.labels_, ms2.labels_)
示例10: test_bad_extract
# 需要导入模块: from sklearn.datasets import samples_generator [as 别名]
# 或者: from sklearn.datasets.samples_generator import make_blobs [as 别名]
def test_bad_extract():
# Test an extraction of eps too close to original eps
msg = "Specify an epsilon smaller than 0.15. Got 0.3."
centers = [[1, 1], [-1, -1], [1, -1]]
X, labels_true = make_blobs(n_samples=750, centers=centers,
cluster_std=0.4, random_state=0)
# Compute OPTICS
clust = OPTICS(max_eps=5.0 * 0.03,
cluster_method='dbscan',
eps=0.3, min_samples=10)
assert_raise_message(ValueError, msg, clust.fit, X)
示例11: test_bad_reachability
# 需要导入模块: from sklearn.datasets import samples_generator [as 别名]
# 或者: from sklearn.datasets.samples_generator import make_blobs [as 别名]
def test_bad_reachability():
msg = "All reachability values are inf. Set a larger max_eps."
centers = [[1, 1], [-1, -1], [1, -1]]
X, labels_true = make_blobs(n_samples=750, centers=centers,
cluster_std=0.4, random_state=0)
with pytest.warns(UserWarning, match=msg):
clust = OPTICS(max_eps=5.0 * 0.003, min_samples=10, eps=0.015)
clust.fit(X)
示例12: test_close_extract
# 需要导入模块: from sklearn.datasets import samples_generator [as 别名]
# 或者: from sklearn.datasets.samples_generator import make_blobs [as 别名]
def test_close_extract():
# Test extract where extraction eps is close to scaled max_eps
centers = [[1, 1], [-1, -1], [1, -1]]
X, labels_true = make_blobs(n_samples=750, centers=centers,
cluster_std=0.4, random_state=0)
# Compute OPTICS
clust = OPTICS(max_eps=1.0, cluster_method='dbscan',
eps=0.3, min_samples=10).fit(X)
# Cluster ordering starts at 0; max cluster label = 2 is 3 clusters
assert_equal(max(clust.labels_), 2)
示例13: test_minibatch_sensible_reassign_partial_fit
# 需要导入模块: from sklearn.datasets import samples_generator [as 别名]
# 或者: from sklearn.datasets.samples_generator import make_blobs [as 别名]
def test_minibatch_sensible_reassign_partial_fit():
zeroed_X, true_labels = make_blobs(n_samples=n_samples, centers=5,
cluster_std=1., random_state=42)
zeroed_X[::2, :] = 0
mb_k_means = MiniBatchKMeans(n_clusters=20, random_state=42, init="random")
for i in range(100):
mb_k_means.partial_fit(zeroed_X)
# there should not be too many exact zero cluster centers
assert_greater(mb_k_means.cluster_centers_.any(axis=1).sum(), 10)
示例14: random_classification_problem
# 需要导入模块: from sklearn.datasets import samples_generator [as 别名]
# 或者: from sklearn.datasets.samples_generator import make_blobs [as 别名]
def random_classification_problem(n_ex, n_classes, n_in, seed=0):
X, y = make_blobs(
n_samples=n_ex, centers=n_classes, n_features=n_in, random_state=seed
)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=seed
)
return X_train, y_train, X_test, y_test
#######################################################################
# Plots #
#######################################################################
示例15: plot
# 需要导入模块: from sklearn.datasets import samples_generator [as 别名]
# 或者: from sklearn.datasets.samples_generator import make_blobs [as 别名]
def plot():
fig, axes = plt.subplots(4, 4)
fig.set_size_inches(10, 10)
for i, ax in enumerate(axes.flatten()):
n_ex = 150
n_in = 2
n_classes = np.random.randint(2, 4)
X, y = make_blobs(
n_samples=n_ex, centers=n_classes, n_features=n_in, random_state=i
)
X -= X.mean(axis=0)
# take best fit over 10 runs
best_elbo = -np.inf
for k in range(10):
_G = GMM(C=n_classes, seed=i * 3)
ret = _G.fit(X, max_iter=100, verbose=False)
while ret != 0:
print("Components collapsed; Refitting")
ret = _G.fit(X, max_iter=100, verbose=False)
if _G.best_elbo > best_elbo:
best_elbo = _G.best_elbo
G = _G
ax = plot_clusters(G, X, ax)
ax.xaxis.set_ticklabels([])
ax.yaxis.set_ticklabels([])
ax.set_title("# Classes: {}; Final VLB: {:.2f}".format(n_classes, G.best_elbo))
plt.tight_layout()
plt.savefig("img/plot.png", dpi=300)
plt.close("all")