本文整理汇总了Python中sklearn.cluster.MeanShift.fit_predict方法的典型用法代码示例。如果您正苦于以下问题:Python MeanShift.fit_predict方法的具体用法?Python MeanShift.fit_predict怎么用?Python MeanShift.fit_predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.cluster.MeanShift
的用法示例。
在下文中一共展示了MeanShift.fit_predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: CombinedMeanShift
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit_predict [as 别名]
def CombinedMeanShift(self, h, alpha,
PrincComp=None,
njobs=-2,
mbf=1):
"""Performs the scikit-learn Mean Shift clustering.
Arguments:
h -- the bandwidth
alpha -- the weight of the principal components as compared
to the spatial data.
PrincComp -- used to pass already-computed principal components
njobs -- the number of processes to be used (default: n. of CPU - 1)
mbf -- the minimum number of items in a seed"""
MS = MeanShift(bin_seeding=True, bandwidth=h, cluster_all=True,
min_bin_freq=mbf, n_jobs=njobs)
if PrincComp is None:
PrincComp = self.ShapePCA(2)
print("Starting sklearn Mean Shift... ")
stdout.flush()
fourvector = np.vstack((self.__data, alpha * PrincComp))
MS.fit_predict(fourvector.T)
self.__ClusterID = MS.labels_
self.__c = MS.cluster_centers_.T
print("done.")
stdout.flush()
示例2: main
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit_predict [as 别名]
def main():
"""Load image, collect pixels, cluster, create segment images, plot."""
# load image
img_rgb = data.coffee()
img_rgb = misc.imresize(img_rgb, (256, 256)) / 255.0
img = color.rgb2hsv(img_rgb)
height, width, channels = img.shape
print("Image shape is: ", img.shape)
# collect pixels as tuples of (r, g, b, y, x)
print("Collecting pixels...")
pixels = []
for y in range(height):
for x in range(width):
pixel = img[y, x, ...]
pixels.append([pixel[0], pixel[1], pixel[2], (y / height) * 2.0, (x / width) * 2.0])
pixels = np.array(pixels)
print("Found %d pixels to cluster" % (len(pixels)))
# cluster the pixels using mean shift
print("Clustering...")
bandwidth = estimate_bandwidth(pixels, quantile=0.05, n_samples=500)
clusterer = MeanShift(bandwidth=bandwidth, bin_seeding=True)
labels = clusterer.fit_predict(pixels)
# process labels generated during clustering
labels_unique = set(labels)
labels_counts = [(lu, len([l for l in labels if l == lu])) for lu in labels_unique]
labels_unique = sorted(list(labels_unique), key=lambda l: labels_counts[l], reverse=True)
nb_clusters = len(labels_unique)
print("Found %d clusters" % (nb_clusters))
print(labels.shape)
print("Creating images of segments...")
img_segments = [np.copy(img_rgb) * 0.25 for label in labels_unique]
for y in range(height):
for x in range(width):
pixel_idx = (y * width) + x
label = labels[pixel_idx]
img_segments[label][y, x, 0] = 1.0
print("Plotting...")
images = [img_rgb]
titles = ["Image"]
for i in range(min(8, nb_clusters)):
images.append(img_segments[i])
titles.append("Segment %d" % (i))
plot_images(images, titles)
示例3: evaluate_learners
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit_predict [as 别名]
def evaluate_learners(X):
'''
Run multiple times with different learners to get an idea of the
relative performance of each configuration.
Returns a sequence of tuples containing:
(title, predicted classes)
for each learner.
'''
from sklearn.cluster import (MeanShift, MiniBatchKMeans,
SpectralClustering, AgglomerativeClustering)
learner = MeanShift(
# Let the learner use its own heuristic for determining the
# number of clusters to create
bandwidth=None
)
y = learner.fit_predict(X)
yield 'Mean Shift clusters', y
learner = MiniBatchKMeans(n_clusters=2)
y = learner.fit_predict(X)
yield 'K Means clusters', y
learner = SpectralClustering(n_clusters=2)
y = learner.fit_predict(X)
yield 'Spectral clusters', y
learner = AgglomerativeClustering(n_clusters=2)
y = learner.fit_predict(X)
yield 'Agglomerative clusters (N=2)', y
learner = AgglomerativeClustering(n_clusters=5)
y = learner.fit_predict(X)
yield 'Agglomerative clusters (N=5)', y
示例4: obtainClusters
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit_predict [as 别名]
def obtainClusters(self, hist):
print 'Obatining clusters using MeanShift from skilean...'
hist = np.array(hist)
hist = hist.astype(float)
scaled_vec = StandardScaler().fit_transform(hist)
bandwidth = estimate_bandwidth(scaled_vec, quantile=0.3)
ms = MEANSHIFT(bandwidth=bandwidth, bin_seeding=True)
clusters = ms.fit_predict(scaled_vec)
print 'Clusters obtained using MeanShift'
return clusters
示例5: mean_shift
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit_predict [as 别名]
def mean_shift(data,metric):
t0 = time()
bandwidth = estimate_bandwidth(data, quantile=0.2, n_samples=len(data))
model = MeanShift(cluster_all=True)
labels = model.fit_predict(data)
if np.count_nonzero(labels) != 0:
score = accuracy.getAccuracy(data,labels,len(data),metric)
else:
score = 'None'
t1 = time()
labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)
return ('Mean Shift',n_clusters_,score,t1-t0)
示例6: compute_clusters
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit_predict [as 别名]
def compute_clusters():
'''
Calculates the centroid centers based on the reports
on the database.
'''
data = Report.objects.all().values('latitude', 'longitude', 'category')
X = np.array([np.array([d['latitude'], d['longitude']]) for d in data])
model = MeanShift(bandwidth=settings.THRESHOLD)
# Getting metrics for each cluster
labels = model.fit_predict(X)
categories = [d['category'] for d in data]
label_metrics = zip(labels, categories)
clusters = zip(list(set(model.labels_)), model.cluster_centers_)
_update_clusters(clusters, label_metrics)
示例7: mean_shift
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit_predict [as 别名]
def mean_shift():
"""
MeanShift discovers blobs in a smooth density of samples. It is a centroid
algorithm which works by updating candidates for centroids to be the mean
of the positions within a given region. These candidates are then filtered
in a post-processing stage to eliminate near-duplicates and form the final
list of centroids.
"""
# Set a generic data sample.
centers = [ [-1.,0.], [0.,1.], [1.,0.] ]
n_samples = 3000
std = 0.5
seed = 0
data, target = make_blobs(n_samples = n_samples, centers = centers,
random_state = seed, cluster_std = std)
# Set bandwidth for the mean shift classifier.
width = estimate_bandwidth(data, quantile = 0.2,
n_samples = int(n_samples / 5))
# Setup the classifier.
clf = MeanShift(bandwidth = width, bin_seeding = True)
ms_y = clf.fit_predict(data)
# Evaluate accuracy.
cnt = int(0)
for idx in range(n_samples):
if(ms_y[idx] != clf.labels_[idx]): cnt += 1
acc = float(cnt) / float(n_samples)
# Print results.
print('Approximated number of centroids ', len(clf.cluster_centers_))
print('Accuracy ', acc)
# Plot clusters.
plt.figure(figsize = (8,8))
plt.scatter(data[:,0], data[:,1], c = ms_y, s = 30)
plt.title('Clusters found with the Mean-shift method')
plt.show()
示例8: predictMeanShift
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit_predict [as 别名]
def predictMeanShift(X, labels):
# The following bandwidth can be automatically detected using
bandwidth = estimate_bandwidth(X, quantile=0.2, n_samples=500)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
results = ms.fit_predict(X)
print list(results)
labels = ms.labels_
cluster_centers = ms.cluster_centers_
labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)
print("number of estimated clusters : %d" % n_clusters_)
# Create a PCA model.
pca_2 = PCA(2)
# Fit the PCA model on the numeric columns from earlier.
plot_columns = pca_2.fit_transform(X)
# Make a scatter plot of each game, shaded according to cluster assignment.
plt.scatter(x=plot_columns[:,0], y=plot_columns[:,1], c=results)
plt.title("Mean Shift- 4 clusters")
# Show the plot.
plt.show()
示例9: test_meanshift_predict
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit_predict [as 别名]
def test_meanshift_predict():
"""Test MeanShift.predict"""
ms = MeanShift(bandwidth=1.2)
labels = ms.fit_predict(X)
labels2 = ms.predict(X)
assert_array_equal(labels, labels2)
示例10: DataFrame
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit_predict [as 别名]
map_sizes
# <codecell>
from sklearn.cluster import MeanShift
cluster_data = DataFrame(columns = ['Patient ID', 'Visit Number', 'TFName', 'Start', 'Cluster'])
for tf, num in zip(tf_counts.index, tf_counts.values):
data = tf_grouped.ix[tf].reset_index()
data['TFName'] = tf
clust = MeanShift(bandwidth = 10)
res = clust.fit_predict(data[['Start']].values)
data['Cluster'] = res
cluster_data = concat([cluster_data, data], axis = 0, ignore_index = True)
# <codecell>
res = crosstab(rows = [cluster_data['Patient ID'], cluster_data['Visit Number']], cols = [cluster_data['TFName'], cluster_data['Cluster']])
# <codecell>
from sklearn.cluster import k_means, mean_shift
centroids, labels = mean_shift(res.values)
示例11: range
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit_predict [as 别名]
print result
index = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
for i in range(0,len(index)) :
print("quantile : %f"%index[i])
bandwidth = estimate_bandwidth(data, quantile=index[1], n_samples=len(data))
print ("bandwidth : %f"% bandwidth)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
#print ms
ms.fit(data)
print ms.fit(data)
labels = ms.fit_predict(data)
# for i in range(0, len(labels)):
# if labels[i] == 0 :
# labels[i] = 1
# else :
# labels[i] = 2
print ("labels : ",labels)
cluster_centers = ms.cluster_centers_
# print ("cluster_centers : ", cluster_centers)
labels_unique = np.unique(labels)
# print("labels_unique : ", labels_unique)
示例12: mean_shift_clustering
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit_predict [as 别名]
def mean_shift_clustering(features, labels):
model = MeanShift()
predictions = model.fit_predict(features)
print get_impurity(predictions, labels)
plot_clustering(features, labels, predictions)
示例13: get_joly_scenes_sementation
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit_predict [as 别名]
scenes = get_joly_scenes_sementation(frames, nb_std_above_mean_th=2.)#get_scenes_segmentation(diffs, nb_std_above_mean_th=2.5)
del frames #not take to much memory too long for nothing...
scenes_hashes = [get_hash_of_hashes(L[s:e]) for s, e in scenes]
#tqdm.write(pformat(Counter(scenes_hashes)))
distance_matrix = np.zeros([len(scenes_hashes)] * 2)
#compute distance between scenes' hashes
for i in trange(len(scenes_hashes)):
for j in range(len(scenes_hashes)):
distance_matrix[i, j] = hamming(scenes_hashes[i], scenes_hashes[j])
#find similar scenes which have hases distance too close compared to others
similar_scenes_matrix = distance_matrix < 64 - (distance_matrix.mean() + distance_matrix.std() * 3)
#try to automatically found clusters with affinity propagation
cluster_builder = MeanShift(bandwidth=1)
scenes_clusters = cluster_builder.fit_predict(similar_scenes_matrix)
#find the clusters with 'too much' points inside compared to others
clusters_counter = Counter(scenes_clusters)
clusters_freq = np.array(list(clusters_counter.values()))
clusters_freq_th = clusters_freq.mean() + clusters_freq.std() * 2.5
frequent_clusters_id = list(filter(lambda k: clusters_counter[k] > clusters_freq_th, clusters_counter))
#find hashes corresponding to these clusters
scenes_hashes_idx = np.array(list(map(lambda v: v in frequent_clusters_id, scenes_clusters)))
generics_scenes_hashes = np.array(scenes_hashes, dtype=np.uint64)[scenes_hashes_idx]
#get the generics indexes from the scene hashes
generics_scenes_idx = []
for i, h in enumerate(scenes_hashes):
if h in generics_scenes_hashes:
generics_scenes_idx.append(i)
#get the boundaries of gnerics scenes
generics_scenes = list(map(lambda i: scenes[i], generics_scenes_idx))
示例14: print
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit_predict [as 别名]
featMatrixSTD=StandardScaler().fit_transform(featMatrix)
featMatrixSTD=featMatrixSTD#+np.abs(featMatrixSTD.min())+1.e-15
print(featMatrixSTD.min())
#featMatrix=RobustScaler(with_centering=False).fit_transform(featMatrix)
nmfTrf=TruncatedSVD(n_components=10)
nmfFeats=nmfTrf.fit_transform(featMatrixSTD)
dfTest=paDataFrame(featMatrixSTD[:,:10])
corr=np.dot(featMatrix,featMatrix.T)
print(corr.shape)
bandwidth = estimate_bandwidth(featMatrix, quantile=0.2, n_samples=500)
ms = MeanShift(bandwidth=bandwidth*0.7, bin_seeding=True)
print('bandwidth',bandwidth)
labels=ms.fit_predict(featMatrix)
# db = DBSCAN(eps=0.2, min_samples=10,metric='precomputed')
# dMat=1.-corr
# labels=db.fit_predict(dMat)
print(np.unique(labels))
sorted_labels=np.argsort(labels)
print(sorted_labels)
corrSorted=corr[sorted_labels,:]
corrSorted=corrSorted[:,sorted_labels]
print(corr.shape,corrSorted.shape)
lab1=np.where(labels==1)[0]
lab2=np.where(labels==2)[0]
示例15: open
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit_predict [as 别名]
Html_file = open("clustering_files/meanshift.html", "w")
# consider only 10000 data (meanshift complexity):
ind = np.array(10000 * [1] + (X.shape[0] - 10000) * [0]).astype(bool)
ind = shuffle(ind)
data_thr10 = pd.DataFrame(X[ind])
data_thr10.columns = data.columns
scaler = StandardScaler()
X = scaler.fit_transform(X)
X = X[ind]
km = MeanShift(cluster_all=False)
preds = km.fit_predict(X)
preds[preds == -1] = max(preds) + 1
print "components", set(preds)
print np.bincount(preds)
data_thr10['preds'] = pd.Series(preds).astype("category")
color_key = ["red", "blue", "yellow", "grey", "black", "purple", "pink",
"brown", "green", "orange"] * 2
title = str(np.bincount(preds))
TOOLS = "wheel_zoom,box_zoom,reset,box_select,pan"
plot_width = 900
plot_height = 300
x_name = 'rateCA'
y_name = 'rate'