本文整理汇总了Python中sklearn.cluster.MeanShift类的典型用法代码示例。如果您正苦于以下问题:Python MeanShift类的具体用法?Python MeanShift怎么用?Python MeanShift使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了MeanShift类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: meanShift
def meanShift(flat_image):
# Estimate Bandwidth
bandwidth = estimate_bandwidth(flat_image, quantile = 0.2, n_samples=500)
ms = MeanShift(bandwidth, bin_seeding=True)
ms.fit(flat_image)
labels = ms.labels_
return ms.labels_, ms.cluster_centers_
示例2: meanshift_for_hough_line
def meanshift_for_hough_line(self):
# init mean shift
pixels_of_label = {}
points_of_label = {}
for hough_line in self.points_of_hough_line:
pixels = self.pixels_of_hough_line[hough_line]
pixels = np.array(pixels)
bandwidth = estimate_bandwidth(pixels, quantile=QUANTILE, n_samples=500)
if bandwidth == 0:
bandwidth = 2
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
ms.fit(pixels)
labels = ms.labels_
labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)
for k in range(n_clusters_):
label = list(hough_line)
label.append(k)
pixels_of_label[tuple(label)] = map(tuple, pixels[labels==k])
for label in pixels_of_label:
pixels = pixels_of_label[label]
points = map(self.img.get_bgr_value, pixels)
points_of_label[label] = points
self.pixels_of_hough_line = pixels_of_label
self.points_of_hough_line = points_of_label
示例3: _fit_mean_shift
def _fit_mean_shift(self, x):
for c in xrange(len(self.crange)):
quant = 0.015 * (c + 1)
for r in xrange(self.repeats):
bandwidth = estimate_bandwidth(
x, quantile=quant, random_state=r)
idx = c * self.repeats + r
model = MeanShift(
bandwidth=bandwidth, bin_seeding=True)
model.fit(x)
self._labels[idx] = model.labels_
self._parameters[idx] = model.cluster_centers_
# build equivalent gmm
k = model.cluster_centers_.shape[0]
model_gmm = GMM(n_components=k, covariance_type=self.cvtype,
init_params='c', n_iter=0)
model_gmm.means_ = model.cluster_centers_
model_gmm.weights_ = sp.array(
[(model.labels_ == i).sum() for i in xrange(k)])
model_gmm.fit(x)
# evaluate goodness of fit
self._ll[idx] = model_gmm.score(x).sum()
if self.gof_type == 'aic':
self._gof[idx] = model_gmm.aic(x)
if self.gof_type == 'bic':
self._gof[idx] = model_gmm.bic(x)
print quant, k, self._gof[idx]
示例4: cluster_pixels_ms
def cluster_pixels_ms(self):
# reshape
"""
cluster points descriptors by meahs shift
:type self: ColorRemover
"""
fg_pixels = self.img.fg_pixels.keys()
descriptors = []
for r, c in fg_pixels:
descriptors.append(self.descriptor_map[r][c])
descriptors = np.array(descriptors)
descriptors = PCA(n_components=int(VECTOR_DIMENSION)/2).fit_transform(descriptors)
# descriptors = self.descriptor_map.reshape(descriptors_rows, 1, VECTOR_DIMENSION)
bandwidth = estimate_bandwidth(descriptors, quantile=0.05)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
ms.fit(descriptors)
labels = ms.labels_
for i in range(len(labels)):
xy = fg_pixels[i]
label = labels[i]
self.labels_map.itemset(xy, label)
# save the indices and BGR values of each cluster as a dictionary with keys of label
for label in range(K):
self.pixels_of_hough_line_in_sphere[label] = map(tuple, np.argwhere((self.labels_map == label)))
self.cluster_bgr[label] = map(tuple, self.img.bgr[self.labels_map == label])
示例5: applyMeanShift
def applyMeanShift(data,quantileValue=0.2,clusterall=False):
result=[]
n_samples=len(data)
print "Nombre de points du dataset: %d" %n_samples
bandwidth = estimate_bandwidth(data, quantile=quantileValue)
ms = MeanShift(bandwidth=bandwidth,cluster_all=clusterall)
#Applique le MeanShift
clustereddata=ms.fit(data)
clusteredlabels= clustereddata.labels_
barycenters=ms.cluster_centers_
labels_unique = np.unique(clusteredlabels)
nbOfClusters = len(labels_unique)
print "number of estimated clusters : %d" % nbOfClusters
for i in labels_unique:
print "###Indices des points du cluster %d : ###" %i
# print [indice[0] for indice in np.argwhere(clusteredlabels == i)]
result.append([indice[0] for indice in np.argwhere(clusteredlabels == i)])
#Add a zero coordinates vector to takeinto account the fact that -1 "cluster" does not have a barycenter
if -1 in labels_unique:
barycenters= np.append([[0 for k in range(len(barycenters[0]))]],barycenters,axis=0)
return [result,barycenters]
示例6: cluster_data
def cluster_data(data,clustering_method,num_clusters):
cluster_centers = labels_unique = labels = extra = None
if clustering_method == 'KMeans':
# http://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html#sklearn.cluster.KMeans
k_means = KMeans(n_clusters=num_clusters,init='k-means++',n_init=10,max_iter=100,tol=0.0001,
precompute_distances=True, verbose=0, random_state=None, copy_x=True, n_jobs=1)
k_means.fit(data)
labels = k_means.labels_
cluster_centers = k_means.cluster_centers_
elif clustering_method == 'MeanShift':
ms = MeanShift( bin_seeding=True,cluster_all=False)
ms.fit(data)
labels = ms.labels_
cluster_centers = ms.cluster_centers_
elif clustering_method == 'AffinityPropagation':
af = AffinityPropagation().fit(data)
cluster_centers = [data[i] for i in af.cluster_centers_indices_]
labels = af.labels_
elif clustering_method == "AgglomerativeClustering":
n_neighbors=min(10,len(data)/2)
connectivity = kneighbors_graph(data, n_neighbors=n_neighbors)
ward = AgglomerativeClustering(n_clusters=num_clusters, connectivity=connectivity,
linkage='ward').fit(data)
labels = ward.labels_
elif clustering_method == "DBSCAN":
db = DBSCAN().fit(data)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
extra = core_samples_mask
labels = db.labels_
if labels is not None:
labels_unique = np.unique(labels)
return labels,cluster_centers,labels_unique,extra
示例7: mean_shift_cluster_analysis
def mean_shift_cluster_analysis(x,y,quantile=0.2,n_samples=1000):
# ADAPTED FROM:
# http://scikit-learn.org/stable/auto_examples/cluster/plot_mean_shift.html#example-cluster-plot-mean-shift-py
# The following bandwidth can be automatically detected using
X = np.hstack((x.reshape((x.shape[0],1)),y.reshape((y.shape[0],1))))
bandwidth = estimate_bandwidth(X, quantile=quantile, n_samples=n_samples)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
ms.fit(X)
labels = ms.labels_
cluster_centers = ms.cluster_centers_
labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)
#print("number of estimated clusters : %d" % n_clusters_)
colors = 'bgrcmykbgrcmykbgrcmykbgrcmykbgrcmykbgrcmykbgrcmyk' #cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
for i in xrange(len(np.unique(labels))):
my_members = labels == i
cluster_center = cluster_centers[i]
plt.scatter(X[my_members, 0], X[my_members, 1],s=90,c=colors[i],alpha=0.7)
plt.scatter(cluster_center[0], cluster_center[1],marker='+',s=280,c=colors[i])
tolx = (X[:,0].max()-X[:,0].min())*0.03
toly = (X[:,1].max()-X[:,1].min())*0.03
plt.xlim(X[:,0].min()-tolx,X[:,0].max()+tolx)
plt.ylim(X[:,1].min()-toly,X[:,1].max()+toly)
plt.show()
return labels
示例8: simplify_data1
def simplify_data1(x):
X = np.array(zip(x,np.zeros(len(x))), dtype=np.float)
bandwidth = estimate_bandwidth(X, quantile=0.2)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
ms.fit(X)
labels = ms.labels_
cluster_centers = ms.cluster_centers_
labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)
#print n_clusters_
#exit()
start=0
value=0
print x
for k in range(n_clusters_):
my_members = labels == k
print "cluster {0}: {1}".format(k, X[my_members, 0]),np.average(X[my_members, 0])
value=np.average(X[my_members, 0])
val2=0
for i in xrange(start,start+len(X[my_members, 0])):
val2+=X[i][0]
print val2,X[i][0],i
X[i][0]=value
print "FINAL",val2/len(X[my_members, 0])
start+=len(X[my_members, 0])
return X[:,0]
示例9: Mean_Shift
def Mean_Shift(path):
#importer les donnees
data=pandas.read_csv(filepath_or_buffer=path,delimiter=',',encoding='utf-8')
data.drop_duplicates()
print (data)
#lire les donnees
values=data[['latitude', 'longitude']].values
print("printing values")
print (values)
#Mean shift
print ("Clustering data Meanshift algorithm")
bandwidth = estimate_bandwidth(values, quantile=0.003, n_samples=None)
#ms = MeanShift(bandwidth=bandwidth, bin_seeding=True, min_bin_freq=20, cluster_all=False)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True,min_bin_freq=25,cluster_all=False)
ms.fit(values)
data['cluster'] = ms.labels_
data = data.sort(columns='cluster')
data = data[(data['cluster'] != -1)]
print (data['cluster'])
data['cluster'] = data['cluster'].apply(lambda x:"cluster" +str(x))
labels_unique = np.unique(ms.labels_).tolist()
del labels_unique[0]
# Filtering clusters centers according to data filter
cluster_centers = DataFrame(ms.cluster_centers_, columns=['latitude', 'longitude'])
cluster_centers['cluster'] = labels_unique
print (cluster_centers)
n_centers_ = len(cluster_centers)
print("number of clusters is :%d" % n_centers_)
# print ("Exporting clusters to {}...'.format(clusters_file)")
data.to_csv(path_or_buf="output/points.csv", cols=['user','latitude','longitude','cluster','picture','datetaken'], encoding='utf-8')
#print ("Exporting clusters centers to {}...'.format(centers_file)")
cluster_centers['cluster'] = cluster_centers['cluster'].apply(lambda x:"cluster" +str(x))
cluster_centers.to_csv(path_or_buf="output/centers.csv", cols=['latitude', 'longitude','cluster'], encoding='utf-8')
plot_meanshift(data, cluster_centers, n_centers_)
return 0
示例10: meanShift
def meanShift(points):
# perform meanshift clustering of data
meanshift = MeanShift()
meanshift.fit(points.T)
labels = meanshift.labels_
centers = meanshift.cluster_centers_
return np.array(labels)
示例11: ms_algo
def ms_algo(X, bandwidth=None):
if(bandwidth==None):
n_samples = X.shape[0]
bandwidth = estimate_bandwidth(X, quantile=0.2, n_samples=n_samples)
# Apply the meanshit algorithm from sklearn library
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
ms.fit(X)
# collect from the meanshift algorithm the labels and the centers of the clusters
labels = ms.labels_
cluster_centers = ms.cluster_centers_
labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique) #Number of clusters
# Print section
print("The number of clusters is: %d" % n_clusters_)
print("The centers are:")
for i in range(n_clusters_):
print i,
print cluster_centers[i]
return cluster_centers
示例12: find_clusters
def find_clusters(feature, items, bandwidth=None, min_bin_freq=None, cluster_all=True, n_jobs=1):
"""
Cluster list of items based on feature using meanshift algorithm (Binning).
:param feature: key used to retrieve item to cluster on
:param items:
:param bandwidth:
:param min_bin_freq:
:param cluster_all:
:return:
"""
x = [item[feature] for item in items]
X = np.array(list(zip(x, np.zeros(len(x)))), dtype=np.float)
ms = MeanShift(bandwidth=bandwidth, min_bin_freq=min_bin_freq, cluster_all=cluster_all, n_jobs=n_jobs)
ms.fit(X)
labels = ms.labels_
labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)
clusters = []
for k in range(n_clusters_):
if k != -1:
my_members = labels == k
cluster_center = np.median(X[my_members, 0])
cluster_sd = np.std(X[my_members, 0])
clusters.append({
'center': cluster_center,
'sd': cluster_sd,
'items': X[my_members, 0]
})
return clusters
示例13: do_meanshift
def do_meanshift(s_path, band1, band2, band3, band4, colour1, colour2,
make_plot):
'''Meanshift clustering to determine the number of clusters in the
data, which is passed to KMEANS function'''
# Truncate data
X = np.vstack([colour1, colour2]).T
'''Compute clustering with MeanShift'''
# Scale data because meanshift generates circular clusters
X_scaled = preprocessing.scale(X)
# The following bandwidth can be automatically detected using
# the routine estimate_bandwidth(X). Bandwidth can also be set manually.
bandwidth = estimate_bandwidth(X)
#bandwidth = 0.65
# Meanshift clustering
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True, cluster_all=False)
ms.fit(X_scaled)
labels_unique = np.unique(ms.labels_)
objects = ms.labels_[ms.labels_ >= 0]
n_clusters = len(labels_unique[labels_unique >= 0])
# Make plot
if "meanshift" in make_plot:
make_ms_plots(s_path, colour1, colour2, n_clusters, X, ms,
band1, band2, band3, band4, objects)
return(n_clusters, bandwidth)
示例14: make
def make(filename, precision):
with open('test.geojson') as f:
data = json.load(f)
features = data['features']
points = [
geo['geometry']["coordinates"]
for geo in features if pred(geo)
]
print points
ar_points = array(points).reshape(len(points) * 2, 2)
print ar_points
bandwidth = estimate_bandwidth(ar_points) / precision
cluster = MeanShift(bandwidth=bandwidth)
cluster.fit(ar_points)
labels = cluster.labels_
cluster_centers = cluster.cluster_centers_
print 'clusters:', len(unique(labels))
for i, geo in enumerate(filter(pred, features)):
geo['geometry']["coordinates"] = [
list(cluster_centers[labels[i*2 + j]])
for j in range(2)
]
with open(filename, 'w') as f:
json.dump(data, f)
示例15: centers_y_clusters
def centers_y_clusters(self,graph_db,nodes,consulta,cyprop):
group = []
todo = []
rr = []
for n in nodes:
tiene = neo4j.CypherQuery(graph_db, consulta+" where id(n) ="+str(n.id)+" return count(distinct(e))"+cyprop+" as cuenta").execute()
for r in tiene:
todo.append([r.cuenta])
rr.append(r.cuenta)
ms = MeanShift(bin_seeding=True)
ms.fit(np.asarray(todo))
labels = ms.labels_
cluster_centers = sorted(ms.cluster_centers_ , key=lambda x: x[0])
for idx,cl in enumerate(cluster_centers):
cluster_centers[idx] = float(cl[0])
for u in cluster_centers:
group.append([])
for n in nodes:
tiene = neo4j.CypherQuery(graph_db, consulta+" where id(n) ="+str(n.id)+" return count(distinct(e))"+cyprop+" as cuenta").execute()
for r in tiene:
valor = r.cuenta
for idx,v in enumerate(cluster_centers):
if idx == 0:
temp1 = -9999
else:
temp1 = (cluster_centers[idx-1] + cluster_centers[idx])/2
if idx == len(cluster_centers) - 1:
temp2 = 99999
else:
temp2 = (cluster_centers[idx+1] + cluster_centers[idx])/2
if temp1 <= valor < temp2:
group[idx].append(n)
return cluster_centers, group