本文整理汇总了Python中sklearn.cluster.MeanShift.fit方法的典型用法代码示例。如果您正苦于以下问题:Python MeanShift.fit方法的具体用法?Python MeanShift.fit怎么用?Python MeanShift.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.cluster.MeanShift
的用法示例。
在下文中一共展示了MeanShift.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: meanShift
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
def meanShift(flat_image):
# Estimate Bandwidth
bandwidth = estimate_bandwidth(flat_image, quantile = 0.2, n_samples=500)
ms = MeanShift(bandwidth, bin_seeding=True)
ms.fit(flat_image)
labels = ms.labels_
return ms.labels_, ms.cluster_centers_
示例2: meanshift_for_hough_line
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
def meanshift_for_hough_line(self):
# init mean shift
pixels_of_label = {}
points_of_label = {}
for hough_line in self.points_of_hough_line:
pixels = self.pixels_of_hough_line[hough_line]
pixels = np.array(pixels)
bandwidth = estimate_bandwidth(pixels, quantile=QUANTILE, n_samples=500)
if bandwidth == 0:
bandwidth = 2
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
ms.fit(pixels)
labels = ms.labels_
labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)
for k in range(n_clusters_):
label = list(hough_line)
label.append(k)
pixels_of_label[tuple(label)] = map(tuple, pixels[labels==k])
for label in pixels_of_label:
pixels = pixels_of_label[label]
points = map(self.img.get_bgr_value, pixels)
points_of_label[label] = points
self.pixels_of_hough_line = pixels_of_label
self.points_of_hough_line = points_of_label
示例3: simplify_data1
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
def simplify_data1(x):
X = np.array(zip(x,np.zeros(len(x))), dtype=np.float)
bandwidth = estimate_bandwidth(X, quantile=0.2)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
ms.fit(X)
labels = ms.labels_
cluster_centers = ms.cluster_centers_
labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)
#print n_clusters_
#exit()
start=0
value=0
print x
for k in range(n_clusters_):
my_members = labels == k
print "cluster {0}: {1}".format(k, X[my_members, 0]),np.average(X[my_members, 0])
value=np.average(X[my_members, 0])
val2=0
for i in xrange(start,start+len(X[my_members, 0])):
val2+=X[i][0]
print val2,X[i][0],i
X[i][0]=value
print "FINAL",val2/len(X[my_members, 0])
start+=len(X[my_members, 0])
return X[:,0]
示例4: _fit_mean_shift
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
def _fit_mean_shift(self, x):
for c in xrange(len(self.crange)):
quant = 0.015 * (c + 1)
for r in xrange(self.repeats):
bandwidth = estimate_bandwidth(
x, quantile=quant, random_state=r)
idx = c * self.repeats + r
model = MeanShift(
bandwidth=bandwidth, bin_seeding=True)
model.fit(x)
self._labels[idx] = model.labels_
self._parameters[idx] = model.cluster_centers_
# build equivalent gmm
k = model.cluster_centers_.shape[0]
model_gmm = GMM(n_components=k, covariance_type=self.cvtype,
init_params='c', n_iter=0)
model_gmm.means_ = model.cluster_centers_
model_gmm.weights_ = sp.array(
[(model.labels_ == i).sum() for i in xrange(k)])
model_gmm.fit(x)
# evaluate goodness of fit
self._ll[idx] = model_gmm.score(x).sum()
if self.gof_type == 'aic':
self._gof[idx] = model_gmm.aic(x)
if self.gof_type == 'bic':
self._gof[idx] = model_gmm.bic(x)
print quant, k, self._gof[idx]
示例5: mean_shift_cluster_analysis
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
def mean_shift_cluster_analysis(x,y,quantile=0.2,n_samples=1000):
# ADAPTED FROM:
# http://scikit-learn.org/stable/auto_examples/cluster/plot_mean_shift.html#example-cluster-plot-mean-shift-py
# The following bandwidth can be automatically detected using
X = np.hstack((x.reshape((x.shape[0],1)),y.reshape((y.shape[0],1))))
bandwidth = estimate_bandwidth(X, quantile=quantile, n_samples=n_samples)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
ms.fit(X)
labels = ms.labels_
cluster_centers = ms.cluster_centers_
labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)
#print("number of estimated clusters : %d" % n_clusters_)
colors = 'bgrcmykbgrcmykbgrcmykbgrcmykbgrcmykbgrcmykbgrcmyk' #cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
for i in xrange(len(np.unique(labels))):
my_members = labels == i
cluster_center = cluster_centers[i]
plt.scatter(X[my_members, 0], X[my_members, 1],s=90,c=colors[i],alpha=0.7)
plt.scatter(cluster_center[0], cluster_center[1],marker='+',s=280,c=colors[i])
tolx = (X[:,0].max()-X[:,0].min())*0.03
toly = (X[:,1].max()-X[:,1].min())*0.03
plt.xlim(X[:,0].min()-tolx,X[:,0].max()+tolx)
plt.ylim(X[:,1].min()-toly,X[:,1].max()+toly)
plt.show()
return labels
示例6: train
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
def train(trainingData, pklFile, clusteringAll, numberOfClusters=None):
# ========================================================================= #
# =============== STEP 1. DEFINE OUTPUT LEARNT MODEL FILE ================= #
# ========================================================================= #
if (pklFile == ''):
os.system('rm -rf learntModel & mkdir learntModel')
pklFile = 'learntModel/learntModel.pkl'
# ========================================================================= #
# =============== STEP 2. PERFORM CLUSTERING TO THE DATA ================== #
# ========================================================================= #
if (numberOfClusters == None):
print "Running MeanShift Model..."
bandwidth = estimate_bandwidth(trainingData)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=False, cluster_all=clusteringAll)
ms.fit(trainingData)
joblib.dump(ms, pklFile)
return {"numberOfClusters":len(ms.cluster_centers_), "labels": ms.labels_, "clusterCenters":ms.cluster_centers_}
elif (numberOfClusters != None):
print "Running K-Means Model..."
kMeans = KMeans(init='k-means++', n_clusters=numberOfClusters)
kMeans.fit(trainingData)
joblib.dump(kMeans, pklFile)
return {"numberOfClusters":len(kMeans.cluster_centers_), "labels": kMeans.labels_, "clusterCenters":kMeans.cluster_centers_}
示例7: cluster_data
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
def cluster_data(data,clustering_method,num_clusters):
cluster_centers = labels_unique = labels = extra = None
if clustering_method == 'KMeans':
# http://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html#sklearn.cluster.KMeans
k_means = KMeans(n_clusters=num_clusters,init='k-means++',n_init=10,max_iter=100,tol=0.0001,
precompute_distances=True, verbose=0, random_state=None, copy_x=True, n_jobs=1)
k_means.fit(data)
labels = k_means.labels_
cluster_centers = k_means.cluster_centers_
elif clustering_method == 'MeanShift':
ms = MeanShift( bin_seeding=True,cluster_all=False)
ms.fit(data)
labels = ms.labels_
cluster_centers = ms.cluster_centers_
elif clustering_method == 'AffinityPropagation':
af = AffinityPropagation().fit(data)
cluster_centers = [data[i] for i in af.cluster_centers_indices_]
labels = af.labels_
elif clustering_method == "AgglomerativeClustering":
n_neighbors=min(10,len(data)/2)
connectivity = kneighbors_graph(data, n_neighbors=n_neighbors)
ward = AgglomerativeClustering(n_clusters=num_clusters, connectivity=connectivity,
linkage='ward').fit(data)
labels = ward.labels_
elif clustering_method == "DBSCAN":
db = DBSCAN().fit(data)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
extra = core_samples_mask
labels = db.labels_
if labels is not None:
labels_unique = np.unique(labels)
return labels,cluster_centers,labels_unique,extra
示例8: meanshift
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
def meanshift(raw_data, t):
# Compute clustering with MeanShift
# The following bandwidth can be automatically detected using
#data = [ [(raw_data[i, 1]+raw_data[i, 5]), (raw_data[i, 2]+raw_data[i,6])] for i in range(raw_data.shape[0]) ]
data = np.zeros((raw_data.shape[0],2))
X = raw_data[:,1] + raw_data[:,5]
Y = raw_data[:,2] + raw_data[:,6]
#X = raw_data[:,1] ; Y = raw_data[:,2];
data = np.transpose(np.concatenate((np.mat(X),np.mat(Y)), axis=0))
bandwidth = estimate_bandwidth(data, quantile=0.2, n_samples=500)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
ms.fit(data)
labels = ms.labels_
cluster_centers = ms.cluster_centers_
labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)
print("number of estimated clusters : %d" % n_clusters_)
# Plot result
plt.figure(t)
plt.clf()
colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
for k, col in zip(range(n_clusters_), colors):
my_members = labels == k
cluster_center = cluster_centers[k]
plt.plot(data[my_members, 0], data[my_members, 1], col + '.')
plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,
markeredgecolor='k', markersize=14)
plt.title('Estimated number of clusters: %d' % n_clusters_)
plt.axis('equal')
plt.show()
示例9: mean_shift
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
def mean_shift(X):
bandwidth = estimate_bandwidth(X, quantile=0.2, n_samples=1000)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True, cluster_all=False)
ms.fit(X)
labels = ms.labels_
cluster_centers = ms.cluster_centers_
return labels, cluster_centers
示例10: make
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
def make(filename, precision):
with open('test.geojson') as f:
data = json.load(f)
features = data['features']
points = [
geo['geometry']["coordinates"]
for geo in features if pred(geo)
]
print points
ar_points = array(points).reshape(len(points) * 2, 2)
print ar_points
bandwidth = estimate_bandwidth(ar_points) / precision
cluster = MeanShift(bandwidth=bandwidth)
cluster.fit(ar_points)
labels = cluster.labels_
cluster_centers = cluster.cluster_centers_
print 'clusters:', len(unique(labels))
for i, geo in enumerate(filter(pred, features)):
geo['geometry']["coordinates"] = [
list(cluster_centers[labels[i*2 + j]])
for j in range(2)
]
with open(filename, 'w') as f:
json.dump(data, f)
示例11: ms_algo
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
def ms_algo(X, bandwidth=None):
if(bandwidth==None):
n_samples = X.shape[0]
bandwidth = estimate_bandwidth(X, quantile=0.2, n_samples=n_samples)
# Apply the meanshit algorithm from sklearn library
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
ms.fit(X)
# collect from the meanshift algorithm the labels and the centers of the clusters
labels = ms.labels_
cluster_centers = ms.cluster_centers_
labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique) #Number of clusters
# Print section
print("The number of clusters is: %d" % n_clusters_)
print("The centers are:")
for i in range(n_clusters_):
print i,
print cluster_centers[i]
return cluster_centers
示例12: do_meanshift
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
def do_meanshift(s_path, band1, band2, band3, band4, colour1, colour2,
make_plot):
'''Meanshift clustering to determine the number of clusters in the
data, which is passed to KMEANS function'''
# Truncate data
X = np.vstack([colour1, colour2]).T
'''Compute clustering with MeanShift'''
# Scale data because meanshift generates circular clusters
X_scaled = preprocessing.scale(X)
# The following bandwidth can be automatically detected using
# the routine estimate_bandwidth(X). Bandwidth can also be set manually.
bandwidth = estimate_bandwidth(X)
#bandwidth = 0.65
# Meanshift clustering
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True, cluster_all=False)
ms.fit(X_scaled)
labels_unique = np.unique(ms.labels_)
objects = ms.labels_[ms.labels_ >= 0]
n_clusters = len(labels_unique[labels_unique >= 0])
# Make plot
if "meanshift" in make_plot:
make_ms_plots(s_path, colour1, colour2, n_clusters, X, ms,
band1, band2, band3, band4, objects)
return(n_clusters, bandwidth)
示例13: centers_y_clusters
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
def centers_y_clusters(self,graph_db,nodes,consulta,cyprop):
group = []
todo = []
rr = []
for n in nodes:
tiene = neo4j.CypherQuery(graph_db, consulta+" where id(n) ="+str(n.id)+" return count(distinct(e))"+cyprop+" as cuenta").execute()
for r in tiene:
todo.append([r.cuenta])
rr.append(r.cuenta)
ms = MeanShift(bin_seeding=True)
ms.fit(np.asarray(todo))
labels = ms.labels_
cluster_centers = sorted(ms.cluster_centers_ , key=lambda x: x[0])
for idx,cl in enumerate(cluster_centers):
cluster_centers[idx] = float(cl[0])
for u in cluster_centers:
group.append([])
for n in nodes:
tiene = neo4j.CypherQuery(graph_db, consulta+" where id(n) ="+str(n.id)+" return count(distinct(e))"+cyprop+" as cuenta").execute()
for r in tiene:
valor = r.cuenta
for idx,v in enumerate(cluster_centers):
if idx == 0:
temp1 = -9999
else:
temp1 = (cluster_centers[idx-1] + cluster_centers[idx])/2
if idx == len(cluster_centers) - 1:
temp2 = 99999
else:
temp2 = (cluster_centers[idx+1] + cluster_centers[idx])/2
if temp1 <= valor < temp2:
group[idx].append(n)
return cluster_centers, group
示例14: BA_meanshift_cluster
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
def BA_meanshift_cluster(mark, chrom):
'''
@param:
@return:
perform mean shift cluster on 2D data:
((chromStart+chromEnd)*0.5, chromEnd-chromStart)
'''
path = os.path.join(get_data_dir(), "tmp", mark,"{0}-{1}.csv".format(chrom, mark))
DF = pd.read_csv(path, sep='\t')
S_x = 0.5*(DF.loc[:, 'chromEnd'].values+DF.loc[:, 'chromStart'].values)
S_y = DF.loc[:, 'chromEnd'].values-DF.loc[:, 'chromStart'].values
X = np.hstack((np.atleast_2d(S_x[7000:8000]).T, np.atleast_2d(S_y[7000:8000]).T))
print X
bandwidth = estimate_bandwidth(X, quantile=0.1, n_samples=1000)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
ms.fit(X)
labels = ms.labels_
print list(set(labels))
import matplotlib.pyplot as plt
from itertools import cycle
colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
for k, col in zip(range(len(list(set(labels)))), colors):
my_members = labels == k
plt.plot(X[my_members, 0], X[my_members, 1], col + '.')
plt.title('Estimated number of clusters: %d' % len(list(set(labels))))
plt.show()
示例15: get_clusters
# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
def get_clusters(self, in_file, cc_file, clf_file, arrivals_file, chunk_size=1710671):
df = pd.read_csv(open(in_file), chunksize=chunk_size)
dests = []
part = 1
lines = 1710671 / chunk_size
try:
dest = cPickle.load(open(arrivals_file))
except IOError:
for d in df:
print "%d / %d" % (part, lines)
part += 1
for row in d.values:
# print eval(row[-1])
tmp = eval(row[-1])
if len(tmp) > 0:
dests.append(tmp[-1])
dest = np.array(dests)
cPickle.dump(dest, open(arrivals_file, "w"), protocol=cPickle.HIGHEST_PROTOCOL)
print "Destination points loaded"
try:
ms = cPickle.load(open(clf_file))
except IOError:
bw = 0.001
ms = MeanShift(bandwidth=bw, bin_seeding=True, min_bin_freq=5, n_jobs=-2)
ms.fit(dest)
cPickle.dump(ms, open(clf_file, "w"), protocol=cPickle.HIGHEST_PROTOCOL)
print "Mean shift loaded"
cluster_centers = ms.cluster_centers_
cPickle.dump(cluster_centers, open(cc_file, "w"), protocol=cPickle.HIGHEST_PROTOCOL)
print "Clusters dumped"