当前位置: 首页>>代码示例>>Python>>正文


Python MeanShift.fit方法代码示例

本文整理汇总了Python中sklearn.cluster.MeanShift.fit方法的典型用法代码示例。如果您正苦于以下问题:Python MeanShift.fit方法的具体用法?Python MeanShift.fit怎么用?Python MeanShift.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.cluster.MeanShift的用法示例。


在下文中一共展示了MeanShift.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: meanShift

# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
def meanShift(flat_image):
    # Estimate Bandwidth
    bandwidth = estimate_bandwidth(flat_image, quantile = 0.2, n_samples=500)
    ms = MeanShift(bandwidth, bin_seeding=True)
    ms.fit(flat_image)
    labels = ms.labels_
    return ms.labels_, ms.cluster_centers_
开发者ID:amitkumarx86,项目名称:project_python,代码行数:9,代码来源:kmeans_meanshift.py

示例2: meanshift_for_hough_line

# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
 def meanshift_for_hough_line(self):
     # init mean shift
     pixels_of_label = {}
     points_of_label = {}
     for hough_line in self.points_of_hough_line:
         pixels = self.pixels_of_hough_line[hough_line]
         pixels = np.array(pixels)
         bandwidth = estimate_bandwidth(pixels, quantile=QUANTILE, n_samples=500)
         if bandwidth == 0:
             bandwidth = 2
         ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
         ms.fit(pixels)
         labels = ms.labels_
         labels_unique = np.unique(labels)
         n_clusters_ = len(labels_unique)
         for k in range(n_clusters_):
             label = list(hough_line)
             label.append(k)
             pixels_of_label[tuple(label)] = map(tuple, pixels[labels==k])
     for label in pixels_of_label:
         pixels = pixels_of_label[label]
         points = map(self.img.get_bgr_value, pixels)
         points_of_label[label] = points
     self.pixels_of_hough_line = pixels_of_label
     self.points_of_hough_line = points_of_label
开发者ID:catbaron-,项目名称:hough_transform_color_removal,代码行数:27,代码来源:main_k.py

示例3: simplify_data1

# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
def simplify_data1(x):
	X = np.array(zip(x,np.zeros(len(x))), dtype=np.float)
	bandwidth = estimate_bandwidth(X, quantile=0.2)
	ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
	ms.fit(X)
	labels = ms.labels_
	cluster_centers = ms.cluster_centers_
	labels_unique = np.unique(labels)
	n_clusters_ = len(labels_unique)
	#print n_clusters_
	#exit()
	start=0
	value=0
	print x
	for k in range(n_clusters_):
	    my_members = labels == k
	    print "cluster {0}: {1}".format(k, X[my_members, 0]),np.average(X[my_members, 0])
	    value=np.average(X[my_members, 0])
	    val2=0
	    for i in xrange(start,start+len(X[my_members, 0])):
		val2+=X[i][0]
		print val2,X[i][0],i
		X[i][0]=value
	    print "FINAL",val2/len(X[my_members, 0])
	    start+=len(X[my_members, 0])
	return X[:,0]
开发者ID:leaguilar,项目名称:playground,代码行数:28,代码来源:plot_data.py

示例4: _fit_mean_shift

# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
    def _fit_mean_shift(self, x):
        for c in xrange(len(self.crange)):
            quant = 0.015 * (c + 1)
            for r in xrange(self.repeats):
                bandwidth = estimate_bandwidth(
                    x, quantile=quant, random_state=r)
                idx = c * self.repeats + r
                model = MeanShift(
                    bandwidth=bandwidth, bin_seeding=True)
                model.fit(x)
                self._labels[idx] = model.labels_
                self._parameters[idx] = model.cluster_centers_

                # build equivalent gmm
                k = model.cluster_centers_.shape[0]
                model_gmm = GMM(n_components=k, covariance_type=self.cvtype,
                                init_params='c', n_iter=0)
                model_gmm.means_ = model.cluster_centers_
                model_gmm.weights_ = sp.array(
                    [(model.labels_ == i).sum() for i in xrange(k)])
                model_gmm.fit(x)

                # evaluate goodness of fit
                self._ll[idx] = model_gmm.score(x).sum()
                if self.gof_type == 'aic':
                    self._gof[idx] = model_gmm.aic(x)
                if self.gof_type == 'bic':
                    self._gof[idx] = model_gmm.bic(x)

                print quant, k, self._gof[idx]
开发者ID:pmeier82,项目名称:BOTMpy,代码行数:32,代码来源:cluster.py

示例5: mean_shift_cluster_analysis

# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
def mean_shift_cluster_analysis(x,y,quantile=0.2,n_samples=1000):
    # ADAPTED FROM:
    # http://scikit-learn.org/stable/auto_examples/cluster/plot_mean_shift.html#example-cluster-plot-mean-shift-py
    # The following bandwidth can be automatically detected using
    X = np.hstack((x.reshape((x.shape[0],1)),y.reshape((y.shape[0],1))))
    bandwidth = estimate_bandwidth(X, quantile=quantile, n_samples=n_samples)
    
    ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
    ms.fit(X)
    labels = ms.labels_
    cluster_centers = ms.cluster_centers_
    
    labels_unique = np.unique(labels)
    n_clusters_ = len(labels_unique)
    
    #print("number of estimated clusters : %d" % n_clusters_)
    colors = 'bgrcmykbgrcmykbgrcmykbgrcmykbgrcmykbgrcmykbgrcmyk' #cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
    for i in xrange(len(np.unique(labels))):
        my_members = labels == i
        cluster_center = cluster_centers[i]
        plt.scatter(X[my_members, 0], X[my_members, 1],s=90,c=colors[i],alpha=0.7)
        plt.scatter(cluster_center[0], cluster_center[1],marker='+',s=280,c=colors[i])
    tolx = (X[:,0].max()-X[:,0].min())*0.03
    toly = (X[:,1].max()-X[:,1].min())*0.03
    plt.xlim(X[:,0].min()-tolx,X[:,0].max()+tolx)
    plt.ylim(X[:,1].min()-toly,X[:,1].max()+toly)
    plt.show()
    return labels
开发者ID:armatita,项目名称:GEOMS2,代码行数:30,代码来源:cerena_multivariate_utils.py

示例6: train

# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
def train(trainingData, pklFile, clusteringAll, numberOfClusters=None):
	# ========================================================================= #
	# =============== STEP 1. DEFINE OUTPUT LEARNT MODEL FILE ================= #
	# ========================================================================= #
	if (pklFile == ''):
		os.system('rm -rf learntModel & mkdir learntModel')
		pklFile = 'learntModel/learntModel.pkl'
	
	# ========================================================================= #
	# =============== STEP 2. PERFORM CLUSTERING TO THE DATA ================== #
	# ========================================================================= #
	if (numberOfClusters == None):
		print "Running MeanShift Model..."
		bandwidth = estimate_bandwidth(trainingData)
		ms = MeanShift(bandwidth=bandwidth, bin_seeding=False, cluster_all=clusteringAll)
		ms.fit(trainingData)
		joblib.dump(ms, pklFile)
		return {"numberOfClusters":len(ms.cluster_centers_), "labels": ms.labels_, "clusterCenters":ms.cluster_centers_}
	
	elif (numberOfClusters != None):
		print "Running K-Means Model..."
		kMeans = KMeans(init='k-means++', n_clusters=numberOfClusters)
		kMeans.fit(trainingData)
		joblib.dump(kMeans, pklFile)
		return {"numberOfClusters":len(kMeans.cluster_centers_), "labels": kMeans.labels_, "clusterCenters":kMeans.cluster_centers_}
开发者ID:ZAZAZakari,项目名称:ML-Algorithm,代码行数:27,代码来源:clustering.py

示例7: cluster_data

# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
def cluster_data(data,clustering_method,num_clusters):
    cluster_centers = labels_unique = labels = extra = None
    if clustering_method == 'KMeans':
        # http://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html#sklearn.cluster.KMeans
        k_means = KMeans(n_clusters=num_clusters,init='k-means++',n_init=10,max_iter=100,tol=0.0001,
                                precompute_distances=True, verbose=0, random_state=None, copy_x=True, n_jobs=1)
        k_means.fit(data)
        labels = k_means.labels_
        cluster_centers = k_means.cluster_centers_
    elif clustering_method == 'MeanShift':
        ms =  MeanShift( bin_seeding=True,cluster_all=False)
        ms.fit(data)
        labels = ms.labels_
        cluster_centers = ms.cluster_centers_
    elif clustering_method == 'AffinityPropagation':
        af = AffinityPropagation().fit(data)
        cluster_centers = [data[i] for i in  af.cluster_centers_indices_]
        labels = af.labels_
    elif clustering_method == "AgglomerativeClustering":
        n_neighbors=min(10,len(data)/2)
        connectivity = kneighbors_graph(data, n_neighbors=n_neighbors)
        ward = AgglomerativeClustering(n_clusters=num_clusters, connectivity=connectivity,
                               linkage='ward').fit(data)
        labels = ward.labels_
    elif clustering_method == "DBSCAN":
        db = DBSCAN().fit(data)
        core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
        core_samples_mask[db.core_sample_indices_] = True
        extra = core_samples_mask
        labels = db.labels_

    if labels is not None:
        labels_unique = np.unique(labels)
    return labels,cluster_centers,labels_unique,extra
开发者ID:ColtonH,项目名称:UnemploymentDataMining,代码行数:36,代码来源:views.py

示例8: meanshift

# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
def meanshift(raw_data, t):
   # Compute clustering with MeanShift
    # The following bandwidth can be automatically detected using
    #data = [ [(raw_data[i, 1]+raw_data[i, 5]), (raw_data[i, 2]+raw_data[i,6])] for i in range(raw_data.shape[0]) ]
    data = np.zeros((raw_data.shape[0],2))
    X = raw_data[:,1] + raw_data[:,5]
    Y = raw_data[:,2] + raw_data[:,6]
    #X = raw_data[:,1] ; Y = raw_data[:,2];
    data = np.transpose(np.concatenate((np.mat(X),np.mat(Y)), axis=0))
    bandwidth = estimate_bandwidth(data, quantile=0.2, n_samples=500)
    ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
    ms.fit(data)
    labels = ms.labels_
    cluster_centers = ms.cluster_centers_
    labels_unique = np.unique(labels)
    n_clusters_ = len(labels_unique)
    print("number of estimated clusters : %d" % n_clusters_) 
    # Plot result
    plt.figure(t)
    plt.clf()
    colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
    for k, col in zip(range(n_clusters_), colors):
        my_members = labels == k
        cluster_center = cluster_centers[k]
        plt.plot(data[my_members, 0], data[my_members, 1], col + '.')
        plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,
                 markeredgecolor='k', markersize=14)
    plt.title('Estimated number of clusters: %d' % n_clusters_)
    plt.axis('equal')
    plt.show()    
开发者ID:kartikbk,项目名称:mtc_parking,代码行数:32,代码来源:SVM_alpha.py

示例9: mean_shift

# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
def mean_shift(X):
    bandwidth = estimate_bandwidth(X, quantile=0.2, n_samples=1000)
    ms = MeanShift(bandwidth=bandwidth, bin_seeding=True, cluster_all=False)
    ms.fit(X)
    labels = ms.labels_
    cluster_centers = ms.cluster_centers_
    return labels, cluster_centers
开发者ID:athoune,项目名称:Palette,代码行数:9,代码来源:colors.py

示例10: make

# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
def make(filename, precision):
    with open('test.geojson') as f:
        data = json.load(f)

    features = data['features']
    points = [
        geo['geometry']["coordinates"]
        for geo in features if pred(geo)
    ]
    print points
    ar_points = array(points).reshape(len(points) * 2, 2)
    print ar_points
    bandwidth = estimate_bandwidth(ar_points) / precision
    cluster = MeanShift(bandwidth=bandwidth)
    cluster.fit(ar_points)
    labels = cluster.labels_
    cluster_centers = cluster.cluster_centers_
    print 'clusters:', len(unique(labels))

    for i, geo in enumerate(filter(pred, features)):
        geo['geometry']["coordinates"] = [
            list(cluster_centers[labels[i*2 + j]])
            for j in range(2)
        ]

    with open(filename, 'w') as f:
        json.dump(data, f)
开发者ID:hackerspace-silesia,项目名称:mapotrans,代码行数:29,代码来源:clustering.py

示例11: ms_algo

# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
def ms_algo(X, bandwidth=None):
    if(bandwidth==None):
        n_samples = X.shape[0]
        bandwidth = estimate_bandwidth(X, quantile=0.2, n_samples=n_samples)

    # Apply the meanshit algorithm from sklearn library
    ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
    ms.fit(X)

    # collect from the meanshift algorithm the labels and the centers of the clusters
    labels = ms.labels_
    cluster_centers = ms.cluster_centers_


    labels_unique = np.unique(labels)
    n_clusters_ = len(labels_unique) #Number of clusters

    # Print section
    print("The number of clusters is: %d" % n_clusters_)

    print("The centers are:")
    for i in range(n_clusters_):
        print i,
        print cluster_centers[i]

    return cluster_centers    
开发者ID:PFAWeb2Control,项目名称:combined_results,代码行数:28,代码来源:meanshift.py

示例12: do_meanshift

# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
def do_meanshift(s_path, band1, band2, band3, band4, colour1, colour2,
                 make_plot):
    '''Meanshift clustering to determine the number of clusters in the
        data, which is passed to KMEANS function'''
    # Truncate data
    X = np.vstack([colour1, colour2]).T
    '''Compute clustering with MeanShift'''
    # Scale data because meanshift generates circular clusters
    X_scaled = preprocessing.scale(X)
    # The following bandwidth can be automatically detected using
    # the routine estimate_bandwidth(X). Bandwidth can also be set manually.
    bandwidth = estimate_bandwidth(X)
    #bandwidth = 0.65
    # Meanshift clustering
    ms = MeanShift(bandwidth=bandwidth, bin_seeding=True, cluster_all=False)
    ms.fit(X_scaled)
    labels_unique = np.unique(ms.labels_)

    objects = ms.labels_[ms.labels_ >= 0]
    n_clusters = len(labels_unique[labels_unique >= 0])
    # Make plot
    if "meanshift" in make_plot:
        make_ms_plots(s_path, colour1, colour2, n_clusters, X, ms,
                      band1, band2, band3, band4, objects)
    return(n_clusters, bandwidth)
开发者ID:PBarmby,项目名称:m83_clustering,代码行数:27,代码来源:Clustering_Analysis.py

示例13: centers_y_clusters

# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
 def centers_y_clusters(self,graph_db,nodes,consulta,cyprop):
     group = []
     todo = []
     rr = []
     for n in nodes:
         tiene = neo4j.CypherQuery(graph_db, consulta+" where id(n) ="+str(n.id)+" return count(distinct(e))"+cyprop+" as cuenta").execute()
         for r in tiene:
             todo.append([r.cuenta])
             rr.append(r.cuenta)
         
     ms = MeanShift(bin_seeding=True)
     ms.fit(np.asarray(todo))
     labels = ms.labels_
     cluster_centers = sorted(ms.cluster_centers_ , key=lambda x: x[0])
     for idx,cl in enumerate(cluster_centers):
         cluster_centers[idx] = float(cl[0])
     for u in cluster_centers:
         group.append([])
     for n in nodes:
         tiene = neo4j.CypherQuery(graph_db, consulta+" where id(n) ="+str(n.id)+" return count(distinct(e))"+cyprop+" as cuenta").execute()
         for r in tiene:
             valor = r.cuenta
         for idx,v in enumerate(cluster_centers):
             if idx == 0:
                 temp1 = -9999
             else:
                 temp1 = (cluster_centers[idx-1] + cluster_centers[idx])/2
             if idx == len(cluster_centers) - 1:
                 temp2 = 99999
             else:
                 temp2 = (cluster_centers[idx+1] + cluster_centers[idx])/2
             if temp1 <= valor < temp2:
                 group[idx].append(n)
     return cluster_centers, group
开发者ID:palmagro,项目名称:mrrf,代码行数:36,代码来源:id3.py

示例14: BA_meanshift_cluster

# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
def BA_meanshift_cluster(mark, chrom):
    '''
    @param:
    @return:
    perform mean shift cluster on 2D data:
        ((chromStart+chromEnd)*0.5, chromEnd-chromStart)
    '''
    path = os.path.join(get_data_dir(), "tmp", mark,"{0}-{1}.csv".format(chrom, mark))
    DF = pd.read_csv(path, sep='\t')
    S_x = 0.5*(DF.loc[:, 'chromEnd'].values+DF.loc[:, 'chromStart'].values)
    S_y = DF.loc[:, 'chromEnd'].values-DF.loc[:, 'chromStart'].values
    X = np.hstack((np.atleast_2d(S_x[7000:8000]).T, np.atleast_2d(S_y[7000:8000]).T))
    print X
    bandwidth = estimate_bandwidth(X, quantile=0.1, n_samples=1000)
    ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
    ms.fit(X)
    labels = ms.labels_
    print list(set(labels))
    import matplotlib.pyplot as plt
    from itertools import cycle
    colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
    for k, col in zip(range(len(list(set(labels)))), colors):
        my_members = labels == k
        plt.plot(X[my_members, 0], X[my_members, 1], col + '.')
    plt.title('Estimated number of clusters: %d' % len(list(set(labels))))
    plt.show()
开发者ID:LuyiTian,项目名称:ChIPOmic,代码行数:28,代码来源:comp_hm.py

示例15: get_clusters

# 需要导入模块: from sklearn.cluster import MeanShift [as 别名]
# 或者: from sklearn.cluster.MeanShift import fit [as 别名]
    def get_clusters(self, in_file, cc_file, clf_file, arrivals_file, chunk_size=1710671):
        df = pd.read_csv(open(in_file), chunksize=chunk_size)
        dests = []
        part = 1
        lines = 1710671 / chunk_size
        try:
            dest = cPickle.load(open(arrivals_file))
        except IOError:
            for d in df:
                print "%d / %d" % (part, lines)
                part += 1
                for row in d.values:
                    # print eval(row[-1])
                    tmp = eval(row[-1])
                    if len(tmp) > 0:
                        dests.append(tmp[-1])
            dest = np.array(dests)
            cPickle.dump(dest, open(arrivals_file, "w"), protocol=cPickle.HIGHEST_PROTOCOL)
        print "Destination points loaded"

        try:
            ms = cPickle.load(open(clf_file))
        except IOError:
            bw = 0.001
            ms = MeanShift(bandwidth=bw, bin_seeding=True, min_bin_freq=5, n_jobs=-2)
            ms.fit(dest)
            cPickle.dump(ms, open(clf_file, "w"), protocol=cPickle.HIGHEST_PROTOCOL)
        print "Mean shift loaded"
        cluster_centers = ms.cluster_centers_
        cPickle.dump(cluster_centers, open(cc_file, "w"), protocol=cPickle.HIGHEST_PROTOCOL)
        print "Clusters dumped"
开发者ID:marek1840,项目名称:Eksploracja,代码行数:33,代码来源:mean_shift.py


注:本文中的sklearn.cluster.MeanShift.fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。