当前位置: 首页>>代码示例>>Python>>正文


Python DBSCAN.fit_predict方法代码示例

本文整理汇总了Python中sklearn.cluster.DBSCAN.fit_predict方法的典型用法代码示例。如果您正苦于以下问题:Python DBSCAN.fit_predict方法的具体用法?Python DBSCAN.fit_predict怎么用?Python DBSCAN.fit_predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.cluster.DBSCAN的用法示例。


在下文中一共展示了DBSCAN.fit_predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: plot_dbscan

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
def plot_dbscan():
    X, y = make_blobs(random_state=0, n_samples=12)

    dbscan = DBSCAN()
    clusters = dbscan.fit_predict(X)
    clusters

    fig, axes = plt.subplots(3, 4, figsize=(11, 8), subplot_kw={'xticks': (), 'yticks': ()})
    # Plot clusters as red, green and blue, and outliers (-1) as white
    colors = ['r', 'g', 'b']
    markers = ['o', '^', 'v']

    # iterate over settings of min_samples and eps
    for i, min_samples in enumerate([2, 3, 5]):
        for j, eps in enumerate([1, 1.5, 2, 3]):
            # instantiate DBSCAN with a particular setting
            dbscan = DBSCAN(min_samples=min_samples, eps=eps)
            # get cluster assignments
            clusters = dbscan.fit_predict(X)
            print("min_samples: %d eps: %f  cluster: %s" % (min_samples, eps, clusters))
            if np.any(clusters == -1):
                c = ['w'] + colors
                m = ['o'] + markers
            else:
                c = colors
                m = markers
            discrete_scatter(X[:, 0], X[:, 1], clusters, ax=axes[i, j], c=c, s=8, markers=m)
            inds = dbscan.core_sample_indices_
            # vizualize core samples and clusters.
            if len(inds):
                discrete_scatter(X[inds, 0], X[inds, 1], clusters[inds],
                                 ax=axes[i, j], s=15, c=colors,
                                 markers=markers)
            axes[i, j].set_title("min_samples: %d eps: %.1f" % (min_samples, eps))
    fig.tight_layout()
开发者ID:ABcDexter,项目名称:introduction_to_ml_with_python,代码行数:37,代码来源:plot_dbscan.py

示例2: dbscan

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
def dbscan(similarity, concepts=2, euclid=False):
    if euclid:
        model = DBSCAN(eps=0.6, min_samples=10, algorithm='auto', leaf_size=30)
        return model.fit_predict(similarity)
    else:
        model = DBSCAN(eps=0.6, min_samples=10, metric='precomputed', algorithm='auto', leaf_size=30)
        return model.fit_predict(1 - similarity)
开发者ID:thran,项目名称:experiments2.0,代码行数:9,代码来源:clusterings.py

示例3: search_charges

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
    def search_charges(self, data, z=0, threshold = 30):
        A = deriv(data,z)
        
        print 'Searching charges...'
        time0 = time.time()        
        
        det = A[3]*A[5]-A[4]**2

        dx = -(A[1]*A[5]-A[2]*A[4])/det
        dy = -(A[2]*A[3]-A[1]*Aa[4])/det
        
        datamax = A[0]+A[1]*dx+A[2]*dy+A[3]*dx**2/2+A[4]*dx*dy+A[5]*dy**2/2        
        t = np.where((np.abs(dx) < 1)*(np.abs(dy) < 1)*(np.abs(datamax) > threshold)*(det > 0))        
        
        x = np.array([t[1]+dx[t], t[0]+dy[t]]).T
        
        db = DBSCAN(min_samples = 1, eps = 1)
        db.fit_predict(x)
        
        n_charges = np.max(db.labels_)+1
        qi = np.zeros(n_charges)
        xi = np.zeros((3,n_charges))
        
        for i in range(0, n_charges):
            xi[0:2,i] = np.mean(x[db.labels_ == i,:], axis=0)
            qi[i] = np.mean(datamax[t][db.labels_ == i])
        
        
        self.set_charges(qi,xi)
        print 'Done! Elapsed time: '+str(time.time()-time0)
        return self
开发者ID:temik42,项目名称:lib,代码行数:33,代码来源:pyfield.py

示例4: _fit_dbscan

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
    def _fit_dbscan(self, x):
        # clustering
        for r in xrange(self.repeats):
            # info
            if self.debug is True:
                print '\t[%s][c:%d][r:%d]' % (self.clus_type, k, r + 1),

            # fit and evaluate model
            model = DBSCAN(eps=1.0, min_samples=100)
            model.fit_predict(x)
            k = len(set(model.labels_)) - (1 if -1 in model.labels_ else 0)
            self._labels[r] = model.labels_
            self._parameters[r] = model.core_sample_indices_

            # build equivalent gmm
            model_gmm = GMM(n_components=k, covariance_type="full")
            model_gmm.means_ = model.core_sample_indices_
            model_gmm.covars_ = sp.ones(
                (k, self.input_dim)) * self.sigma_factor
            model_gmm.weights_ = sp.array(
                [(self._labels[r] == i).sum() for i in xrange(k)])

            # evaluate goodness of fit
            self._ll[r] = model_gmm.score(x).sum()
            if self.gof_type == 'aic':
                self._gof[r] = model_gmm.aic(x)
            if self.gof_type == 'bic':
                self._gof[r] = model_gmm.bic(x)

            # debug info
            if self.debug is True:
                print self._gof[r]
开发者ID:pmeier82,项目名称:BOTMpy,代码行数:34,代码来源:cluster.py

示例5: current_datapoints_dbscan

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
	def current_datapoints_dbscan(self):
		"""
		Method clusters points-outliers (after current_datapoints_threshold_filter and current_datapoints_outliers_filter) into slice-clusters using DBSCAN.
		Returns dict of slice-clusters - base for event-candidates. Uses self.eps attribute to estimate cluster boundaries.
		"""
		nets = self.current_datapoints.keys()
		ids = concatenate([self.current_datapoints[x]['ids'] for x in nets])
		coords = concatenate([self.current_datapoints[x]['array'] for x in nets])
		weights = concatenate([self.current_datapoints[x]['weights'] for x in nets])
		if len(ids) > 0:
			clustering = DBSCAN(eps=self.eps, min_samples=5)
			labels = clustering.fit_predict(coords)
			core_ids = ids[clustering.core_sample_indices_]
			ids = ids[labels > -1]
			coords = coords[labels > -1]
			weights = weights[labels > -1]
			labels = labels[labels > -1]
			ret_tab = {}
			for i in range(len(labels)):
				try:
					ret_tab[labels[i]].append({'id':ids[i], 'lng':coords[i,0], 'lat':coords[i,1], 'weight':weights[i], 'is_core':ids[i] in core_ids})
				except KeyError:
					ret_tab[labels[i]] = [{'id':ids[i], 'lng':coords[i,0], 'lat':coords[i,1], 'weight':weights[i], 'is_core':ids[i] in core_ids}]
			return ret_tab
		else:
			return {}
开发者ID:city-pulse,项目名称:mskpulse.backend,代码行数:28,代码来源:detector.py

示例6: cluster_DBSCAN

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
def cluster_DBSCAN(args):
	"""
	Clustering with Ward hierarchical clustering: constructs a tree and cuts it.
	"""
	#load data
	g_it = node_link_data.node_link_data_to_eden(input = args.input_file, input_type = "file")
	vec = graph.Vectorizer(r = args.radius,d = args.distance, nbits = args.nbits)
	logger.info('Vectorizer: %s' % vec)

	X = vec.transform(g_it, n_jobs = args.n_jobs)
	logger.info('Instances: %d Features: %d with an avg of %d features per instance' % (X.shape[0], X.shape[1], X.getnnz() / X.shape[0]))
	
	#project to lower dimensional space to use clustering algorithms
	transformer = TruncatedSVD(n_components=args.n_components)
	X_dense=transformer.fit_transform(X)

	#log statistics on data
	logger.info('Dimensionality reduction Instances: %d Features: %d with an avg of %d features per instance' % (X_dense.shape[0], X_dense.shape[1], X.getnnz() / X.shape[0]))

	#clustering
	clustering_algo = DBSCAN(eps = args.eps)
	y = clustering_algo.fit_predict(X_dense)
	msg = 'Predictions statistics: '
	msg += util.report_base_statistics(y)
	logger.info(msg)

	#save model for vectorizer
	out_file_name = "vectorizer"
	eden_io.dump(vec, output_dir_path = args.output_dir_path, out_file_name = out_file_name)
	logger.info("Written file: %s/%s",args.output_dir_path, out_file_name)

	#save result
	out_file_name = "labels"
	eden_io.store_matrix(matrix = y, output_dir_path = args.output_dir_path, out_file_name = out_file_name, output_format = "text")
	logger.info("Written file: %s/%s",args.output_dir_path, out_file_name)
开发者ID:nickgentoo,项目名称:pyEDeN,代码行数:37,代码来源:cluster_DBSCAN.py

示例7: cluster_with_dbscan

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
def cluster_with_dbscan(vectors, epsilon=0.5, min_samples=5, distances=None, metric="euclidean"):
    # precomputing our distances will be faster as we can use multiple cores
    if distances is None:
        distances = pairwise_distances(vectors, n_jobs=-1, metric=metric)

    dbscan = DBSCAN(eps=epsilon, min_samples=min_samples, metric="precomputed")
    return dbscan.fit_predict(distances)
开发者ID:CylanceSPEAR,项目名称:NMAP-Cluster,代码行数:9,代码来源:clustering.py

示例8: dbscan_outliers

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
def dbscan_outliers(df):
    """
    Find outliers (noise points) using DBSCAN.

    Parameters
    ----------
    df: A pandas.DataFrame

    Returns
    -------
    A tuple of (a sklearn.DBSCAN instance, a pandas.DataFrame)
    """

    scaler = StandardScaler()
    scaler.fit(df)
    scaled = scaler.transform(df)

    dbs = DBSCAN()

    db = dbs.fit(scaled)
    outliers = dbs.fit_predict(scaled)

    df_o = df.ix[np.nonzero(outliers)]

    return db, df_o
开发者ID:nwngeek212,项目名称:MachineLearningConcepts,代码行数:27,代码来源:helper.py

示例9: _cluster

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
def _cluster(params):
    cls = None
    method = sh.getConst('method')
    if method=='kmedoid':
        assert False
        # from kmedoid import kmedsoid
        # cls = kmedoid
    elif method=='dbscan':
        from sklearn.cluster import DBSCAN
        cls = DBSCAN(eps=params['eps'],min_samples=params['min_samples'],
                     metric='precomputed')
    else:
        assert False, 'FATAL: unknown cluster method'

    ##
    mat = sh.getConst('mat')
    labels = cls.fit_predict(mat)
    nLabels = len(set(labels))

    ##
    sil = None; cal = None
    if (nLabels >= 2)and(nLabels <= len(labels)-1):
        sil = met.silhouette_score(mat,labels,'precomputed')
        cal = met.calinski_harabaz_score(mat,labels)
    perf = dict(silhouette_score=sil,calinski_harabaz_score=cal)

    return (labels,perf)
开发者ID:tttor,项目名称:csipb-jamu-prj,代码行数:29,代码来源:cluster.py

示例10: DBScan_Flux

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
def DBScan_Flux(phots, ycenters, xcenters, dbsClean=0, useTheForce=False):
    """Class methods are similar to regular functions.

    Note:
        Do not include the `self` parameter in the ``Args`` section.

    Args:
        param1: The first parameter.
        param2: The second parameter.

    Returns:
        True if successful, False otherwise.

    """
    
    dbsPhots    = DBSCAN()#n_jobs=-1)
    stdScaler   = StandardScaler()
    
    phots       = np.copy(phots.ravel())
    phots[~np.isfinite(phots)] = np.median(phots[np.isfinite(phots)])
    
    featuresNow = np.transpose([stdScaler.fit_transform(ycenters[:,None]).ravel(), \
                                stdScaler.fit_transform(xcenters[:,None]).ravel(), \
                                stdScaler.fit_transform(phots[:,None]).ravel()   ] )
    
    # print(featuresNow.shape)
    dbsPhotsPred= dbsPhots.fit_predict(featuresNow)
    
    return dbsPhotsPred == dbsClean
开发者ID:exowanderer,项目名称:ExoplanetTSO,代码行数:31,代码来源:bak_auxiliary.py

示例11: get_clusters

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
def get_clusters(tracks):
    neighbors = g.m.neighborsSpin.value()
    dist = g.m.neighborDistanceSpin.value()
    data = np.array([[tr['mean_x'], tr['mean_y']] for tr in tracks])
    scanner = DBSCAN(eps=dist, min_samples=neighbors)
    ids = scanner.fit_predict(data)
    return ids
开发者ID:BrettJSettle,项目名称:MotilityTracking,代码行数:9,代码来源:MotilityTracking.py

示例12: cluster_dbscan

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
def cluster_dbscan(matrix, distance_measure="sts", eps=1):
    """Clusters the distance matrix for a given epsilon value, if distance
    measure is sts. Other distance measures are: [‘cityblock’, ‘cosine’, 
    ‘euclidean’, ‘l1’, ‘l2’, ‘manhattan’, ‘braycurtis’, ‘canberra’, 
    ‘chebyshev’, ‘correlation’, ‘dice’, ‘hamming’, ‘jaccard’, ‘kulsinski’, 
    ‘mahalanobis’, ‘matching’, ‘minkowski’, ‘rogerstanimoto’, ‘russellrao’, 
    ‘seuclidean’, ‘sokalmichener’, ‘sokalsneath’, ‘sqeuclidean’, ‘yule’]

    Parameters
    ----------
    matrix: np.matrix
        The input matrix. If distance measure is sts, this should be the sts
        distance matrix. If other distance, this should be the time-series
        matrix of size ngenes x nsamples.
    distance_measure: str
        The distance measure, default is sts, short time-series distance.
        Any distance measure available in scikit-learn is available here.
        Note: multiple time-series is NOT supported for distances other than    
        "sts".

    Returns
    -------
    cluster_labels: list of int
        A list of size ngenes that defines cluster membership.
    """
    if (distance_measure == "sts"):
        dbs = DBSCAN(eps=eps, metric='precomputed', min_samples=2)
    else:
        dbs = DBSCAN(eps=eps, metric=distance_measure, min_samples=2)
    cluster_labels = dbs.fit_predict(matrix)
    return cluster_labels
开发者ID:beiko-lab,项目名称:ananke,代码行数:33,代码来源:_cluster.py

示例13: cluster_lvl1

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
 def cluster_lvl1(self, data):
     db = DBSCAN(eps=2., min_samples=2, metric='precomputed')
     processed = np.float32(np.vstack([
         np.mgrid[:self.map_height, :self.map_width].reshape(2, -1),
         data.ravel()
     ])).T
     dist = self.distances_for_lvl1(processed)
     return db.fit_predict(dist).reshape(self.map_height, self.map_width)
开发者ID:ahmedassal,项目名称:ml-playground,代码行数:10,代码来源:main.py

示例14: regroup

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
    def regroup(self, maxdistance, minsize, algo = 'auto'):

        self.__loginfo('Regrouping')
        dbsfit = DBSCAN(eps=maxdistance, min_samples=minsize, algorithm=algo).fit(self.primarylist)
        dbsresult = dbsfit.fit_predict(self.primarylist)
        grouplist = []
        for grouplabel in dbsresult:
            if not grouplabel in grouplist: grouplist.append(grouplabel)
        self.__loginfo('Group label count: %s' % len(grouplist))
开发者ID:qunox,项目名称:arah,代码行数:11,代码来源:mapanalysis.py

示例15: main

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
def main(datafile, feature1, feature2, normalize, clusteroutput, percentile, copula):
    X, features = read_sah_h5(datafile, just_good=False)
    if 'id' not in features:
        ids = np.arange(len(X))
    else:
        ids = X[:, features.index('id')]
    x = X[:, features.index(feature1)]
    y = X[:, features.index(feature2)]
    D = np.column_stack([x, y])

    idx = np.random.randint(len(X), size=10000)

    D = D[idx]
    ids = ids[idx]

    if normalize:
        mean = np.average(D, axis=0)
        std = np.std(D, axis=0)
        std[np.nonzero(std == 0.0)] = 1.0 # Avoid NaNs
        Dnorm = (D - mean) / std
    elif copula:
        Dnorm = np.column_stack([copula_transform(f) for f in D.T])
    else:
        Dnorm = D

    kmeans = MiniBatchKMeans(n_clusters=50)
    gmm = GMM(n_components=200, covariance_type='full', verbose=True)
    #C = gmm.fit_predict(Dnorm)
    dbscan = DBSCAN(eps=100.0, min_samples=1)
    C = dbscan.fit_predict(Dnorm)
    print C

    with open(clusteroutput, 'w+') as f:
        for c, i in zip(C, ids):
            f.write('%d,%d\n' % (i, c))

    pl.scatter(D[:, 0], D[:, 1], color=pl.cm.spectral(C.astype(float) / np.max(C)))

#    for c in np.unique(C):
#        pl.bar(0, 0, lw=0, ec='none',
#            fc=pl.cm.spectral(float(c) / np.max(C)), label='Cluster %d' % c)
#    pl.legend(loc='upper left')

    if percentile > 0:
        pl.xlim(
            scoreatpercentile(x, percentile),
            scoreatpercentile(x, 100-percentile)
        )
        pl.ylim(
            scoreatpercentile(y, percentile),
            scoreatpercentile(y, 100-percentile)
        )

    pl.xlabel(feature1)
    pl.ylabel(feature2)
    pl.show()
开发者ID:UCBerkeleySETI,项目名称:blml,代码行数:58,代码来源:pairwise_cluster.py


注:本文中的sklearn.cluster.DBSCAN.fit_predict方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。