当前位置: 首页>>代码示例>>Python>>正文


Python DBSCAN.fit方法代码示例

本文整理汇总了Python中sklearn.cluster.DBSCAN.fit方法的典型用法代码示例。如果您正苦于以下问题:Python DBSCAN.fit方法的具体用法?Python DBSCAN.fit怎么用?Python DBSCAN.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.cluster.DBSCAN的用法示例。


在下文中一共展示了DBSCAN.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: train_dbscan

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
def train_dbscan():
	print "starting dbscan clustering..."
	model = DBSCAN(eps=dbs_eps, min_samples=dbs_min_samples, metric=dbs_metric, algorithm='auto')
	model.fit(X)
	
	core_ponts = model.core_sample_indices_ 
	if output_core_points:
		print "core points data index"
		print core_points
	print "num of core points %d" %(len(core_ponts))
	
	print "all points clutser index"
	cluster_index = model.labels_
	if output_cluster_members:
		#print cluster_index
		cluster_members = {}
		for i,c in enumerate(cluster_index):
			index_list = cluster_members.get(c, list())
			index_list.append(i)
			cluster_members[c] = index_list
		for cl, indx_list in cluster_members.iteritems():
			if cl > 0:
				print "cluster index %d  size %d" %(cl, len(indx_list))
			else:
				print "noise points size %d" %(len(indx_list))
			print indx_list
	
	print "num of clusters %d" %(cluster_index.max() + 1)
开发者ID:ahnqirage,项目名称:avenir,代码行数:30,代码来源:cluster.py

示例2: cluster

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
def cluster():
    eps_set = 0.5 * np.arange(1, 7)
    npt_set = np.arange(1, 6)
    scores = []
    global res
    res = []
    for eps in eps_set:
        for npt in npt_set:
            est = DBSCAN(eps=eps, min_samples=npt)
            est.fit(x)
            ari = metrics.adjusted_rand_score(y, est.labels_)
            scores.append(ari)
            n_noise = len([ l for l in est.labels_ if l == -1])
            res.append((ari, np.max(est.labels_) + 1 , n_noise))
            print ari
    max_score = np.max(scores)
    max_idx = scores.index(max_score)
    max_eps = eps_set[max_idx / len(npt_set)]
    max_npt = npt_set[max_idx % len(npt_set)]
    print max_score, max_eps, max_npt
    scores = np.array(scores).reshape(len(eps_set), len(npt_set))
    pl.imshow(scores, interpolation='nearest', cmap=pl.cm.spectral)
    pl.colorbar()
    pl.xticks(np.arange(len(npt_set)), npt_set)
    pl.yticks(np.arange(len(eps_set)), eps_set)
    pl.ylabel('eps')
    pl.xlabel('min_samples')
    pl.show()
开发者ID:harrylclc,项目名称:ist557,代码行数:30,代码来源:dbscan.py

示例3: dbscan_algo

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
 def dbscan_algo(self,cluster,X=None):
     
     if self.dMetric=='levenstein':
         clust = DBSCAN(eps=self.epsilon,min_samples=1,metric="precomputed")
         clust.fit(X)
     else:
         vectorizer = TfidfVectorizer().fit_transform(cluster)
         dataX = TfidfTransformer(norm='l1',smooth_idf=True,use_idf=True,sublinear_tf=False).fit_transform(vectorizer)
         clust = DBSCAN(eps=self.epsilon,metric="cosine",min_samples=3,algorithm='brute')
         clust.fit(dataX)
     
     companyNames = cluster
     
     preds = clust.labels_
     clabels = np.unique(preds)
     for i in range(clabels.shape[0]):
         if clabels[i] < 0:
             continue
         cmem_ids = np.where(preds==clabels[i])[0]
         cmembers = []
         for cmem_id in cmem_ids:
             cmembers.append(companyNames[cmem_id])
         clusteritems = ",".join(cmembers)
         print clusteritems
         if len(cmem_ids) > 1:
             self.result.write("Clustered: %s"%clusteritems)
             self.result.write('\n')
开发者ID:SiddarthaNagireddy,项目名称:easyClustering,代码行数:29,代码来源:dbscan.py

示例4: cluster_mappings

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
def cluster_mappings(vector_inpath, do_pca=False, target_dim=100, indices_inpath=None, epsilon=2.5, min_s=20):
	# TODO: CLustering parameters
	# TODO: Metric cosine similarity or euclidian distance
	print alt("Load mappings...")
	indices, model = load_mappings_from_model(vector_inpath)
	X = numpy.array([model[key] for key in indices])
	# del model
	if do_pca:
		print alt("Truncate vectors with PCA to %i dimensions..." %(target_dim))
		pca = PCA(n_components=target_dim)
		pca.fit(X)
		X = pca.transform(X)
	print alt("Cluster points...")
	# k = 2 * X[0].shape[0] - 1
	# min_pts = k + 1
	#dbscan = DBSCAN(eps=0.1, min_samples=20, metric='cosine',algorithm='brute')
	dbscan = DBSCAN(eps=epsilon, min_samples=min_s)
	dbscan.fit(X)
	labels = dbscan.labels_
	print get_cluster_size(labels)
	print alt("Finished clustering!")
	sscore = silhouette_score(X, labels)
	print("Silhouette Coefficient: %0.3f" %(sscore))
	if indices_inpath:
		resolve_indices(indices, labels, indices_inpath, model)
开发者ID:dboth,项目名称:thesis_ba,代码行数:27,代码来源:cluster_mappings.py

示例5: classify_core

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
    def classify_core(self, N_CLUSTERS, clusterType, data_for_trial_type, begin_time, end_time):

        BEGIN_TIME_FRAME = begin_time*self.griddy.TIME_GRID_SPACING
        END_TIME_FRAME = end_time*self.griddy.TIME_GRID_SPACING

        data = data_for_trial_type[:,BEGIN_TIME_FRAME:END_TIME_FRAME,self.griddy.VEL_X]

        labels = None
        if clusterType == 'kmeans':
            kmeans = KMeans(n_clusters=N_CLUSTERS)
            kmeans.fit(data)
            labels = kmeans.labels_
        elif clusterType == 'affinity_propagation':
            ap = AffinityPropagation(damping=0.75)
            ap.fit(data)
            labels = ap.labels_
            N_CLUSTERS = np.max(self.labels)+1
        elif clusterType == 'DBSCAN':
            dbscan = DBSCAN()
            dbscan.fit(data)
            labels = dbscan.labels_
            N_CLUSTERS = np.max(labels)+1
            print 'N_CLUSTERS=' + str(N_CLUSTERS)
        elif clusterType == 'AgglomerativeClustering':
            ac = AgglomerativeClustering(n_clusters=N_CLUSTERS)
            ac.fit(data)
            labels = ac.labels_
        else:
            print 'ERROR: clusterType: ' + clusterType + ' is not recognized'

        return (labels, N_CLUSTERS)
开发者ID:SashaRayshubskiy,项目名称:osmotropotaxis_analysis_python,代码行数:33,代码来源:fly_trajectory_classifier.py

示例6: on_squaremsg_received

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
    def on_squaremsg_received(self, msg):
        detected_squares = []
        for square_msg in msg.squares:
            detected_squares.append(TrackedSquare.from_msg(square_msg))

        self._prev_squares.append(detected_squares)
        
        all_squares = list(itertools.chain.from_iterable(self._prev_squares))
        square_centers = [list(s.center) + [s.hue] for s in all_squares]
        data = np.array(square_centers)

        ms = DBSCAN(eps=64, min_samples=3)
        ms.fit(data)
        labels = ms.labels_

        ts_msg = TrackedSquares()
        for i, s in enumerate(all_squares):
            label = np.int0(labels[i])
            if label < 0: 
                continue

            s.tracking_colour = TrackedSquare.TRACKING_COLOURS[label % len(TrackedSquare.TRACKING_COLOURS)]
            s.tracking_detected = True

            ts_msg.squares.append(s.to_msg())

        self._squares_pub.publish(ts_msg)
开发者ID:Knifa,项目名称:Glasgow-Baxter,代码行数:29,代码来源:understanding.py

示例7: test

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
def test():
    global est
    est = DBSCAN(eps=1, min_samples=1)
    est.fit(x)
    print est.labels_
    ari = metrics.adjusted_rand_score(y, est.labels_)
    print ari
开发者ID:harrylclc,项目名称:ist557,代码行数:9,代码来源:dbscan.py

示例8: clusterMalwareNames

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
def clusterMalwareNames(malwareNames):
    # strictly lexical clustering over malware-names
    wordCount = {}
    # create a distance matrix
    matrix = np.zeros((len(malwareNames), len(malwareNames)))
    for i in range(len(malwareNames)):
        for j in range(len(malwareNames)):
            if matrix[i, j] == 0.0:        
                matrix[i, j] = computeSimilarity(malwareNames[i], malwareNames[j])
                matrix[j, i] = matrix[i, j]
    
    # Scikit-Learn's DBSCAN implementation to cluster the malware-names
    clust = DBSCAN(eps=0.1, min_samples=5, metric="precomputed")
    clust.fit(matrix)    
    
    preds = clust.labels_
    clabels = np.unique(preds)
    
    # create Word-Count Map
    for i in range(clabels.shape[0]):
        if clabels[i] < 0:
            continue
        
        cmem_ids = np.where(preds == clabels[i])[0]
        cmembers = []
        
        for cmem_id in cmem_ids:
            cmembers.append(malwareNames[cmem_id])
        
        wordCount[", ".join(uniqueList(cmembers))] = len(cmem_ids)
    return wordCount
开发者ID:M0nk2y,项目名称:malware-crawler,代码行数:33,代码来源:vtTool.py

示例9: find_tracks

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
def find_tracks(data, eps=20, min_samples=20):
    """Applies the DBSCAN algorithm from scikit-learn to find tracks in the data.

    Parameters
    ----------
    data : array-like
        An array of (x, y, z, hits) data points
    eps : number, optional
        The minimum distance between adjacent points in a cluster
    min_samples : number, optional
        The min number of points in a cluster

    Returns
    -------
    tracks : list
        A list of tracks. Each track is an ndarray of points.

    """
    xyz = data[:, 0:3]
    dbs = DBSCAN(eps=eps, min_samples=min_samples)
    dbs.fit(xyz)

    tracks = []
    for track in (np.where(dbs.labels_ == n)[0] for n in np.unique(dbs.labels_) if n != -1):
        tracks.append(data[track])

    return tracks
开发者ID:tarvos14,项目名称:pytpc,代码行数:29,代码来源:tracking.py

示例10: cluster_dbscan

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
    def cluster_dbscan(self, calpha=False, cluster_diameter=6, cluster_min_size=10):
        '''
        cluster the residues using the DBSCAN method. 
        The parameters here are neighborhood diameter (eps) and neighborhood 
        connectivity (min_samples).
        
        Returns a list of cluster labels, in which label ``-1`` means an outlier point,
        which doesn't belong to any cluster.
        '''

        if not self.positive_residues:
            return {}
        
        if calpha:
            data_atoms = self.positive_residues.select('ca')
        else:
            data_atoms = self.positive_residues.select('sidechain or ca').copy()
        
        assert (
                data_atoms.getHierView().numResidues() == 
                self.positive_residues.getHierView().numResidues()
                )
        
        OUTLIER_LABEL = -1
        
        db_clust = DBSCAN(eps=cluster_diameter, min_samples=cluster_min_size)
        db_clust.fit(data_atoms.getCoords())

        db_labels = db_clust.labels_.astype(int)
        #print db_labels, len(db_labels)
        if calpha:
            residue_labels = db_labels
        
        else:
            residues = list(data_atoms.getHierView().iterResidues())
            residue_labels = np.zeros(len(residues), dtype=int)
            
            def most_common(lst):
                lst = list(lst)
                return max(set(lst) or [OUTLIER_LABEL], key=lst.count)
            
            data_atoms.setBetas(db_labels)
            for i, res in enumerate(residues):
                atom_labels = res.getBetas()
                residue_labels[i] = most_common(atom_labels[atom_labels!=OUTLIER_LABEL])
                
        assert len(residue_labels) == self.positive_residues.getHierView().numResidues()
        
        residue_numbers = self.positive_residues.ca.getResnums()
        clusters = sorted(
                [residue_numbers[residue_labels==i] for i in
                    set(residue_labels) if i!=-1], 
                key=self.conf_sum, 
                reverse=True,
                )
        return dict(enumerate(clusters))
开发者ID:asaladin,项目名称:peptalk,代码行数:58,代码来源:peptalk.py

示例11: dbscan

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
 def dbscan(self, eps=0.75, min_samples=3):
     """
     :param kwargs: key-value arguments to pass to DBSCAN
                    (eps: max dist between points in same neighbourhood,
                     min_samples: number of points in a neighbourhood)
     :return:
     """
     est = DBSCAN(metric='precomputed', eps=eps, min_samples=min_samples)
     est.fit(self.get_dm(False))
     return Partition(est.labels_)
开发者ID:kgori,项目名称:treeCl,代码行数:12,代码来源:clustering.py

示例12: fit

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
def fit(fvecs, params):
	eps_ = int(params[0])
	min_s = int(params[1])
	metric_=params[2]
	# affinity : “euclidean”, “l1”, “l2”, “manhattan”, “cosine”, or ‘precomputed’

	model = DBSCAN(eps=eps_, min_samples=min_s, metric=metric_)
	model.fit(fvecs)
	print len(set(model.labels_))
	return model.labels_
开发者ID:s1van,项目名称:cse5243,代码行数:12,代码来源:dbscan.py

示例13: score_sam

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
def score_sam(min_val, max_val, incr=1):
    sam_range = range(min_val, max_val, incr)
    scores = []
    for k in sam_range:
        db = DBSCAN(eps=2, min_samples=k)
        db.fit(X_scaled)
        if len(set(db.labels_)) > 1:
            scores.append(metrics.silhouette_score(X_scaled, db.labels_))
        else:
            scores.append(0)
    return scores
开发者ID:dieuwe,项目名称:sfdat22_dva,代码行数:13,代码来源:lab.py

示例14: db

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
def db(lngs, lats, city, cluster_diameter):
	city_area = city["area"]
	city_lng = city["lng"]
	city_lat = city["lat"]
	lngs = np.array(lngs)*math.cos(city_lat)

	dbscan = DBSCAN(metric='euclidean')
	dbscan.fit(np.array([lngs, lats]).transpose())
	cluster_labels = np.array(dbscan.labels_)

	return labels_to_index(cluster_labels)
开发者ID:avisochek,项目名称:scastrap_data_pipeline,代码行数:13,代码来源:clustering_algorithms.py

示例15: simple_clustering

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
	def simple_clustering(x):
		print alt("Current parameters: %s" %(str(x)))
		dbscan = DBSCAN(eps=x[0], min_samples=x[1], p=x[2])
		dbscan.fit(X)
		cluster_sizes = get_cluster_size(dbscan.labels_)
		print alt("Current cluster sizes: %s" %(cluster_sizes))
		sscore = silhouette_score(X, dbscan.labels_)
		tscore = (sscore / (len(cluster_sizes.keys()) - 1))
		print alt("Current value of objective function: %.5f" %(tscore))
		print "-" * 50
		return -1.0 * tscore
开发者ID:dboth,项目名称:thesis_ba,代码行数:13,代码来源:cluster_mappings.py


注:本文中的sklearn.cluster.DBSCAN.fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。