本文整理汇总了Python中sklearn.cluster.DBSCAN.fit方法的典型用法代码示例。如果您正苦于以下问题:Python DBSCAN.fit方法的具体用法?Python DBSCAN.fit怎么用?Python DBSCAN.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.cluster.DBSCAN
的用法示例。
在下文中一共展示了DBSCAN.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: train_dbscan
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
def train_dbscan():
print "starting dbscan clustering..."
model = DBSCAN(eps=dbs_eps, min_samples=dbs_min_samples, metric=dbs_metric, algorithm='auto')
model.fit(X)
core_ponts = model.core_sample_indices_
if output_core_points:
print "core points data index"
print core_points
print "num of core points %d" %(len(core_ponts))
print "all points clutser index"
cluster_index = model.labels_
if output_cluster_members:
#print cluster_index
cluster_members = {}
for i,c in enumerate(cluster_index):
index_list = cluster_members.get(c, list())
index_list.append(i)
cluster_members[c] = index_list
for cl, indx_list in cluster_members.iteritems():
if cl > 0:
print "cluster index %d size %d" %(cl, len(indx_list))
else:
print "noise points size %d" %(len(indx_list))
print indx_list
print "num of clusters %d" %(cluster_index.max() + 1)
示例2: cluster
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
def cluster():
eps_set = 0.5 * np.arange(1, 7)
npt_set = np.arange(1, 6)
scores = []
global res
res = []
for eps in eps_set:
for npt in npt_set:
est = DBSCAN(eps=eps, min_samples=npt)
est.fit(x)
ari = metrics.adjusted_rand_score(y, est.labels_)
scores.append(ari)
n_noise = len([ l for l in est.labels_ if l == -1])
res.append((ari, np.max(est.labels_) + 1 , n_noise))
print ari
max_score = np.max(scores)
max_idx = scores.index(max_score)
max_eps = eps_set[max_idx / len(npt_set)]
max_npt = npt_set[max_idx % len(npt_set)]
print max_score, max_eps, max_npt
scores = np.array(scores).reshape(len(eps_set), len(npt_set))
pl.imshow(scores, interpolation='nearest', cmap=pl.cm.spectral)
pl.colorbar()
pl.xticks(np.arange(len(npt_set)), npt_set)
pl.yticks(np.arange(len(eps_set)), eps_set)
pl.ylabel('eps')
pl.xlabel('min_samples')
pl.show()
示例3: dbscan_algo
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
def dbscan_algo(self,cluster,X=None):
if self.dMetric=='levenstein':
clust = DBSCAN(eps=self.epsilon,min_samples=1,metric="precomputed")
clust.fit(X)
else:
vectorizer = TfidfVectorizer().fit_transform(cluster)
dataX = TfidfTransformer(norm='l1',smooth_idf=True,use_idf=True,sublinear_tf=False).fit_transform(vectorizer)
clust = DBSCAN(eps=self.epsilon,metric="cosine",min_samples=3,algorithm='brute')
clust.fit(dataX)
companyNames = cluster
preds = clust.labels_
clabels = np.unique(preds)
for i in range(clabels.shape[0]):
if clabels[i] < 0:
continue
cmem_ids = np.where(preds==clabels[i])[0]
cmembers = []
for cmem_id in cmem_ids:
cmembers.append(companyNames[cmem_id])
clusteritems = ",".join(cmembers)
print clusteritems
if len(cmem_ids) > 1:
self.result.write("Clustered: %s"%clusteritems)
self.result.write('\n')
示例4: cluster_mappings
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
def cluster_mappings(vector_inpath, do_pca=False, target_dim=100, indices_inpath=None, epsilon=2.5, min_s=20):
# TODO: CLustering parameters
# TODO: Metric cosine similarity or euclidian distance
print alt("Load mappings...")
indices, model = load_mappings_from_model(vector_inpath)
X = numpy.array([model[key] for key in indices])
# del model
if do_pca:
print alt("Truncate vectors with PCA to %i dimensions..." %(target_dim))
pca = PCA(n_components=target_dim)
pca.fit(X)
X = pca.transform(X)
print alt("Cluster points...")
# k = 2 * X[0].shape[0] - 1
# min_pts = k + 1
#dbscan = DBSCAN(eps=0.1, min_samples=20, metric='cosine',algorithm='brute')
dbscan = DBSCAN(eps=epsilon, min_samples=min_s)
dbscan.fit(X)
labels = dbscan.labels_
print get_cluster_size(labels)
print alt("Finished clustering!")
sscore = silhouette_score(X, labels)
print("Silhouette Coefficient: %0.3f" %(sscore))
if indices_inpath:
resolve_indices(indices, labels, indices_inpath, model)
示例5: classify_core
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
def classify_core(self, N_CLUSTERS, clusterType, data_for_trial_type, begin_time, end_time):
BEGIN_TIME_FRAME = begin_time*self.griddy.TIME_GRID_SPACING
END_TIME_FRAME = end_time*self.griddy.TIME_GRID_SPACING
data = data_for_trial_type[:,BEGIN_TIME_FRAME:END_TIME_FRAME,self.griddy.VEL_X]
labels = None
if clusterType == 'kmeans':
kmeans = KMeans(n_clusters=N_CLUSTERS)
kmeans.fit(data)
labels = kmeans.labels_
elif clusterType == 'affinity_propagation':
ap = AffinityPropagation(damping=0.75)
ap.fit(data)
labels = ap.labels_
N_CLUSTERS = np.max(self.labels)+1
elif clusterType == 'DBSCAN':
dbscan = DBSCAN()
dbscan.fit(data)
labels = dbscan.labels_
N_CLUSTERS = np.max(labels)+1
print 'N_CLUSTERS=' + str(N_CLUSTERS)
elif clusterType == 'AgglomerativeClustering':
ac = AgglomerativeClustering(n_clusters=N_CLUSTERS)
ac.fit(data)
labels = ac.labels_
else:
print 'ERROR: clusterType: ' + clusterType + ' is not recognized'
return (labels, N_CLUSTERS)
开发者ID:SashaRayshubskiy,项目名称:osmotropotaxis_analysis_python,代码行数:33,代码来源:fly_trajectory_classifier.py
示例6: on_squaremsg_received
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
def on_squaremsg_received(self, msg):
detected_squares = []
for square_msg in msg.squares:
detected_squares.append(TrackedSquare.from_msg(square_msg))
self._prev_squares.append(detected_squares)
all_squares = list(itertools.chain.from_iterable(self._prev_squares))
square_centers = [list(s.center) + [s.hue] for s in all_squares]
data = np.array(square_centers)
ms = DBSCAN(eps=64, min_samples=3)
ms.fit(data)
labels = ms.labels_
ts_msg = TrackedSquares()
for i, s in enumerate(all_squares):
label = np.int0(labels[i])
if label < 0:
continue
s.tracking_colour = TrackedSquare.TRACKING_COLOURS[label % len(TrackedSquare.TRACKING_COLOURS)]
s.tracking_detected = True
ts_msg.squares.append(s.to_msg())
self._squares_pub.publish(ts_msg)
示例7: test
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
def test():
global est
est = DBSCAN(eps=1, min_samples=1)
est.fit(x)
print est.labels_
ari = metrics.adjusted_rand_score(y, est.labels_)
print ari
示例8: clusterMalwareNames
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
def clusterMalwareNames(malwareNames):
# strictly lexical clustering over malware-names
wordCount = {}
# create a distance matrix
matrix = np.zeros((len(malwareNames), len(malwareNames)))
for i in range(len(malwareNames)):
for j in range(len(malwareNames)):
if matrix[i, j] == 0.0:
matrix[i, j] = computeSimilarity(malwareNames[i], malwareNames[j])
matrix[j, i] = matrix[i, j]
# Scikit-Learn's DBSCAN implementation to cluster the malware-names
clust = DBSCAN(eps=0.1, min_samples=5, metric="precomputed")
clust.fit(matrix)
preds = clust.labels_
clabels = np.unique(preds)
# create Word-Count Map
for i in range(clabels.shape[0]):
if clabels[i] < 0:
continue
cmem_ids = np.where(preds == clabels[i])[0]
cmembers = []
for cmem_id in cmem_ids:
cmembers.append(malwareNames[cmem_id])
wordCount[", ".join(uniqueList(cmembers))] = len(cmem_ids)
return wordCount
示例9: find_tracks
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
def find_tracks(data, eps=20, min_samples=20):
"""Applies the DBSCAN algorithm from scikit-learn to find tracks in the data.
Parameters
----------
data : array-like
An array of (x, y, z, hits) data points
eps : number, optional
The minimum distance between adjacent points in a cluster
min_samples : number, optional
The min number of points in a cluster
Returns
-------
tracks : list
A list of tracks. Each track is an ndarray of points.
"""
xyz = data[:, 0:3]
dbs = DBSCAN(eps=eps, min_samples=min_samples)
dbs.fit(xyz)
tracks = []
for track in (np.where(dbs.labels_ == n)[0] for n in np.unique(dbs.labels_) if n != -1):
tracks.append(data[track])
return tracks
示例10: cluster_dbscan
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
def cluster_dbscan(self, calpha=False, cluster_diameter=6, cluster_min_size=10):
'''
cluster the residues using the DBSCAN method.
The parameters here are neighborhood diameter (eps) and neighborhood
connectivity (min_samples).
Returns a list of cluster labels, in which label ``-1`` means an outlier point,
which doesn't belong to any cluster.
'''
if not self.positive_residues:
return {}
if calpha:
data_atoms = self.positive_residues.select('ca')
else:
data_atoms = self.positive_residues.select('sidechain or ca').copy()
assert (
data_atoms.getHierView().numResidues() ==
self.positive_residues.getHierView().numResidues()
)
OUTLIER_LABEL = -1
db_clust = DBSCAN(eps=cluster_diameter, min_samples=cluster_min_size)
db_clust.fit(data_atoms.getCoords())
db_labels = db_clust.labels_.astype(int)
#print db_labels, len(db_labels)
if calpha:
residue_labels = db_labels
else:
residues = list(data_atoms.getHierView().iterResidues())
residue_labels = np.zeros(len(residues), dtype=int)
def most_common(lst):
lst = list(lst)
return max(set(lst) or [OUTLIER_LABEL], key=lst.count)
data_atoms.setBetas(db_labels)
for i, res in enumerate(residues):
atom_labels = res.getBetas()
residue_labels[i] = most_common(atom_labels[atom_labels!=OUTLIER_LABEL])
assert len(residue_labels) == self.positive_residues.getHierView().numResidues()
residue_numbers = self.positive_residues.ca.getResnums()
clusters = sorted(
[residue_numbers[residue_labels==i] for i in
set(residue_labels) if i!=-1],
key=self.conf_sum,
reverse=True,
)
return dict(enumerate(clusters))
示例11: dbscan
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
def dbscan(self, eps=0.75, min_samples=3):
"""
:param kwargs: key-value arguments to pass to DBSCAN
(eps: max dist between points in same neighbourhood,
min_samples: number of points in a neighbourhood)
:return:
"""
est = DBSCAN(metric='precomputed', eps=eps, min_samples=min_samples)
est.fit(self.get_dm(False))
return Partition(est.labels_)
示例12: fit
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
def fit(fvecs, params):
eps_ = int(params[0])
min_s = int(params[1])
metric_=params[2]
# affinity : “euclidean”, “l1”, “l2”, “manhattan”, “cosine”, or ‘precomputed’
model = DBSCAN(eps=eps_, min_samples=min_s, metric=metric_)
model.fit(fvecs)
print len(set(model.labels_))
return model.labels_
示例13: score_sam
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
def score_sam(min_val, max_val, incr=1):
sam_range = range(min_val, max_val, incr)
scores = []
for k in sam_range:
db = DBSCAN(eps=2, min_samples=k)
db.fit(X_scaled)
if len(set(db.labels_)) > 1:
scores.append(metrics.silhouette_score(X_scaled, db.labels_))
else:
scores.append(0)
return scores
示例14: db
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
def db(lngs, lats, city, cluster_diameter):
city_area = city["area"]
city_lng = city["lng"]
city_lat = city["lat"]
lngs = np.array(lngs)*math.cos(city_lat)
dbscan = DBSCAN(metric='euclidean')
dbscan.fit(np.array([lngs, lats]).transpose())
cluster_labels = np.array(dbscan.labels_)
return labels_to_index(cluster_labels)
示例15: simple_clustering
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit [as 别名]
def simple_clustering(x):
print alt("Current parameters: %s" %(str(x)))
dbscan = DBSCAN(eps=x[0], min_samples=x[1], p=x[2])
dbscan.fit(X)
cluster_sizes = get_cluster_size(dbscan.labels_)
print alt("Current cluster sizes: %s" %(cluster_sizes))
sscore = silhouette_score(X, dbscan.labels_)
tscore = (sscore / (len(cluster_sizes.keys()) - 1))
print alt("Current value of objective function: %.5f" %(tscore))
print "-" * 50
return -1.0 * tscore