本文整理匯總了Python中sklearn.cluster.DBSCAN屬性的典型用法代碼示例。如果您正苦於以下問題:Python cluster.DBSCAN屬性的具體用法?Python cluster.DBSCAN怎麽用?Python cluster.DBSCAN使用的例子?那麽, 這裏精選的屬性代碼示例或許可以為您提供幫助。您也可以進一步了解該屬性所在類sklearn.cluster
的用法示例。
在下文中一共展示了cluster.DBSCAN屬性的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: initDBScan
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import DBSCAN [as 別名]
def initDBScan(self):
"""
Init with DBSCAN
"""
db=DBSCAN(eps=0.05, min_samples=2)
db.fit(self.buffer)
labels = pd.DataFrame(db.labels_+1)
for x in range(1, labels[0].max()+1):
samples = self.buffer[labels[labels[0]==x].index]
sample = Sample(samples[0], 0)
sample.setTimestamp(1)
mc = MicroCluster(1, self.lamb, self.pMicroCluster.N + 1)
for sampleNumber in range(0, len(samples)):
sample = Sample(samples[sampleNumber], sampleNumber)
sample.setTimestamp(sampleNumber+1)
mc.insertSample(sample, self.currentTimestamp)
self.pMicroCluster.insert(mc)
示例2: get_classer
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import DBSCAN [as 別名]
def get_classer(self, algo_name, classer, algo_dir):
if not os.path.exists(algo_dir):
os.mkdir(algo_dir)
classer_fn = '{}_classer.npy'.format(os.path.join(algo_dir, algo_name))
trafoed_fn = '{}_trafoed.npy'.format(os.path.join(algo_dir, algo_name))
if os.path.isfile(classer_fn):
return pickle.load(open(classer_fn, mode='rb'))
else:
if algo_name == 'DBSCAN':
self.loop_estimate_bandwidth()
logger.info('clustering all speech with {}'.format(algo_name))
if hasattr(classer, 'fit') and hasattr(classer, 'predict'):
classer.fit(self.sdc_all_speech)
elif hasattr(classer, 'fit_transform'): # TSNE
all_speech_trafoed = classer.fit_transform(self.sdc_all_speech)
np.save(open(trafoed_fn, mode='wb'), all_speech_trafoed)
else: # DBSCAN
classer.fit_predict(self.sdc_all_speech)
logger.info(classer.get_params())
logger.info('dumping classifier')
pickle.dump(classer, open(classer_fn, mode='wb'))
return classer
示例3: DBSCAN_cluster
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import DBSCAN [as 別名]
def DBSCAN_cluster(psi_matrix, eventid_lst, dist, minpts, metric):
# Setting logging preferences
logger = logging.getLogger(__name__)
# The metric is "cosine" works only with the algorithm "brute"
if metric == "cosine":
alg = 'brute'
else:
alg = 'auto'
try:
db = DBSCAN(eps=dist, min_samples=minpts, metric=metric, algorithm=alg).fit(psi_matrix)
labels = db.labels_
except:
logger.error("Unknown error: {}".format(sys.exc_info()))
sys.exit(1)
eventid_labels_dict = {k: v for k, v in zip(eventid_lst, labels)}
return eventid_labels_dict, labels
示例4: cluster_analysis
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import DBSCAN [as 別名]
def cluster_analysis(dpsi, psivec, sig_threshold, dpsi_threshold, eps, minpts, metric, indexes, clustering,
separation, output):
path = os.path.dirname(os.path.realpath(dpsi))
os.chdir(path)
psi_matrix, eventid_lst = process_cluster_input(dpsi, psivec, sig_threshold, dpsi_threshold, indexes)
if(clustering=="DBSCAN"):
eventid_labels_dict, labels = DBSCAN_cluster(psi_matrix, eventid_lst, eps, minpts, metric)
#eventid_labels_dict are the labels of the clustering for eacg event
write_averaged_cluster_output(psi_matrix, eventid_lst, eventid_labels_dict, output)
calculate_cluster_scores(psi_matrix, labels, output)
else:
#OPTICS
points_list = create_points_list(psi_matrix, eventid_lst) #Transform the points on psi_matrix to Points from optics.py
optics = Optics(points_list, eps, minpts) # Maximum radius to be considered, cluster size >= 2 points
optics.run() # run the algorithm
clusters = optics.cluster(separation) # minimum threshold for clustering (upper limit to separate the clusters)
eventid_labels_dict, labels = generate_labels(clusters, eventid_lst)
write_averaged_cluster_output(psi_matrix, eventid_lst, eventid_labels_dict, output)
calculate_cluster_scores(psi_matrix, labels, output)
示例5: plot_res
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import DBSCAN [as 別名]
def plot_res(labels: list, n_cluster: int, num: int):
colors = plt.cm.Spectral(np.linspace(0, 1, len(set(labels))))
for k, col in zip(set(labels), colors):
if k == -1:
# Black used for noise.
col = 'k'
class_member_mask = (labels == k)
xy = trainingData[class_member_mask & core_samples_mask]
plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col,
markeredgecolor='k', markersize=10)
xy = trainingData[class_member_mask & ~core_samples_mask]
plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col,
markeredgecolor='k', markersize=6)
plt.title('DBSCAN')
plt.savefig(settings.PLOT_DIR + 'db-%d-%d.png' % (n_cluster, num))
plt.show()
示例6: __init__
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import DBSCAN [as 別名]
def __init__(self, cluster_algorithm=cluster.DBSCAN(eps=0.5,min_samples=3), nr_cubes=10,
overlap_perc=0.1, scaler=preprocessing.MinMaxScaler(), reducer=None, color_function="distance_origin",
link_local=False, verbose=1):
self.clf = cluster_algorithm
self.nr_cubes = nr_cubes
self.overlap_perc = overlap_perc
self.scaler = scaler
self.color_function = color_function
self.verbose = verbose
self.link_local = link_local
self.reducer = reducer
self.chunk_dist = []
self.overlap_dist = []
self.d = []
if self.verbose > 0:
print("\nnr_cubes = %s \n\noverlap_perc = %s\n\nlink_local = %s\n\nClusterer = %s\n\nScaler = %s\n\n"%(self.nr_cubes, overlap_perc, self.link_local, str(self.clf),str(self.scaler)))
示例7: _cluster_v2
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import DBSCAN [as 別名]
def _cluster_v2(prediction):
"""
dbscan cluster
:param prediction:
:return:
"""
db = DBSCAN(eps=0.7, min_samples=200).fit(prediction)
db_labels = db.labels_
unique_labels = np.unique(db_labels)
unique_labels = [tmp for tmp in unique_labels if tmp != -1]
log.info('聚類簇個數為: {:d}'.format(len(unique_labels)))
num_clusters = len(unique_labels)
cluster_centers = db.components_
return num_clusters, db_labels, cluster_centers
示例8: get_cluster
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import DBSCAN [as 別名]
def get_cluster(self):
scores = sorted([(b, max(i['stats'])) for b, i in self.all_candidates.items()], key=lambda x: x[1], reverse=True)
data = [s[1] for s in scores]
X = np.matrix(ParserFinder.get_matrix(data))
labels = list(DBSCAN(metric='precomputed').fit(X).labels_)
clusters = []
new_c = []
old_l = 0
index = 0
for l in labels:
b = scores[index][0]
if old_l != l:
clusters.append(new_c)
new_c = []
new_c.append(b)
old_l = l
index += 1
self.clusters = list(clusters)
示例9: createRanges
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import DBSCAN [as 別名]
def createRanges(pts):
X_ = np.array(pts)
db_ = DBSCAN(eps=step+0.05, min_samples=1).fit(X_)
labels = db_.labels_
ranges = []
for k in set(labels):
class_member_mask = (labels == k)
xy = X_[class_member_mask]
epsilon_l,minPts = zip(*list(X_[class_member_mask]))
epsilon_min,epsilon_max = min(epsilon_l),max(epsilon_l)
assert(min(minPts) == max(minPts))
ranges.append((minPts[0],epsilon_min,epsilon_max))
return ranges
示例10: get_user_pts
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import DBSCAN [as 別名]
def get_user_pts(markings):
user_pts = []
X = np.asarray(markings)
db = DBSCAN(eps=10, min_samples=3).fit(X)
# core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
# core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
# print n_clusters_
unique_labels = set(labels)
colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels)))
for k, col in zip(unique_labels, colors):
if k == -1:
continue
class_member_mask = (labels == k)
xy = X[class_member_mask]
user_pts.append(xy)
x,y = zip(*xy)
# plt.plot(x,y,"o")
return user_pts
示例11: cluster_center_candidates
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import DBSCAN [as 別名]
def cluster_center_candidates(points, max_dist=100, min_samples=1):
""" cluster center candidates by given density clustering
:param [[float]] points: points
:param float max_dist: maximal distance among points
:param int min_samples: minimal number od samples
:return (ndarray, [int]):
"""
points = np.array(points)
if not list(points):
return points, []
dbscan = cluster.DBSCAN(eps=max_dist, min_samples=min_samples)
dbscan.fit(points)
labels = dbscan.labels_.copy()
centers = []
for i in range(max(labels) + 1):
clust = points[labels == i]
if len(clust) > 0:
center = np.mean(clust, axis=0)
centers.append(center)
return np.array(centers), labels
示例12: test_objectmapper
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import DBSCAN [as 別名]
def test_objectmapper(self):
df = pdml.ModelFrame([])
self.assertIs(df.cluster.AffinityPropagation, cluster.AffinityPropagation)
self.assertIs(df.cluster.AgglomerativeClustering, cluster.AgglomerativeClustering)
self.assertIs(df.cluster.Birch, cluster.Birch)
self.assertIs(df.cluster.DBSCAN, cluster.DBSCAN)
self.assertIs(df.cluster.FeatureAgglomeration, cluster.FeatureAgglomeration)
self.assertIs(df.cluster.KMeans, cluster.KMeans)
self.assertIs(df.cluster.MiniBatchKMeans, cluster.MiniBatchKMeans)
self.assertIs(df.cluster.MeanShift, cluster.MeanShift)
self.assertIs(df.cluster.SpectralClustering, cluster.SpectralClustering)
self.assertIs(df.cluster.bicluster.SpectralBiclustering,
cluster.bicluster.SpectralBiclustering)
self.assertIs(df.cluster.bicluster.SpectralCoclustering,
cluster.bicluster.SpectralCoclustering)
示例13: affinityPropagationForPoints
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import DBSCAN [as 別名]
def affinityPropagationForPoints(dataArray,epsValue):
# print("--------------------Clustering")
data=dataArray
a_T = datetime.datetime.now()
db=cluster.DBSCAN(eps=epsValue,min_samples=3,metric='euclidean') #meter=degree*(2 * math.pi * 6378137.0)/ 360 degree=50/(2 * math.pi * 6378137.0) * 360,在調參時,eps為鄰域的距離閾值,而分析的數據為經緯度數據,為了便於調參,可依據上述公式可以在米和度之間互相轉換,此時設置eps=0.0008,約為90m,如果poi的空間點之間距離在90m內則為一簇;min_samples為樣本點要成為核心對象所需要的鄰域樣本數閾值。參數需要自行根據所分析的數據不斷調試,直至達到較好聚類的結果。
y_db=db.fit_predict(data) #獲取聚類預測類標
b_T= datetime.datetime.now()
# print("time span:", b_T-a_T)
# print("_"*50)
pred=y_db
# print(pred,len(np.unique(pred))) #打印查看預測類標和計算聚類簇數
# print("-------------------cluster Finishing")
return pred,np.unique(pred) #返回DBSCAN聚類預測值。和簇類標
#convert points .shp to raster 將點數據寫入為raster數據。使用raster.SetGeoTransform,柵格化數據。參考GDAL官方代碼
示例14: affinityPropagationForPoints
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import DBSCAN [as 別名]
def affinityPropagationForPoints(dataArray,epsValue):
print("--------------------Clustering")
data=dataArray
t1=time.time()
db=cluster.DBSCAN(eps=epsValue,min_samples=3,metric='euclidean') #meter=degree*(2 * math.pi * 6378137.0)/ 360 degree=50/(2 * math.pi * 6378137.0) * 360,在調參時,eps為鄰域的距離閾值,而分析的數據為經緯度數據,為了便於調參,可依據上述公式可以在米和度之間互相轉換,此時設置eps=0.0008,約為90m,如果poi的空間點之間距離在90m內則為一簇;min_samples為樣本點要成為核心對象所需要的鄰域樣本數閾值。參數需要自行根據所分析的數據不斷調試,直至達到較好聚類的結果。
y_db=db.fit_predict(data) #獲取聚類預測類標
t2=time.time()
tDiff_af=t2-t1 #用於計算聚類所需時間
print(tDiff_af)
pred=y_db
print(pred,len(np.unique(pred))) #打印查看預測類標和計算聚類簇數
# t3=time.time()
# plt.close('all')
# plt.figure(1,figsize=(20,20))
# plt.clf()
# cm=plt.cm.get_cmap('nipy_spectral') #獲取內置色帶
# plt.scatter(data[...,0],data[...,1],s=10,alpha=0.8,c=pred,cmap=cm) #c參數設置為預測值,傳入色帶,根據c值顯示顏色
# plt.show()
# t4=time.time()
# tDiff_plt=t4-t3 #計算圖表顯示時間
# print(tDiff_plt)
print("-------------------cluster Finishing")
return pred,np.unique(pred) #返回DBSCAN聚類預測值。和簇類標
示例15: affinityPropagationForPoints
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import DBSCAN [as 別名]
def affinityPropagationForPoints(data):
t1=time.time()
db=cluster.DBSCAN(eps=16,min_samples=3,metric='euclidean') #調整eps參數,和min_sample參數,獲得適宜的聚類結果
y_db=db.fit_predict(data) #獲取聚類預測類標
t2=time.time()
tDiff_af=t2-t1 #用於計算聚類所需時間
print("模型訓練持續時間:",tDiff_af)
pred=y_db
print("預測類標,與簇數:",pred,len(np.unique(pred))) #打印查看預測類標和計算聚類簇數
t3=time.time()
plt.close('all')
plt.figure(1,figsize=(15,15))
plt.clf()
cm=plt.cm.get_cmap('nipy_spectral') #獲取內置色帶
sc=plt.scatter(data[...,0],data[...,1],s=10,alpha=0.8,c=pred,cmap=cm) #c參數設置為預測值,傳入色帶,根據c值顯示顏色
plt.show()
t4=time.time()
tDiff_plt=t4-t3 #計算圖表顯示時間
print("圖表顯示持續時間:",tDiff_plt)
return pred,np.unique(pred) #返回DBSCAN聚類預測值。和簇類標