本文整理汇总了Python中sklearn.cluster.DBSCAN.predict方法的典型用法代码示例。如果您正苦于以下问题:Python DBSCAN.predict方法的具体用法?Python DBSCAN.predict怎么用?Python DBSCAN.predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.cluster.DBSCAN
的用法示例。
在下文中一共展示了DBSCAN.predict方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import predict [as 别名]
def main(argv):
dbscan_heuristic_mode = False
dpgmm_mode = False
do_plot_clusters = False
do_dump_clusters = False
try:
opts, args = getopt.getopt(argv,"hegdp")
except getopt.GetoptError:
print('elviz_cluster.py [-h] [-e] [-g] [-d] [-p]')
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print('elviz_cluster.py [-h] [-e]')
print(' -h = help, -e = run dbscan' +
' epsilon heuristic plot generation code')
print(' -g = use a DPGMM for clustering')
print(' -p = plot the clusters to a PDF file')
print(' -d = dump the clusters to a text file')
sys.exit()
elif opt == '-e':
dbscan_heuristic_mode = True
elif opt == '-g':
dpgmm_mode = True
elif opt == '-p':
do_plot_clusters = True
elif opt == '-d':
do_dump_clusters = True
[elviz_data, combined_df] = read_pickle_or_CSVs(DATA_PICKLE, RAW_DATA_DIR)
# Setup plotting limits
print("determining plotting limits")
limits = {"x": [combined_df['Average fold'].min(), MAX_AVG_FOLD],
"y": [combined_df['Reference GC'].min(), combined_df['Reference GC'].max()]}
# Below changed in favor of fixed MAX
# limits["x"] = [combined_df['Average fold'].min(), combined_df['Average fold'].max()]
# fixed MAX below
print("normalizing data prior to clustering")
# normalize the combined data to retrieve the normalization parameters
scaler = StandardScaler().fit(combined_df[CLUSTER_COLUMNS])
# serializing outputs
if dbscan_heuristic_mode:
print("making DBSCAN heuristic plots")
dbscan_heuristic(elviz_data, scaler)
os.sys.exit()
print("serially processing files")
for filename in elviz_data.keys():
pdf_filename = filename.replace("csv", "pdf")
# skip if the PDF already exists
if os.path.isfile(RESULTS_DIR + pdf_filename):
print("skiping file %s" % filename)
continue
print("processing file %s" % filename)
df = elviz_data[filename]
# create a multipage PDF for storing the plots
with PdfPages(RESULTS_DIR + pdf_filename) as pdf:
# find unique values of taxonomy columns
dfgb = df.groupby(['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species'])
for key in dfgb.indices.keys():
idx = dfgb.indices[key]
tax_rows = df.iloc[idx]
if len(tax_rows) < MIN_ROWS:
continue
# normalize all dimensions to be used in clustering, e.g. GC, coverage, rpk
# reuse the scaler we created from all of the data for the transform
tax_rows_cluster_columns = scaler.transform(tax_rows[CLUSTER_COLUMNS])
if not dpgmm_mode:
db = DBSCAN(eps=EPS, min_samples=MIN_SAMPLES)
db.fit(tax_rows_cluster_columns)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_
else:
db = mixture.DPGMM(n_components=DPGMM_N_COMPONENTS, n_iter=100,
covariance_type='full', alpha=100, verbose=0)
db.fit(tax_rows_cluster_columns)
Y_ = db.predict(tax_rows_cluster_columns)
for i, (mean, covar) in enumerate(zip(
db.means_, db._get_covars())):
if not np.any(Y_ == i):
continue
#plt.scatter(X[Y_ == i, 0], X[Y_ == i, 1], .8, color=color)
labels = Y_
core_samples_mask = np.zeros_like(labels, dtype=bool)
core_samples_mask[:] = True
#print(labels)
#print(type(labels))
# number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
if n_clusters_ < 1:
#.........这里部分代码省略.........
示例2: get_hits
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import predict [as 别名]
def get_hits(file_path):
branch_list = [
#'spill',
#'tdc_time_stamp',
#'trigger_counter',
'number_hits',
'tdc_number',
'hit_channel',
'hit_time_bin',
]
arr = rnp.root2array(file_path, 'DataQuality/mwpc', branch_list)
#spill = arr['spill'].astype(np.int64)
#tdc_time_stamp = arr['tdc_time_stamp'].astype(np.int64) #/ 106.208 # microseconds
#trigger_counter = arr['trigger_counter']
number_hits = arr['number_hits']
tdc_number = arr['tdc_number']
hit_channel = arr['hit_channel']
hit_time_bin = arr['hit_time_bin']
number_entries = arr.size
number_tdcs = 16
time_bin_scaling = 1.0 / 1280.0
channel_scaling = 1.0 / 64.0
dbscan = DBSCAN(eps=4.0/64.0, min_samples=1)
#colors = np.array([ x for x in 'bgrcmykbgrcmykbgrcmykbgrcmyk' ])
#colors = np.hstack([colors] * 20)
good_hit_array = [ [] for i in range(number_tdcs) ]
bad_hit_array = [ [] for i in range(number_tdcs) ]
for entry in xrange(number_entries):
hit_time_buffer = [ [] for i in range(number_tdcs) ]
hit_channel_buffer = [ [] for i in range(number_tdcs) ]
for tdc_index in xrange(number_tdcs):
flag = (tdc_number[entry] == tdc_index + 1)
hit_time_buffer[tdc_index].extend(hit_time_bin[entry][flag])
hit_channel_buffer[tdc_index].extend(hit_channel[entry][flag])
for tdc_index in xrange(number_tdcs):
data = np.array([
np.array(hit_channel_buffer[tdc_index]).astype(np.int64),
np.array(hit_time_buffer[tdc_index]).astype(np.int64)
]).T
scaled_data = np.array([
np.array(hit_channel_buffer[tdc_index]).astype(np.int64) \
* channel_scaling,
np.array(hit_time_buffer[tdc_index]).astype(np.int64) \
* time_bin_scaling
]).T
if len(scaled_data) != 0:
dbscan.fit(scaled_data)
if hasattr(dbscan, 'labels_'):
y_pred = dbscan.labels_.astype(np.int)
else:
y_pred = dbscan.predict(scaled_data)
#print '///////////////////////////////////////'
cluster_indices = np.unique(y_pred)
#print cluster_indices
#print data.shape
#print y_pred.shape
#print y_pred
for cluster_index in cluster_indices:
cluster = data[y_pred == cluster_index]
if len(cluster) > 10:
bad_hit_array[tdc_index].extend(cluster)
continue
# z is the earliest hit in the cluster
z = cluster[np.where(cluster[:, 1] == cluster[:, 1].min())]
if len(z) == 1:
good_hit_array[tdc_index].append(z[0])
else:
bad_hit_array[tdc_index].extend(cluster)
continue
#else:
# mean_channel = np.mean(z[:, 0])
dtype = np.dtype((np.void, (cluster.shape[1] *
cluster.dtype.itemsize)))
mask = np.in1d(cluster.view(dtype), z.view(dtype))
bad_hits = cluster[~mask]
bad_hit_array[tdc_index].extend(bad_hits)
#print '///////////////////////////////////////'
good_hit_array = np.array(
[ np.array(hits) for hits in good_hit_array ]
)
bad_hit_array = np.array(
[ np.array(hits) for hits in bad_hit_array ]
)
#.........这里部分代码省略.........