本文整理汇总了Python中sklearn.cluster.DBSCAN._get_covars方法的典型用法代码示例。如果您正苦于以下问题:Python DBSCAN._get_covars方法的具体用法?Python DBSCAN._get_covars怎么用?Python DBSCAN._get_covars使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.cluster.DBSCAN
的用法示例。
在下文中一共展示了DBSCAN._get_covars方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import _get_covars [as 别名]
def main(argv):
dbscan_heuristic_mode = False
dpgmm_mode = False
do_plot_clusters = False
do_dump_clusters = False
try:
opts, args = getopt.getopt(argv,"hegdp")
except getopt.GetoptError:
print('elviz_cluster.py [-h] [-e] [-g] [-d] [-p]')
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print('elviz_cluster.py [-h] [-e]')
print(' -h = help, -e = run dbscan' +
' epsilon heuristic plot generation code')
print(' -g = use a DPGMM for clustering')
print(' -p = plot the clusters to a PDF file')
print(' -d = dump the clusters to a text file')
sys.exit()
elif opt == '-e':
dbscan_heuristic_mode = True
elif opt == '-g':
dpgmm_mode = True
elif opt == '-p':
do_plot_clusters = True
elif opt == '-d':
do_dump_clusters = True
[elviz_data, combined_df] = read_pickle_or_CSVs(DATA_PICKLE, RAW_DATA_DIR)
# Setup plotting limits
print("determining plotting limits")
limits = {"x": [combined_df['Average fold'].min(), MAX_AVG_FOLD],
"y": [combined_df['Reference GC'].min(), combined_df['Reference GC'].max()]}
# Below changed in favor of fixed MAX
# limits["x"] = [combined_df['Average fold'].min(), combined_df['Average fold'].max()]
# fixed MAX below
print("normalizing data prior to clustering")
# normalize the combined data to retrieve the normalization parameters
scaler = StandardScaler().fit(combined_df[CLUSTER_COLUMNS])
# serializing outputs
if dbscan_heuristic_mode:
print("making DBSCAN heuristic plots")
dbscan_heuristic(elviz_data, scaler)
os.sys.exit()
print("serially processing files")
for filename in elviz_data.keys():
pdf_filename = filename.replace("csv", "pdf")
# skip if the PDF already exists
if os.path.isfile(RESULTS_DIR + pdf_filename):
print("skiping file %s" % filename)
continue
print("processing file %s" % filename)
df = elviz_data[filename]
# create a multipage PDF for storing the plots
with PdfPages(RESULTS_DIR + pdf_filename) as pdf:
# find unique values of taxonomy columns
dfgb = df.groupby(['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species'])
for key in dfgb.indices.keys():
idx = dfgb.indices[key]
tax_rows = df.iloc[idx]
if len(tax_rows) < MIN_ROWS:
continue
# normalize all dimensions to be used in clustering, e.g. GC, coverage, rpk
# reuse the scaler we created from all of the data for the transform
tax_rows_cluster_columns = scaler.transform(tax_rows[CLUSTER_COLUMNS])
if not dpgmm_mode:
db = DBSCAN(eps=EPS, min_samples=MIN_SAMPLES)
db.fit(tax_rows_cluster_columns)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_
else:
db = mixture.DPGMM(n_components=DPGMM_N_COMPONENTS, n_iter=100,
covariance_type='full', alpha=100, verbose=0)
db.fit(tax_rows_cluster_columns)
Y_ = db.predict(tax_rows_cluster_columns)
for i, (mean, covar) in enumerate(zip(
db.means_, db._get_covars())):
if not np.any(Y_ == i):
continue
#plt.scatter(X[Y_ == i, 0], X[Y_ == i, 1], .8, color=color)
labels = Y_
core_samples_mask = np.zeros_like(labels, dtype=bool)
core_samples_mask[:] = True
#print(labels)
#print(type(labels))
# number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
if n_clusters_ < 1:
#.........这里部分代码省略.........