当前位置: 首页>>代码示例>>Python>>正文


Python DBSCAN._get_covars方法代码示例

本文整理汇总了Python中sklearn.cluster.DBSCAN._get_covars方法的典型用法代码示例。如果您正苦于以下问题:Python DBSCAN._get_covars方法的具体用法?Python DBSCAN._get_covars怎么用?Python DBSCAN._get_covars使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.cluster.DBSCAN的用法示例。


在下文中一共展示了DBSCAN._get_covars方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import _get_covars [as 别名]
def main(argv):
    dbscan_heuristic_mode = False
    dpgmm_mode = False
    do_plot_clusters = False
    do_dump_clusters = False
    try:
        opts, args = getopt.getopt(argv,"hegdp")
    except getopt.GetoptError:
        print('elviz_cluster.py [-h] [-e] [-g] [-d] [-p]')
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print('elviz_cluster.py [-h] [-e]')
            print('  -h = help, -e = run dbscan' +
                  ' epsilon heuristic plot generation code')
            print('  -g = use a DPGMM for clustering')
            print('  -p = plot the clusters to a PDF file')
            print('  -d = dump the clusters to a text file')
            sys.exit()
        elif opt == '-e':
            dbscan_heuristic_mode = True
        elif opt == '-g':
            dpgmm_mode = True
        elif opt == '-p':
            do_plot_clusters = True
        elif opt == '-d':
            do_dump_clusters = True

    [elviz_data, combined_df] = read_pickle_or_CSVs(DATA_PICKLE, RAW_DATA_DIR)

    # Setup plotting limits
    print("determining plotting limits")
    limits = {"x": [combined_df['Average fold'].min(), MAX_AVG_FOLD],
              "y": [combined_df['Reference GC'].min(), combined_df['Reference GC'].max()]}
    # Below changed in favor of fixed MAX
    # limits["x"] = [combined_df['Average fold'].min(), combined_df['Average fold'].max()]
    # fixed MAX below

    print("normalizing data prior to clustering")
    # normalize the combined data to retrieve the normalization parameters
    scaler = StandardScaler().fit(combined_df[CLUSTER_COLUMNS])
    # serializing outputs

    if dbscan_heuristic_mode:
        print("making DBSCAN heuristic plots")
        dbscan_heuristic(elviz_data, scaler)
        os.sys.exit()

    print("serially processing files")
    for filename in elviz_data.keys():
        pdf_filename = filename.replace("csv", "pdf")
        # skip if the PDF already exists
        if os.path.isfile(RESULTS_DIR + pdf_filename):
            print("skiping file %s" % filename)
            continue
        print("processing file %s" % filename)

        df = elviz_data[filename]

        # create a multipage PDF for storing the plots
        with PdfPages(RESULTS_DIR + pdf_filename) as pdf:
            # find unique values of taxonomy columns
            dfgb = df.groupby(['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species'])
            for key in dfgb.indices.keys():
                idx = dfgb.indices[key]
                tax_rows = df.iloc[idx]
                if len(tax_rows) < MIN_ROWS:
                    continue
                # normalize all dimensions to be used in clustering, e.g. GC, coverage, rpk
                # reuse the scaler we created from all of the data for the transform
                tax_rows_cluster_columns = scaler.transform(tax_rows[CLUSTER_COLUMNS])

                if not dpgmm_mode:
                    db = DBSCAN(eps=EPS, min_samples=MIN_SAMPLES)
                    db.fit(tax_rows_cluster_columns)

                    core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
                    core_samples_mask[db.core_sample_indices_] = True
                    labels = db.labels_
                else:
                    db = mixture.DPGMM(n_components=DPGMM_N_COMPONENTS, n_iter=100,
                                       covariance_type='full', alpha=100, verbose=0)
                    db.fit(tax_rows_cluster_columns)
                    Y_ = db.predict(tax_rows_cluster_columns)
                    for i, (mean, covar) in enumerate(zip(
                        db.means_, db._get_covars())):
                        if not np.any(Y_ == i):
                            continue
                        #plt.scatter(X[Y_ == i, 0], X[Y_ == i, 1], .8, color=color)
                    labels = Y_
                    core_samples_mask = np.zeros_like(labels, dtype=bool)
                    core_samples_mask[:] = True
                            
                #print(labels)
                #print(type(labels))

                # number of clusters in labels, ignoring noise if present.
                n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)

                if n_clusters_ < 1:
#.........这里部分代码省略.........
开发者ID:JanetMatsen,项目名称:elvizAnalysis,代码行数:103,代码来源:elviz_cluster.py


注:本文中的sklearn.cluster.DBSCAN._get_covars方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。