当前位置: 首页>>代码示例>>Python>>正文


Python umap.UMAP属性代码示例

本文整理汇总了Python中umap.UMAP属性的典型用法代码示例。如果您正苦于以下问题:Python umap.UMAP属性的具体用法?Python umap.UMAP怎么用?Python umap.UMAP使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在umap的用法示例。


在下文中一共展示了umap.UMAP属性的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: project

# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def project(self, projection_model=None):
        '''
        :param projection_model: sklearn unsupervised model (e.g., PCA) by default the recommended model is umap.UMAP,
        which requires UMAP in to be installed

        :return: array, shape (num dimension, vocab size)
        '''
        if self.embeddings_ is None:
            raise Exception("Run set_embeddings_model or set_embeddings to get embeddings")
        if projection_model is None:
            try:
                import umap
            except:
                raise Exception("Please install umap (pip install umap-learn) to use the default projection_model.")
            projection_model = umap.UMAP(min_dist=0.5, metric='cosine')
        axes = projection_model.fit_transform(self.embeddings_)
        return axes 
开发者ID:JasonKessler,项目名称:scattertext,代码行数:19,代码来源:EmbeddingsResolver.py

示例2: project_embeddings

# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def project_embeddings(self, projection_model=None, x_dim=0, y_dim=1):
        '''

        :param projection_model: sklearn unsupervised model (e.g., PCA) by default the recommended model is umap.UMAP,
            which requires UMAP in to be installed
        :param x_dim: int, default 0, dimension of transformation matrix for x-axis
        :param y_dim: int, default 1, dimension of transformation matrix for y-axis
        :return:
        '''
        axes = self.project(projection_model)
        word_axes = (pd.DataFrame({'term': [w for w in self.vocab_],
                                   'x': axes.T[x_dim],
                                   'y': axes.T[y_dim]})
                     .set_index('term')
                     .reindex(pd.Series(self.corpus_.get_terms()))
                     .dropna())
        self.corpus_ = self.corpus_.remove_terms(set(self.corpus_.get_terms()) - set(word_axes.index))
        word_axes = word_axes.reindex(self.corpus_.get_terms()).dropna()

        return self.corpus_, word_axes 
开发者ID:JasonKessler,项目名称:scattertext,代码行数:22,代码来源:EmbeddingsResolver.py

示例3: test_ingest_map_embedding_umap

# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def test_ingest_map_embedding_umap():
    adata_ref = sc.AnnData(X)
    adata_new = sc.AnnData(T)

    sc.pp.neighbors(
        adata_ref, method='umap', use_rep='X', n_neighbors=4, random_state=0
    )
    sc.tl.umap(adata_ref, random_state=0)

    ing = sc.tl.Ingest(adata_ref)
    ing.fit(adata_new)
    ing.map_embedding(method='umap')

    reducer = UMAP(min_dist=0.5, random_state=0, n_neighbors=4)
    reducer.fit(X)
    umap_transformed_t = reducer.transform(T)

    assert np.allclose(ing._obsm['X_umap'], umap_transformed_t) 
开发者ID:theislab,项目名称:scanpy,代码行数:20,代码来源:test_ingest.py

示例4: bsoid_umap_embed

# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def bsoid_umap_embed(f_10fps_sc, umap_params=UMAP_PARAMS):
    """
    Trains UMAP (unsupervised) given a set of features based on (x,y) positions
    :param f_10fps_sc: 2D array, standardized/session features
    :param umap_params: dict, UMAP params in GLOBAL_CONFIG
    :return trained_umap: object, trained UMAP transformer
    :return umap_embeddings: 2D array, embedded UMAP space
    """
    feats_train = f_10fps_sc.T
    logging.info('Transforming all {} instances from {} D into {} D'.format(feats_train.shape[0],
                                                                            feats_train.shape[1],
                                                                            umap_params.get('n_components')))
    trained_umap = umap.UMAP(n_neighbors=int(round(np.sqrt(feats_train.shape[0]))),  # power law
                             **umap_params).fit(feats_train)
    umap_embeddings = trained_umap.embedding_
    logging.info('Done non-linear transformation with UMAP from {} D into {} D.'.format(feats_train.shape[1],
                                                                                        umap_embeddings.shape[1]))
    return trained_umap, umap_embeddings 
开发者ID:YttriLab,项目名称:B-SOID,代码行数:20,代码来源:train.py

示例5: bsoid_hdbscan

# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def bsoid_hdbscan(umap_embeddings, hdbscan_params=HDBSCAN_PARAMS):
    """
    Trains HDBSCAN (unsupervised) given learned UMAP space
    :param umap_embeddings: 2D array, embedded UMAP space
    :param hdbscan_params: dict, HDBSCAN params in GLOBAL_CONFIG
    :return assignments: HDBSCAN assignments
    """
    highest_numulab = -np.infty
    numulab = []
    min_cluster_range = range(6, 21)
    logging.info('Running HDBSCAN on {} instances in {} D space...'.format(*umap_embeddings.shape))
    for min_c in min_cluster_range:
        trained_classifier = hdbscan.HDBSCAN(prediction_data=True,
                                             min_cluster_size=int(round(0.001 * min_c * umap_embeddings.shape[0])),
                                             **hdbscan_params).fit(umap_embeddings)
        numulab.append(len(np.unique(trained_classifier.labels_)))
        if numulab[-1] > highest_numulab:
            logging.info('Adjusting minimum cluster size to maximize cluster number...')
            highest_numulab = numulab[-1]
            best_clf = trained_classifier
    assignments = best_clf.labels_
    soft_clusters = hdbscan.all_points_membership_vectors(best_clf)
    soft_assignments = np.argmax(soft_clusters, axis=1)
    logging.info('Done predicting labels for {} instances in {} D space...'.format(*umap_embeddings.shape))
    return assignments, soft_clusters, soft_assignments 
开发者ID:YttriLab,项目名称:B-SOID,代码行数:27,代码来源:train.py

示例6: main

# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def main(train_folders: list):
    """
    :param train_folders: list, training data folders
    :return f_10fps: 2D array, features
    :return umap_embeddings: 2D array, embedded UMAP space
    :return nn_classifier: obj, MLP classifier
    :return scores: 1D array, cross-validated accuracy
    :return nn_assignments: neural net predictions
    """
    import bsoid_umap.utils.likelihoodprocessing
    filenames, training_data, perc_rect = bsoid_umap.utils.likelihoodprocessing.main(train_folders)
    f_10fps, f_10fps_sc = bsoid_feats(training_data)
    trained_umap, umap_embeddings = bsoid_umap_embed(f_10fps_sc)
    hdb_assignments, soft_clusters, soft_assignments = bsoid_hdbscan(umap_embeddings)
    nn_classifier, scores, nn_assignments = bsoid_nn(f_10fps, soft_assignments)
    if PLOT:
        timestr = time.strftime("_%Y%m%d_%H%M")
        fig1 = plot_classes(umap_embeddings[hdb_assignments >= 0], hdb_assignments[hdb_assignments >= 0])
        my_file1 = 'hdb_soft_assignments'
        fig1.savefig(os.path.join(OUTPUT_PATH, str.join('', (my_file1, timestr, '.svg'))))
        plot_accuracy(scores)
    return f_10fps, f_10fps_sc, umap_embeddings, hdb_assignments, soft_assignments, soft_clusters, \
           nn_classifier, scores, nn_assignments 
开发者ID:YttriLab,项目名称:B-SOID,代码行数:25,代码来源:train.py

示例7: test_umap_sparse_transform_on_iris

# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def test_umap_sparse_transform_on_iris(iris, iris_selection):
    data = sparse.csr_matrix(iris.data[iris_selection])
    assert sparse.issparse(data)
    fitter = UMAP(
        n_neighbors=10,
        min_dist=0.01,
        random_state=42,
        n_epochs=100,
        force_approximation_algorithm=True,
    ).fit(data)

    new_data = sparse.csr_matrix(iris.data[~iris_selection])
    assert sparse.issparse(new_data)
    embedding = fitter.transform(new_data)

    trust = trustworthiness(new_data, embedding, 10)
    assert_greater_equal(
        trust,
        0.80,
        "Insufficiently trustworthy transform for" "iris dataset: {}".format(trust),
    )


# UMAP Clusterability on Iris
# --------------------------- 
开发者ID:lmcinnes,项目名称:umap,代码行数:27,代码来源:test_umap_on_iris.py

示例8: test_metric_supervised_umap_trustworthiness

# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def test_metric_supervised_umap_trustworthiness():
    data, labels = make_blobs(50, cluster_std=0.5, random_state=42)
    embedding = UMAP(
        n_neighbors=10,
        min_dist=0.01,
        target_metric="l1",
        target_weight=0.8,
        n_epochs=100,
        random_state=42,
    ).fit_transform(data, labels)
    trust = trustworthiness(data, embedding, 10)
    assert_greater_equal(
        trust,
        0.95,
        "Insufficiently trustworthy embedding for" "blobs dataset: {}".format(trust),
    ) 
开发者ID:lmcinnes,项目名称:umap,代码行数:18,代码来源:test_umap_trustworthiness.py

示例9: test_string_metric_supervised_umap_trustworthiness

# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def test_string_metric_supervised_umap_trustworthiness():
    data, labels = make_blobs(50, cluster_std=0.5, random_state=42)
    labels = np.array(["this", "that", "other"])[labels]
    embedding = UMAP(
        n_neighbors=10,
        min_dist=0.01,
        target_metric="string",
        target_weight=0.8,
        n_epochs=100,
        random_state=42,
    ).fit_transform(data, labels)
    trust = trustworthiness(data, embedding, 10)
    assert_greater_equal(
        trust,
        0.95,
        "Insufficiently trustworthy embedding for" "blobs dataset: {}".format(trust),
    ) 
开发者ID:lmcinnes,项目名称:umap,代码行数:19,代码来源:test_umap_trustworthiness.py

示例10: test_discrete_metric_supervised_umap_trustworthiness

# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def test_discrete_metric_supervised_umap_trustworthiness():
    data, labels = make_blobs(50, cluster_std=0.5, random_state=42)
    embedding = UMAP(
        n_neighbors=10,
        min_dist=0.01,
        target_metric="ordinal",
        target_weight=0.8,
        n_epochs=100,
        random_state=42,
    ).fit_transform(data, labels)
    trust = trustworthiness(data, embedding, 10)
    assert_greater_equal(
        trust,
        0.95,
        "Insufficiently trustworthy embedding for" "blobs dataset: {}".format(trust),
    ) 
开发者ID:lmcinnes,项目名称:umap,代码行数:18,代码来源:test_umap_trustworthiness.py

示例11: cal_UMAP

# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def cal_UMAP(code, pca_dim = 50, n_neighbors = 30, min_dist=0.1, n_components=2, metric='cosine'):
    """ Calculate UMAP dimensionality reduction
    Args:
        code: num_cells * num_features
        pca_dim: if dimensionality of code > pca_dim, apply PCA first
        n_neighbors: UMAP parameter
        min_dist: UMAP parameter
        n_components: UMAP parameter
        metric: UMAP parameter
    Returns:
        umap_code: num_cells * n_components
    """
    if code.shape[1] > pca_dim:
        pca = PCA(n_components=pca_dim)
        code = pca.fit_transform(code)
    fit = umap.UMAP(n_neighbors=n_neighbors,
                    min_dist=min_dist,
                    n_components=n_components,
                    metric=metric,
                    random_state=0)
    umap_code = fit.fit_transform(code)

    return umap_code 
开发者ID:txWang,项目名称:BERMUDA,代码行数:25,代码来源:helper.py

示例12: fit

# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def fit(self, X, y=None):
        """
        Fit the model using X as training data.

        :param X: array-like, shape=(n_columns, n_samples,) training data.
        :param y: ignored but kept in for pipeline support
        :return: Returns an instance of self.
        """
        X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
        if self.n_components < 2:
            raise ValueError("Number of components must be at least two.")
        if not self.threshold:
            raise ValueError(f"The `threshold` value cannot be `None`.")

        self.umap_ = umap.UMAP(
            n_components=self.n_components,
            n_neighbors=self.n_neighbors,
            min_dist=self.min_dist,
            metric=self.metric,
            random_state=self.random_state,
        )
        self.umap_.fit(X, y)
        self.offset_ = -self.threshold
        return self 
开发者ID:koaning,项目名称:scikit-lego,代码行数:26,代码来源:umap_reconstruction.py

示例13: umap

# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def umap(features, dims=2, write_to=None):
    ''' Reduces the features in the parsed pd.DataFrame 'features' into 'dims'
    dimensions (default 2). Writes the output to 'write_to' if provided, in
    .csv format. Returns the feature DataFrame.
    '''
    
    if dims != 2:
        print('UMAP: Not currently supporting anything but 2-dim reduction')
    
    id_col_name = features.columns[0]
        
    print('UMAP: Reducing features to 2 dimensions'.format(dims))
    
    # Don't consider the first unique ID column
    features_salient = features.copy().drop(columns=[id_col_name], axis=1)
    
    reduced = pd.DataFrame(UMAP(spread=0.5).fit_transform(features_salient))
    reduced.insert(0, id_col_name, features[[id_col_name]])
    
    print('Success')
    
    if write_to is not None:
        try:
            reduced.to_csv(write_to, index=False)
            print('Wrote reduced features to "{}"'.format(write_to))
        except Exception as e:
            print('\nWARNING - Could not write results to file: "{}"'.format(e))
    
    return reduced 
开发者ID:zegami,项目名称:image-similarity-clustering,代码行数:31,代码来源:umap_reducer.py

示例14: plot_umap

# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def plot_umap(trainer):
    latent_seq, latent_fish = trainer.get_latent()
    latent2d = umap.UMAP().fit_transform(np.concatenate([latent_seq, latent_fish]))
    latent2d_seq = latent2d[: latent_seq.shape[0]]
    latent2d_fish = latent2d[latent_seq.shape[0] :]

    data_seq, data_fish = [p.gene_dataset for p in trainer.all_dataset]

    colors = sns.color_palette(n_colors=30)
    plt.figure(figsize=(25, 10))
    ax = plt.subplot(1, 3, 1)
    ax.scatter(*latent2d_seq.T, color="r", label="seq", alpha=0.5, s=0.5)
    ax.scatter(*latent2d_fish.T, color="b", label="osm", alpha=0.5, s=0.5)
    ax.legend()

    ax = plt.subplot(1, 3, 2)
    labels = data_seq.labels.ravel()
    for i, label in enumerate(data_seq.cell_types):
        ax.scatter(
            *latent2d_seq[labels == i].T,
            color=colors[i],
            label=label[:12],
            alpha=0.5,
            s=5
        )
    ax.legend()
    ax.set_title("Seq cells")

    ax = plt.subplot(1, 3, 3)
    labels = data_fish.labels.ravel()
    for i, label in enumerate(data_fish.cell_types):
        ax.scatter(
            *latent2d_fish[labels == i].T, color=colors[i], label=label, alpha=0.5, s=5
        )
    ax.legend()
    ax.set_title("Spatial cells") 
开发者ID:YosefLab,项目名称:scVI,代码行数:38,代码来源:gimvi_tutorial.py

示例15: project_separate

# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def project_separate(self, projector=None):
        if projector is None:
            from umap import UMAP
            projector = UMAP(n_components=2, metric='cosine')
        both_category_embeddings = np.vstack([self.cat1_dwe_ar_norm,
                                              self.cat2_dwe_ar_norm])
        projected_ar = projector.fit_transform(both_category_embeddings)
        df = pd.DataFrame(projected_ar, columns=['x', 'y'], index=self.labeled_terms)
        df['category'] = [self.category1] * len(self.terms) + [self.category2] * len(self.terms)
        return df 
开发者ID:JasonKessler,项目名称:scattertext,代码行数:12,代码来源:CategoryEmbeddings.py


注:本文中的umap.UMAP属性示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。