本文整理汇总了Python中umap.UMAP属性的典型用法代码示例。如果您正苦于以下问题:Python umap.UMAP属性的具体用法?Python umap.UMAP怎么用?Python umap.UMAP使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类umap
的用法示例。
在下文中一共展示了umap.UMAP属性的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: project
# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def project(self, projection_model=None):
'''
:param projection_model: sklearn unsupervised model (e.g., PCA) by default the recommended model is umap.UMAP,
which requires UMAP in to be installed
:return: array, shape (num dimension, vocab size)
'''
if self.embeddings_ is None:
raise Exception("Run set_embeddings_model or set_embeddings to get embeddings")
if projection_model is None:
try:
import umap
except:
raise Exception("Please install umap (pip install umap-learn) to use the default projection_model.")
projection_model = umap.UMAP(min_dist=0.5, metric='cosine')
axes = projection_model.fit_transform(self.embeddings_)
return axes
示例2: project_embeddings
# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def project_embeddings(self, projection_model=None, x_dim=0, y_dim=1):
'''
:param projection_model: sklearn unsupervised model (e.g., PCA) by default the recommended model is umap.UMAP,
which requires UMAP in to be installed
:param x_dim: int, default 0, dimension of transformation matrix for x-axis
:param y_dim: int, default 1, dimension of transformation matrix for y-axis
:return:
'''
axes = self.project(projection_model)
word_axes = (pd.DataFrame({'term': [w for w in self.vocab_],
'x': axes.T[x_dim],
'y': axes.T[y_dim]})
.set_index('term')
.reindex(pd.Series(self.corpus_.get_terms()))
.dropna())
self.corpus_ = self.corpus_.remove_terms(set(self.corpus_.get_terms()) - set(word_axes.index))
word_axes = word_axes.reindex(self.corpus_.get_terms()).dropna()
return self.corpus_, word_axes
示例3: test_ingest_map_embedding_umap
# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def test_ingest_map_embedding_umap():
adata_ref = sc.AnnData(X)
adata_new = sc.AnnData(T)
sc.pp.neighbors(
adata_ref, method='umap', use_rep='X', n_neighbors=4, random_state=0
)
sc.tl.umap(adata_ref, random_state=0)
ing = sc.tl.Ingest(adata_ref)
ing.fit(adata_new)
ing.map_embedding(method='umap')
reducer = UMAP(min_dist=0.5, random_state=0, n_neighbors=4)
reducer.fit(X)
umap_transformed_t = reducer.transform(T)
assert np.allclose(ing._obsm['X_umap'], umap_transformed_t)
示例4: bsoid_umap_embed
# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def bsoid_umap_embed(f_10fps_sc, umap_params=UMAP_PARAMS):
"""
Trains UMAP (unsupervised) given a set of features based on (x,y) positions
:param f_10fps_sc: 2D array, standardized/session features
:param umap_params: dict, UMAP params in GLOBAL_CONFIG
:return trained_umap: object, trained UMAP transformer
:return umap_embeddings: 2D array, embedded UMAP space
"""
feats_train = f_10fps_sc.T
logging.info('Transforming all {} instances from {} D into {} D'.format(feats_train.shape[0],
feats_train.shape[1],
umap_params.get('n_components')))
trained_umap = umap.UMAP(n_neighbors=int(round(np.sqrt(feats_train.shape[0]))), # power law
**umap_params).fit(feats_train)
umap_embeddings = trained_umap.embedding_
logging.info('Done non-linear transformation with UMAP from {} D into {} D.'.format(feats_train.shape[1],
umap_embeddings.shape[1]))
return trained_umap, umap_embeddings
示例5: bsoid_hdbscan
# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def bsoid_hdbscan(umap_embeddings, hdbscan_params=HDBSCAN_PARAMS):
"""
Trains HDBSCAN (unsupervised) given learned UMAP space
:param umap_embeddings: 2D array, embedded UMAP space
:param hdbscan_params: dict, HDBSCAN params in GLOBAL_CONFIG
:return assignments: HDBSCAN assignments
"""
highest_numulab = -np.infty
numulab = []
min_cluster_range = range(6, 21)
logging.info('Running HDBSCAN on {} instances in {} D space...'.format(*umap_embeddings.shape))
for min_c in min_cluster_range:
trained_classifier = hdbscan.HDBSCAN(prediction_data=True,
min_cluster_size=int(round(0.001 * min_c * umap_embeddings.shape[0])),
**hdbscan_params).fit(umap_embeddings)
numulab.append(len(np.unique(trained_classifier.labels_)))
if numulab[-1] > highest_numulab:
logging.info('Adjusting minimum cluster size to maximize cluster number...')
highest_numulab = numulab[-1]
best_clf = trained_classifier
assignments = best_clf.labels_
soft_clusters = hdbscan.all_points_membership_vectors(best_clf)
soft_assignments = np.argmax(soft_clusters, axis=1)
logging.info('Done predicting labels for {} instances in {} D space...'.format(*umap_embeddings.shape))
return assignments, soft_clusters, soft_assignments
示例6: main
# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def main(train_folders: list):
"""
:param train_folders: list, training data folders
:return f_10fps: 2D array, features
:return umap_embeddings: 2D array, embedded UMAP space
:return nn_classifier: obj, MLP classifier
:return scores: 1D array, cross-validated accuracy
:return nn_assignments: neural net predictions
"""
import bsoid_umap.utils.likelihoodprocessing
filenames, training_data, perc_rect = bsoid_umap.utils.likelihoodprocessing.main(train_folders)
f_10fps, f_10fps_sc = bsoid_feats(training_data)
trained_umap, umap_embeddings = bsoid_umap_embed(f_10fps_sc)
hdb_assignments, soft_clusters, soft_assignments = bsoid_hdbscan(umap_embeddings)
nn_classifier, scores, nn_assignments = bsoid_nn(f_10fps, soft_assignments)
if PLOT:
timestr = time.strftime("_%Y%m%d_%H%M")
fig1 = plot_classes(umap_embeddings[hdb_assignments >= 0], hdb_assignments[hdb_assignments >= 0])
my_file1 = 'hdb_soft_assignments'
fig1.savefig(os.path.join(OUTPUT_PATH, str.join('', (my_file1, timestr, '.svg'))))
plot_accuracy(scores)
return f_10fps, f_10fps_sc, umap_embeddings, hdb_assignments, soft_assignments, soft_clusters, \
nn_classifier, scores, nn_assignments
示例7: test_umap_sparse_transform_on_iris
# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def test_umap_sparse_transform_on_iris(iris, iris_selection):
data = sparse.csr_matrix(iris.data[iris_selection])
assert sparse.issparse(data)
fitter = UMAP(
n_neighbors=10,
min_dist=0.01,
random_state=42,
n_epochs=100,
force_approximation_algorithm=True,
).fit(data)
new_data = sparse.csr_matrix(iris.data[~iris_selection])
assert sparse.issparse(new_data)
embedding = fitter.transform(new_data)
trust = trustworthiness(new_data, embedding, 10)
assert_greater_equal(
trust,
0.80,
"Insufficiently trustworthy transform for" "iris dataset: {}".format(trust),
)
# UMAP Clusterability on Iris
# ---------------------------
示例8: test_metric_supervised_umap_trustworthiness
# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def test_metric_supervised_umap_trustworthiness():
data, labels = make_blobs(50, cluster_std=0.5, random_state=42)
embedding = UMAP(
n_neighbors=10,
min_dist=0.01,
target_metric="l1",
target_weight=0.8,
n_epochs=100,
random_state=42,
).fit_transform(data, labels)
trust = trustworthiness(data, embedding, 10)
assert_greater_equal(
trust,
0.95,
"Insufficiently trustworthy embedding for" "blobs dataset: {}".format(trust),
)
示例9: test_string_metric_supervised_umap_trustworthiness
# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def test_string_metric_supervised_umap_trustworthiness():
data, labels = make_blobs(50, cluster_std=0.5, random_state=42)
labels = np.array(["this", "that", "other"])[labels]
embedding = UMAP(
n_neighbors=10,
min_dist=0.01,
target_metric="string",
target_weight=0.8,
n_epochs=100,
random_state=42,
).fit_transform(data, labels)
trust = trustworthiness(data, embedding, 10)
assert_greater_equal(
trust,
0.95,
"Insufficiently trustworthy embedding for" "blobs dataset: {}".format(trust),
)
示例10: test_discrete_metric_supervised_umap_trustworthiness
# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def test_discrete_metric_supervised_umap_trustworthiness():
data, labels = make_blobs(50, cluster_std=0.5, random_state=42)
embedding = UMAP(
n_neighbors=10,
min_dist=0.01,
target_metric="ordinal",
target_weight=0.8,
n_epochs=100,
random_state=42,
).fit_transform(data, labels)
trust = trustworthiness(data, embedding, 10)
assert_greater_equal(
trust,
0.95,
"Insufficiently trustworthy embedding for" "blobs dataset: {}".format(trust),
)
示例11: cal_UMAP
# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def cal_UMAP(code, pca_dim = 50, n_neighbors = 30, min_dist=0.1, n_components=2, metric='cosine'):
""" Calculate UMAP dimensionality reduction
Args:
code: num_cells * num_features
pca_dim: if dimensionality of code > pca_dim, apply PCA first
n_neighbors: UMAP parameter
min_dist: UMAP parameter
n_components: UMAP parameter
metric: UMAP parameter
Returns:
umap_code: num_cells * n_components
"""
if code.shape[1] > pca_dim:
pca = PCA(n_components=pca_dim)
code = pca.fit_transform(code)
fit = umap.UMAP(n_neighbors=n_neighbors,
min_dist=min_dist,
n_components=n_components,
metric=metric,
random_state=0)
umap_code = fit.fit_transform(code)
return umap_code
示例12: fit
# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def fit(self, X, y=None):
"""
Fit the model using X as training data.
:param X: array-like, shape=(n_columns, n_samples,) training data.
:param y: ignored but kept in for pipeline support
:return: Returns an instance of self.
"""
X = check_array(X, estimator=self, dtype=FLOAT_DTYPES)
if self.n_components < 2:
raise ValueError("Number of components must be at least two.")
if not self.threshold:
raise ValueError(f"The `threshold` value cannot be `None`.")
self.umap_ = umap.UMAP(
n_components=self.n_components,
n_neighbors=self.n_neighbors,
min_dist=self.min_dist,
metric=self.metric,
random_state=self.random_state,
)
self.umap_.fit(X, y)
self.offset_ = -self.threshold
return self
示例13: umap
# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def umap(features, dims=2, write_to=None):
''' Reduces the features in the parsed pd.DataFrame 'features' into 'dims'
dimensions (default 2). Writes the output to 'write_to' if provided, in
.csv format. Returns the feature DataFrame.
'''
if dims != 2:
print('UMAP: Not currently supporting anything but 2-dim reduction')
id_col_name = features.columns[0]
print('UMAP: Reducing features to 2 dimensions'.format(dims))
# Don't consider the first unique ID column
features_salient = features.copy().drop(columns=[id_col_name], axis=1)
reduced = pd.DataFrame(UMAP(spread=0.5).fit_transform(features_salient))
reduced.insert(0, id_col_name, features[[id_col_name]])
print('Success')
if write_to is not None:
try:
reduced.to_csv(write_to, index=False)
print('Wrote reduced features to "{}"'.format(write_to))
except Exception as e:
print('\nWARNING - Could not write results to file: "{}"'.format(e))
return reduced
示例14: plot_umap
# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def plot_umap(trainer):
latent_seq, latent_fish = trainer.get_latent()
latent2d = umap.UMAP().fit_transform(np.concatenate([latent_seq, latent_fish]))
latent2d_seq = latent2d[: latent_seq.shape[0]]
latent2d_fish = latent2d[latent_seq.shape[0] :]
data_seq, data_fish = [p.gene_dataset for p in trainer.all_dataset]
colors = sns.color_palette(n_colors=30)
plt.figure(figsize=(25, 10))
ax = plt.subplot(1, 3, 1)
ax.scatter(*latent2d_seq.T, color="r", label="seq", alpha=0.5, s=0.5)
ax.scatter(*latent2d_fish.T, color="b", label="osm", alpha=0.5, s=0.5)
ax.legend()
ax = plt.subplot(1, 3, 2)
labels = data_seq.labels.ravel()
for i, label in enumerate(data_seq.cell_types):
ax.scatter(
*latent2d_seq[labels == i].T,
color=colors[i],
label=label[:12],
alpha=0.5,
s=5
)
ax.legend()
ax.set_title("Seq cells")
ax = plt.subplot(1, 3, 3)
labels = data_fish.labels.ravel()
for i, label in enumerate(data_fish.cell_types):
ax.scatter(
*latent2d_fish[labels == i].T, color=colors[i], label=label, alpha=0.5, s=5
)
ax.legend()
ax.set_title("Spatial cells")
示例15: project_separate
# 需要导入模块: import umap [as 别名]
# 或者: from umap import UMAP [as 别名]
def project_separate(self, projector=None):
if projector is None:
from umap import UMAP
projector = UMAP(n_components=2, metric='cosine')
both_category_embeddings = np.vstack([self.cat1_dwe_ar_norm,
self.cat2_dwe_ar_norm])
projected_ar = projector.fit_transform(both_category_embeddings)
df = pd.DataFrame(projected_ar, columns=['x', 'y'], index=self.labeled_terms)
df['category'] = [self.category1] * len(self.terms) + [self.category2] * len(self.terms)
return df