本文整理汇总了Python中faiss.Clustering方法的典型用法代码示例。如果您正苦于以下问题:Python faiss.Clustering方法的具体用法?Python faiss.Clustering怎么用?Python faiss.Clustering使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类faiss
的用法示例。
在下文中一共展示了faiss.Clustering方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: do_clustering
# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import Clustering [as 别名]
def do_clustering(features, num_clusters, gpu_ids=None,
num_pca_components=None, niter=100, nredo=1, verbose=0):
logging.debug('FAISS: using GPUs {}'.format(gpu_ids))
features = np.asarray(features.reshape(features.shape[0], -1), dtype=np.float32)
if num_pca_components is not None:
features = preprocess_features(features, d=num_pca_components,
niter=niter, nredo=nredo, verbose=verbose)
logging.debug('FAISS: clustering...')
t0 = time.time()
centroids = train_kmeans(features, num_clusters, gpu_ids=gpu_ids, verbose=1)
labels = compute_cluster_assignment(centroids, features)
t1 = time.time()
logging.debug("FAISS: Clustering total elapsed time: %.3f m" % ((t1 - t0) / 60.0))
return labels
示例2: test_nmi_faiss
# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import Clustering [as 别名]
def test_nmi_faiss(embeddings, labels):
res = faiss.StandardGpuResources()
flat_config = faiss.GpuIndexFlatConfig()
flat_config.device = 0
unique_labels = np.unique(labels)
d = embeddings.shape[1]
kmeans = faiss.Clustering(d, unique_labels.size)
kmeans.verbose = True
kmeans.niter = 300
kmeans.nredo = 10
kmeans.seed = 0
index = faiss.GpuIndexFlatL2(res, d, flat_config)
kmeans.train(embeddings, index)
dists, pred_labels = index.search(embeddings, 1)
pred_labels = pred_labels.squeeze()
nmi = normalized_mutual_info_score(labels, pred_labels)
print("NMI: {}".format(nmi))
return nmi
示例3: train_coarse_quantizer
# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import Clustering [as 别名]
def train_coarse_quantizer(data, quantizer_path, num_clusters, hnsw=False, niter=10, cuda=False):
d = data.shape[1]
index_flat = faiss.IndexFlatL2(d)
# make it into a gpu index
if cuda:
res = faiss.StandardGpuResources()
index_flat = faiss.index_cpu_to_gpu(res, 0, index_flat)
clus = faiss.Clustering(d, num_clusters)
clus.verbose = True
clus.niter = niter
clus.train(data, index_flat)
centroids = faiss.vector_float_to_array(clus.centroids)
centroids = centroids.reshape(num_clusters, d)
if hnsw:
quantizer = faiss.IndexHNSWFlat(d, 32)
quantizer.hnsw.efSearch = 128
quantizer.train(centroids)
quantizer.add(centroids)
else:
quantizer = faiss.IndexFlatL2(d)
quantizer.add(centroids)
faiss.write_index(quantizer, quantizer_path)
示例4: run_kmeans
# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import Clustering [as 别名]
def run_kmeans(x, nmb_clusters, verbose=False):
"""Runs kmeans on 1 GPU.
Args:
x: data
nmb_clusters (int): number of clusters
Returns:
list: ids of data in each cluster
"""
n_data, d = x.shape
# faiss implementation of k-means
clus = faiss.Clustering(d, nmb_clusters)
# Change faiss seed at each k-means so that the randomly picked
# initialization centroids do not correspond to the same feature ids
# from an epoch to another.
clus.seed = np.random.randint(1234)
clus.niter = 20
clus.max_points_per_centroid = 10000000
res = faiss.StandardGpuResources()
flat_config = faiss.GpuIndexFlatConfig()
flat_config.useFloat16 = False
flat_config.device = 0
index = faiss.GpuIndexFlatL2(res, d, flat_config)
# perform the training
clus.train(x, index)
_, I = index.search(x, 1)
losses = faiss.vector_to_array(clus.obj)
if verbose:
print('k-means loss evolution: {0}'.format(losses))
return [int(n[0]) for n in I], losses[-1]
示例5: run_kmeans
# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import Clustering [as 别名]
def run_kmeans(x, nmb_clusters):
"""
Args:
x: data
nmb_clusters (int): number of clusters
Returns:
list: ids of data in each cluster
"""
n_data, d = x.shape
logging.info("running k-means clustering with k=%d"%nmb_clusters)
logging.info("embedding dimensionality is %d"%d)
# faiss implementation of k-means
clus = faiss.Clustering(d, nmb_clusters)
clus.niter = 20
clus.max_points_per_centroid = 10000000
index = faiss.IndexFlatL2(d)
if faiss.get_num_gpus() > 0:
index = faiss.index_cpu_to_all_gpus(index)
# perform the training
clus.train(x, index)
_, idxs = index.search(x, 1)
return [int(n[0]) for n in idxs]
# modified from https://github.com/facebookresearch/faiss/wiki/Faiss-building-blocks:-clustering,-PCA,-quantization
示例6: train_kmeans
# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import Clustering [as 别名]
def train_kmeans(x, num_clusters=1000, gpu_ids=None, niter=100, nredo=1, verbose=0):
"""
Runs k-means clustering on one or several GPUs
"""
assert np.all(~np.isnan(x)), 'x contains NaN'
assert np.all(np.isfinite(x)), 'x contains Inf'
if isinstance(gpu_ids, int):
gpu_ids = [gpu_ids]
assert gpu_ids is None or len(gpu_ids)
d = x.shape[1]
kmeans = faiss.Clustering(d, num_clusters)
kmeans.verbose = bool(verbose)
kmeans.niter = niter
kmeans.nredo = nredo
# otherwise the kmeans implementation sub-samples the training set
kmeans.max_points_per_centroid = 10000000
if gpu_ids is not None:
res = [faiss.StandardGpuResources() for i in gpu_ids]
flat_config = []
for i in gpu_ids:
cfg = faiss.GpuIndexFlatConfig()
cfg.useFloat16 = False
cfg.device = i
flat_config.append(cfg)
if len(gpu_ids) == 1:
index = faiss.GpuIndexFlatL2(res[0], d, flat_config[0])
else:
indexes = [faiss.GpuIndexFlatL2(res[i], d, flat_config[i])
for i in range(len(gpu_ids))]
index = faiss.IndexProxy()
for sub_index in indexes:
index.addIndex(sub_index)
else:
index = faiss.IndexFlatL2(d)
# perform the training
kmeans.train(x, index)
centroids = faiss.vector_float_to_array(kmeans.centroids)
objective = faiss.vector_float_to_array(kmeans.obj)
#logging.debug("Final objective: %.4g" % objective[-1])
return centroids.reshape(num_clusters, d)