当前位置: 首页>>代码示例>>Python>>正文


Python faiss.Clustering方法代码示例

本文整理汇总了Python中faiss.Clustering方法的典型用法代码示例。如果您正苦于以下问题:Python faiss.Clustering方法的具体用法?Python faiss.Clustering怎么用?Python faiss.Clustering使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在faiss的用法示例。


在下文中一共展示了faiss.Clustering方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: do_clustering

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import Clustering [as 别名]
def do_clustering(features, num_clusters, gpu_ids=None,
                  num_pca_components=None, niter=100, nredo=1, verbose=0):
    logging.debug('FAISS: using GPUs {}'.format(gpu_ids))
    features = np.asarray(features.reshape(features.shape[0], -1), dtype=np.float32)

    if num_pca_components is not None:
        features = preprocess_features(features, d=num_pca_components,
                                       niter=niter, nredo=nredo, verbose=verbose)

    logging.debug('FAISS: clustering...')
    t0 = time.time()
    centroids = train_kmeans(features, num_clusters, gpu_ids=gpu_ids, verbose=1)
    labels = compute_cluster_assignment(centroids, features)
    t1 = time.time()
    logging.debug("FAISS: Clustering total elapsed time: %.3f m" % ((t1 - t0) / 60.0))
    return labels 
开发者ID:CompVis,项目名称:metric-learning-divide-and-conquer,代码行数:18,代码来源:faissext.py

示例2: test_nmi_faiss

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import Clustering [as 别名]
def test_nmi_faiss(embeddings, labels):
    res = faiss.StandardGpuResources()
    flat_config = faiss.GpuIndexFlatConfig()
    flat_config.device = 0

    unique_labels = np.unique(labels)
    d = embeddings.shape[1]
    kmeans = faiss.Clustering(d, unique_labels.size)
    kmeans.verbose = True
    kmeans.niter = 300
    kmeans.nredo = 10
    kmeans.seed = 0

    index = faiss.GpuIndexFlatL2(res, d, flat_config)

    kmeans.train(embeddings, index)

    dists, pred_labels = index.search(embeddings, 1)

    pred_labels = pred_labels.squeeze()

    nmi = normalized_mutual_info_score(labels, pred_labels)

    print("NMI: {}".format(nmi))
    return nmi 
开发者ID:azgo14,项目名称:classification_metric_learning,代码行数:27,代码来源:nmi.py

示例3: train_coarse_quantizer

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import Clustering [as 别名]
def train_coarse_quantizer(data, quantizer_path, num_clusters, hnsw=False, niter=10, cuda=False):
    d = data.shape[1]

    index_flat = faiss.IndexFlatL2(d)
    # make it into a gpu index
    if cuda:
        res = faiss.StandardGpuResources()
        index_flat = faiss.index_cpu_to_gpu(res, 0, index_flat)
    clus = faiss.Clustering(d, num_clusters)
    clus.verbose = True
    clus.niter = niter
    clus.train(data, index_flat)
    centroids = faiss.vector_float_to_array(clus.centroids)
    centroids = centroids.reshape(num_clusters, d)

    if hnsw:
        quantizer = faiss.IndexHNSWFlat(d, 32)
        quantizer.hnsw.efSearch = 128
        quantizer.train(centroids)
        quantizer.add(centroids)
    else:
        quantizer = faiss.IndexFlatL2(d)
        quantizer.add(centroids)

    faiss.write_index(quantizer, quantizer_path) 
开发者ID:uwnlp,项目名称:denspi,代码行数:27,代码来源:run_index.py

示例4: run_kmeans

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import Clustering [as 别名]
def run_kmeans(x, nmb_clusters, verbose=False):
    """Runs kmeans on 1 GPU.
    Args:
        x: data
        nmb_clusters (int): number of clusters
    Returns:
        list: ids of data in each cluster
    """
    n_data, d = x.shape

    # faiss implementation of k-means
    clus = faiss.Clustering(d, nmb_clusters)

    # Change faiss seed at each k-means so that the randomly picked
    # initialization centroids do not correspond to the same feature ids
    # from an epoch to another.
    clus.seed = np.random.randint(1234)

    clus.niter = 20
    clus.max_points_per_centroid = 10000000
    res = faiss.StandardGpuResources()
    flat_config = faiss.GpuIndexFlatConfig()
    flat_config.useFloat16 = False
    flat_config.device = 0
    index = faiss.GpuIndexFlatL2(res, d, flat_config)

    # perform the training
    clus.train(x, index)
    _, I = index.search(x, 1)
    losses = faiss.vector_to_array(clus.obj)
    if verbose:
        print('k-means loss evolution: {0}'.format(losses))

    return [int(n[0]) for n in I], losses[-1] 
开发者ID:XiaohangZhan,项目名称:cdp,代码行数:36,代码来源:faiss_kmeans.py

示例5: run_kmeans

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import Clustering [as 别名]
def run_kmeans(x, nmb_clusters):
    """
    Args:
        x: data
        nmb_clusters (int): number of clusters
    Returns:
        list: ids of data in each cluster
    """
    n_data, d = x.shape
    logging.info("running k-means clustering with k=%d"%nmb_clusters)
    logging.info("embedding dimensionality is %d"%d)

    # faiss implementation of k-means
    clus = faiss.Clustering(d, nmb_clusters)
    clus.niter = 20
    clus.max_points_per_centroid = 10000000
    index = faiss.IndexFlatL2(d)
    if faiss.get_num_gpus() > 0:
        index = faiss.index_cpu_to_all_gpus(index)
    # perform the training
    clus.train(x, index)
    _, idxs = index.search(x, 1)

    return [int(n[0]) for n in idxs]


# modified from https://github.com/facebookresearch/faiss/wiki/Faiss-building-blocks:-clustering,-PCA,-quantization 
开发者ID:KevinMusgrave,项目名称:pytorch-metric-learning,代码行数:29,代码来源:stat_utils.py

示例6: train_kmeans

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import Clustering [as 别名]
def train_kmeans(x, num_clusters=1000, gpu_ids=None, niter=100, nredo=1, verbose=0):
    """
    Runs k-means clustering on one or several GPUs
    """
    assert np.all(~np.isnan(x)), 'x contains NaN'
    assert np.all(np.isfinite(x)), 'x contains Inf'
    if isinstance(gpu_ids, int):
        gpu_ids = [gpu_ids]
    assert gpu_ids is None or len(gpu_ids)

    d = x.shape[1]
    kmeans = faiss.Clustering(d, num_clusters)
    kmeans.verbose = bool(verbose)
    kmeans.niter = niter
    kmeans.nredo = nredo

    # otherwise the kmeans implementation sub-samples the training set
    kmeans.max_points_per_centroid = 10000000

    if gpu_ids is not None:
        res = [faiss.StandardGpuResources() for i in gpu_ids]

        flat_config = []
        for i in gpu_ids:
            cfg = faiss.GpuIndexFlatConfig()
            cfg.useFloat16 = False
            cfg.device = i
            flat_config.append(cfg)

        if len(gpu_ids) == 1:
            index = faiss.GpuIndexFlatL2(res[0], d, flat_config[0])
        else:
            indexes = [faiss.GpuIndexFlatL2(res[i], d, flat_config[i])
                       for i in range(len(gpu_ids))]
            index = faiss.IndexProxy()
            for sub_index in indexes:
                index.addIndex(sub_index)
    else:
        index = faiss.IndexFlatL2(d)

    # perform the training
    kmeans.train(x, index)
    centroids = faiss.vector_float_to_array(kmeans.centroids)

    objective = faiss.vector_float_to_array(kmeans.obj)
    #logging.debug("Final objective: %.4g" % objective[-1])

    return centroids.reshape(num_clusters, d) 
开发者ID:CompVis,项目名称:metric-learning-divide-and-conquer,代码行数:50,代码来源:faissext.py


注:本文中的faiss.Clustering方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。