当前位置: 首页>>代码示例>>Python>>正文


Python faiss.index_factory方法代码示例

本文整理汇总了Python中faiss.index_factory方法的典型用法代码示例。如果您正苦于以下问题:Python faiss.index_factory方法的具体用法?Python faiss.index_factory怎么用?Python faiss.index_factory使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在faiss的用法示例。


在下文中一共展示了faiss.index_factory方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: execute

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import index_factory [as 别名]
def execute(cls, ctx, op):
        (data,), device_id, _ = as_same_device(
            [ctx[op.input.key]], device=op.device, ret_extra=True)

        with device(device_id):
            index = faiss.index_factory(data.shape[1], op.faiss_index,
                                        op.faiss_metric_type)

            if device_id >= 0:  # pragma: no cover
                # GPU
                index = _index_to_gpu(index, device_id)
                index.train_c(data.shape[0], _swig_ptr_from_cupy_float32_array(data))
            else:
                index.train(data)

            ctx[op.outputs[0].key] = _store_index(
                ctx, op, index, device_id) 
开发者ID:mars-project,项目名称:mars,代码行数:19,代码来源:_faiss.py

示例2: faiss_train

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import index_factory [as 别名]
def faiss_train(fn_feature, root_path, index_path='train.index', id_path='data.json'):
    folder_names = os.listdir(root_path)
    logging.info('directory %s ', folder_names)
    ids = None
    vals = None
    id_json = {}
    print(folder_names)
    for idx, folder_name in enumerate(folder_names):
        id_json[str(idx)] = folder_name
        now_path = os.path.join(root_path, folder_name)
        feature_val = fn_feature(now_path)
        vals = np.concatenate((feature_val, vals), axis=0) if vals is not None else feature_val
        id_np = np.asarray([idx] * feature_val.shape[0])
        ids = np.concatenate((id_np, ids), axis=0) if ids is not None else id_np
    N, dim = vals.shape
    x = int(2 * math.sqrt(N))
    index_description = "IVF{x},Flat".format(x=x)
    index = faiss.index_factory(7 * 7 * 512, index_description, faiss.METRIC_INNER_PRODUCT)
    index.train(vals)
    index.add_with_ids(vals, ids)
    faiss.write_index(index, index_path)
    with open(id_path, 'w', encoding='utf-8') as f:
        json.dump(id_json, f, ensure_ascii=False, indent=4)
    print(id_json)
    return index, id_json 
开发者ID:seongahjo,项目名称:Mosaicer,代码行数:27,代码来源:train.py

示例3: _execute_one_chunk

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import index_factory [as 别名]
def _execute_one_chunk(cls, ctx, op):
        (inp,), device_id, xp = as_same_device(
            [ctx[c.key] for c in op.inputs], device=op.device, ret_extra=True)

        with device(device_id):
            # create index
            index = faiss.index_factory(inp.shape[1], op.faiss_index,
                                        op.faiss_metric_type)
            # GPU
            if device_id >= 0:  # pragma: no cover
                index = _index_to_gpu(index, device_id)

            # train index
            if not index.is_trained:
                assert op.n_sample is not None
                sample_indices = xp.random.choice(inp.shape[0],
                                                  size=op.n_sample, replace=False)
                sampled = inp[sample_indices]
                index.train(sampled)

            if op.metric == 'cosine':
                # faiss does not support cosine distances directly,
                # data needs to be normalize before adding to index,
                # refer to:
                # https://github.com/facebookresearch/faiss/wiki/FAQ#how-can-i-index-vectors-for-cosine-distance
                faiss.normalize_L2(inp)
            # add vectors to index
            if device_id >= 0:  # pragma: no cover
                # gpu
                inp = inp.astype(np.float32, copy=False)
                index.add_c(inp.shape[0], _swig_ptr_from_cupy_float32_array(inp))
            else:
                index.add(inp)

            ctx[op.outputs[0].key] = _store_index(ctx, op, index, device_id) 
开发者ID:mars-project,项目名称:mars,代码行数:37,代码来源:_faiss.py

示例4: build_faiss_index

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import index_factory [as 别名]
def build_faiss_index(X, index_name='auto', n_sample=None, metric="euclidean",
                      random_state=None, same_distribution=True,
                      accuracy=False, memory_require=None, **kw):
    X = astensor(X)

    if metric not in METRIC_TO_FAISS_METRIC_TYPE:
        raise ValueError('unknown metric: {}'.format(metric))
    if index_name != 'auto':
        try:
            faiss.index_factory(X.shape[1], index_name,
                                METRIC_TO_FAISS_METRIC_TYPE[metric])
        except RuntimeError:
            raise ValueError('illegal faiss index: {}'.format(index_name))

    rs = check_random_state(random_state)
    if isinstance(rs, RandomState):
        rs = rs.to_numpy()
    seed = gen_random_seeds(1, rs)[0]
    if memory_require is None:
        memory_require = MemoryRequirementGrade.low
    else:
        memory_require = _get_memory_require(memory_require)
    op = FaissBuildIndex(faiss_index=index_name, metric=metric,
                         n_sample=n_sample, gpu=X.op.gpu, seed=seed,
                         same_distribution=same_distribution,
                         accuracy=accuracy, memory_require=memory_require, **kw)
    return op(X) 
开发者ID:mars-project,项目名称:mars,代码行数:29,代码来源:_faiss.py

示例5: fit

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import index_factory [as 别名]
def fit(self, X):
        X = X.astype(numpy.float32)
        self._index = faiss.GpuIndexIVFFlat(self._res, len(X[0]), self._n_bits,
                                            faiss.METRIC_L2)
        # self._index = faiss.index_factory(len(X[0]),
        #                                   "IVF%d,Flat" % self._n_bits)
        # co = faiss.GpuClonerOptions()
        # co.useFloat16 = True
        # self._index = faiss.index_cpu_to_gpu(self._res, 0,
        #                                      self._index, co)
        self._index.train(X)
        self._index.add(X)
        self._index.setNumProbes(self._n_probes) 
开发者ID:erikbern,项目名称:ann-benchmarks,代码行数:15,代码来源:faiss_gpu.py

示例6: cluster

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import index_factory [as 别名]
def cluster(features, th_knn, max_size=300, labels=None):
    '''
    与face-train不同,这里聚类的相似度没有经过1-转换
    :param features:
    :param th_knn:
    :param max_size:
    :return:
    '''
    k = 80
    nprobe = 8

    # knn
    size, dim = features.shape
    metric = faiss.METRIC_INNER_PRODUCT
    nlist = min(4096, 8 * round(math.sqrt(size)))
    if size < 4 * 10000:
        fac_str = "Flat"  # same
    elif size < 80 * 10000:
        fac_str = "IVF" + str(nlist) + ",Flat"  # same
    elif size < 200 * 10000:
        fac_str = "IVF16384,Flat"  # same
    else:
        fac_str = "IVF16384,PQ8"  # same
    logger.info("cdp cluster fac str %s", fac_str)
    index = faiss.index_factory(dim, fac_str, metric)
    index.train(features)
    index.nprobe = min(nprobe, nlist)
    assert index.is_trained
    logger.info('cdp cluster nlist: {}, nprobe: {}'.format(nlist, nprobe))
    index.add(features)

    sims, ners = index.search(features, k=k)
    if "Flat" not in fac_str:
        sims = sim_by_feature(features, features, ners)
    knns = np.concatenate([sims[:, np.newaxis].astype(np.float32), ners[:, np.newaxis].astype(np.float32)], axis=1)
    # del features

    return cluster_by_knns(knns, features, th_knn, max_size, labels) 
开发者ID:Kestrong,项目名称:capture_reid,代码行数:40,代码来源:cdp.py

示例7: _execute_map

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import index_factory [as 别名]
def _execute_map(cls, ctx, op):
        (data,), device_id, _ = as_same_device(
            [ctx[op.inputs[0].key]], device=op.device, ret_extra=True)
        index = ctx[op.inputs[1].key] if len(op.inputs) == 2 else None

        with device(device_id):
            if index is not None:
                # fetch the trained index
                trained_index = _load_index(ctx, op, index, device_id)
                return_index_type = _get_index_type(op.return_index_type, ctx)
                if return_index_type == 'object':
                    # clone a new one,
                    # because faiss does not ensure thread-safe for operations that change index
                    # https://github.com/facebookresearch/faiss/wiki/Threads-and-asynchronous-calls#thread-safety
                    trained_index = faiss.clone_index(trained_index)
            else:
                trained_index = faiss.index_factory(data.shape[1], op.faiss_index,
                                                    op.faiss_metric_type)
                if op.same_distribution:
                    # no need to train, just create index
                    pass
                else:
                    # distribution no the same, train on each chunk
                    trained_index.train(data)

                if device_id >= 0:  # pragma: no cover
                    trained_index = _index_to_gpu(trained_index, device_id)
            if op.metric == 'cosine':
                # faiss does not support cosine distances directly,
                # data needs to be normalize before adding to index,
                # refer to:
                # https://github.com/facebookresearch/faiss/wiki/FAQ#how-can-i-index-vectors-for-cosine-distance
                faiss.normalize_L2(data)

            # add data into index
            if device_id >= 0:  # pragma: no cover
                # gpu
                trained_index.add_c(data.shape[0], _swig_ptr_from_cupy_float32_array(data))
            else:
                trained_index.add(data)

            ctx[op.outputs[0].key] = _store_index(ctx, op, trained_index, device_id) 
开发者ID:mars-project,项目名称:mars,代码行数:44,代码来源:_faiss.py

示例8: testGenIndexStringAndSampleCount

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import index_factory [as 别名]
def testGenIndexStringAndSampleCount(self):
        d = 32

        # accuracy=True, could be Flat only
        ret = _gen_index_string_and_sample_count((10 ** 9, d), None, True, 'minimum')
        self.assertEqual(ret, ('Flat', None))

        # no memory concern
        ret = _gen_index_string_and_sample_count((10 ** 5, d), None, False, 'maximum')
        self.assertEqual(ret, ('HNSW32', None))
        index = faiss.index_factory(d, ret[0])
        self.assertTrue(index.is_trained)

        # memory concern not much
        ret = _gen_index_string_and_sample_count((10 ** 5, d), None, False, 'high')
        self.assertEqual(ret, ('IVF1580,Flat', 47400))
        index = faiss.index_factory(d, ret[0])
        self.assertFalse(index.is_trained)

        # memory quite important
        ret = _gen_index_string_and_sample_count((5 * 10 ** 6, d), None, False, 'low')
        self.assertEqual(ret, ('PCAR16,IVF65536_HNSW32,SQ8', 32 * 65536))
        index = faiss.index_factory(d, ret[0])
        self.assertFalse(index.is_trained)

        # memory very important
        ret = _gen_index_string_and_sample_count((10 ** 8, d), None, False, 'minimum')
        self.assertEqual(ret, ('OPQ16_32,IVF1048576_HNSW32,PQ16', 64 * 65536))
        index = faiss.index_factory(d, ret[0])
        self.assertFalse(index.is_trained)

        ret = _gen_index_string_and_sample_count((10 ** 10, d), None, False, 'low')
        self.assertEqual(ret, ('PCAR16,IVF1048576_HNSW32,SQ8', 64 * 65536))
        index = faiss.index_factory(d, ret[0])
        self.assertFalse(index.is_trained)

        with self.assertRaises(ValueError):
            # M > 64 raise error
            _gen_index_string_and_sample_count((10 ** 5, d), None, False, 'maximum', M=128)

        with self.assertRaises(ValueError):
            # M > 64
            _gen_index_string_and_sample_count((10 ** 5, d), None, False, 'minimum', M=128)

        with self.assertRaises(ValueError):
            # dim should be multiple of M
            _gen_index_string_and_sample_count((10 ** 5, d), None, False, 'minimum', M=16, dim=17)

        with self.assertRaises(ValueError):
            _gen_index_string_and_sample_count((10 ** 5, d), None, False, 'low', k=5) 
开发者ID:mars-project,项目名称:mars,代码行数:52,代码来源:test_faiss.py

示例9: __init__

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import index_factory [as 别名]
def __init__(self,
                 feats,
                 k,
                 index_path='',
                 index_key='',
                 nprobe=128,
                 omp_num_threads=None,
                 rebuild_index=True,
                 verbose=True,
                 **kwargs):
        import faiss
        if omp_num_threads is not None:
            faiss.omp_set_num_threads(omp_num_threads)
        self.verbose = verbose
        with Timer('[faiss] build index', verbose):
            if index_path != '' and not rebuild_index and os.path.exists(
                    index_path):
                print('[faiss] read index from {}'.format(index_path))
                index = faiss.read_index(index_path)
            else:
                feats = feats.astype('float32')
                size, dim = feats.shape
                index = faiss.IndexFlatIP(dim)
                if index_key != '':
                    assert index_key.find(
                        'HNSW') < 0, 'HNSW returns distances insted of sims'
                    metric = faiss.METRIC_INNER_PRODUCT
                    nlist = min(4096, 8 * round(math.sqrt(size)))
                    if index_key == 'IVF':
                        quantizer = index
                        index = faiss.IndexIVFFlat(quantizer, dim, nlist,
                                                   metric)
                    else:
                        index = faiss.index_factory(dim, index_key, metric)
                    if index_key.find('Flat') < 0:
                        assert not index.is_trained
                    index.train(feats)
                    index.nprobe = min(nprobe, nlist)
                    assert index.is_trained
                    print('nlist: {}, nprobe: {}'.format(nlist, nprobe))
                index.add(feats)
                if index_path != '':
                    print('[faiss] save index to {}'.format(index_path))
                    mkdir_if_no_exists(index_path)
                    faiss.write_index(index, index_path)
        with Timer('[faiss] query topk {}'.format(k), verbose):
            knn_ofn = index_path + '.npz'
            if os.path.exists(knn_ofn):
                print('[faiss] read knns from {}'.format(knn_ofn))
                self.knns = np.load(knn_ofn)['data']
            else:
                sims, nbrs = index.search(feats, k=k)
                self.knns = [(np.array(nbr, dtype=np.int32),
                              1 - np.array(sim, dtype=np.float32))
                             for nbr, sim in zip(nbrs, sims)] 
开发者ID:yl-1993,项目名称:learn-to-cluster,代码行数:57,代码来源:knn.py

示例10: __init__

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import index_factory [as 别名]
def __init__(self,
                 target,
                 nprobe=128,
                 index_factory_str=None,
                 verbose=False,
                 mode='proxy',
                 using_gpu=True):
        self._res_list = []

        num_gpu = faiss.get_num_gpus()
        print('[faiss gpu] #GPU: {}'.format(num_gpu))

        size, dim = target.shape
        assert size > 0, "size: {}".format(size)
        index_factory_str = "IVF{},PQ{}".format(
            min(8192, 16 * round(np.sqrt(size))),
            32) if index_factory_str is None else index_factory_str
        cpu_index = faiss.index_factory(dim, index_factory_str)
        cpu_index.nprobe = nprobe

        if mode == 'proxy':
            co = faiss.GpuClonerOptions()
            co.useFloat16 = True
            co.usePrecomputed = False

            index = faiss.IndexProxy()
            for i in range(num_gpu):
                res = faiss.StandardGpuResources()
                self._res_list.append(res)
                sub_index = faiss.index_cpu_to_gpu(
                    res, i, cpu_index, co) if using_gpu else cpu_index
                index.addIndex(sub_index)
        elif mode == 'shard':
            co = faiss.GpuMultipleClonerOptions()
            co.useFloat16 = True
            co.usePrecomputed = False
            co.shard = True
            index = faiss.index_cpu_to_all_gpus(cpu_index,
                                                co,
                                                ngpu=num_gpu)
        else:
            raise KeyError("Unknown index mode")

        index = faiss.IndexIDMap(index)
        index.verbose = verbose

        # get nlist to decide how many samples used for training
        nlist = int([
            item for item in index_factory_str.split(",") if 'IVF' in item
        ][0].replace("IVF", ""))

        # training
        if not index.is_trained:
            indexes_sample_for_train = np.random.randint(
                0, size, nlist * 256)
            index.train(target[indexes_sample_for_train])

        # add with ids
        target_ids = np.arange(0, size)
        index.add_with_ids(target, target_ids)
        self.index = index 
开发者ID:yl-1993,项目名称:learn-to-cluster,代码行数:63,代码来源:faiss_gpu.py


注:本文中的faiss.index_factory方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。