当前位置: 首页>>代码示例>>Python>>正文


Python faiss.read_index方法代码示例

本文整理汇总了Python中faiss.read_index方法的典型用法代码示例。如果您正苦于以下问题:Python faiss.read_index方法的具体用法?Python faiss.read_index怎么用?Python faiss.read_index使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在faiss的用法示例。


在下文中一共展示了faiss.read_index方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _load_index

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import read_index [as 别名]
def _load_index(ctx, op, index, device_id):
    return_index_type = _get_index_type(op.return_index_type, ctx)

    if return_index_type == 'object':
        # local
        return index
    elif return_index_type == 'filename':
        # local cluster
        return faiss.read_index(index)
    else:
        # distributed
        fn = tempfile.mkstemp('.index', prefix='faiss_')[1]
        with open(fn, 'wb') as f:
            f.write(index)
        index = faiss.read_index(f.name)
        if device_id >= 0:  # pragma: no cover
            index = _index_to_gpu(index, device_id)
        return index 
开发者ID:mars-project,项目名称:mars,代码行数:20,代码来源:_faiss.py

示例2: __init__

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import read_index [as 别名]
def __init__(self, phrase_dump_dir, start_index_path, idx2id_path, max_answer_length, para=False,
                 num_dummy_zeros=0, cuda=False):
        if os.path.isdir(phrase_dump_dir):
            self.phrase_dump_paths = sorted(
                [os.path.join(phrase_dump_dir, name) for name in os.listdir(phrase_dump_dir) if 'hdf5' in name])
            dump_names = [os.path.splitext(os.path.basename(path))[0] for path in self.phrase_dump_paths]
            self.dump_ranges = [list(map(int, name.split('-'))) for name in dump_names]
        else:
            self.phrase_dump_paths = [phrase_dump_dir]
        self.phrase_dumps = [h5py.File(path, 'r') for path in self.phrase_dump_paths]
        self.max_answer_length = max_answer_length
        self.para = para

        print('reading %s' % start_index_path)
        self.start_index = faiss.read_index(start_index_path, faiss.IO_FLAG_ONDISK_SAME_DIR)
        self.idx_f = self.load_idx_f(idx2id_path)
        self.has_offset = not 'doc' in self.idx_f
        # with h5py.File(idx2id_path, 'r') as f:
        #     self.idx2doc_id = f['doc'][:]
        #     self.idx2para_id = f['para'][:]
        #     self.idx2word_id = f['word'][:]

        self.num_dummy_zeros = num_dummy_zeros
        self.cuda = cuda 
开发者ID:uwnlp,项目名称:denspi,代码行数:26,代码来源:mips.py

示例3: train_index

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import read_index [as 别名]
def train_index(data, quantizer_path, trained_index_path, fine_quant='SQ8', cuda=False):
    quantizer = faiss.read_index(quantizer_path)
    if fine_quant == 'SQ8':
        trained_index = faiss.IndexIVFScalarQuantizer(quantizer, quantizer.d, quantizer.ntotal, faiss.METRIC_L2)
    elif fine_quant.startswith('PQ'):
        m = int(fine_quant[2:])
        trained_index = faiss.IndexIVFPQ(quantizer, quantizer.d, quantizer.ntotal, m, 8)
    else:
        raise ValueError(fine_quant)

    if cuda:
        if fine_quant.startswith('PQ'):
            print('PQ not supported on GPU; keeping CPU.')
        else:
            res = faiss.StandardGpuResources()
            gpu_index = faiss.index_cpu_to_gpu(res, 0, trained_index)
            gpu_index.train(data)
            trained_index = faiss.index_gpu_to_cpu(gpu_index)
    else:
        trained_index.train(data)
    faiss.write_index(trained_index, trained_index_path) 
开发者ID:uwnlp,项目名称:denspi,代码行数:23,代码来源:run_index.py

示例4: do_indexing

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import read_index [as 别名]
def do_indexing(word2vec_model=None):
    if not os.path.isfile(INDEX_FILE_PATH):
        index = faiss.IndexFlatIP(word2vec_model.vector_size)
        index.add(word2vec_model.wv.syn0norm)
        faiss.write_index(index, INDEX_FILE_PATH)
        return index
    else:
        return faiss.read_index(INDEX_FILE_PATH) 
开发者ID:ynqa,项目名称:faiss-server,代码行数:10,代码来源:indexing.py

示例5: __init__

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import read_index [as 别名]
def __init__(self, file_name, dim_vector=500, sentence_max_len=50):
        self.u = faiss.read_index(file_name)  # type: faiss.Index
        self.sentence_max_length = sentence_max_len 
开发者ID:HendrikStrobelt,项目名称:Seq2Seq-Vis,代码行数:5,代码来源:faissVectorIndex.py

示例6: deserialize_from

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import read_index [as 别名]
def deserialize_from(self, index_file: str):
        logger.info("Loading index from %s", index_file)
        self.index = faiss.read_index(index_file)
        logger.info(
            "Loaded index of type %s and size %d", type(self.index), self.index.ntotal
        )


# DenseFlatIndexer does exact search 
开发者ID:facebookresearch,项目名称:BLINK,代码行数:11,代码来源:faiss_indexer.py

示例7: __init_indexes

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import read_index [as 别名]
def __init_indexes(self):
        for fname in self.base_dir.glob(self.pattern):
            print(fname)
            idx = fname.stem.split('_')[-1]
            self.indexes[int(idx)] = faiss.read_index(str(fname)) 
开发者ID:bhoov,项目名称:exbert,代码行数:7,代码来源:index_wrapper.py

示例8: load

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import read_index [as 别名]
def load(self, path: str, device: Optional[str] = None) -> None:
        r"""Load the index and meta data from ``path`` directory.

        Args:
            path (str): A path to the directory to load the index from.
            device (optional str): Device to load the index into. If None,
                value will be picked from hyperparameters.

        """

        if not os.path.exists(path):
            raise ValueError(f"Failed to load the index. {path} "
                             f"does not exist.")

        cpu_index = faiss.read_index(f"{path}/index.faiss")

        if device is None:
            device = self._config.device

        if device.lower().startswith("gpu"):
            gpu_resource = faiss.StandardGpuResources()
            gpu_id = int(device[3:])
            if faiss.get_num_gpus() < gpu_id:
                gpu_id = 0
                logging.warning("Cannot create the index on device %s. "
                                "Total number of GPUs on this machine is "
                                "%s. Using the gpu0 for the index.",
                                device, faiss.get_num_gpus())
            self._index = faiss.index_cpu_to_gpu(
                gpu_resource, gpu_id, cpu_index)

        else:
            self._index = cpu_index

        with open(f"{path}/index.meta_data", "rb") as f:
            self._meta_data = pickle.load(f) 
开发者ID:asyml,项目名称:forte,代码行数:38,代码来源:embedding_based_indexer.py

示例9: calculate

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import read_index [as 别名]
def calculate(self, images):
        predicted = []
        index = faiss.read_index(self.index_path)
        with open(self.id_path) as f:
            id_json = json.load(f)
        logging.info('database load')
        imgs = self.feature.get_feature(images)
        D, I = index.search(imgs, k=1)
        for p in I:
            predicted.append(id_json[str(p[0])])
        return predicted 
开发者ID:seongahjo,项目名称:Mosaicer,代码行数:13,代码来源:label.py

示例10: main

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import read_index [as 别名]
def main(args):
    index = read_index(str(args.index.with_suffix(".idx")))
    index.nprobe = args.num_probes

    with args.index.with_suffix(".json").open() as fp:
        metadata = json.load(fp)

    def query(batch, n):
        feats = np.frombuffer(batch.data, dtype=np.float32)
        feats = rearrange(feats, "(n d) -> n d", d=args.dimension)
        assert len(feats.shape) == 2
        assert feats.shape[1] == args.dimension
        assert feats.dtype == np.float32

        dists, indices = index.search(feats, n)

        meta = [[metadata[i] for i in batch] for batch in indices]

        return dists.tolist(), indices.tolist(), meta

    with SimpleXMLRPCServer((args.host, args.port), logRequests=False) as server:
        server.register_function(query)

        try:
            print("⏳ Waiting for similarity calls on {}:{}".format(args.host, args.port), file=sys.stderr)
            server.serve_forever()
        except KeyboardInterrupt:
            print("\n⌛ Done", file=sys.stderr) 
开发者ID:moabitcoin,项目名称:ig65m-pytorch,代码行数:30,代码来源:server.py

示例11: remove_doc_ids

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import read_index [as 别名]
def remove_doc_ids(args):
    if os.path.isdir(args.subindex_dir):
        names = os.listdir(args.subindex_dir)
        index_names = [name for name in names if name.endswith('.faiss')]
        index_paths = [os.path.join(args.subindex_dir, name) for name in index_names]
        target_paths = [os.path.join(args.target_dir, name) for name in index_names]
        idx2id_paths = [path.replace('.faiss', '.hdf5') for path in index_paths]
        if not os.path.exists(args.target_dir):
            os.makedirs(args.target_dir)

        with open(args.ignore_path, 'r') as fp:
            ignore_counter = json.load(fp)
        count = sum(ignore_counter.values())
        th = count * args.ratio
        ignores = [int(key) for key, val in ignore_counter.items() if val > th]
        print('thresholding at %.1f, removing following document ids:' % th)
        for ignore in ignores:
            print(ignore)

        for idx2id_path, index_path, target_path in zip(idx2id_paths, tqdm(index_paths), target_paths):
            with h5py.File(idx2id_path, 'r') as f:
                doc_ids = f['doc'][:]
                offset = f.attrs['offset']
            idxs, = np.where(np.any(np.expand_dims(doc_ids, 1) == ignores, 1))
            if len(idxs) > 0:
                idxs = idxs + offset
                print('found %d ids to remove' % len(idxs))
                index = faiss.read_index(index_path)
                index.remove_ids(idxs)
                faiss.write_index(index, target_path)
            else:
                print('no ignore list found at %s' % index_path)
    else:
        index_path = args.subindex_dir
        target_path = args.target_dir
        idx2id_path = args.subindex_dir.replace('index.faiss', 'idx2id.hdf5')
        with open(args.ignore_path, 'r') as fp:
            ignores = np.array(list(map(int, json.load(fp))))
        with h5py.File(idx2id_path, 'r') as f:
            for offset, group in f.items():
                doc_ids = group['doc'][:]
                offset = int(offset)
                idxs, = np.where(np.any(np.expand_dims(doc_ids, 1) == ignores, 1))
                if len(idxs) > 0:
                    idxs = idxs + offset
                    print(idxs)
                    index = faiss.read_index(index_path)
                    index.remove_ids(idxs)
                    faiss.write_index(index, target_path)
                else:
                    print('no ignore list found at %d' % offset) 
开发者ID:uwnlp,项目名称:denspi,代码行数:53,代码来源:remove_doc_id.py

示例12: merge_indexes

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import read_index [as 别名]
def merge_indexes(subindex_dir, trained_index_path, target_index_path, target_idx2id_path, target_inv_path):
    # target_inv_path = merged_index.ivfdata
    names = os.listdir(subindex_dir)
    idx2id_paths = [os.path.join(subindex_dir, name) for name in names if name.endswith('.hdf5')]
    index_paths = [os.path.join(subindex_dir, name) for name in names if name.endswith('.faiss')]

    print('copying idx2id')
    with h5py.File(target_idx2id_path, 'w') as out:
        for idx2id_path in tqdm(idx2id_paths, desc='copying idx2id'):
            with h5py.File(idx2id_path, 'r') as in_:
                for key, g in in_.items():
                    offset = str(g.attrs['offset'])
                    assert key == offset
                    group = out.create_group(offset)
                    group.create_dataset('doc', data=in_['doc'])
                    group.create_dataset('para', data=in_['para'])
                    group.create_dataset('word', data=in_['word'])

    print('loading invlists')
    ivfs = []
    for index_path in tqdm(index_paths, desc='loading invlists'):
        # the IO_FLAG_MMAP is to avoid actually loading the data thus
        # the total size of the inverted lists can exceed the
        # available RAM
        index = faiss.read_index(index_path,
                                 faiss.IO_FLAG_MMAP)
        ivfs.append(index.invlists)

        # avoid that the invlists get deallocated with the index
        index.own_invlists = False

    # construct the output index
    index = faiss.read_index(trained_index_path)

    # prepare the output inverted lists. They will be written
    # to merged_index.ivfdata
    invlists = faiss.OnDiskInvertedLists(
        index.nlist, index.code_size,
        target_inv_path)

    # merge all the inverted lists
    print('merging')
    ivf_vector = faiss.InvertedListsPtrVector()
    for ivf in tqdm(ivfs):
        ivf_vector.push_back(ivf)

    print("merge %d inverted lists " % ivf_vector.size())
    ntotal = invlists.merge_from(ivf_vector.data(), ivf_vector.size())
    print(ntotal)

    # now replace the inverted lists in the output index
    index.ntotal = ntotal
    index.replace_invlists(invlists)

    print('writing index')
    faiss.write_index(index, target_index_path) 
开发者ID:uwnlp,项目名称:denspi,代码行数:58,代码来源:run_index.py

示例13: run_index

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import read_index [as 别名]
def run_index(args):
    phrase_path = os.path.join(args.dump_dir, 'phrase.hdf5')
    if os.path.exists(phrase_path):
        dump_paths = [phrase_path]
    else:
        dump_names = os.listdir(os.path.join(args.dump_dir, 'phrase'))
        dump_paths = [os.path.join(args.dump_dir, 'phrase', name) for name in dump_names if name.endswith('.hdf5')]

    data = None

    if args.stage in ['all', 'coarse']:
        if args.replace or not os.path.exists(args.quantizer_path):
            if not os.path.exists(args.index_dir):
                os.makedirs(args.index_dir)
            data, max_norm = sample_data(dump_paths, max_norm=args.max_norm, para=args.para,
                                         doc_sample_ratio=args.doc_sample_ratio, vec_sample_ratio=args.vec_sample_ratio,
                                         max_norm_cf=args.max_norm_cf, num_dummy_zeros=args.num_dummy_zeros,
                                         norm_th=args.norm_th)
            with open(args.max_norm_path, 'w') as fp:
                json.dump(max_norm, fp)
            train_coarse_quantizer(data, args.quantizer_path, args.num_clusters, cuda=args.cuda)

    if args.stage in ['all', 'fine']:
        if args.replace or not os.path.exists(args.trained_index_path):
            with open(args.max_norm_path, 'r') as fp:
                max_norm = json.load(fp)
            if data is None:
                data, _ = sample_data(dump_paths, max_norm=max_norm, para=args.para,
                                      doc_sample_ratio=args.doc_sample_ratio, vec_sample_ratio=args.vec_sample_ratio,
                                      num_dummy_zeros=args.num_dummy_zeros, norm_th=args.norm_th)
            train_index(data, args.quantizer_path, args.trained_index_path, fine_quant=args.fine_quant, cuda=args.cuda)

    if args.stage in ['all', 'add']:
        if args.replace or not os.path.exists(args.index_path):
            with open(args.max_norm_path, 'r') as fp:
                max_norm = json.load(fp)
            if args.dump_paths is not None:
                dump_paths = args.dump_paths
                if not os.path.exists(args.subindex_dir):
                    os.makedirs(args.subindex_dir)
            add_to_index(dump_paths, args.trained_index_path, args.index_path, args.idx2id_path,
                         max_norm=max_norm, para=args.para, num_dummy_zeros=args.num_dummy_zeros, cuda=args.cuda,
                         num_docs_per_add=args.num_docs_per_add, offset=args.offset, norm_th=args.norm_th,
                         fine_quant=args.fine_quant)

    if args.stage == 'merge':
        if args.replace or not os.path.exists(args.index_path):
            merge_indexes(args.subindex_dir, args.trained_index_path, args.index_path, args.idx2id_path, args.inv_path)

    if args.stage == 'move':
        index = faiss.read_index(args.trained_index_path)
        invlists = faiss.OnDiskInvertedLists(
            index.nlist, index.code_size,
            args.inv_path)
        index.replace_invlists(invlists)
        faiss.write_index(index, args.index_path) 
开发者ID:uwnlp,项目名称:denspi,代码行数:58,代码来源:run_index.py

示例14: __init__

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import read_index [as 别名]
def __init__(self,
                 feats,
                 k,
                 index_path='',
                 index_key='',
                 nprobe=128,
                 omp_num_threads=None,
                 rebuild_index=True,
                 verbose=True,
                 **kwargs):
        import faiss
        if omp_num_threads is not None:
            faiss.omp_set_num_threads(omp_num_threads)
        self.verbose = verbose
        with Timer('[faiss] build index', verbose):
            if index_path != '' and not rebuild_index and os.path.exists(
                    index_path):
                print('[faiss] read index from {}'.format(index_path))
                index = faiss.read_index(index_path)
            else:
                feats = feats.astype('float32')
                size, dim = feats.shape
                index = faiss.IndexFlatIP(dim)
                if index_key != '':
                    assert index_key.find(
                        'HNSW') < 0, 'HNSW returns distances insted of sims'
                    metric = faiss.METRIC_INNER_PRODUCT
                    nlist = min(4096, 8 * round(math.sqrt(size)))
                    if index_key == 'IVF':
                        quantizer = index
                        index = faiss.IndexIVFFlat(quantizer, dim, nlist,
                                                   metric)
                    else:
                        index = faiss.index_factory(dim, index_key, metric)
                    if index_key.find('Flat') < 0:
                        assert not index.is_trained
                    index.train(feats)
                    index.nprobe = min(nprobe, nlist)
                    assert index.is_trained
                    print('nlist: {}, nprobe: {}'.format(nlist, nprobe))
                index.add(feats)
                if index_path != '':
                    print('[faiss] save index to {}'.format(index_path))
                    mkdir_if_no_exists(index_path)
                    faiss.write_index(index, index_path)
        with Timer('[faiss] query topk {}'.format(k), verbose):
            knn_ofn = index_path + '.npz'
            if os.path.exists(knn_ofn):
                print('[faiss] read knns from {}'.format(knn_ofn))
                self.knns = np.load(knn_ofn)['data']
            else:
                sims, nbrs = index.search(feats, k=k)
                self.knns = [(np.array(nbr, dtype=np.int32),
                              1 - np.array(sim, dtype=np.float32))
                             for nbr, sim in zip(nbrs, sims)] 
开发者ID:yl-1993,项目名称:learn-to-cluster,代码行数:57,代码来源:knn.py


注:本文中的faiss.read_index方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。