当前位置: 首页>>代码示例>>Python>>正文


Python faiss.IndexFlatIP方法代码示例

本文整理汇总了Python中faiss.IndexFlatIP方法的典型用法代码示例。如果您正苦于以下问题:Python faiss.IndexFlatIP方法的具体用法?Python faiss.IndexFlatIP怎么用?Python faiss.IndexFlatIP使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在faiss的用法示例。


在下文中一共展示了faiss.IndexFlatIP方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import IndexFlatIP [as 别名]
def main():
    f = h5py.File(opt.states, "r")
    data = f[opt.data]
    seqs, slens, hid = data.shape

    print("Processing {} Sequences".format(seqs))
    print("with {} tokens each".format(slens))
    print("and {} states".format(hid))

    # Initialize a new index
    index = faiss.IndexFlatIP(hid)
    # Fill it
    for ix in tqdm(range(0, seqs-opt.stepsize, opt.stepsize)):
        cdata = np.array(data[ix:ix+opt.stepsize]\
                  .reshape(-1, hid), dtype="float32")
        index.add(cdata)
    f.close()

    faiss.write_index(index, opt.output) 
开发者ID:HendrikStrobelt,项目名称:Seq2Seq-Vis,代码行数:21,代码来源:h5_to_faiss.py

示例2: _build_approximate_index

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import IndexFlatIP [as 别名]
def _build_approximate_index(self,
                                     data: np.ndarray):
            dimensionality = data.shape[1]
            nlist = 100 if data.shape[0] > 100 else 2

            if self.kernel_name in {'rbf'}:
                quantizer = faiss.IndexFlatL2(dimensionality)
                cpu_index_flat = faiss.IndexIVFFlat(quantizer, dimensionality, nlist, faiss.METRIC_L2)
            else:
                quantizer = faiss.IndexFlatIP(dimensionality)
                cpu_index_flat = faiss.IndexIVFFlat(quantizer, dimensionality, nlist)

            gpu_index_ivf = faiss.index_cpu_to_gpu(self.resource, 0, cpu_index_flat)
            gpu_index_ivf.train(data)
            gpu_index_ivf.add(data)
            self.index = gpu_index_ivf 
开发者ID:uclnlp,项目名称:gntp,代码行数:18,代码来源:faiss.py

示例3: do_indexing

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import IndexFlatIP [as 别名]
def do_indexing(word2vec_model=None):
    if not os.path.isfile(INDEX_FILE_PATH):
        index = faiss.IndexFlatIP(word2vec_model.vector_size)
        index.add(word2vec_model.wv.syn0norm)
        faiss.write_index(index, INDEX_FILE_PATH)
        return index
    else:
        return faiss.read_index(INDEX_FILE_PATH) 
开发者ID:ynqa,项目名称:faiss-server,代码行数:10,代码来源:indexing.py

示例4: __init__

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import IndexFlatIP [as 别名]
def __init__(self, dim=None):
        super(Index, self).__init__()
        self.D, self.I = None, None
        self.index = faiss.IndexFlatIP(dim) 
开发者ID:iesl,项目名称:diora,代码行数:6,代码来源:phrase_embed.py

示例5: __init__

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import IndexFlatIP [as 别名]
def __init__(self, vector_sz: int = 1, buffer_size: int = 50000):
        super(DenseFlatIndexer, self).__init__(buffer_size=buffer_size)
        self.index = faiss.IndexFlatIP(vector_sz) 
开发者ID:facebookresearch,项目名称:BLINK,代码行数:5,代码来源:faiss_indexer.py

示例6: get_nn_avg_dist

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import IndexFlatIP [as 别名]
def get_nn_avg_dist(emb, query, knn):
    """
    Compute the average distance of the `knn` nearest neighbors
    for a given set of embeddings and queries.
    Use Faiss if available.
    """
    if FAISS_AVAILABLE:
        emb = emb.cpu().numpy()
        query = query.cpu().numpy()
        if hasattr(faiss, 'StandardGpuResources'):
            # gpu mode
            res = faiss.StandardGpuResources()
            config = faiss.GpuIndexFlatConfig()
            config.device = 0
            index = faiss.GpuIndexFlatIP(res, emb.shape[1], config)
        else:
            # cpu mode
            index = faiss.IndexFlatIP(emb.shape[1])
        index.add(emb)
        distances, _ = index.search(query, knn)
        return distances.mean(1)
    else:
        bs = 1024
        all_distances = []
        emb = emb.transpose(0, 1).contiguous()
        for i in range(0, query.shape[0], bs):
            distances = query[i:i + bs].mm(emb)
            best_distances, _ = distances.topk(knn, dim=1, largest=True, sorted=True)
            all_distances.append(best_distances.mean(1).cpu())
        all_distances = torch.cat(all_distances)
        return all_distances.numpy() 
开发者ID:violet-zct,项目名称:DeMa-BWE,代码行数:33,代码来源:utils.py

示例7: train_indexes

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import IndexFlatIP [as 别名]
def train_indexes(ce:CorpusDataWrapper, stepsize=100, drop_null=True):
    """

    Parameters:
    ===========
    - corpus_embedding: Wrapper around HDF5 file for easy access to data
    - stepsize: How many sentences to train with at once
    - drop_null: Don't index the embeddings of special tokens (e.g., [CLS] and [SEP]) whose spacy POS are null
    """
    NUM_LAYERS = ce.n_layers # want to account for the input layer, which for attentions + contexts is all value 0
    
    embedding_indexes = [faiss.IndexFlatIP(ce.embedding_dim) for i in range(NUM_LAYERS)]
    context_indexes = [faiss.IndexFlatIP(ce.embedding_dim) for i in range(NUM_LAYERS)]

    for ix in range(0, len(ce), stepsize):
        cdata = ce[ix:ix+stepsize]

        if drop_null: 
            embeddings = np.concatenate([c.zero_special_embeddings for c in cdata], axis=1)
            contexts = np.concatenate([c.zero_special_contexts for c in cdata], axis=1)
        else:
            embeddings = np.concatenate([c.embeddings for c in cdata], axis=1)
            contexts = np.concatenate([c.contexts for c in cdata], axis=1)

        for i in range(NUM_LAYERS):
            embedding_indexes[i].add(embeddings[i])
            context_indexes[i].add(contexts[i])
            
    return embedding_indexes, context_indexes 
开发者ID:bhoov,项目名称:exbert,代码行数:31,代码来源:create_faiss.py

示例8: predict_landmark_id

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import IndexFlatIP [as 别名]
def predict_landmark_id(ids_query, feats_query, ids_train, feats_train, landmark_dict, voting_k=3):
    print('build index...')
    cpu_index = faiss.IndexFlatIP(feats_train.shape[1])
    cpu_index.add(feats_train)
    sims, topk_idx = cpu_index.search(x=feats_query, k=voting_k)
    print('query search done.')

    df = pd.DataFrame(ids_query, columns=['id'])
    df['images'] = np.apply_along_axis(' '.join, axis=1, arr=ids_train[topk_idx])

    rows = []
    for imidx, (_, r) in tqdm.tqdm(enumerate(df.iterrows()), total=len(df)):
        image_ids = [name.split('/')[-1] for name in r.images.split(' ')]
        counter = Counter()
        for i, image_id in enumerate(image_ids[:voting_k]):
            landmark_id = landmark_dict[image_id]

            counter[landmark_id] += sims[imidx, i]

        landmark_id, score = counter.most_common(1)[0]
        rows.append({
            'id': r['id'],
            'landmarks': f'{landmark_id} {score:.9f}',
        })

    pred = pd.DataFrame(rows).set_index('id')
    pred['landmark_id'], pred['score'] = list(
        zip(*pred['landmarks'].apply(lambda x: str(x).split(' '))))
    pred['score'] = pred['score'].astype(np.float32) / voting_k

    return pred 
开发者ID:lyakaap,项目名称:Landmark2019-1st-and-3rd-Place-Solution,代码行数:33,代码来源:submit_retrieval.py

示例9: __init__

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import IndexFlatIP [as 别名]
def __init__(self, database, method):
        super().__init__(database, method)
        self.index = {'cosine': faiss.IndexFlatIP,
                      'euclidean': faiss.IndexFlatL2}[method](self.D)
        if os.environ.get('CUDA_VISIBLE_DEVICES'):
            print('CUDA', os.environ.get('CUDA_VISIBLE_DEVICES'))
            self.index = faiss.index_cpu_to_all_gpus(self.index)
        self.add() 
开发者ID:lyakaap,项目名称:Landmark2019-1st-and-3rd-Place-Solution,代码行数:10,代码来源:reranking.py

示例10: _build_exact_index

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import IndexFlatIP [as 别名]
def _build_exact_index(self,
                               data: np.ndarray):
            dimensionality = data.shape[1]

            if self.kernel_name in {'rbf'}:
                self.cpu_index_flat = faiss.IndexFlatL2(dimensionality)
            else:
                self.cpu_index_flat = faiss.IndexFlatIP(dimensionality)

            if not self.cpu:
                self.index = faiss.index_cpu_to_gpu(self.resource, 0, self.cpu_index_flat)
            else:
                self.index = self.cpu_index_flat
            self.index.add(data) 
开发者ID:uclnlp,项目名称:gntp,代码行数:16,代码来源:faiss.py

示例11: get_index

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import IndexFlatIP [as 别名]
def get_index():
    import faiss
    from sklearn.preprocessing import normalize
    # test indexes
    indexL2 = faiss.IndexFlatL2(128)
    indexIP = faiss.IndexFlatIP(128)
    indexCOS = faiss.IndexFlatIP(128)

    mov_mat, _, _ = get_embeddings()
    mov_mat = mov_mat.numpy().astype('float32')
    indexL2.add(mov_mat)
    indexIP.add(mov_mat)
    indexCOS.add(normalize(mov_mat, axis=1, norm='l2'))
    return {'L2': indexL2, 'IP': indexIP, 'COS': indexCOS} 
开发者ID:awarebayes,项目名称:RecNN,代码行数:16,代码来源:streamlit_demo.py

示例12: __init__

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import IndexFlatIP [as 别名]
def __init__(self, database, method):
        super().__init__(database, method)
        self.index = {'cosine': faiss.IndexFlatIP,
                      'euclidean': faiss.IndexFlatL2}[method](self.D)
        if os.environ.get('CUDA_VISIBLE_DEVICES'):
            self.index = faiss.index_cpu_to_all_gpus(self.index)
        self.add() 
开发者ID:fyang93,项目名称:diffusion,代码行数:9,代码来源:knn.py

示例13: __init__

# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import IndexFlatIP [as 别名]
def __init__(self,
                 feats,
                 k,
                 index_path='',
                 index_key='',
                 nprobe=128,
                 omp_num_threads=None,
                 rebuild_index=True,
                 verbose=True,
                 **kwargs):
        import faiss
        if omp_num_threads is not None:
            faiss.omp_set_num_threads(omp_num_threads)
        self.verbose = verbose
        with Timer('[faiss] build index', verbose):
            if index_path != '' and not rebuild_index and os.path.exists(
                    index_path):
                print('[faiss] read index from {}'.format(index_path))
                index = faiss.read_index(index_path)
            else:
                feats = feats.astype('float32')
                size, dim = feats.shape
                index = faiss.IndexFlatIP(dim)
                if index_key != '':
                    assert index_key.find(
                        'HNSW') < 0, 'HNSW returns distances insted of sims'
                    metric = faiss.METRIC_INNER_PRODUCT
                    nlist = min(4096, 8 * round(math.sqrt(size)))
                    if index_key == 'IVF':
                        quantizer = index
                        index = faiss.IndexIVFFlat(quantizer, dim, nlist,
                                                   metric)
                    else:
                        index = faiss.index_factory(dim, index_key, metric)
                    if index_key.find('Flat') < 0:
                        assert not index.is_trained
                    index.train(feats)
                    index.nprobe = min(nprobe, nlist)
                    assert index.is_trained
                    print('nlist: {}, nprobe: {}'.format(nlist, nprobe))
                index.add(feats)
                if index_path != '':
                    print('[faiss] save index to {}'.format(index_path))
                    mkdir_if_no_exists(index_path)
                    faiss.write_index(index, index_path)
        with Timer('[faiss] query topk {}'.format(k), verbose):
            knn_ofn = index_path + '.npz'
            if os.path.exists(knn_ofn):
                print('[faiss] read knns from {}'.format(knn_ofn))
                self.knns = np.load(knn_ofn)['data']
            else:
                sims, nbrs = index.search(feats, k=k)
                self.knns = [(np.array(nbr, dtype=np.int32),
                              1 - np.array(sim, dtype=np.float32))
                             for nbr, sim in zip(nbrs, sims)] 
开发者ID:yl-1993,项目名称:learn-to-cluster,代码行数:57,代码来源:knn.py


注:本文中的faiss.IndexFlatIP方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。