本文整理汇总了Python中faiss.IndexFlatIP方法的典型用法代码示例。如果您正苦于以下问题:Python faiss.IndexFlatIP方法的具体用法?Python faiss.IndexFlatIP怎么用?Python faiss.IndexFlatIP使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类faiss
的用法示例。
在下文中一共展示了faiss.IndexFlatIP方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import IndexFlatIP [as 别名]
def main():
f = h5py.File(opt.states, "r")
data = f[opt.data]
seqs, slens, hid = data.shape
print("Processing {} Sequences".format(seqs))
print("with {} tokens each".format(slens))
print("and {} states".format(hid))
# Initialize a new index
index = faiss.IndexFlatIP(hid)
# Fill it
for ix in tqdm(range(0, seqs-opt.stepsize, opt.stepsize)):
cdata = np.array(data[ix:ix+opt.stepsize]\
.reshape(-1, hid), dtype="float32")
index.add(cdata)
f.close()
faiss.write_index(index, opt.output)
示例2: _build_approximate_index
# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import IndexFlatIP [as 别名]
def _build_approximate_index(self,
data: np.ndarray):
dimensionality = data.shape[1]
nlist = 100 if data.shape[0] > 100 else 2
if self.kernel_name in {'rbf'}:
quantizer = faiss.IndexFlatL2(dimensionality)
cpu_index_flat = faiss.IndexIVFFlat(quantizer, dimensionality, nlist, faiss.METRIC_L2)
else:
quantizer = faiss.IndexFlatIP(dimensionality)
cpu_index_flat = faiss.IndexIVFFlat(quantizer, dimensionality, nlist)
gpu_index_ivf = faiss.index_cpu_to_gpu(self.resource, 0, cpu_index_flat)
gpu_index_ivf.train(data)
gpu_index_ivf.add(data)
self.index = gpu_index_ivf
示例3: do_indexing
# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import IndexFlatIP [as 别名]
def do_indexing(word2vec_model=None):
if not os.path.isfile(INDEX_FILE_PATH):
index = faiss.IndexFlatIP(word2vec_model.vector_size)
index.add(word2vec_model.wv.syn0norm)
faiss.write_index(index, INDEX_FILE_PATH)
return index
else:
return faiss.read_index(INDEX_FILE_PATH)
示例4: __init__
# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import IndexFlatIP [as 别名]
def __init__(self, dim=None):
super(Index, self).__init__()
self.D, self.I = None, None
self.index = faiss.IndexFlatIP(dim)
示例5: __init__
# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import IndexFlatIP [as 别名]
def __init__(self, vector_sz: int = 1, buffer_size: int = 50000):
super(DenseFlatIndexer, self).__init__(buffer_size=buffer_size)
self.index = faiss.IndexFlatIP(vector_sz)
示例6: get_nn_avg_dist
# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import IndexFlatIP [as 别名]
def get_nn_avg_dist(emb, query, knn):
"""
Compute the average distance of the `knn` nearest neighbors
for a given set of embeddings and queries.
Use Faiss if available.
"""
if FAISS_AVAILABLE:
emb = emb.cpu().numpy()
query = query.cpu().numpy()
if hasattr(faiss, 'StandardGpuResources'):
# gpu mode
res = faiss.StandardGpuResources()
config = faiss.GpuIndexFlatConfig()
config.device = 0
index = faiss.GpuIndexFlatIP(res, emb.shape[1], config)
else:
# cpu mode
index = faiss.IndexFlatIP(emb.shape[1])
index.add(emb)
distances, _ = index.search(query, knn)
return distances.mean(1)
else:
bs = 1024
all_distances = []
emb = emb.transpose(0, 1).contiguous()
for i in range(0, query.shape[0], bs):
distances = query[i:i + bs].mm(emb)
best_distances, _ = distances.topk(knn, dim=1, largest=True, sorted=True)
all_distances.append(best_distances.mean(1).cpu())
all_distances = torch.cat(all_distances)
return all_distances.numpy()
示例7: train_indexes
# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import IndexFlatIP [as 别名]
def train_indexes(ce:CorpusDataWrapper, stepsize=100, drop_null=True):
"""
Parameters:
===========
- corpus_embedding: Wrapper around HDF5 file for easy access to data
- stepsize: How many sentences to train with at once
- drop_null: Don't index the embeddings of special tokens (e.g., [CLS] and [SEP]) whose spacy POS are null
"""
NUM_LAYERS = ce.n_layers # want to account for the input layer, which for attentions + contexts is all value 0
embedding_indexes = [faiss.IndexFlatIP(ce.embedding_dim) for i in range(NUM_LAYERS)]
context_indexes = [faiss.IndexFlatIP(ce.embedding_dim) for i in range(NUM_LAYERS)]
for ix in range(0, len(ce), stepsize):
cdata = ce[ix:ix+stepsize]
if drop_null:
embeddings = np.concatenate([c.zero_special_embeddings for c in cdata], axis=1)
contexts = np.concatenate([c.zero_special_contexts for c in cdata], axis=1)
else:
embeddings = np.concatenate([c.embeddings for c in cdata], axis=1)
contexts = np.concatenate([c.contexts for c in cdata], axis=1)
for i in range(NUM_LAYERS):
embedding_indexes[i].add(embeddings[i])
context_indexes[i].add(contexts[i])
return embedding_indexes, context_indexes
示例8: predict_landmark_id
# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import IndexFlatIP [as 别名]
def predict_landmark_id(ids_query, feats_query, ids_train, feats_train, landmark_dict, voting_k=3):
print('build index...')
cpu_index = faiss.IndexFlatIP(feats_train.shape[1])
cpu_index.add(feats_train)
sims, topk_idx = cpu_index.search(x=feats_query, k=voting_k)
print('query search done.')
df = pd.DataFrame(ids_query, columns=['id'])
df['images'] = np.apply_along_axis(' '.join, axis=1, arr=ids_train[topk_idx])
rows = []
for imidx, (_, r) in tqdm.tqdm(enumerate(df.iterrows()), total=len(df)):
image_ids = [name.split('/')[-1] for name in r.images.split(' ')]
counter = Counter()
for i, image_id in enumerate(image_ids[:voting_k]):
landmark_id = landmark_dict[image_id]
counter[landmark_id] += sims[imidx, i]
landmark_id, score = counter.most_common(1)[0]
rows.append({
'id': r['id'],
'landmarks': f'{landmark_id} {score:.9f}',
})
pred = pd.DataFrame(rows).set_index('id')
pred['landmark_id'], pred['score'] = list(
zip(*pred['landmarks'].apply(lambda x: str(x).split(' '))))
pred['score'] = pred['score'].astype(np.float32) / voting_k
return pred
示例9: __init__
# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import IndexFlatIP [as 别名]
def __init__(self, database, method):
super().__init__(database, method)
self.index = {'cosine': faiss.IndexFlatIP,
'euclidean': faiss.IndexFlatL2}[method](self.D)
if os.environ.get('CUDA_VISIBLE_DEVICES'):
print('CUDA', os.environ.get('CUDA_VISIBLE_DEVICES'))
self.index = faiss.index_cpu_to_all_gpus(self.index)
self.add()
示例10: _build_exact_index
# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import IndexFlatIP [as 别名]
def _build_exact_index(self,
data: np.ndarray):
dimensionality = data.shape[1]
if self.kernel_name in {'rbf'}:
self.cpu_index_flat = faiss.IndexFlatL2(dimensionality)
else:
self.cpu_index_flat = faiss.IndexFlatIP(dimensionality)
if not self.cpu:
self.index = faiss.index_cpu_to_gpu(self.resource, 0, self.cpu_index_flat)
else:
self.index = self.cpu_index_flat
self.index.add(data)
示例11: get_index
# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import IndexFlatIP [as 别名]
def get_index():
import faiss
from sklearn.preprocessing import normalize
# test indexes
indexL2 = faiss.IndexFlatL2(128)
indexIP = faiss.IndexFlatIP(128)
indexCOS = faiss.IndexFlatIP(128)
mov_mat, _, _ = get_embeddings()
mov_mat = mov_mat.numpy().astype('float32')
indexL2.add(mov_mat)
indexIP.add(mov_mat)
indexCOS.add(normalize(mov_mat, axis=1, norm='l2'))
return {'L2': indexL2, 'IP': indexIP, 'COS': indexCOS}
示例12: __init__
# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import IndexFlatIP [as 别名]
def __init__(self, database, method):
super().__init__(database, method)
self.index = {'cosine': faiss.IndexFlatIP,
'euclidean': faiss.IndexFlatL2}[method](self.D)
if os.environ.get('CUDA_VISIBLE_DEVICES'):
self.index = faiss.index_cpu_to_all_gpus(self.index)
self.add()
示例13: __init__
# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import IndexFlatIP [as 别名]
def __init__(self,
feats,
k,
index_path='',
index_key='',
nprobe=128,
omp_num_threads=None,
rebuild_index=True,
verbose=True,
**kwargs):
import faiss
if omp_num_threads is not None:
faiss.omp_set_num_threads(omp_num_threads)
self.verbose = verbose
with Timer('[faiss] build index', verbose):
if index_path != '' and not rebuild_index and os.path.exists(
index_path):
print('[faiss] read index from {}'.format(index_path))
index = faiss.read_index(index_path)
else:
feats = feats.astype('float32')
size, dim = feats.shape
index = faiss.IndexFlatIP(dim)
if index_key != '':
assert index_key.find(
'HNSW') < 0, 'HNSW returns distances insted of sims'
metric = faiss.METRIC_INNER_PRODUCT
nlist = min(4096, 8 * round(math.sqrt(size)))
if index_key == 'IVF':
quantizer = index
index = faiss.IndexIVFFlat(quantizer, dim, nlist,
metric)
else:
index = faiss.index_factory(dim, index_key, metric)
if index_key.find('Flat') < 0:
assert not index.is_trained
index.train(feats)
index.nprobe = min(nprobe, nlist)
assert index.is_trained
print('nlist: {}, nprobe: {}'.format(nlist, nprobe))
index.add(feats)
if index_path != '':
print('[faiss] save index to {}'.format(index_path))
mkdir_if_no_exists(index_path)
faiss.write_index(index, index_path)
with Timer('[faiss] query topk {}'.format(k), verbose):
knn_ofn = index_path + '.npz'
if os.path.exists(knn_ofn):
print('[faiss] read knns from {}'.format(knn_ofn))
self.knns = np.load(knn_ofn)['data']
else:
sims, nbrs = index.search(feats, k=k)
self.knns = [(np.array(nbr, dtype=np.int32),
1 - np.array(sim, dtype=np.float32))
for nbr, sim in zip(nbrs, sims)]