本文整理汇总了Python中faiss.index_factory方法的典型用法代码示例。如果您正苦于以下问题:Python faiss.index_factory方法的具体用法?Python faiss.index_factory怎么用?Python faiss.index_factory使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类faiss
的用法示例。
在下文中一共展示了faiss.index_factory方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: execute
# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import index_factory [as 别名]
def execute(cls, ctx, op):
(data,), device_id, _ = as_same_device(
[ctx[op.input.key]], device=op.device, ret_extra=True)
with device(device_id):
index = faiss.index_factory(data.shape[1], op.faiss_index,
op.faiss_metric_type)
if device_id >= 0: # pragma: no cover
# GPU
index = _index_to_gpu(index, device_id)
index.train_c(data.shape[0], _swig_ptr_from_cupy_float32_array(data))
else:
index.train(data)
ctx[op.outputs[0].key] = _store_index(
ctx, op, index, device_id)
示例2: faiss_train
# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import index_factory [as 别名]
def faiss_train(fn_feature, root_path, index_path='train.index', id_path='data.json'):
folder_names = os.listdir(root_path)
logging.info('directory %s ', folder_names)
ids = None
vals = None
id_json = {}
print(folder_names)
for idx, folder_name in enumerate(folder_names):
id_json[str(idx)] = folder_name
now_path = os.path.join(root_path, folder_name)
feature_val = fn_feature(now_path)
vals = np.concatenate((feature_val, vals), axis=0) if vals is not None else feature_val
id_np = np.asarray([idx] * feature_val.shape[0])
ids = np.concatenate((id_np, ids), axis=0) if ids is not None else id_np
N, dim = vals.shape
x = int(2 * math.sqrt(N))
index_description = "IVF{x},Flat".format(x=x)
index = faiss.index_factory(7 * 7 * 512, index_description, faiss.METRIC_INNER_PRODUCT)
index.train(vals)
index.add_with_ids(vals, ids)
faiss.write_index(index, index_path)
with open(id_path, 'w', encoding='utf-8') as f:
json.dump(id_json, f, ensure_ascii=False, indent=4)
print(id_json)
return index, id_json
示例3: _execute_one_chunk
# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import index_factory [as 别名]
def _execute_one_chunk(cls, ctx, op):
(inp,), device_id, xp = as_same_device(
[ctx[c.key] for c in op.inputs], device=op.device, ret_extra=True)
with device(device_id):
# create index
index = faiss.index_factory(inp.shape[1], op.faiss_index,
op.faiss_metric_type)
# GPU
if device_id >= 0: # pragma: no cover
index = _index_to_gpu(index, device_id)
# train index
if not index.is_trained:
assert op.n_sample is not None
sample_indices = xp.random.choice(inp.shape[0],
size=op.n_sample, replace=False)
sampled = inp[sample_indices]
index.train(sampled)
if op.metric == 'cosine':
# faiss does not support cosine distances directly,
# data needs to be normalize before adding to index,
# refer to:
# https://github.com/facebookresearch/faiss/wiki/FAQ#how-can-i-index-vectors-for-cosine-distance
faiss.normalize_L2(inp)
# add vectors to index
if device_id >= 0: # pragma: no cover
# gpu
inp = inp.astype(np.float32, copy=False)
index.add_c(inp.shape[0], _swig_ptr_from_cupy_float32_array(inp))
else:
index.add(inp)
ctx[op.outputs[0].key] = _store_index(ctx, op, index, device_id)
示例4: build_faiss_index
# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import index_factory [as 别名]
def build_faiss_index(X, index_name='auto', n_sample=None, metric="euclidean",
random_state=None, same_distribution=True,
accuracy=False, memory_require=None, **kw):
X = astensor(X)
if metric not in METRIC_TO_FAISS_METRIC_TYPE:
raise ValueError('unknown metric: {}'.format(metric))
if index_name != 'auto':
try:
faiss.index_factory(X.shape[1], index_name,
METRIC_TO_FAISS_METRIC_TYPE[metric])
except RuntimeError:
raise ValueError('illegal faiss index: {}'.format(index_name))
rs = check_random_state(random_state)
if isinstance(rs, RandomState):
rs = rs.to_numpy()
seed = gen_random_seeds(1, rs)[0]
if memory_require is None:
memory_require = MemoryRequirementGrade.low
else:
memory_require = _get_memory_require(memory_require)
op = FaissBuildIndex(faiss_index=index_name, metric=metric,
n_sample=n_sample, gpu=X.op.gpu, seed=seed,
same_distribution=same_distribution,
accuracy=accuracy, memory_require=memory_require, **kw)
return op(X)
示例5: fit
# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import index_factory [as 别名]
def fit(self, X):
X = X.astype(numpy.float32)
self._index = faiss.GpuIndexIVFFlat(self._res, len(X[0]), self._n_bits,
faiss.METRIC_L2)
# self._index = faiss.index_factory(len(X[0]),
# "IVF%d,Flat" % self._n_bits)
# co = faiss.GpuClonerOptions()
# co.useFloat16 = True
# self._index = faiss.index_cpu_to_gpu(self._res, 0,
# self._index, co)
self._index.train(X)
self._index.add(X)
self._index.setNumProbes(self._n_probes)
示例6: cluster
# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import index_factory [as 别名]
def cluster(features, th_knn, max_size=300, labels=None):
'''
与face-train不同,这里聚类的相似度没有经过1-转换
:param features:
:param th_knn:
:param max_size:
:return:
'''
k = 80
nprobe = 8
# knn
size, dim = features.shape
metric = faiss.METRIC_INNER_PRODUCT
nlist = min(4096, 8 * round(math.sqrt(size)))
if size < 4 * 10000:
fac_str = "Flat" # same
elif size < 80 * 10000:
fac_str = "IVF" + str(nlist) + ",Flat" # same
elif size < 200 * 10000:
fac_str = "IVF16384,Flat" # same
else:
fac_str = "IVF16384,PQ8" # same
logger.info("cdp cluster fac str %s", fac_str)
index = faiss.index_factory(dim, fac_str, metric)
index.train(features)
index.nprobe = min(nprobe, nlist)
assert index.is_trained
logger.info('cdp cluster nlist: {}, nprobe: {}'.format(nlist, nprobe))
index.add(features)
sims, ners = index.search(features, k=k)
if "Flat" not in fac_str:
sims = sim_by_feature(features, features, ners)
knns = np.concatenate([sims[:, np.newaxis].astype(np.float32), ners[:, np.newaxis].astype(np.float32)], axis=1)
# del features
return cluster_by_knns(knns, features, th_knn, max_size, labels)
示例7: _execute_map
# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import index_factory [as 别名]
def _execute_map(cls, ctx, op):
(data,), device_id, _ = as_same_device(
[ctx[op.inputs[0].key]], device=op.device, ret_extra=True)
index = ctx[op.inputs[1].key] if len(op.inputs) == 2 else None
with device(device_id):
if index is not None:
# fetch the trained index
trained_index = _load_index(ctx, op, index, device_id)
return_index_type = _get_index_type(op.return_index_type, ctx)
if return_index_type == 'object':
# clone a new one,
# because faiss does not ensure thread-safe for operations that change index
# https://github.com/facebookresearch/faiss/wiki/Threads-and-asynchronous-calls#thread-safety
trained_index = faiss.clone_index(trained_index)
else:
trained_index = faiss.index_factory(data.shape[1], op.faiss_index,
op.faiss_metric_type)
if op.same_distribution:
# no need to train, just create index
pass
else:
# distribution no the same, train on each chunk
trained_index.train(data)
if device_id >= 0: # pragma: no cover
trained_index = _index_to_gpu(trained_index, device_id)
if op.metric == 'cosine':
# faiss does not support cosine distances directly,
# data needs to be normalize before adding to index,
# refer to:
# https://github.com/facebookresearch/faiss/wiki/FAQ#how-can-i-index-vectors-for-cosine-distance
faiss.normalize_L2(data)
# add data into index
if device_id >= 0: # pragma: no cover
# gpu
trained_index.add_c(data.shape[0], _swig_ptr_from_cupy_float32_array(data))
else:
trained_index.add(data)
ctx[op.outputs[0].key] = _store_index(ctx, op, trained_index, device_id)
示例8: testGenIndexStringAndSampleCount
# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import index_factory [as 别名]
def testGenIndexStringAndSampleCount(self):
d = 32
# accuracy=True, could be Flat only
ret = _gen_index_string_and_sample_count((10 ** 9, d), None, True, 'minimum')
self.assertEqual(ret, ('Flat', None))
# no memory concern
ret = _gen_index_string_and_sample_count((10 ** 5, d), None, False, 'maximum')
self.assertEqual(ret, ('HNSW32', None))
index = faiss.index_factory(d, ret[0])
self.assertTrue(index.is_trained)
# memory concern not much
ret = _gen_index_string_and_sample_count((10 ** 5, d), None, False, 'high')
self.assertEqual(ret, ('IVF1580,Flat', 47400))
index = faiss.index_factory(d, ret[0])
self.assertFalse(index.is_trained)
# memory quite important
ret = _gen_index_string_and_sample_count((5 * 10 ** 6, d), None, False, 'low')
self.assertEqual(ret, ('PCAR16,IVF65536_HNSW32,SQ8', 32 * 65536))
index = faiss.index_factory(d, ret[0])
self.assertFalse(index.is_trained)
# memory very important
ret = _gen_index_string_and_sample_count((10 ** 8, d), None, False, 'minimum')
self.assertEqual(ret, ('OPQ16_32,IVF1048576_HNSW32,PQ16', 64 * 65536))
index = faiss.index_factory(d, ret[0])
self.assertFalse(index.is_trained)
ret = _gen_index_string_and_sample_count((10 ** 10, d), None, False, 'low')
self.assertEqual(ret, ('PCAR16,IVF1048576_HNSW32,SQ8', 64 * 65536))
index = faiss.index_factory(d, ret[0])
self.assertFalse(index.is_trained)
with self.assertRaises(ValueError):
# M > 64 raise error
_gen_index_string_and_sample_count((10 ** 5, d), None, False, 'maximum', M=128)
with self.assertRaises(ValueError):
# M > 64
_gen_index_string_and_sample_count((10 ** 5, d), None, False, 'minimum', M=128)
with self.assertRaises(ValueError):
# dim should be multiple of M
_gen_index_string_and_sample_count((10 ** 5, d), None, False, 'minimum', M=16, dim=17)
with self.assertRaises(ValueError):
_gen_index_string_and_sample_count((10 ** 5, d), None, False, 'low', k=5)
示例9: __init__
# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import index_factory [as 别名]
def __init__(self,
feats,
k,
index_path='',
index_key='',
nprobe=128,
omp_num_threads=None,
rebuild_index=True,
verbose=True,
**kwargs):
import faiss
if omp_num_threads is not None:
faiss.omp_set_num_threads(omp_num_threads)
self.verbose = verbose
with Timer('[faiss] build index', verbose):
if index_path != '' and not rebuild_index and os.path.exists(
index_path):
print('[faiss] read index from {}'.format(index_path))
index = faiss.read_index(index_path)
else:
feats = feats.astype('float32')
size, dim = feats.shape
index = faiss.IndexFlatIP(dim)
if index_key != '':
assert index_key.find(
'HNSW') < 0, 'HNSW returns distances insted of sims'
metric = faiss.METRIC_INNER_PRODUCT
nlist = min(4096, 8 * round(math.sqrt(size)))
if index_key == 'IVF':
quantizer = index
index = faiss.IndexIVFFlat(quantizer, dim, nlist,
metric)
else:
index = faiss.index_factory(dim, index_key, metric)
if index_key.find('Flat') < 0:
assert not index.is_trained
index.train(feats)
index.nprobe = min(nprobe, nlist)
assert index.is_trained
print('nlist: {}, nprobe: {}'.format(nlist, nprobe))
index.add(feats)
if index_path != '':
print('[faiss] save index to {}'.format(index_path))
mkdir_if_no_exists(index_path)
faiss.write_index(index, index_path)
with Timer('[faiss] query topk {}'.format(k), verbose):
knn_ofn = index_path + '.npz'
if os.path.exists(knn_ofn):
print('[faiss] read knns from {}'.format(knn_ofn))
self.knns = np.load(knn_ofn)['data']
else:
sims, nbrs = index.search(feats, k=k)
self.knns = [(np.array(nbr, dtype=np.int32),
1 - np.array(sim, dtype=np.float32))
for nbr, sim in zip(nbrs, sims)]
示例10: __init__
# 需要导入模块: import faiss [as 别名]
# 或者: from faiss import index_factory [as 别名]
def __init__(self,
target,
nprobe=128,
index_factory_str=None,
verbose=False,
mode='proxy',
using_gpu=True):
self._res_list = []
num_gpu = faiss.get_num_gpus()
print('[faiss gpu] #GPU: {}'.format(num_gpu))
size, dim = target.shape
assert size > 0, "size: {}".format(size)
index_factory_str = "IVF{},PQ{}".format(
min(8192, 16 * round(np.sqrt(size))),
32) if index_factory_str is None else index_factory_str
cpu_index = faiss.index_factory(dim, index_factory_str)
cpu_index.nprobe = nprobe
if mode == 'proxy':
co = faiss.GpuClonerOptions()
co.useFloat16 = True
co.usePrecomputed = False
index = faiss.IndexProxy()
for i in range(num_gpu):
res = faiss.StandardGpuResources()
self._res_list.append(res)
sub_index = faiss.index_cpu_to_gpu(
res, i, cpu_index, co) if using_gpu else cpu_index
index.addIndex(sub_index)
elif mode == 'shard':
co = faiss.GpuMultipleClonerOptions()
co.useFloat16 = True
co.usePrecomputed = False
co.shard = True
index = faiss.index_cpu_to_all_gpus(cpu_index,
co,
ngpu=num_gpu)
else:
raise KeyError("Unknown index mode")
index = faiss.IndexIDMap(index)
index.verbose = verbose
# get nlist to decide how many samples used for training
nlist = int([
item for item in index_factory_str.split(",") if 'IVF' in item
][0].replace("IVF", ""))
# training
if not index.is_trained:
indexes_sample_for_train = np.random.randint(
0, size, nlist * 256)
index.train(target[indexes_sample_for_train])
# add with ids
target_ids = np.arange(0, size)
index.add_with_ids(target, target_ids)
self.index = index