本文整理汇总了Python中annoy.AnnoyIndex方法的典型用法代码示例。如果您正苦于以下问题:Python annoy.AnnoyIndex方法的具体用法?Python annoy.AnnoyIndex怎么用?Python annoy.AnnoyIndex使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类annoy
的用法示例。
在下文中一共展示了annoy.AnnoyIndex方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: nn_approx
# 需要导入模块: import annoy [as 别名]
# 或者: from annoy import AnnoyIndex [as 别名]
def nn_approx(ds1, ds2, knn=KNN, metric='manhattan', n_trees=10):
# Build index.
a = AnnoyIndex(ds2.shape[1], metric=metric)
for i in range(ds2.shape[0]):
a.add_item(i, ds2[i, :])
a.build(n_trees)
# Search index.
ind = []
for i in range(ds1.shape[0]):
ind.append(a.get_nns_by_vector(ds1[i, :], knn, search_k=-1))
ind = np.array(ind)
# Match.
match = set()
for a, b in zip(range(ds1.shape[0]), ind):
for b_i in b:
match.add((a, b_i))
return match
# Find mutual nearest neighbors.
示例2: label_approx
# 需要导入模块: import annoy [as 别名]
# 或者: from annoy import AnnoyIndex [as 别名]
def label_approx(X, sites, site_labels, k=1):
from annoy import AnnoyIndex
assert(X.shape[1] == sites.shape[1])
# Build index over site points.
aindex = AnnoyIndex(sites.shape[1], metric='euclidean')
for i in range(sites.shape[0]):
aindex.add_item(i, sites[i, :])
aindex.build(10)
labels = []
for i in range(X.shape[0]):
# Find nearest site point.
nearest_sites = aindex.get_nns_by_vector(X[i, :], k)
if len(nearest_sites) < 1:
labels.append(None)
continue
label = Counter([
site_labels[ns] for ns in nearest_sites
]).most_common(1)[0][0]
labels.append(label)
return np.array(labels)
示例3: get_approx_index_chunks
# 需要导入模块: import annoy [as 别名]
# 或者: from annoy import AnnoyIndex [as 别名]
def get_approx_index_chunks(self):
"""Gets decompressed chunks of the AnnoyIndex of the vectors from
the database."""
try:
db = self._db(force_new=True)
with lz4.frame.LZ4FrameDecompressor() as decompressor:
chunks = db.execute(
"""
SELECT rowid,index_file
FROM `magnitude_approx`
WHERE trees = ?
""", (self.approx_trees,))
for chunk in chunks:
yield decompressor.decompress(chunk[1])
if self.closed:
return
except Exception as e:
if self.closed:
pass
else:
raise e
示例4: run
# 需要导入模块: import annoy [as 别名]
# 或者: from annoy import AnnoyIndex [as 别名]
def run(self):
try:
index = AnnoyIndex(self.n_dims, metric='angular')
index.load(self.index_filepath)
for i in range(self.data_indices[0], self.data_indices[1]):
neighbour_indexes = index.get_nns_by_item(
i, self.k, search_k=self.search_k, include_distances=False)
neighbour_indexes = np.array(neighbour_indexes,
dtype=np.uint32)
self.results_queue.put(
IndexNeighbours(row_index=i,
neighbour_list=neighbour_indexes))
except Exception as e:
self.exception = e
finally:
self.results_queue.close()
示例5: test_build_sparse_annoy_index
# 需要导入模块: import annoy [as 别名]
# 或者: from annoy import AnnoyIndex [as 别名]
def test_build_sparse_annoy_index(annoy_index_file):
data = np.random.choice([0, 1], size=(10, 5))
sparse_data = csr_matrix(data)
index = build_annoy_index(sparse_data, annoy_index_file)
assert os.path.exists(annoy_index_file)
loaded_index = AnnoyIndex(5, metric='angular')
loaded_index.load(annoy_index_file)
assert index.f == loaded_index.f == 5
assert index.get_n_items() == loaded_index.get_n_items() == 10
assert index.get_nns_by_item(0, 5) == loaded_index.get_nns_by_item(0, 5)
index.unload()
loaded_index.unload()
示例6: create_tree
# 需要导入模块: import annoy [as 别名]
# 或者: from annoy import AnnoyIndex [as 别名]
def create_tree(data,approx,metric,use_faiss,n_trees):
'''
Create a faiss/cKDTree/KDTree/annoy index for nearest neighbour lookup. All undescribed input
as in ``bbknn.bbknn()``. Returns the resulting index.
Input
-----
data : ``numppy.array``
PCA coordinates of a batch's cells to index.
'''
if approx:
ckd = AnnoyIndex(data.shape[1],metric=metric)
for i in np.arange(data.shape[0]):
ckd.add_item(i,data[i,:])
ckd.build(n_trees)
elif metric == 'euclidean':
if 'faiss' in sys.modules and use_faiss:
ckd = faiss.IndexFlatL2(data.shape[1])
ckd.add(data)
else:
ckd = cKDTree(data)
else:
ckd = KDTree(data,metric=metric)
return ckd
示例7: srs_positive_annoy
# 需要导入模块: import annoy [as 别名]
# 或者: from annoy import AnnoyIndex [as 别名]
def srs_positive_annoy(X, N, seed=None, replace=False, prenormalized=False):
from annoy import AnnoyIndex
n_samples, n_features = X.shape
if not replace and N > n_samples:
raise ValueError('Cannot sample {} elements from {} elements '
'without replacement'.format(N, n_samples))
if not replace and N == n_samples:
return range(N)
if not seed is None:
np.random.seed(seed)
X = X - X.min(0)
if not prenormalized:
X = normalize(X).astype('float32')
srs_idx = set()
for i in range(N):
aindex = AnnoyIndex(X.shape[1], metric='euclidean')
for i in range(X.shape[0]):
if i not in srs_idx:
aindex.add_item(i, X[i, :])
aindex.build(10)
Phi_i = np.random.normal(size=(n_features))
Phi_i /= np.linalg.norm(Phi_i)
nearest_site = aindex.get_nns_by_vector(Phi_i, 1)
srs_idx.add(nearest_site[0])
return sorted(srs_idx)
示例8: fit
# 需要导入模块: import annoy [as 别名]
# 或者: from annoy import AnnoyIndex [as 别名]
def fit(self, Ciu, show_progress=True):
# delay loading the annoy library in case its not installed here
import annoy
# train the model
super(AnnoyAlternatingLeastSquares, self).fit(Ciu, show_progress)
# build up an Annoy Index with all the item_factors (for calculating
# similar items)
if self.approximate_similar_items:
log.debug("Building annoy similar items index")
self.similar_items_index = annoy.AnnoyIndex(
self.item_factors.shape[1], 'angular')
for i, row in enumerate(self.item_factors):
self.similar_items_index.add_item(i, row)
self.similar_items_index.build(self.n_trees)
# build up a separate index for the inner product (for recommend
# methods)
if self.approximate_recommend:
log.debug("Building annoy recommendation index")
self.max_norm, extra = augment_inner_product_matrix(self.item_factors)
self.recommend_index = annoy.AnnoyIndex(extra.shape[1], 'angular')
for i, row in enumerate(extra):
self.recommend_index.add_item(i, row)
self.recommend_index.build(self.n_trees)
示例9: generateAnnoy
# 需要导入模块: import annoy [as 别名]
# 或者: from annoy import AnnoyIndex [as 别名]
def generateAnnoy(real, artificial, annoyFilename, dimensions):
idx2vec = np.array(artificial[2])
t = AnnoyIndex(dimensions)
for j in range(len(artificial[2])):
t.add_item(j,idx2vec[j])
print('Done Adding items to AnnoyIndex')
t.build(TREESIZE)
print('Done Building AnnoyIndex')
t.save(annoyFilename)
return t
示例10: create_annoy
# 需要导入模块: import annoy [as 别名]
# 或者: from annoy import AnnoyIndex [as 别名]
def create_annoy(target_features):
t = AnnoyIndex(layer_dimension)
for idx, target_feature in enumerate(target_features):
t.add_item(idx, target_feature)
t.build(10)
t.save(os.path.join(work_dir, 'annoy.ann'))
示例11: test_tree
# 需要导入模块: import annoy [as 别名]
# 或者: from annoy import AnnoyIndex [as 别名]
def test_tree(self):
t = AnnoyIndex(5, 'angular')
t.add_item(1, [1,2,3,4,5])
self.assertTrue(t.build(1))
示例12: __build_index
# 需要导入模块: import annoy [as 别名]
# 或者: from annoy import AnnoyIndex [as 别名]
def __build_index(self, index_file):
self.embedding_size = self.embeddings.shape[1]
self.index = an.AnnoyIndex(self.embedding_size, metric='angular')
for embedding_ind in range(self.embeddings.shape[0]):
embedding = self.embeddings[embedding_ind, :]
self.index.add_item(embedding_ind, embedding)
self.index.build(self.n_trees)
if self.id_map is None:
self.id_map = dict([(i, i) for i in range(self.embeddings.shape[0])])
self.inverse_id_map = dict([(v,k) for k,v in self.id_map.items()])
if index_file:
embeddings_file = index_file + '.embeddings'
state = {
'embedding_size': self.embedding_size,
'id_map': self.id_map,
}
self.index.save(embeddings_file)
with open(index_file, 'wb') as _index_file:
pickle.dump(state, _index_file)
示例13: __load_index
# 需要导入模块: import annoy [as 别名]
# 或者: from annoy import AnnoyIndex [as 别名]
def __load_index(self, index_file):
log.info('Loading index file from {}'.format(index_file))
with open(index_file, 'rb') as _index_file:
state = pickle.load(_index_file)
self.embedding_size = state['embedding_size']
self.id_map = state['id_map']
embeddings_file = index_file + '.embeddings'
self.index = an.AnnoyIndex(self.embedding_size, metric='angular')
self.index.load(embeddings_file)
self.inverse_id_map = dict([(v,k) for k,v in self.id_map.items()])
示例14: __init__
# 需要导入模块: import annoy [as 别名]
# 或者: from annoy import AnnoyIndex [as 别名]
def __init__(self, vecs):
assert len(vecs)>0, 'no vecs available to init AnnoyIndex'
size = len(vecs[0])
self.annoy_model = AnnoyIndex(size)
for idx,vec in enumerate(vecs):
self.annoy_model.add_item(idx, vec)
self.annoy_model.build(50)
示例15: __init__
# 需要导入模块: import annoy [as 别名]
# 或者: from annoy import AnnoyIndex [as 别名]
def __init__(self, file_name, dim_vector=500):
self.u = AnnoyIndex(dim_vector)
self.u.load(file_name)