本文整理匯總了Python中nearpy.Engine.store_vector方法的典型用法代碼示例。如果您正苦於以下問題:Python Engine.store_vector方法的具體用法?Python Engine.store_vector怎麽用?Python Engine.store_vector使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類nearpy.Engine
的用法示例。
在下文中一共展示了Engine.store_vector方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: index_user_vectors
# 需要導入模塊: from nearpy import Engine [as 別名]
# 或者: from nearpy.Engine import store_vector [as 別名]
def index_user_vectors():
print 'Performing indexing with HashPermutations...'
global engine_perm
t0 = time.time()
print k_dimen, d_dimen
rbp_perm = RandomBinaryProjections('rbp_perm', d_dimen)
rbp_perm.reset(k_dimen)
# Create permutations meta-hash
permutations = HashPermutations('permut')
rbp_conf = {'num_permutation':50,'beam_size':10,'num_neighbour':250}
# Add rbp as child hash of permutations hash
permutations.add_child_hash(rbp_perm, rbp_conf)
# Create engine
engine_perm = Engine(k_dimen, lshashes=[permutations], distance=CosineDistance())
for u in user_vector:
engine_perm.store_vector(user_vector[u], data=u)
# Then update permuted index
permutations.build_permuted_index()
t1 = time.time()
print 'Indexing took %f seconds', (t1-t0)
示例2: knn
# 需要導入模塊: from nearpy import Engine [as 別名]
# 或者: from nearpy.Engine import store_vector [as 別名]
def knn(data,k):
assert k<=len(data)-1, 'The number of neighbors must be smaller than the data cardinality (minus one)'
k=k+1
n,dimension = data.shape
ind = []
dist = []
if(dimension<10):
rbp = RandomBinaryProjections('rbp', dimension)
else:
rbp = RandomBinaryProjections('rbp',10)
engine = Engine(dimension, lshashes=[rbp], vector_filters=[NearestFilter(k)])
for i in range(n):
engine.store_vector(data[i], i)
for i in range(n):
N = engine.neighbours(data[i])
ind.append([x[1] for x in N][1:])
dist.append([x[2] for x in N][1:])
return N,dist,ind
示例3: TestEngine
# 需要導入模塊: from nearpy import Engine [as 別名]
# 或者: from nearpy.Engine import store_vector [as 別名]
class TestEngine(unittest.TestCase):
def setUp(self):
self.engine = Engine(1000)
def test_retrieval(self):
for k in range(100):
self.engine.clean_all_buckets()
x = numpy.random.randn(1000)
x_data = 'data'
self.engine.store_vector(x, x_data)
n = self.engine.neighbours(x)
y = n[0][0]
y_data = n[0][1]
y_distance = n[0][2]
self.assertTrue((y == x).all())
self.assertEqual(y_data, x_data)
self.assertEqual(y_distance, 0.0)
def test_retrieval_sparse(self):
for k in range(100):
self.engine.clean_all_buckets()
x = scipy.sparse.rand(1000, 1, density=0.05)
x_data = 'data'
self.engine.store_vector(x, x_data)
n = self.engine.neighbours(x)
y = n[0][0]
y_data = n[0][1]
y_distance = n[0][2]
self.assertTrue((y - x).sum() == 0.0)
self.assertEqual(y_data, x_data)
self.assertEqual(y_distance, 0.0)
示例4: StateDBEngine
# 需要導入模塊: from nearpy import Engine [as 別名]
# 或者: from nearpy.Engine import store_vector [as 別名]
class StateDBEngine(object):
def __init__(self):
# initialize "nearby" library
self.dim = 4
self.rbp = RandomBinaryProjections('rbp', 100)
self.engine = Engine(self.dim, lshashes=[self.rbp])
# performance counter
self.counter = 0
def add(self, x, data):
# print 'add data = ', data
self.engine.store_vector(x, data)
self.counter += 1
def lookup(self, x, THRESHOLD=0.1):
naver = self.engine.neighbours(x)
if len(naver) == 0:
return None
pt, data, d = naver[0]
# print 'lhs, rhs', x, pt,
# print 'd = ', d, (d < THRESHOLD), (data is None)
if d < THRESHOLD:
return data
else:
return None
示例5: NearPy
# 需要導入模塊: from nearpy import Engine [as 別名]
# 或者: from nearpy.Engine import store_vector [as 別名]
class NearPy(NearestNeighbor):
def __init__(self, dist=EuclideanDistance(), phi=lambda x: x):
NearestNeighbor.__init__(self, dist, phi)
def _create_engine(self, k, lshashes=None):
self.k_ = k
self.engine_ = Engine(self.dimension_, lshashes,
distance=self.dist_metric_,
vector_filters=[NearestFilter(k)])
for i, feature in enumerate(self.featurized_):
if self.transpose_:
self.engine_.store_vector(feature.T, i)
else:
self.engine_.store_vector(feature, i)
def train(self, data, k=10):
self.data_ = np.array(data)
self.featurized_ = self.featurize(data)
shape = featurized[0].shape
assert len(shape) <= 2, 'Feature shape must be (1, N), (N, 1), or (N,)'
if len(shape) == 1:
self.transpose_ = False
self.dimension_ = shape[0]
else:
assert 1 in shape, 'Feature shape must be (1, N) or (N, 1)'
self.transpose_ = (shape[0] == 1)
self.dimension_ = shape[1] if self.transpose_ else shape[0]
logging.info('Constructing nearest neighbor data structure.')
train_start = time.clock()
self._create_engine(k)
train_end = time.clock()
# logging.info('Took %f sec' %(train_end - train_start))
def within_distance(x, dist=0.5, return_indices=False):
raise NotImplementedError
def nearest_neighbors(self, x, k, return_indices=False):
# HACK: load all data back into new engine if k doesn't match
if k != self.k_:
self._create_engine(k)
feature = self.phi_(x)
if self.transpose_:
query_result = self.engine_.neighbours(feature.T)
else:
query_result = self.engine_.neighbours(feature)
if len(query_result) == 0:
return [], []
features, indices, distances = zip(*query_result)
if return_indices:
return list(indices), list(distances)
else:
indices = np.array(indices)
return list(self.data_[indices]), list(distances)
示例6: main
# 需要導入模塊: from nearpy import Engine [as 別名]
# 或者: from nearpy.Engine import store_vector [as 別名]
def main(args):
""" Main entry.
"""
data = Dataset(args.dataset)
num, dim = data.base.shape
# We are looking for the ten closest neighbours
nearest = NearestFilter(args.topk)
# We want unique candidates
unique = UniqueFilter()
# Create engines for all configurations
for nbit, ntbl in itertools.product(args.nbits, args.ntbls):
logging.info("Creating Engine ...")
lshashes = [RandomBinaryProjections('rbp%d' % i, nbit)
for i in xrange(ntbl)]
# Create engine with this configuration
engine = Engine(dim, lshashes=lshashes,
vector_filters=[unique, nearest])
logging.info("\tDone!")
logging.info("Adding items ...")
for i in xrange(num):
engine.store_vector(data.base[i, :], i)
if i % 100000 == 0:
logging.info("\t%d/%d" % (i, data.nbae))
logging.info("\tDone!")
ids = np.zeros((data.nqry, args.topk), np.int)
logging.info("Searching ...")
tic()
for i in xrange(data.nqry):
reti = [y for x, y, z in
np.array(engine.neighbours(data.query[i]))]
ids[i, :len(reti)] = reti
if i % 100 == 0:
logging.info("\t%d/%d" % (i, data.nqry))
time_costs = toc()
logging.info("\tDone!")
report = os.path.join(args.exp_dir, "report.txt")
with open(report, "a") as rptf:
rptf.write("*" * 64 + "\n")
rptf.write("* %s\n" % time.asctime())
rptf.write("*" * 64 + "\n")
r_at_k = compute_stats(data.groundtruth, ids, args.topk)[-1][-1]
with open(report, "a") as rptf:
rptf.write("=" * 64 + "\n")
rptf.write("index_%s-nbit_%d-ntbl_%d\n" % ("NearPy", nbit, ntbl))
rptf.write("-" * 64 + "\n")
rptf.write("[email protected]%-8d%.4f\n" % (args.topk, r_at_k))
rptf.write("time cost (ms): %.3f\n" %
(time_costs * 1000 / data.nqry))
示例7: build_index
# 需要導入模塊: from nearpy import Engine [as 別名]
# 或者: from nearpy.Engine import store_vector [as 別名]
def build_index(self, X):
f = X.shape[1]
n = X.shape[0]
rbp = RandomBinaryProjections('rbp', 32)
engine = Engine(f, lshashes=[rbp])
for i in range(n):
engine.store_vector(X[i], 'data_%d' % i)
return engine
示例8: get_engine
# 需要導入模塊: from nearpy import Engine [as 別名]
# 或者: from nearpy.Engine import store_vector [as 別名]
def get_engine(self, vocab, vecs):
logging.info('{} hash functions'.format(self.args.projections))
hashes = [PCABinaryProjections('ne1v', self.args.projections, vecs[:1000,:].T)]
engine = Engine(vecs.shape[1], lshashes=hashes, distance=[],
vector_filters=[])
for ind, vec in enumerate(vecs):
if not ind % 100000:
logging.info(
'{} words added to nearpy engine'.format(ind))
engine.store_vector(vec, ind)
return engine
示例9: test_storage_issue
# 需要導入模塊: from nearpy import Engine [as 別名]
# 或者: from nearpy.Engine import store_vector [as 別名]
def test_storage_issue(self):
engine1 = Engine(100)
engine2 = Engine(100)
for k in range(1000):
x = numpy.random.randn(100)
x_data = 'data'
engine1.store_vector(x, x_data)
# Each engine should have its own default storage
self.assertTrue(len(engine2.storage.buckets)==0)
示例10: TestPermutation
# 需要導入模塊: from nearpy import Engine [as 別名]
# 或者: from nearpy.Engine import store_vector [as 別名]
class TestPermutation(unittest.TestCase):
def setUp(self):
logging.basicConfig(level=logging.WARNING)
# Create permutations meta-hash
self.permutations = HashPermutations('permut')
# Create binary hash as child hash
rbp = RandomBinaryProjections('rbp1', 4)
rbp_conf = {'num_permutation':50,'beam_size':10,'num_neighbour':100}
# Add rbp as child hash of permutations hash
self.permutations.add_child_hash(rbp, rbp_conf)
# Create engine with meta hash and cosine distance
self.engine_perm = Engine(200, lshashes=[self.permutations], distance=CosineDistance())
# Create engine without permutation meta-hash
self.engine = Engine(200, lshashes=[rbp], distance=CosineDistance())
def test_runnable(self):
# First index some random vectors
matrix = numpy.zeros((1000,200))
for i in xrange(1000):
v = numpy.random.randn(200)
matrix[i] = v
self.engine.store_vector(v)
self.engine_perm.store_vector(v)
# Then update permuted index
self.permutations.build_permuted_index()
# Do random query on engine with permutations meta-hash
print '\nNeighbour distances with permuted index:'
query = numpy.random.randn(200)
results = self.engine_perm.neighbours(query)
dists = [x[2] for x in results]
print dists
# Do random query on engine without permutations meta-hash
print '\nNeighbour distances without permuted index (distances should be larger):'
results = self.engine.neighbours(query)
dists = [x[2] for x in results]
print dists
# Real neighbours
print '\nReal neighbour distances:'
query = query.reshape((1,200))
dists = CosineDistance().distance_matrix(matrix,query)
dists = dists.reshape((-1,))
dists = sorted(dists)
print dists[:10]
示例11: test_sparse
# 需要導入模塊: from nearpy import Engine [as 別名]
# 或者: from nearpy.Engine import store_vector [as 別名]
def test_sparse():
dim = 500
num_train = 1000
num_test = 1
train_data = ss.rand(dim, num_train)#pickle.load('/home/jmahler/Downloads/feature_objects.p')
test_data = ss.rand(dim, num_test)
rbp = RandomBinaryProjections('rbp', 10)
engine = Engine(dim, lshashes=[rbp])
for i in range(num_train):
engine.store_vector(train_data.getcol(i))
for j in range(num_test):
N = engine.neighbours(test_data.getcol(j))
print N
IPython.embed()
示例12: TestEngine
# 需要導入模塊: from nearpy import Engine [as 別名]
# 或者: from nearpy.Engine import store_vector [as 別名]
class TestEngine(unittest.TestCase):
def setUp(self):
self.engine = Engine(1000)
def test_storage_issue(self):
engine1 = Engine(100)
engine2 = Engine(100)
for k in range(1000):
x = numpy.random.randn(100)
x_data = 'data'
engine1.store_vector(x, x_data)
# Each engine should have its own default storage
self.assertTrue(len(engine2.storage.buckets)==0)
def test_retrieval(self):
for k in range(100):
self.engine.clean_all_buckets()
x = numpy.random.randn(1000)
x_data = 'data'
self.engine.store_vector(x, x_data)
n = self.engine.neighbours(x)
y, y_data, y_distance = n[0]
normalized_x = unitvec(x)
delta = 0.000000001
self.assertAlmostEqual(numpy.abs((normalized_x - y)).max(), 0, delta=delta)
self.assertEqual(y_data, x_data)
self.assertAlmostEqual(y_distance, 0.0, delta=delta)
def test_retrieval_sparse(self):
for k in range(100):
self.engine.clean_all_buckets()
x = scipy.sparse.rand(1000, 1, density=0.05)
x_data = 'data'
self.engine.store_vector(x, x_data)
n = self.engine.neighbours(x)
y, y_data, y_distance = n[0]
normalized_x = unitvec(x)
delta = 0.000000001
self.assertAlmostEqual(numpy.abs((normalized_x - y)).max(), 0, delta=delta)
self.assertEqual(y_data, x_data)
self.assertAlmostEqual(y_distance, 0.0, delta=delta)
示例13: TestEngine
# 需要導入模塊: from nearpy import Engine [as 別名]
# 或者: from nearpy.Engine import store_vector [as 別名]
class TestEngine(unittest.TestCase):
def setUp(self):
self.engine = Engine(1000)
def test_storage_issue(self):
engine1 = Engine(100)
engine2 = Engine(100)
for k in range(1000):
x = numpy.random.randn(100)
x_data = 'data'
engine1.store_vector(x, x_data)
# Each engine should have its own default storage
self.assertTrue(len(engine2.storage.buckets)==0)
def test_retrieval(self):
for k in range(100):
self.engine.clean_all_buckets()
x = numpy.random.randn(1000)
x_data = 'data'
self.engine.store_vector(x, x_data)
n = self.engine.neighbours(x)
y = n[0][0]
y_data = n[0][1]
y_distance = n[0][2]
self.assertTrue((y == x).all())
self.assertEqual(y_data, x_data)
self.assertEqual(y_distance, 0.0)
def test_retrieval_sparse(self):
for k in range(100):
self.engine.clean_all_buckets()
x = scipy.sparse.rand(1000, 1, density=0.05)
x_data = 'data'
self.engine.store_vector(x, x_data)
n = self.engine.neighbours(x)
y = n[0][0]
y_data = n[0][1]
y_distance = n[0][2]
self.assertTrue((y - x).sum() == 0.0)
self.assertEqual(y_data, x_data)
self.assertEqual(y_distance, 0.0)
示例14: __init__
# 需要導入模塊: from nearpy import Engine [as 別名]
# 或者: from nearpy.Engine import store_vector [as 別名]
class lshsearcher:
def __init__(self):
self.__dimension = None
self.__engine_perm = None
self.__permutations = None
def _set_confval(self, dimension=None):
if dimension is None:
return None
else:
self.__dimension = dimension
def _engine_on(self):
# Create permutations meta-hash
self.__permutations = HashPermutations('permut')
# Create binary hash as child hash
rbp_perm = RandomBinaryProjections('rbp_perm', 14)
rbp_conf = {'num_permutation':50,'beam_size':10,'num_neighbour':100}
# Add rbp as child hash of permutations hash
self.__permutations.add_child_hash(rbp_perm, rbp_conf)
# Create engine
self.__engine_perm = Engine(self.__dimension, lshashes=[self.__permutations], distance=CosineDistance())
def conf(self, dimension):
self._set_confval(dimension)
self._engine_on()
def getData(self, v):
if self.__engine_perm is not None:
self.__engine_perm.store_vector(v)
def commitData(self):
if self.__permutations is not None:
self.__permutations.build_permuted_index()
def find(self, v):
if self.__engine_perm is not None:
return self.__engine_perm.neighbours(v)
示例15: example2
# 需要導入模塊: from nearpy import Engine [as 別名]
# 或者: from nearpy.Engine import store_vector [as 別名]
def example2():
# Dimension of feature space
DIM = 100
# Number of data points (dont do too much because of exact search)
POINTS = 20000
##########################################################
print 'Performing indexing with HashPermutations...'
t0 = time.time()
# Create permutations meta-hash
permutations = HashPermutations('permut')
# Create binary hash as child hash
rbp_perm = RandomBinaryProjections('rbp_perm', 14)
rbp_conf = {'num_permutation':50,'beam_size':10,'num_neighbour':100}
# Add rbp as child hash of permutations hash
permutations.add_child_hash(rbp_perm, rbp_conf)
# Create engine
engine_perm = Engine(DIM, lshashes=[permutations], distance=CosineDistance())
# First index some random vectors
matrix = numpy.zeros((POINTS,DIM))
for i in xrange(POINTS):
v = numpy.random.randn(DIM)
matrix[i] = v
engine_perm.store_vector(v)
# Then update permuted index
permutations.build_permuted_index()
t1 = time.time()
print 'Indexing took %f seconds' % (t1-t0)
# Get random query vector
query = numpy.random.randn(DIM)
# Do random query on engine 3
print '\nNeighbour distances with HashPermutations:'
print ' -> Candidate count is %d' % engine_perm.candidate_count(query)
results = engine_perm.neighbours(query)
dists = [x[2] for x in results]
print dists
# Real neighbours
print '\nReal neighbour distances:'
query = query.reshape((1,DIM))
dists = CosineDistance().distance_matrix(matrix,query)
dists = dists.reshape((-1,))
dists = sorted(dists)
print dists[:10]
##########################################################
print '\nPerforming indexing with HashPermutationMapper...'
t0 = time.time()
# Create permutations meta-hash
permutations2 = HashPermutationMapper('permut2')
# Create binary hash as child hash
rbp_perm2 = RandomBinaryProjections('rbp_perm2', 14)
# Add rbp as child hash of permutations hash
permutations2.add_child_hash(rbp_perm2)
# Create engine
engine_perm2 = Engine(DIM, lshashes=[permutations2], distance=CosineDistance())
# First index some random vectors
matrix = numpy.zeros((POINTS,DIM))
for i in xrange(POINTS):
v = numpy.random.randn(DIM)
matrix[i] = v
engine_perm2.store_vector(v)
t1 = time.time()
print 'Indexing took %f seconds' % (t1-t0)
# Get random query vector
query = numpy.random.randn(DIM)
# Do random query on engine 4
print '\nNeighbour distances with HashPermutationMapper:'
print ' -> Candidate count is %d' % engine_perm2.candidate_count(query)
results = engine_perm2.neighbours(query)
dists = [x[2] for x in results]
print dists
# Real neighbours
print '\nReal neighbour distances:'
query = query.reshape((1,DIM))
dists = CosineDistance().distance_matrix(matrix,query)
dists = dists.reshape((-1,))
dists = sorted(dists)
#.........這裏部分代碼省略.........