本文整理汇总了Python中nearpy.Engine.neighbours方法的典型用法代码示例。如果您正苦于以下问题:Python Engine.neighbours方法的具体用法?Python Engine.neighbours怎么用?Python Engine.neighbours使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nearpy.Engine
的用法示例。
在下文中一共展示了Engine.neighbours方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: TestEngine
# 需要导入模块: from nearpy import Engine [as 别名]
# 或者: from nearpy.Engine import neighbours [as 别名]
class TestEngine(unittest.TestCase):
def setUp(self):
self.engine = Engine(1000)
def test_retrieval(self):
for k in range(100):
self.engine.clean_all_buckets()
x = numpy.random.randn(1000)
x_data = 'data'
self.engine.store_vector(x, x_data)
n = self.engine.neighbours(x)
y = n[0][0]
y_data = n[0][1]
y_distance = n[0][2]
self.assertTrue((y == x).all())
self.assertEqual(y_data, x_data)
self.assertEqual(y_distance, 0.0)
def test_retrieval_sparse(self):
for k in range(100):
self.engine.clean_all_buckets()
x = scipy.sparse.rand(1000, 1, density=0.05)
x_data = 'data'
self.engine.store_vector(x, x_data)
n = self.engine.neighbours(x)
y = n[0][0]
y_data = n[0][1]
y_distance = n[0][2]
self.assertTrue((y - x).sum() == 0.0)
self.assertEqual(y_data, x_data)
self.assertEqual(y_distance, 0.0)
示例2: NearPy
# 需要导入模块: from nearpy import Engine [as 别名]
# 或者: from nearpy.Engine import neighbours [as 别名]
class NearPy(NearestNeighbor):
def __init__(self, dist=EuclideanDistance(), phi=lambda x: x):
NearestNeighbor.__init__(self, dist, phi)
def _create_engine(self, k, lshashes=None):
self.k_ = k
self.engine_ = Engine(self.dimension_, lshashes,
distance=self.dist_metric_,
vector_filters=[NearestFilter(k)])
for i, feature in enumerate(self.featurized_):
if self.transpose_:
self.engine_.store_vector(feature.T, i)
else:
self.engine_.store_vector(feature, i)
def train(self, data, k=10):
self.data_ = np.array(data)
self.featurized_ = self.featurize(data)
shape = featurized[0].shape
assert len(shape) <= 2, 'Feature shape must be (1, N), (N, 1), or (N,)'
if len(shape) == 1:
self.transpose_ = False
self.dimension_ = shape[0]
else:
assert 1 in shape, 'Feature shape must be (1, N) or (N, 1)'
self.transpose_ = (shape[0] == 1)
self.dimension_ = shape[1] if self.transpose_ else shape[0]
logging.info('Constructing nearest neighbor data structure.')
train_start = time.clock()
self._create_engine(k)
train_end = time.clock()
# logging.info('Took %f sec' %(train_end - train_start))
def within_distance(x, dist=0.5, return_indices=False):
raise NotImplementedError
def nearest_neighbors(self, x, k, return_indices=False):
# HACK: load all data back into new engine if k doesn't match
if k != self.k_:
self._create_engine(k)
feature = self.phi_(x)
if self.transpose_:
query_result = self.engine_.neighbours(feature.T)
else:
query_result = self.engine_.neighbours(feature)
if len(query_result) == 0:
return [], []
features, indices, distances = zip(*query_result)
if return_indices:
return list(indices), list(distances)
else:
indices = np.array(indices)
return list(self.data_[indices]), list(distances)
示例3: TestPermutation
# 需要导入模块: from nearpy import Engine [as 别名]
# 或者: from nearpy.Engine import neighbours [as 别名]
class TestPermutation(unittest.TestCase):
def setUp(self):
logging.basicConfig(level=logging.WARNING)
# Create permutations meta-hash
self.permutations = HashPermutations('permut')
# Create binary hash as child hash
rbp = RandomBinaryProjections('rbp1', 4)
rbp_conf = {'num_permutation':50,'beam_size':10,'num_neighbour':100}
# Add rbp as child hash of permutations hash
self.permutations.add_child_hash(rbp, rbp_conf)
# Create engine with meta hash and cosine distance
self.engine_perm = Engine(200, lshashes=[self.permutations], distance=CosineDistance())
# Create engine without permutation meta-hash
self.engine = Engine(200, lshashes=[rbp], distance=CosineDistance())
def test_runnable(self):
# First index some random vectors
matrix = numpy.zeros((1000,200))
for i in xrange(1000):
v = numpy.random.randn(200)
matrix[i] = v
self.engine.store_vector(v)
self.engine_perm.store_vector(v)
# Then update permuted index
self.permutations.build_permuted_index()
# Do random query on engine with permutations meta-hash
print '\nNeighbour distances with permuted index:'
query = numpy.random.randn(200)
results = self.engine_perm.neighbours(query)
dists = [x[2] for x in results]
print dists
# Do random query on engine without permutations meta-hash
print '\nNeighbour distances without permuted index (distances should be larger):'
results = self.engine.neighbours(query)
dists = [x[2] for x in results]
print dists
# Real neighbours
print '\nReal neighbour distances:'
query = query.reshape((1,200))
dists = CosineDistance().distance_matrix(matrix,query)
dists = dists.reshape((-1,))
dists = sorted(dists)
print dists[:10]
示例4: StateDBEngine
# 需要导入模块: from nearpy import Engine [as 别名]
# 或者: from nearpy.Engine import neighbours [as 别名]
class StateDBEngine(object):
def __init__(self):
# initialize "nearby" library
self.dim = 4
self.rbp = RandomBinaryProjections('rbp', 100)
self.engine = Engine(self.dim, lshashes=[self.rbp])
# performance counter
self.counter = 0
def add(self, x, data):
# print 'add data = ', data
self.engine.store_vector(x, data)
self.counter += 1
def lookup(self, x, THRESHOLD=0.1):
naver = self.engine.neighbours(x)
if len(naver) == 0:
return None
pt, data, d = naver[0]
# print 'lhs, rhs', x, pt,
# print 'd = ', d, (d < THRESHOLD), (data is None)
if d < THRESHOLD:
return data
else:
return None
示例5: knn
# 需要导入模块: from nearpy import Engine [as 别名]
# 或者: from nearpy.Engine import neighbours [as 别名]
def knn(data,k):
assert k<=len(data)-1, 'The number of neighbors must be smaller than the data cardinality (minus one)'
k=k+1
n,dimension = data.shape
ind = []
dist = []
if(dimension<10):
rbp = RandomBinaryProjections('rbp', dimension)
else:
rbp = RandomBinaryProjections('rbp',10)
engine = Engine(dimension, lshashes=[rbp], vector_filters=[NearestFilter(k)])
for i in range(n):
engine.store_vector(data[i], i)
for i in range(n):
N = engine.neighbours(data[i])
ind.append([x[1] for x in N][1:])
dist.append([x[2] for x in N][1:])
return N,dist,ind
示例6: main
# 需要导入模块: from nearpy import Engine [as 别名]
# 或者: from nearpy.Engine import neighbours [as 别名]
def main(args):
""" Main entry.
"""
data = Dataset(args.dataset)
num, dim = data.base.shape
# We are looking for the ten closest neighbours
nearest = NearestFilter(args.topk)
# We want unique candidates
unique = UniqueFilter()
# Create engines for all configurations
for nbit, ntbl in itertools.product(args.nbits, args.ntbls):
logging.info("Creating Engine ...")
lshashes = [RandomBinaryProjections('rbp%d' % i, nbit)
for i in xrange(ntbl)]
# Create engine with this configuration
engine = Engine(dim, lshashes=lshashes,
vector_filters=[unique, nearest])
logging.info("\tDone!")
logging.info("Adding items ...")
for i in xrange(num):
engine.store_vector(data.base[i, :], i)
if i % 100000 == 0:
logging.info("\t%d/%d" % (i, data.nbae))
logging.info("\tDone!")
ids = np.zeros((data.nqry, args.topk), np.int)
logging.info("Searching ...")
tic()
for i in xrange(data.nqry):
reti = [y for x, y, z in
np.array(engine.neighbours(data.query[i]))]
ids[i, :len(reti)] = reti
if i % 100 == 0:
logging.info("\t%d/%d" % (i, data.nqry))
time_costs = toc()
logging.info("\tDone!")
report = os.path.join(args.exp_dir, "report.txt")
with open(report, "a") as rptf:
rptf.write("*" * 64 + "\n")
rptf.write("* %s\n" % time.asctime())
rptf.write("*" * 64 + "\n")
r_at_k = compute_stats(data.groundtruth, ids, args.topk)[-1][-1]
with open(report, "a") as rptf:
rptf.write("=" * 64 + "\n")
rptf.write("index_%s-nbit_%d-ntbl_%d\n" % ("NearPy", nbit, ntbl))
rptf.write("-" * 64 + "\n")
rptf.write("[email protected]%-8d%.4f\n" % (args.topk, r_at_k))
rptf.write("time cost (ms): %.3f\n" %
(time_costs * 1000 / data.nqry))
示例7: TestEngine
# 需要导入模块: from nearpy import Engine [as 别名]
# 或者: from nearpy.Engine import neighbours [as 别名]
class TestEngine(unittest.TestCase):
def setUp(self):
self.engine = Engine(1000)
def test_storage_issue(self):
engine1 = Engine(100)
engine2 = Engine(100)
for k in range(1000):
x = numpy.random.randn(100)
x_data = 'data'
engine1.store_vector(x, x_data)
# Each engine should have its own default storage
self.assertTrue(len(engine2.storage.buckets)==0)
def test_retrieval(self):
for k in range(100):
self.engine.clean_all_buckets()
x = numpy.random.randn(1000)
x_data = 'data'
self.engine.store_vector(x, x_data)
n = self.engine.neighbours(x)
y = n[0][0]
y_data = n[0][1]
y_distance = n[0][2]
self.assertTrue((y == x).all())
self.assertEqual(y_data, x_data)
self.assertEqual(y_distance, 0.0)
def test_retrieval_sparse(self):
for k in range(100):
self.engine.clean_all_buckets()
x = scipy.sparse.rand(1000, 1, density=0.05)
x_data = 'data'
self.engine.store_vector(x, x_data)
n = self.engine.neighbours(x)
y = n[0][0]
y_data = n[0][1]
y_distance = n[0][2]
self.assertTrue((y - x).sum() == 0.0)
self.assertEqual(y_data, x_data)
self.assertEqual(y_distance, 0.0)
示例8: TestEngine
# 需要导入模块: from nearpy import Engine [as 别名]
# 或者: from nearpy.Engine import neighbours [as 别名]
class TestEngine(unittest.TestCase):
def setUp(self):
self.engine = Engine(1000)
def test_storage_issue(self):
engine1 = Engine(100)
engine2 = Engine(100)
for k in range(1000):
x = numpy.random.randn(100)
x_data = 'data'
engine1.store_vector(x, x_data)
# Each engine should have its own default storage
self.assertTrue(len(engine2.storage.buckets)==0)
def test_retrieval(self):
for k in range(100):
self.engine.clean_all_buckets()
x = numpy.random.randn(1000)
x_data = 'data'
self.engine.store_vector(x, x_data)
n = self.engine.neighbours(x)
y, y_data, y_distance = n[0]
normalized_x = unitvec(x)
delta = 0.000000001
self.assertAlmostEqual(numpy.abs((normalized_x - y)).max(), 0, delta=delta)
self.assertEqual(y_data, x_data)
self.assertAlmostEqual(y_distance, 0.0, delta=delta)
def test_retrieval_sparse(self):
for k in range(100):
self.engine.clean_all_buckets()
x = scipy.sparse.rand(1000, 1, density=0.05)
x_data = 'data'
self.engine.store_vector(x, x_data)
n = self.engine.neighbours(x)
y, y_data, y_distance = n[0]
normalized_x = unitvec(x)
delta = 0.000000001
self.assertAlmostEqual(numpy.abs((normalized_x - y)).max(), 0, delta=delta)
self.assertEqual(y_data, x_data)
self.assertAlmostEqual(y_distance, 0.0, delta=delta)
示例9: test_sparse
# 需要导入模块: from nearpy import Engine [as 别名]
# 或者: from nearpy.Engine import neighbours [as 别名]
def test_sparse():
dim = 500
num_train = 1000
num_test = 1
train_data = ss.rand(dim, num_train)#pickle.load('/home/jmahler/Downloads/feature_objects.p')
test_data = ss.rand(dim, num_test)
rbp = RandomBinaryProjections('rbp', 10)
engine = Engine(dim, lshashes=[rbp])
for i in range(num_train):
engine.store_vector(train_data.getcol(i))
for j in range(num_test):
N = engine.neighbours(test_data.getcol(j))
print N
IPython.embed()
示例10: __init__
# 需要导入模块: from nearpy import Engine [as 别名]
# 或者: from nearpy.Engine import neighbours [as 别名]
class lshsearcher:
def __init__(self):
self.__dimension = None
self.__engine_perm = None
self.__permutations = None
def _set_confval(self, dimension=None):
if dimension is None:
return None
else:
self.__dimension = dimension
def _engine_on(self):
# Create permutations meta-hash
self.__permutations = HashPermutations('permut')
# Create binary hash as child hash
rbp_perm = RandomBinaryProjections('rbp_perm', 14)
rbp_conf = {'num_permutation':50,'beam_size':10,'num_neighbour':100}
# Add rbp as child hash of permutations hash
self.__permutations.add_child_hash(rbp_perm, rbp_conf)
# Create engine
self.__engine_perm = Engine(self.__dimension, lshashes=[self.__permutations], distance=CosineDistance())
def conf(self, dimension):
self._set_confval(dimension)
self._engine_on()
def getData(self, v):
if self.__engine_perm is not None:
self.__engine_perm.store_vector(v)
def commitData(self):
if self.__permutations is not None:
self.__permutations.build_permuted_index()
def find(self, v):
if self.__engine_perm is not None:
return self.__engine_perm.neighbours(v)
示例11: __init__
# 需要导入模块: from nearpy import Engine [as 别名]
# 或者: from nearpy.Engine import neighbours [as 别名]
class LSHSearch:
def __init__(self, feature_file, dimension, neighbour, lsh_project_num):
self.feature_file = feature_file
self.dimension = dimension
self.neighbour = neighbour
self.face_feature = defaultdict(str)
self.ground_truth = defaultdict(int)
# Create permutations meta-hash
permutations2 = HashPermutationMapper('permut2')
tmp_feature = defaultdict(str)
with open(feature_file, 'rb') as f:
reader = csv.reader(f, delimiter=' ')
for name, feature in reader:
tmp_feature[name] = feature
matrix = []
label = []
for item in tmp_feature.keys():
v = map(float, tmp_feature[item].split(','))
matrix.append(np.array(v))
label.append(item)
random.shuffle(matrix)
print 'PCA matric : ', len(matrix)
rbp_perm2 = PCABinaryProjections('testPCABPHash', lsh_project_num, matrix)
permutations2.add_child_hash(rbp_perm2)
# Create engine
nearest = NearestFilter(self.neighbour)
self.engine = Engine(self.dimension, lshashes=[permutations2], distance=CosineDistance(), vector_filters=[nearest])
def build(self):
with open(self.feature_file, 'rb') as f:
reader = csv.reader(f, delimiter=' ')
for name, feature in reader:
self.face_feature[name] = feature
person = '_'.join(name.split('_')[:-1])
self.ground_truth[person] += 1
for item in self.face_feature.keys():
v = map(float, self.face_feature[item].split(','))
self.engine.store_vector(v, item)
def query(self, person_list):
dists = []
scores = []
for person in person_list:
query = map(float, self.face_feature[person].split(','))
print '\nNeighbour distances with mutliple binary hashes:'
print ' -> Candidate count is %d' % self.engine.candidate_count(query)
results = self.engine.neighbours(query)
dists = dists + [x[1] for x in results]
scores = scores + [x[2] for x in results]
t_num = [self.ground_truth['_'.join(x.split('_')[:-1])] for x in dists]
res = zip(dists, scores, t_num)
res.sort(key = lambda t: t[1])
res1 = self.f7(res, person_list)
return res1[:self.neighbour]
def true_num(self, person):
return self.ground_truth[person]
def f7(self, zip_seq, person_list):
seen = set()
seen_add = seen.add
return [ x for x in zip_seq if not (x[0] in seen or seen_add(x[0]) or x[0] in person_list)]
示例12: TestRandomBinaryProjectionTree
# 需要导入模块: from nearpy import Engine [as 别名]
# 或者: from nearpy.Engine import neighbours [as 别名]
class TestRandomBinaryProjectionTree(unittest.TestCase):
def setUp(self):
self.memory = MemoryStorage()
self.redis_object = Redis(host='localhost',
port=6379, db=0)
self.redis_storage = RedisStorage(self.redis_object)
def test_retrieval(self):
# We want 12 projections, 20 results at least
rbpt = RandomBinaryProjectionTree('testHash', 12, 20)
# Create engine for 100 dimensional feature space, do not forget to set
# nearest filter to 20, because default is 10
self.engine = Engine(100, lshashes=[rbpt], vector_filters=[NearestFilter(20)])
# First insert 200000 random vectors
#print 'Indexing...'
for k in range(200000):
x = numpy.random.randn(100)
x_data = 'data'
self.engine.store_vector(x, x_data)
# Now do random queries and check result set size
#print 'Querying...'
for k in range(10):
x = numpy.random.randn(100)
n = self.engine.neighbours(x)
#print "Candidate count = %d" % self.engine.candidate_count(x)
#print "Result size = %d" % len(n)
self.assertEqual(len(n), 20)
def test_storage_memory(self):
# We want 10 projections, 20 results at least
rbpt = RandomBinaryProjectionTree('testHash', 10, 20)
# Create engine for 100 dimensional feature space
self.engine = Engine(100, lshashes=[rbpt], vector_filters=[NearestFilter(20)])
# First insert 2000 random vectors
for k in range(2000):
x = numpy.random.randn(100)
x_data = 'data'
self.engine.store_vector(x, x_data)
self.memory.store_hash_configuration(rbpt)
rbpt2 = RandomBinaryProjectionTree(None, None, None)
rbpt2.apply_config(self.memory.load_hash_configuration('testHash'))
self.assertEqual(rbpt.dim, rbpt2.dim)
self.assertEqual(rbpt.hash_name, rbpt2.hash_name)
self.assertEqual(rbpt.projection_count, rbpt2.projection_count)
for i in range(rbpt.normals.shape[0]):
for j in range(rbpt.normals.shape[1]):
self.assertEqual(rbpt.normals[i, j], rbpt2.normals[i, j])
# Now do random queries and check result set size
for k in range(10):
x = numpy.random.randn(100)
keys1 = rbpt.hash_vector(x, querying=True)
keys2 = rbpt2.hash_vector(x, querying=True)
self.assertEqual(len(keys1), len(keys2))
for k in range(len(keys1)):
self.assertEqual(keys1[k], keys2[k])
def test_storage_redis(self):
# We want 10 projections, 20 results at least
rbpt = RandomBinaryProjectionTree('testHash', 10, 20)
# Create engine for 100 dimensional feature space
self.engine = Engine(100, lshashes=[rbpt], vector_filters=[NearestFilter(20)])
# First insert 2000 random vectors
for k in range(2000):
x = numpy.random.randn(100)
x_data = 'data'
self.engine.store_vector(x, x_data)
self.redis_storage.store_hash_configuration(rbpt)
rbpt2 = RandomBinaryProjectionTree(None, None, None)
rbpt2.apply_config(self.redis_storage.load_hash_configuration('testHash'))
self.assertEqual(rbpt.dim, rbpt2.dim)
self.assertEqual(rbpt.hash_name, rbpt2.hash_name)
self.assertEqual(rbpt.projection_count, rbpt2.projection_count)
for i in range(rbpt.normals.shape[0]):
for j in range(rbpt.normals.shape[1]):
self.assertEqual(rbpt.normals[i, j], rbpt2.normals[i, j])
# Now do random queries and check result set size
for k in range(10):
x = numpy.random.randn(100)
keys1 = rbpt.hash_vector(x, querying=True)
keys2 = rbpt2.hash_vector(x, querying=True)
self.assertEqual(len(keys1), len(keys2))
#.........这里部分代码省略.........
示例13: __init__
# 需要导入模块: from nearpy import Engine [as 别名]
# 或者: from nearpy.Engine import neighbours [as 别名]
#.........这里部分代码省略.........
self.engine_ = Engine(self.KPCA_.alphas_.shape[1], lshashes=[rbp])
transformed_vectors = self.KPCA_.transform(vector_set.T)
for i in range(len(list(self.training_))):
#vector=vector_set[:,i]
#vector=np.reshape(vector,(self.biggest,1))
#vector=self.KPCA_.transform(vector)
self.engine_.store_vector(transformed_vectors[i,:], self.training_[i])
def load_FICA(self,vector_set):
rbp = RandomBinaryProjections('rbp',10)
self.engine_ = Engine(self.biggest, lshashes=[rbp])
for i in range(len(list(self.training_))):
vector=vector_set[:,i]
vector=np.reshape(vector,(self.biggest,1))
vector=self.FICA_.transform(vector)
self.engine_.store_vector(vector[:,0],self.training_[i])
def load_DL(self,vector_set):
rbp = RandomBinaryProjections('rbp',10)
self.engine_ = Engine(self.biggest, lshashes=[rbp])
for i in range(len(list(self.training_))):
vector=vector_set[:,i]
vector=np.reshape(vector,(self.biggest,1))
vector=self.DL_[-1].transform(vector)
self.engine_.store_vector(vector[:,0],self.training_[i])
def engine_query(self,test_vector):
"""
queries the engine with a (self.biggest,1) dimension vector and returns the file_names of nearest
neighbors and the results
"""
#print test_vector
#reshaped=np.reshape(test_vector,(self.biggest,1))
results = self.engine_.neighbours(test_vector.T)
file_names = [i[1] for i in results]
return file_names, results
def setup_confusion(self):
"""
reinitializes the self.confusion_ confusion matrix variable
"""
self.confusion_={}
self.confusion_[UNKNOWN_TAG] = {}
for file_ in self.all_files_:
category = cat50_file_category(file_)
self.confusion_[category] = {}
for query_cat in self.confusion_.keys():
for pred_cat in self.confusion_.keys():
self.confusion_[query_cat][pred_cat] = 0
"""
Makes a test vector by taking in an SDF, reshaping it, normalizing it, then returns a transformed
version of that vector based on the corresponding decomposition model that was already trained
"""
def make_test_vector(self,sdf_array,vector_type):
if vector_type=="PCA":
return self.make_PCA_test_vector(sdf_array)
elif vector_type=="FA":
return self.make_FA_test_vector(sdf_array)
elif vector_type=="KPCA":
return self.make_KPCA_test_vector(sdf_array)
elif vector_type=="FICA":
return self.make_FICA_test_vector(sdf_array)
elif vector_type=="DL":
return self.make_DL_test_vector(sdf_array)
示例14: example1
# 需要导入模块: from nearpy import Engine [as 别名]
# 或者: from nearpy.Engine import neighbours [as 别名]
def example1():
# Dimension of feature space
DIM = 100
# Number of data points (dont do too much because of exact search)
POINTS = 10000
print 'Creating engines'
# We want 12 projections, 20 results at least
rbpt = RandomBinaryProjectionTree('rbpt', 20, 20)
# Create engine 1
engine_rbpt = Engine(DIM, lshashes=[rbpt], distance=CosineDistance())
# Create binary hash as child hash
rbp = RandomBinaryProjections('rbp1', 20)
# Create engine 2
engine = Engine(DIM, lshashes=[rbp], distance=CosineDistance())
# Create permutations meta-hash
permutations = HashPermutations('permut')
# Create binary hash as child hash
rbp_perm = RandomBinaryProjections('rbp_perm', 20)
rbp_conf = {'num_permutation':50,'beam_size':10,'num_neighbour':100}
# Add rbp as child hash of permutations hash
permutations.add_child_hash(rbp_perm, rbp_conf)
# Create engine 3
engine_perm = Engine(DIM, lshashes=[permutations], distance=CosineDistance())
# Create permutations meta-hash
permutations2 = HashPermutationMapper('permut2')
# Create binary hash as child hash
rbp_perm2 = RandomBinaryProjections('rbp_perm2', 12)
# Add rbp as child hash of permutations hash
permutations2.add_child_hash(rbp_perm2)
# Create engine 3
engine_perm2 = Engine(DIM, lshashes=[permutations2], distance=CosineDistance())
print 'Indexing %d random vectors of dimension %d' % (POINTS, DIM)
# First index some random vectors
matrix = numpy.zeros((POINTS,DIM))
for i in xrange(POINTS):
v = numpy.random.randn(DIM)
matrix[i] = v
engine.store_vector(v)
engine_rbpt.store_vector(v)
engine_perm.store_vector(v)
engine_perm2.store_vector(v)
print 'Buckets 1 = %d' % len(engine.storage.buckets['rbp1'].keys())
print 'Buckets 2 = %d' % len(engine_rbpt.storage.buckets['rbpt'].keys())
print 'Building permuted index for HashPermutations'
# Then update permuted index
permutations.build_permuted_index()
print 'Generate random data'
# Get random query vector
query = numpy.random.randn(DIM)
# Do random query on engine 1
print '\nNeighbour distances with RandomBinaryProjectionTree:'
print ' -> Candidate count is %d' % engine_rbpt.candidate_count(query)
results = engine_rbpt.neighbours(query)
dists = [x[2] for x in results]
print dists
# Do random query on engine 2
print '\nNeighbour distances with RandomBinaryProjections:'
print ' -> Candidate count is %d' % engine.candidate_count(query)
results = engine.neighbours(query)
dists = [x[2] for x in results]
print dists
# Do random query on engine 3
print '\nNeighbour distances with HashPermutations:'
print ' -> Candidate count is %d' % engine_perm.candidate_count(query)
results = engine_perm.neighbours(query)
dists = [x[2] for x in results]
print dists
# Do random query on engine 4
print '\nNeighbour distances with HashPermutations2:'
print ' -> Candidate count is %d' % engine_perm2.candidate_count(query)
results = engine_perm2.neighbours(query)
dists = [x[2] for x in results]
print dists
#.........这里部分代码省略.........
示例15: __init__
# 需要导入模块: from nearpy import Engine [as 别名]
# 或者: from nearpy.Engine import neighbours [as 别名]
class LSHSearch:
def __init__(self, feature_file, dimension, neighbour, lsh_project_num):
self.feature_file = feature_file
self.dimension = dimension
self.neighbour = neighbour
self.face_feature = defaultdict(str)
self.ground_truth = defaultdict(int)
# Create permutations meta-hash
self.permutations2 = HashPermutationMapper('permut2')
tmp_feature = defaultdict(str)
with open(feature_file, 'rb') as f:
reader = csv.reader(f, delimiter=' ')
for name, feature in reader:
tmp_feature[name] = feature
matrix = []
label = []
for item in tmp_feature.keys():
v = map(float, tmp_feature[item].split(','))
matrix.append(np.array(v))
label.append(item)
random.shuffle(matrix)
print 'PCA matric : ', len(matrix)
rbp_perm2 = PCABinaryProjections(
'testPCABPHash', lsh_project_num, matrix)
self.permutations2.add_child_hash(rbp_perm2)
# Create engine
nearest = NearestFilter(self.neighbour)
self.engine = Engine(
self.dimension,
lshashes=[self.permutations2],
distance=CosineDistance(),
vector_filters=[nearest])
def build(self):
with open(self.feature_file, 'rb') as f:
reader = csv.reader(f, delimiter=' ')
for name, feature in reader:
self.face_feature[name] = feature
person = '_'.join(name.split('_')[:-1])
self.ground_truth[person] += 1
for item in self.face_feature.keys():
v = map(float, self.face_feature[item].split(','))
self.engine.store_vector(v, item)
def update(self, person, feature):
print feature
v = map(float, feature.split(','))
epoch_time = long(time.time())
f_name = person + '_' + str(epoch_time)
print f_name
self.engine.store_vector(v, f_name)
def query(self, person_feature):
dists = []
scores = []
query = map(float, person_feature.split(','))
# print '\nNeighbour distances with mutliple binary hashes:'
# print ' -> Candidate count is %d' % self.engine.candidate_count(query)
results = self.engine.neighbours(query)
dists = dists + [x[1] for x in results]
scores = scores + [x[2] for x in results]
res = zip(dists, scores)
res.sort(key=lambda t: t[1])
return res[:self.neighbour]