本文整理匯總了Python中nearpy.Engine類的典型用法代碼示例。如果您正苦於以下問題:Python Engine類的具體用法?Python Engine怎麽用?Python Engine使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了Engine類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: StateDBEngine
class StateDBEngine(object):
def __init__(self):
# initialize "nearby" library
self.dim = 4
self.rbp = RandomBinaryProjections('rbp', 100)
self.engine = Engine(self.dim, lshashes=[self.rbp])
# performance counter
self.counter = 0
def add(self, x, data):
# print 'add data = ', data
self.engine.store_vector(x, data)
self.counter += 1
def lookup(self, x, THRESHOLD=0.1):
naver = self.engine.neighbours(x)
if len(naver) == 0:
return None
pt, data, d = naver[0]
# print 'lhs, rhs', x, pt,
# print 'd = ', d, (d < THRESHOLD), (data is None)
if d < THRESHOLD:
return data
else:
return None
示例2: index_user_vectors
def index_user_vectors():
print 'Performing indexing with HashPermutations...'
global engine_perm
t0 = time.time()
print k_dimen, d_dimen
rbp_perm = RandomBinaryProjections('rbp_perm', d_dimen)
rbp_perm.reset(k_dimen)
# Create permutations meta-hash
permutations = HashPermutations('permut')
rbp_conf = {'num_permutation':50,'beam_size':10,'num_neighbour':250}
# Add rbp as child hash of permutations hash
permutations.add_child_hash(rbp_perm, rbp_conf)
# Create engine
engine_perm = Engine(k_dimen, lshashes=[permutations], distance=CosineDistance())
for u in user_vector:
engine_perm.store_vector(user_vector[u], data=u)
# Then update permuted index
permutations.build_permuted_index()
t1 = time.time()
print 'Indexing took %f seconds', (t1-t0)
示例3: TestEngine
class TestEngine(unittest.TestCase):
def setUp(self):
self.engine = Engine(1000)
def test_retrieval(self):
for k in range(100):
self.engine.clean_all_buckets()
x = numpy.random.randn(1000)
x_data = 'data'
self.engine.store_vector(x, x_data)
n = self.engine.neighbours(x)
y = n[0][0]
y_data = n[0][1]
y_distance = n[0][2]
self.assertTrue((y == x).all())
self.assertEqual(y_data, x_data)
self.assertEqual(y_distance, 0.0)
def test_retrieval_sparse(self):
for k in range(100):
self.engine.clean_all_buckets()
x = scipy.sparse.rand(1000, 1, density=0.05)
x_data = 'data'
self.engine.store_vector(x, x_data)
n = self.engine.neighbours(x)
y = n[0][0]
y_data = n[0][1]
y_distance = n[0][2]
self.assertTrue((y - x).sum() == 0.0)
self.assertEqual(y_data, x_data)
self.assertEqual(y_distance, 0.0)
示例4: knn
def knn(data,k):
assert k<=len(data)-1, 'The number of neighbors must be smaller than the data cardinality (minus one)'
k=k+1
n,dimension = data.shape
ind = []
dist = []
if(dimension<10):
rbp = RandomBinaryProjections('rbp', dimension)
else:
rbp = RandomBinaryProjections('rbp',10)
engine = Engine(dimension, lshashes=[rbp], vector_filters=[NearestFilter(k)])
for i in range(n):
engine.store_vector(data[i], i)
for i in range(n):
N = engine.neighbours(data[i])
ind.append([x[1] for x in N][1:])
dist.append([x[2] for x in N][1:])
return N,dist,ind
示例5: main
def main(args):
""" Main entry.
"""
data = Dataset(args.dataset)
num, dim = data.base.shape
# We are looking for the ten closest neighbours
nearest = NearestFilter(args.topk)
# We want unique candidates
unique = UniqueFilter()
# Create engines for all configurations
for nbit, ntbl in itertools.product(args.nbits, args.ntbls):
logging.info("Creating Engine ...")
lshashes = [RandomBinaryProjections('rbp%d' % i, nbit)
for i in xrange(ntbl)]
# Create engine with this configuration
engine = Engine(dim, lshashes=lshashes,
vector_filters=[unique, nearest])
logging.info("\tDone!")
logging.info("Adding items ...")
for i in xrange(num):
engine.store_vector(data.base[i, :], i)
if i % 100000 == 0:
logging.info("\t%d/%d" % (i, data.nbae))
logging.info("\tDone!")
ids = np.zeros((data.nqry, args.topk), np.int)
logging.info("Searching ...")
tic()
for i in xrange(data.nqry):
reti = [y for x, y, z in
np.array(engine.neighbours(data.query[i]))]
ids[i, :len(reti)] = reti
if i % 100 == 0:
logging.info("\t%d/%d" % (i, data.nqry))
time_costs = toc()
logging.info("\tDone!")
report = os.path.join(args.exp_dir, "report.txt")
with open(report, "a") as rptf:
rptf.write("*" * 64 + "\n")
rptf.write("* %s\n" % time.asctime())
rptf.write("*" * 64 + "\n")
r_at_k = compute_stats(data.groundtruth, ids, args.topk)[-1][-1]
with open(report, "a") as rptf:
rptf.write("=" * 64 + "\n")
rptf.write("index_%s-nbit_%d-ntbl_%d\n" % ("NearPy", nbit, ntbl))
rptf.write("-" * 64 + "\n")
rptf.write("[email protected]%-8d%.4f\n" % (args.topk, r_at_k))
rptf.write("time cost (ms): %.3f\n" %
(time_costs * 1000 / data.nqry))
示例6: build_index
def build_index(self, X):
f = X.shape[1]
n = X.shape[0]
rbp = RandomBinaryProjections('rbp', 32)
engine = Engine(f, lshashes=[rbp])
for i in range(n):
engine.store_vector(X[i], 'data_%d' % i)
return engine
示例7: test_storage_issue
def test_storage_issue(self):
engine1 = Engine(100)
engine2 = Engine(100)
for k in range(1000):
x = numpy.random.randn(100)
x_data = 'data'
engine1.store_vector(x, x_data)
# Each engine should have its own default storage
self.assertTrue(len(engine2.storage.buckets)==0)
示例8: get_engine
def get_engine(self, vocab, vecs):
logging.info('{} hash functions'.format(self.args.projections))
hashes = [PCABinaryProjections('ne1v', self.args.projections, vecs[:1000,:].T)]
engine = Engine(vecs.shape[1], lshashes=hashes, distance=[],
vector_filters=[])
for ind, vec in enumerate(vecs):
if not ind % 100000:
logging.info(
'{} words added to nearpy engine'.format(ind))
engine.store_vector(vec, ind)
return engine
示例9: test_storage_memory
def test_storage_memory(self):
# We want 10 projections, 20 results at least
rbpt = RandomBinaryProjectionTree('testHash', 10, 20)
# Create engine for 100 dimensional feature space
self.engine = Engine(100, lshashes=[rbpt], vector_filters=[NearestFilter(20)])
# First insert 2000 random vectors
for k in range(2000):
x = numpy.random.randn(100)
x_data = 'data'
self.engine.store_vector(x, x_data)
self.memory.store_hash_configuration(rbpt)
rbpt2 = RandomBinaryProjectionTree(None, None, None)
rbpt2.apply_config(self.memory.load_hash_configuration('testHash'))
self.assertEqual(rbpt.dim, rbpt2.dim)
self.assertEqual(rbpt.hash_name, rbpt2.hash_name)
self.assertEqual(rbpt.projection_count, rbpt2.projection_count)
for i in range(rbpt.normals.shape[0]):
for j in range(rbpt.normals.shape[1]):
self.assertEqual(rbpt.normals[i, j], rbpt2.normals[i, j])
# Now do random queries and check result set size
for k in range(10):
x = numpy.random.randn(100)
keys1 = rbpt.hash_vector(x, querying=True)
keys2 = rbpt2.hash_vector(x, querying=True)
self.assertEqual(len(keys1), len(keys2))
for k in range(len(keys1)):
self.assertEqual(keys1[k], keys2[k])
示例10: __init__
def __init__(self):
# initialize "nearby" library
self.dim = 4
self.rbp = RandomBinaryProjections('rbp', 100)
self.engine = Engine(self.dim, lshashes=[self.rbp])
# performance counter
self.counter = 0
示例11: __init__
def __init__(self, feature_file, dimension, neighbour, lsh_project_num):
self.feature_file = feature_file
self.dimension = dimension
self.neighbour = neighbour
self.face_feature = defaultdict(str)
self.ground_truth = defaultdict(int)
# Create permutations meta-hash
permutations2 = HashPermutationMapper('permut2')
tmp_feature = defaultdict(str)
with open(feature_file, 'rb') as f:
reader = csv.reader(f, delimiter=' ')
for name, feature in reader:
tmp_feature[name] = feature
matrix = []
label = []
for item in tmp_feature.keys():
v = map(float, tmp_feature[item].split(','))
matrix.append(np.array(v))
label.append(item)
random.shuffle(matrix)
print 'PCA matric : ', len(matrix)
rbp_perm2 = PCABinaryProjections('testPCABPHash', lsh_project_num, matrix)
permutations2.add_child_hash(rbp_perm2)
# Create engine
nearest = NearestFilter(self.neighbour)
self.engine = Engine(self.dimension, lshashes=[permutations2], distance=CosineDistance(), vector_filters=[nearest])
示例12: load_DL
def load_DL(self,vector_set):
rbp = RandomBinaryProjections('rbp',10)
self.engine_ = Engine(self.biggest, lshashes=[rbp])
for i in range(len(list(self.training_))):
vector=vector_set[:,i]
vector=np.reshape(vector,(self.biggest,1))
vector=self.DL_[-1].transform(vector)
self.engine_.store_vector(vector[:,0],self.training_[i])
示例13: test_sparse
def test_sparse():
dim = 500
num_train = 1000
num_test = 1
train_data = ss.rand(dim, num_train)#pickle.load('/home/jmahler/Downloads/feature_objects.p')
test_data = ss.rand(dim, num_test)
rbp = RandomBinaryProjections('rbp', 10)
engine = Engine(dim, lshashes=[rbp])
for i in range(num_train):
engine.store_vector(train_data.getcol(i))
for j in range(num_test):
N = engine.neighbours(test_data.getcol(j))
print N
IPython.embed()
示例14: load_KPCA
def load_KPCA(self,vector_set):
rbp = RandomBinaryProjections('rbp',10)
self.engine_ = Engine(self.KPCA_.alphas_.shape[1], lshashes=[rbp])
transformed_vectors = self.KPCA_.transform(vector_set.T)
for i in range(len(list(self.training_))):
#vector=vector_set[:,i]
#vector=np.reshape(vector,(self.biggest,1))
#vector=self.KPCA_.transform(vector)
self.engine_.store_vector(transformed_vectors[i,:], self.training_[i])
示例15: setUp
def setUp(self):
logging.basicConfig(level=logging.WARNING)
# Create permutations meta-hash
self.permutations = HashPermutations('permut')
# Create binary hash as child hash
rbp = RandomBinaryProjections('rbp1', 4)
rbp_conf = {'num_permutation':50,'beam_size':10,'num_neighbour':100}
# Add rbp as child hash of permutations hash
self.permutations.add_child_hash(rbp, rbp_conf)
# Create engine with meta hash and cosine distance
self.engine_perm = Engine(200, lshashes=[self.permutations], distance=CosineDistance())
# Create engine without permutation meta-hash
self.engine = Engine(200, lshashes=[rbp], distance=CosineDistance())