本文整理汇总了Python中lshash.LSHash.query方法的典型用法代码示例。如果您正苦于以下问题:Python LSHash.query方法的具体用法?Python LSHash.query怎么用?Python LSHash.query使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类lshash.LSHash
的用法示例。
在下文中一共展示了LSHash.query方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_lshash_redis_extra_val
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
def test_lshash_redis_extra_val():
"""
Test external lshash module
"""
config = {"redis": {"host": 'localhost', "port": 6379, "db": 15}}
sr = StrictRedis(**config['redis'])
sr.flushdb()
lsh = LSHash(6, 8, 1, config)
for i in xrange(num_elements):
lsh.index(list(els[i]), el_names[i])
lsh.index(list(els[i]), el_names[i]) # multiple insertions
hasht = lsh.hash_tables[0]
itms = [hasht.get_list(k) for k in hasht.keys()]
for itm in itms:
assert itms.count(itm) == 1
for el in itm:
assert el[0] in els
assert el[1] in el_names
for el in els:
res = lsh.query(list(el), num_results=1, distance_func='euclidean')[0]
# vector an name are in the first element of the tuple res[0]
el_v, el_name = res[0]
# the distance is in the second element of the tuple
el_dist = res[1]
assert el_v in els
assert el_name in el_names
assert el_dist == 0
del lsh
sr.flushdb()
示例2: test
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
def test():
import utils
trueIds, testSet = utils.load_test_set('fc7', 'raw', 0)
lsh = LSHash(128, np.shape(testSet[0])[0], matrices_filename='lsh_planes.data.npz', overwrite=True)
for idx, input_point in enumerate(testSet):
hastValue = lsh._hash(lsh.uniform_planes[0], input_point.tolist())
print hastValue
lsh.index(input_point, idx)
print lsh.query(testSet[3], 3)
return None
示例3: test_lshash_redis
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
def test_lshash_redis():
"""
Test external lshash module
"""
config = {"redis": {"host": 'localhost', "port": 6379, "db": 15}}
sr = StrictRedis(**config['redis'])
sr.flushdb()
lsh = LSHash(6, 8, 1, config)
for i in xrange(num_elements):
lsh.index(list(els[i]))
lsh.index(list(els[i])) # multiple insertions should be prevented by the library
hasht = lsh.hash_tables[0]
itms = [hasht.get_list(k) for k in hasht.keys()]
for itm in itms:
for el in itm:
assert itms.count(itm) == 1 # have multiple insertions been prevented?
assert el in els
for el in els:
res = lsh.query(list(el), num_results=1, distance_func='euclidean')[0]
el_v, el_dist = res
assert el_v in els
assert el_dist == 0
del lsh
sr.flushdb()
示例4: eventIdentification
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
def eventIdentification(dictionaryFile, corpusFile, outputFile):
outputVector = []
tempDict = {}
corpus = corpora.MmCorpus(corpusFile)
dictionary = corpora.Dictionary.load(dictionaryFile)
#print "Unique Tokens:", dictionary.__len__()
lsh = LSHash(20, dictionary.__len__())
index = 0
for index in range(len(corpus)):
denseVector = getDenseVector(corpus[index], lsh.input_dim)
result = lsh.query(denseVector)
#print denseVector
#no similar tweets
if(result == []):
#print "No Similar Tweets for: ", index
tempDict[tuple(denseVector)] = len(outputVector)
outputVector.append([index])
lsh.index(denseVector)
continue
assigned = False
for vector in result:
if(getDistance(vector, denseVector) == True):
ev = tempDict[tuple(vector[0])]
outputVector[ev].append(index)
tempDict[tuple(denseVector)] = ev
#for ind in range(len(outputVector)):
#done = False
#for tweetNo in outputVector[ind]:
#if (tweetNo == tempDict[tuple(vector[0])]):
#outputVector[ind].append(index)
#done = True
#break
#if done == True:
#break
assigned = True
break
if assigned == False:
tempDict[tuple(denseVector)] = len(outputVector)
outputVector.append([index])
lsh.index(denseVector)
with open(outputFile, 'w') as out:
for vector in outputVector:
line = ""
for index in vector:
line += "," + str(index)
out.write(line[1:]+"\n")
del outputVector
del tempDict
示例5: test_lshash
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
def test_lshash():
lsh = LSHash(6, 8, 1)
for i in xrange(num_elements):
lsh.index(list(els[i]))
lsh.index(list(els[i])) # multiple insertions
hasht = lsh.hash_tables[0]
itms = [hasht.get_list(k) for k in hasht.keys()]
for itm in itms:
assert itms.count(itm) == 1
for el in itm:
assert el in els
for el in els:
res = lsh.query(list(el), num_results=1, distance_func='euclidean')[0]
# res is a tuple containing the vector and the distance
el_v, el_dist = res
assert el_v in els
assert el_dist == 0
del lsh
示例6: LshIndexer
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
class LshIndexer(Indexer):
PARAMETERS = {'hash_size': 6,
'input_dim': 128,
'num_of_hashtables': 1,
'storage': {'redis': {'host':'localhost', 'port': 6379}}}
def initialize_store(self, parameters):
self.store = LSHash(parameters['hash_size'],
parameters['input_dim'],
parameters['num_of_hashtables'],
parameters['storage'])
def index(self, features):
for feature in features:
self.store.index(feature.data, feature.file_id)
def query(self, feature, num_results=5):
return self.store.query(feature, num_results)
示例7: detect_subevent
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
def detect_subevent(filename):
dictionaryFile = filename + ".dict"
corpusFile = filename + ".mm"
outputFile = filename + ".out"
outputVector = []
tempDict = {}
outputdict={}
corpus = corpora.MmCorpus(corpusFile)
dictionary = corpora.Dictionary.load(dictionaryFile)
lsh = LSHash(30, dictionary.__len__())
index = 0
for index in range(len(corpus)):
#print str(index)+",",
#print corpus[index]
denseVector = getDenseVector(corpus[index], lsh.input_dim)
#print getSparseVector(denseVector)
result = lsh.query(denseVector, num_results = 50, distance_func = "euclidean")
#print result
#no similar tweets
if(result == []):
outputdict[index]=[]
tempDict[getSparseVector(denseVector)] = index
lsh.index(denseVector)
#continue
else:
for r in result:
if(outputdict.has_key(tempDict[getSparseVector(r[0])])):
outputdict[tempDict[getSparseVector(r[0])]].append(index)
break
#print outputdict
with open(outputFile, 'w') as out:
for key in outputdict.iterkeys():
line = str(key)
for i in outputdict[key]:
line += ", " + str(i)
out.write(line+"\n")
print "Please check the output file:", outputFile
示例8: test_lshash_extra_val
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
def test_lshash_extra_val():
lsh = LSHash(6, 8, 1)
for i in xrange(num_elements):
lsh.index(list(els[i]), el_names[i])
hasht = lsh.hash_tables[0]
itms = [hasht.get_list(k) for k in hasht.keys()]
for itm in itms:
for el in itm:
assert el[0] in els
assert el[1] in el_names
for el in els:
# res is a list, so we need to select the first entry only
res = lsh.query(list(el), num_results=1, distance_func='euclidean')[0]
# vector an name are in the first element of the tuple res[0]
el_v, el_name = res[0]
# the distance is in the second element of the tuple
el_dist = res[1]
assert el_v in els
assert el_name in el_names
assert el_dist == 0
del lsh
示例9: subEventDetection
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
def subEventDetection(dictionaryFile, corpusFile, outputFile):
outputVector = []
tempDict = {}
corpus = corpora.MmCorpus(corpusFile)
dictionary = corpora.Dictionary.load(dictionaryFile)
lsh = LSHash(30, dictionary.__len__())
index = 0
for index in range(len(corpus)):
denseVector = getDenseVector(corpus[index], lsh.input_dim)
result = lsh.query(denseVector, num_results = 50, distance_func = "cosine")
#no similar tweets
if(result == []):
outputVector.append([index])
continue
assigned = False
for vector in result:
if(getDistance(vector, denseVector) == True):
for ind in range(len(outputVector)):
done = False
for tweetNo in outputVector[ind]:
if (tweetNo == tempDict[vector]):
outputVector[ind].append(index)
done = True
break
if done == True:
break
assigned = True
break
if assiged == False:
outputVector.append([index])
lsh.index(denseVector)
tempDict[tuple(denseVector)] = index
with open(outputFile, 'w') as out:
for vector in outputVector:
line = ""
for index in vector:
line += ", " + str(index)
out.write(line[2:]+"\n")
print "Please check the output file:", outputFile
示例10: __init__
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
class Searcher:
_DIST_FUNCTIONS = ["hamming", "euclidean", "true_euclidean", "centred_euclidean", "cosine", "l1norm"]
index = None
def __init__(self, dataset):
self.create_index(dataset)
def create_index(self, items, hash_size=6):
input_dim = len(items.values()[0])
self.index = LSHash(hash_size, input_dim)
for key in items:
self.index.index(items[key], extra_data=key)
return True
def query(self, query_item, num_results=10, distance_function='cosine'):
if distance_function not in self._DIST_FUNCTIONS:
raise Exception("{0} not supported".format(distance_function))
results = self.index.query(query_item, num_results=num_results, distance_func=distance_function)
return self.parse_results(results)
def parse_results(self, results):
return {x[0][1]:x[1] for x in results}
示例11: vectorize
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
def vectorize(string):
vec = numpy.zeros(25, dtype=numpy.int)
for i in range(len(string)):
vec[i] = ord(string[i])
#print vec
return vec
def decode(vec):
vec = [unichr(int(vec[i])) for i in range(len(vec))]
s = ''
s = s.encode('utf-8', 'ignore')
for i in range(len(vec)):
if(vec[i] != '\x00'):
s = s+vec[i]
return s
lsh = LSHash(1, 25, storage_config={'dict':'9'}, matrices_filename = '../advs/THE FIVE ORANGE PIPS.npz')
f = open('../advs/THE FIVE ORANGE PIPS.tok')
tok = pickle.load(f)
for word in tok:
lsh.index(vectorize(word))
res = lsh.query(vectorize('orang'), num_results = 3, distance_func = 'l1norm')
print len(res)
print [decode(r[0]) for r in res]
示例12: LSHash
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
from lshash import LSHash
lsh = LSHash(hash_size=6, input_dim=8, num_hashtables=1, storage_config={"lmdb": {'path': '/Users/christianburger/Downloads/testlmdb'}})
lsh.index([1,2,3,4,5,6,7,8], 'a')
lsh.index([2,3,4,5,6,7,8,9], 'b')
lsh.index([10,12,99,1,5,31,2,3], 'c')
print lsh.query([1,2,3,4,5,6,7,7])
示例13: enumerate
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
for i, sample in enumerate(samples[train_n:]):
total = total + 1
#rs = lsh.query(get_img(fn), num_results=1, distance_func="cosine") # test rate: 91.326531, 196 files took 52901.431 ms
#rs = lsh.query(get_img(fn), num_results=1, distance_func="l1norm") # test rate: 91.326531, 196 files took 35271.345 ms
#rs = lsh.query(get_img(fn), num_results=1, distance_func="euclidean") # test rate: 90.816327, 196 files took 24904.888 ms
#rs = lsh.query(get_img(fn), num_results=1, distance_func="true_euclidean") # test rate: 89.795918, 196 files took 17713.646 ms
#rs = lsh.query(get_img(fn), num_results=1, distance_func="centred_euclidean") # test rate: 52.040816, 196 files took 9000.577 ms
# BLOCKS = 1, ORIENTATIONS = (8, 8, 3), DIMENSION = 57, test rate: 89.285714, 196 files took 9997.003 ms
# BLOCKS = 2, ORIENTATIONS = (8, 8, 3), DIMENSION = 228, test rate: 91.326531, 196 files took 17227.878 ms
# BLOCKS = 3, ORIENTATIONS = (8, 8, 3), DIMENSION = 513, test rate: 98.469388, 196 files took 64944.190 ms
# BLOCKS = 4, ORIENTATIONS = (8, 8, 4), DIMENSION = 960, test rate: 95.408163, 196 files took 47667.006 ms
# BLOCKS = 5, ORIENTATIONS = (8, 8, 3), DIMENSION = 1425, test rate: 93.367347, 196 files took 71029.642 ms
#rs = lsh.query(leargist.color_gist(Image.open(fn), nblocks=BLOCKS, orientations=ORIENTATIONS), num_results=1, distance_func="l1norm")
rs = lsh.query(sample, num_results=1, distance_func=DISTANCE_FUNC)
if rs and rs[0][0][1] == responses[train_n:][i]:
correct = correct + 1
# if rs:
# rs = [r[0][1] for r in rs]
# try:
# idx = rs.index(responses[train_n:][i])
# except ValueError:
# idx = -1
# if idx != -1:
# correct = correct + 1
#else:
# print CHARS[rs[0][0][1]], " => ", CHARS[responses[train_n:][i]]
t2 = time.time()
print "test rate: %f, %d files took %0.3f ms" % (correct/float(total)*100, total, (t2 - t1) * 1000.0)
示例14: LSHash
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
from __future__ import print_function
from __future__ import division
from scipy.spatial.distance import cosine
from tqdm import tqdm
import numpy
from lshash import LSHash
import time
start = time.time()
lsh = LSHash(8, 300)
sample_word_embeds = []
for i in tqdm(xrange(20000)):
word_embed = numpy.random.rand(300)
lsh.index(word_embed)
if i % 500 == 0:
sample_word_embeds.append(word_embed)
print("Indexing takes {} seconds".format(time.time() - start))
start = time.time()
for word_embed in sample_word_embeds:
print('-' * 80)
results = lsh.query(word_embed, num_results=None, distance_func='cosine')
print("Num result: {}".format(len(results)))
print('Nearest neighbor cosine distance:')
print(" {} | {}".format(results[1][1], cosine(results[1][0], word_embed)))
print('Query takes average {} seconds'.format((time.time() - start) / len(sample_word_embeds)))
示例15: range
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
# -*- coding: utf-8 -*-
"""
Created on Fri Dec 04 15:29:56 2015
@author: MaGesh
"""
import numpy as np
from scipy.ndimage import imread
from lshash import LSHash
lsh=LSHash(20,32*32) #32*32 is the dimension with 20 hash buckets
resultSet=[]
for i in range(1,100001):
print i;
X="F:\\Fall 2015\\Data Mining\\Programming Assignments\\PA5\\data\\dataset\\"+str(i)+".bmp"
im=imread(X,flatten=True)
single_array=im.flatten()
lsh.index(single_array)#hashing the each values in to the bucket
for i in range(1,11):
print i,"for querying"
X1="F:\\Fall 2015\\Data Mining\\Programming Assignments\\PA5\\data\\Query\\"+str(i)+".bmp"
imQ=imread(X1,flatten=True) #converting to grey scale
imFlatten=imQ.flatten()
value=lsh.query(imFlatten,distance_func="euclidean") #querying the nearest points
resultSet.append(value)