本文整理汇总了Python中lshash.LSHash类的典型用法代码示例。如果您正苦于以下问题:Python LSHash类的具体用法?Python LSHash怎么用?Python LSHash使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了LSHash类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_lshash_redis_extra_val
def test_lshash_redis_extra_val():
"""
Test external lshash module
"""
config = {"redis": {"host": 'localhost', "port": 6379, "db": 15}}
sr = StrictRedis(**config['redis'])
sr.flushdb()
lsh = LSHash(6, 8, 1, config)
for i in xrange(num_elements):
lsh.index(list(els[i]), el_names[i])
lsh.index(list(els[i]), el_names[i]) # multiple insertions
hasht = lsh.hash_tables[0]
itms = [hasht.get_list(k) for k in hasht.keys()]
for itm in itms:
assert itms.count(itm) == 1
for el in itm:
assert el[0] in els
assert el[1] in el_names
for el in els:
res = lsh.query(list(el), num_results=1, distance_func='euclidean')[0]
# vector an name are in the first element of the tuple res[0]
el_v, el_name = res[0]
# the distance is in the second element of the tuple
el_dist = res[1]
assert el_v in els
assert el_name in el_names
assert el_dist == 0
del lsh
sr.flushdb()
示例2: test_lshash_redis
def test_lshash_redis():
"""
Test external lshash module
"""
config = {"redis": {"host": 'localhost', "port": 6379, "db": 15}}
sr = StrictRedis(**config['redis'])
sr.flushdb()
lsh = LSHash(6, 8, 1, config)
for i in xrange(num_elements):
lsh.index(list(els[i]))
lsh.index(list(els[i])) # multiple insertions should be prevented by the library
hasht = lsh.hash_tables[0]
itms = [hasht.get_list(k) for k in hasht.keys()]
for itm in itms:
for el in itm:
assert itms.count(itm) == 1 # have multiple insertions been prevented?
assert el in els
for el in els:
res = lsh.query(list(el), num_results=1, distance_func='euclidean')[0]
el_v, el_dist = res
assert el_v in els
assert el_dist == 0
del lsh
sr.flushdb()
示例3: build_index
def build_index(self, X):
f = X.shape[1]
n = X.shape[0]
lsh = LSHash(hash_size = 32, input_dim = f, num_hashtables = 100)
for i in range(n):
lsh.index(X[i], i)
return lsh
示例4: dump_lsh_data_to_pickle
def dump_lsh_data_to_pickle(bits_tid_pickle, lsh_pickle):
f = file(bits_tid_pickle, "rb")
data = pickle.load(f)
f.close()
#这里的参数可以调整,具体见https://github.com/kayzh/LSHash
lsh = LSHash(13, 128, num_hashtables=1)
map(lambda x:lsh.index(np.array([int(tmp) for tmp in x])), data.keys())
out = file(lsh_pickle,"wb")
pickle.dump(lsh, out, -1)
out.close()
示例5: generateSingleHash
def generateSingleHash(X, planesFileName, n_bits=64):
"""
Generate a n_bits long hash for each input in X
:param X:
:param n_bits:
:return:
"""
import utils
# overwrite old matrixes an build some random new ones
fileName = os.path.join(utils.lsh_planes_dir, planesFileName + '.npz')
lsh = LSHash(n_bits, np.shape(X)[0], matrices_filename=fileName, overwrite=False)
return lsh._hash(lsh.uniform_planes[0], X.tolist())
示例6: create_hash2img
def create_hash2img():
img2gist = get_img2gist()
lsh = LSHash(hash_len, 960, storage_config=redis_config,
matrices_filename=matrices_file)
count = 0
total_num = len(img2gist)
for name, gist_v in img2gist.iteritems():
count += 1
lsh.index(gist_v, name)
sys.stdout.write('%d/%d\r ' % (count, total_num))
sys.stdout.flush()
print 'bucket ratio: %d/%d' % (len(lsh.hash_tables[0].keys()), 2 ** hash_len)
return lsh
示例7: init
def init():
parser = argparse.ArgumentParser(description = 'Tools for hamming distance-based image retrieval by cuda')
parser.add_argument('-f', help = 'The filename of image raw features (SIFT).')
parser.add_argument('-v', default = 'fvecs', help = 'The format of image raw features.')
parser.add_argument('-s', default = 'dict', help = 'The method of indexing storage.')
parser.add_argument('-d', default = '128', help = 'Dimensions of raw image feature.')
parser.add_argument('-o', default = '0', help = 'Offset of accessing raw image features.')
parser.add_argument('-n', default = '1', help = 'Number of raw image features to read.')
parser.add_argument('-i', default = 'n', help = 'Whether to perform indexing step.')
parser.add_argument('-e', help = 'The dirname of indexing folder.')
parser.add_argument('-k', default = '10', help = 'Number of retrieved images.')
parser.add_argument('-r', default = '32', help = 'Number of dimensions randomly sampled.')
parser.add_argument('-c', default = 'n', help = 'Whether to perform compressing step.')
parser.add_argument('-q', default = 'n', help = 'Whether to sequentially sampling.')
parser.add_argument('-p', default = 'n', help = 'Whether to perform querying in compressed domain.')
parser.add_argument('-g', default = 'y', help = 'GPU mode. default is "yes".')
parser.add_argument('-l', default = 'n', help = 'VLQ base64 mode. Load VLQ base64 encoding compressed dict.')
parser.add_argument('-b', default = '1', help = 'Expanding level of search buckets.')
parser.add_argument('-t', default = 'int32', help = 'FastDict type (int32, int8, string).')
args = parser.parse_args()
d = int(args.d)
nuse = int(args.n)
off = int(args.o)
random_dims = int(args.r)
random_sampling = True
if args.q == 'y':
random_sampling = False
lsh = LSHash(64, d, random_sampling, args.t, random_dims, 1, storage_config = args.s, matrices_filename = 'project_plane.npz')
np_feature_vecs = load_features(args.f, args.v, nuse, d, off)
if args.c != 'y' and args.i != 'y' and args.e != None and args.s == 'random':
if args.p == 'y':
print "loading compressed index."
lsh.load_compress_index(args.e, (args.l == 'y'))
print "loading done."
else:
print "loading index."
lsh.load_index(args.e)
print "loading done."
print "indexing done. Ready for querying."
return (lsh, np_feature_vecs, args)
示例8: __init__
def __init__(self):
self.lshIndexList = []
# create a list of lsh indexes
self.lsh = LSHash(NUMBER_OF_BITS_PER_HASH, NUM_TOPICS, num_hashtables=NUMBER_OF_LSH_INDEXES,
storage_config={"redis": {"host": "localhost", "port": 6379}})
示例9: LshIndexer
class LshIndexer(Indexer):
PARAMETERS = {'hash_size': 6,
'input_dim': 128,
'num_of_hashtables': 1,
'storage': {'redis': {'host':'localhost', 'port': 6379}}}
def initialize_store(self, parameters):
self.store = LSHash(parameters['hash_size'],
parameters['input_dim'],
parameters['num_of_hashtables'],
parameters['storage'])
def index(self, features):
for feature in features:
self.store.index(feature.data, feature.file_id)
def query(self, feature, num_results=5):
return self.store.query(feature, num_results)
示例10: test_lshash
def test_lshash():
lsh = LSHash(6, 8, 1)
for i in xrange(num_elements):
lsh.index(list(els[i]))
lsh.index(list(els[i])) # multiple insertions
hasht = lsh.hash_tables[0]
itms = [hasht.get_list(k) for k in hasht.keys()]
for itm in itms:
assert itms.count(itm) == 1
for el in itm:
assert el in els
for el in els:
res = lsh.query(list(el), num_results=1, distance_func='euclidean')[0]
# res is a tuple containing the vector and the distance
el_v, el_dist = res
assert el_v in els
assert el_dist == 0
del lsh
示例11: generateHashes
def generateHashes(X, scalar, planesFileName, n_bits=64):
"""
Generate a n_bits long hash for each input in X
:param X:
:param n_bits:
:return:
"""
import utils
# overwrite old matrixes an build some random new ones
fileName = os.path.join(utils.lsh_planes_dir, planesFileName + '.npz')
lsh = LSHash(n_bits, np.shape(X[0])[0], matrices_filename=fileName, overwrite=False)
hashValues = []
for input_point in X:
input_point = scalar.transform(input_point)
hashValues.append(lsh._hash(lsh.uniform_planes[0], input_point))
return hashValues
示例12: learn
def learn(routes):
global global_training_route
global next_hop_index
extra_data_len = 2 #destination, next_hop
ndims = len(routes[0]) - extra_data_len #Number of dimensions
hash_length = len(routes[0]) * 2 #arbitrarily chosen hash_length
next_hop_index = len(routes[0]) - 1 #NextHop index at the last
for i in range(0, len(routes) - 1):
if(routes[i][next_hop_index] >= routes[i+1][next_hop_index]):
routes[i][next_hop_index] = i+1
else:
routes[i][next_hop_index] = -1
global_training_route = routes
lsh = LSHash(hash_length, ndims)
for entry in routes:
lsh.index(entry[:-extra_data_len], extra_data = entry[-extra_data_len:])
return lsh
示例13: detect_subevent
def detect_subevent(filename):
dictionaryFile = filename + ".dict"
corpusFile = filename + ".mm"
outputFile = filename + ".out"
outputVector = []
tempDict = {}
outputdict={}
corpus = corpora.MmCorpus(corpusFile)
dictionary = corpora.Dictionary.load(dictionaryFile)
lsh = LSHash(30, dictionary.__len__())
index = 0
for index in range(len(corpus)):
#print str(index)+",",
#print corpus[index]
denseVector = getDenseVector(corpus[index], lsh.input_dim)
#print getSparseVector(denseVector)
result = lsh.query(denseVector, num_results = 50, distance_func = "euclidean")
#print result
#no similar tweets
if(result == []):
outputdict[index]=[]
tempDict[getSparseVector(denseVector)] = index
lsh.index(denseVector)
#continue
else:
for r in result:
if(outputdict.has_key(tempDict[getSparseVector(r[0])])):
outputdict[tempDict[getSparseVector(r[0])]].append(index)
break
#print outputdict
with open(outputFile, 'w') as out:
for key in outputdict.iterkeys():
line = str(key)
for i in outputdict[key]:
line += ", " + str(i)
out.write(line+"\n")
print "Please check the output file:", outputFile
示例14: getBuckets
def getBuckets(fromFile):
global nameDict
global lsh
nameDict = {}
lsh = LSHash(bWidth,26, num_hashtables = num_ht)
if fromFile:
f = open(datafile, 'r')
nameList = f.readlines()
else:
nameList = surnames.dic.keys()
for l in nameList:
name = l.split(" ")[0].strip()
nameArr = getvec(name)
arrStr = toStr(nameArr)
if arrStr in nameDict:
nameDict[arrStr].append(name)
else:
nameDict[arrStr] = [name]
for k in nameDict.keys():
lsh.index(toArr(k))
示例15: test_lshash_extra_val
def test_lshash_extra_val():
lsh = LSHash(6, 8, 1)
for i in xrange(num_elements):
lsh.index(list(els[i]), el_names[i])
hasht = lsh.hash_tables[0]
itms = [hasht.get_list(k) for k in hasht.keys()]
for itm in itms:
for el in itm:
assert el[0] in els
assert el[1] in el_names
for el in els:
# res is a list, so we need to select the first entry only
res = lsh.query(list(el), num_results=1, distance_func='euclidean')[0]
# vector an name are in the first element of the tuple res[0]
el_v, el_name = res[0]
# the distance is in the second element of the tuple
el_dist = res[1]
assert el_v in els
assert el_name in el_names
assert el_dist == 0
del lsh