本文整理汇总了Python中lshash.LSHash.index方法的典型用法代码示例。如果您正苦于以下问题:Python LSHash.index方法的具体用法?Python LSHash.index怎么用?Python LSHash.index使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类lshash.LSHash
的用法示例。
在下文中一共展示了LSHash.index方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_lshash_redis_extra_val
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
def test_lshash_redis_extra_val():
"""
Test external lshash module
"""
config = {"redis": {"host": 'localhost', "port": 6379, "db": 15}}
sr = StrictRedis(**config['redis'])
sr.flushdb()
lsh = LSHash(6, 8, 1, config)
for i in xrange(num_elements):
lsh.index(list(els[i]), el_names[i])
lsh.index(list(els[i]), el_names[i]) # multiple insertions
hasht = lsh.hash_tables[0]
itms = [hasht.get_list(k) for k in hasht.keys()]
for itm in itms:
assert itms.count(itm) == 1
for el in itm:
assert el[0] in els
assert el[1] in el_names
for el in els:
res = lsh.query(list(el), num_results=1, distance_func='euclidean')[0]
# vector an name are in the first element of the tuple res[0]
el_v, el_name = res[0]
# the distance is in the second element of the tuple
el_dist = res[1]
assert el_v in els
assert el_name in el_names
assert el_dist == 0
del lsh
sr.flushdb()
示例2: test_lshash_redis
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
def test_lshash_redis():
"""
Test external lshash module
"""
config = {"redis": {"host": 'localhost', "port": 6379, "db": 15}}
sr = StrictRedis(**config['redis'])
sr.flushdb()
lsh = LSHash(6, 8, 1, config)
for i in xrange(num_elements):
lsh.index(list(els[i]))
lsh.index(list(els[i])) # multiple insertions should be prevented by the library
hasht = lsh.hash_tables[0]
itms = [hasht.get_list(k) for k in hasht.keys()]
for itm in itms:
for el in itm:
assert itms.count(itm) == 1 # have multiple insertions been prevented?
assert el in els
for el in els:
res = lsh.query(list(el), num_results=1, distance_func='euclidean')[0]
el_v, el_dist = res
assert el_v in els
assert el_dist == 0
del lsh
sr.flushdb()
示例3: build_index
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
def build_index(self, X):
f = X.shape[1]
n = X.shape[0]
lsh = LSHash(hash_size = 32, input_dim = f, num_hashtables = 100)
for i in range(n):
lsh.index(X[i], i)
return lsh
示例4: create_hash2img
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
def create_hash2img():
img2gist = get_img2gist()
lsh = LSHash(hash_len, 960, storage_config=redis_config,
matrices_filename=matrices_file)
count = 0
total_num = len(img2gist)
for name, gist_v in img2gist.iteritems():
count += 1
lsh.index(gist_v, name)
sys.stdout.write('%d/%d\r ' % (count, total_num))
sys.stdout.flush()
print 'bucket ratio: %d/%d' % (len(lsh.hash_tables[0].keys()), 2 ** hash_len)
return lsh
示例5: subEventDetection
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
def subEventDetection(dictionaryFile, corpusFile, outputFile):
outputVector = []
tempDict = {}
corpus = corpora.MmCorpus(corpusFile)
dictionary = corpora.Dictionary.load(dictionaryFile)
lsh = LSHash(30, dictionary.__len__())
index = 0
for index in range(len(corpus)):
denseVector = getDenseVector(corpus[index], lsh.input_dim)
result = lsh.query(denseVector, num_results = 50, distance_func = "cosine")
#no similar tweets
if(result == []):
outputVector.append([index])
continue
assigned = False
for vector in result:
if(getDistance(vector, denseVector) == True):
for ind in range(len(outputVector)):
done = False
for tweetNo in outputVector[ind]:
if (tweetNo == tempDict[vector]):
outputVector[ind].append(index)
done = True
break
if done == True:
break
assigned = True
break
if assiged == False:
outputVector.append([index])
lsh.index(denseVector)
tempDict[tuple(denseVector)] = index
with open(outputFile, 'w') as out:
for vector in outputVector:
line = ""
for index in vector:
line += ", " + str(index)
out.write(line[2:]+"\n")
print "Please check the output file:", outputFile
示例6: eventIdentification
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
def eventIdentification(dictionaryFile, corpusFile, outputFile):
outputVector = []
tempDict = {}
corpus = corpora.MmCorpus(corpusFile)
dictionary = corpora.Dictionary.load(dictionaryFile)
#print "Unique Tokens:", dictionary.__len__()
lsh = LSHash(20, dictionary.__len__())
index = 0
for index in range(len(corpus)):
denseVector = getDenseVector(corpus[index], lsh.input_dim)
result = lsh.query(denseVector)
#print denseVector
#no similar tweets
if(result == []):
#print "No Similar Tweets for: ", index
tempDict[tuple(denseVector)] = len(outputVector)
outputVector.append([index])
lsh.index(denseVector)
continue
assigned = False
for vector in result:
if(getDistance(vector, denseVector) == True):
ev = tempDict[tuple(vector[0])]
outputVector[ev].append(index)
tempDict[tuple(denseVector)] = ev
#for ind in range(len(outputVector)):
#done = False
#for tweetNo in outputVector[ind]:
#if (tweetNo == tempDict[tuple(vector[0])]):
#outputVector[ind].append(index)
#done = True
#break
#if done == True:
#break
assigned = True
break
if assigned == False:
tempDict[tuple(denseVector)] = len(outputVector)
outputVector.append([index])
lsh.index(denseVector)
with open(outputFile, 'w') as out:
for vector in outputVector:
line = ""
for index in vector:
line += "," + str(index)
out.write(line[1:]+"\n")
del outputVector
del tempDict
示例7: test
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
def test():
import utils
trueIds, testSet = utils.load_test_set('fc7', 'raw', 0)
lsh = LSHash(128, np.shape(testSet[0])[0], matrices_filename='lsh_planes.data.npz', overwrite=True)
for idx, input_point in enumerate(testSet):
hastValue = lsh._hash(lsh.uniform_planes[0], input_point.tolist())
print hastValue
lsh.index(input_point, idx)
print lsh.query(testSet[3], 3)
return None
示例8: LshIndexer
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
class LshIndexer(Indexer):
PARAMETERS = {'hash_size': 6,
'input_dim': 128,
'num_of_hashtables': 1,
'storage': {'redis': {'host':'localhost', 'port': 6379}}}
def initialize_store(self, parameters):
self.store = LSHash(parameters['hash_size'],
parameters['input_dim'],
parameters['num_of_hashtables'],
parameters['storage'])
def index(self, features):
for feature in features:
self.store.index(feature.data, feature.file_id)
def query(self, feature, num_results=5):
return self.store.query(feature, num_results)
示例9: test_lshash
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
def test_lshash():
lsh = LSHash(6, 8, 1)
for i in xrange(num_elements):
lsh.index(list(els[i]))
lsh.index(list(els[i])) # multiple insertions
hasht = lsh.hash_tables[0]
itms = [hasht.get_list(k) for k in hasht.keys()]
for itm in itms:
assert itms.count(itm) == 1
for el in itm:
assert el in els
for el in els:
res = lsh.query(list(el), num_results=1, distance_func='euclidean')[0]
# res is a tuple containing the vector and the distance
el_v, el_dist = res
assert el_v in els
assert el_dist == 0
del lsh
示例10: learn
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
def learn(routes):
global global_training_route
global next_hop_index
extra_data_len = 2 #destination, next_hop
ndims = len(routes[0]) - extra_data_len #Number of dimensions
hash_length = len(routes[0]) * 2 #arbitrarily chosen hash_length
next_hop_index = len(routes[0]) - 1 #NextHop index at the last
for i in range(0, len(routes) - 1):
if(routes[i][next_hop_index] >= routes[i+1][next_hop_index]):
routes[i][next_hop_index] = i+1
else:
routes[i][next_hop_index] = -1
global_training_route = routes
lsh = LSHash(hash_length, ndims)
for entry in routes:
lsh.index(entry[:-extra_data_len], extra_data = entry[-extra_data_len:])
return lsh
示例11: detect_subevent
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
def detect_subevent(filename):
dictionaryFile = filename + ".dict"
corpusFile = filename + ".mm"
outputFile = filename + ".out"
outputVector = []
tempDict = {}
outputdict={}
corpus = corpora.MmCorpus(corpusFile)
dictionary = corpora.Dictionary.load(dictionaryFile)
lsh = LSHash(30, dictionary.__len__())
index = 0
for index in range(len(corpus)):
#print str(index)+",",
#print corpus[index]
denseVector = getDenseVector(corpus[index], lsh.input_dim)
#print getSparseVector(denseVector)
result = lsh.query(denseVector, num_results = 50, distance_func = "euclidean")
#print result
#no similar tweets
if(result == []):
outputdict[index]=[]
tempDict[getSparseVector(denseVector)] = index
lsh.index(denseVector)
#continue
else:
for r in result:
if(outputdict.has_key(tempDict[getSparseVector(r[0])])):
outputdict[tempDict[getSparseVector(r[0])]].append(index)
break
#print outputdict
with open(outputFile, 'w') as out:
for key in outputdict.iterkeys():
line = str(key)
for i in outputdict[key]:
line += ", " + str(i)
out.write(line+"\n")
print "Please check the output file:", outputFile
示例12: dump_lsh_data_to_pickle
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
def dump_lsh_data_to_pickle(bits_tid_pickle, lsh_pickle):
f = file(bits_tid_pickle, "rb")
data = pickle.load(f)
f.close()
#这里的参数可以调整,具体见https://github.com/kayzh/LSHash
lsh = LSHash(13, 128, num_hashtables=1)
map(lambda x:lsh.index(np.array([int(tmp) for tmp in x])), data.keys())
out = file(lsh_pickle,"wb")
pickle.dump(lsh, out, -1)
out.close()
示例13: getBuckets
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
def getBuckets(fromFile):
global nameDict
global lsh
nameDict = {}
lsh = LSHash(bWidth,26, num_hashtables = num_ht)
if fromFile:
f = open(datafile, 'r')
nameList = f.readlines()
else:
nameList = surnames.dic.keys()
for l in nameList:
name = l.split(" ")[0].strip()
nameArr = getvec(name)
arrStr = toStr(nameArr)
if arrStr in nameDict:
nameDict[arrStr].append(name)
else:
nameDict[arrStr] = [name]
for k in nameDict.keys():
lsh.index(toArr(k))
示例14: test_lshash_extra_val
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
def test_lshash_extra_val():
lsh = LSHash(6, 8, 1)
for i in xrange(num_elements):
lsh.index(list(els[i]), el_names[i])
hasht = lsh.hash_tables[0]
itms = [hasht.get_list(k) for k in hasht.keys()]
for itm in itms:
for el in itm:
assert el[0] in els
assert el[1] in el_names
for el in els:
# res is a list, so we need to select the first entry only
res = lsh.query(list(el), num_results=1, distance_func='euclidean')[0]
# vector an name are in the first element of the tuple res[0]
el_v, el_name = res[0]
# the distance is in the second element of the tuple
el_dist = res[1]
assert el_v in els
assert el_name in el_names
assert el_dist == 0
del lsh
示例15: __init__
# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
class Searcher:
_DIST_FUNCTIONS = ["hamming", "euclidean", "true_euclidean", "centred_euclidean", "cosine", "l1norm"]
index = None
def __init__(self, dataset):
self.create_index(dataset)
def create_index(self, items, hash_size=6):
input_dim = len(items.values()[0])
self.index = LSHash(hash_size, input_dim)
for key in items:
self.index.index(items[key], extra_data=key)
return True
def query(self, query_item, num_results=10, distance_function='cosine'):
if distance_function not in self._DIST_FUNCTIONS:
raise Exception("{0} not supported".format(distance_function))
results = self.index.query(query_item, num_results=num_results, distance_func=distance_function)
return self.parse_results(results)
def parse_results(self, results):
return {x[0][1]:x[1] for x in results}