当前位置: 首页>>代码示例>>Python>>正文


Python LSHash.query方法代码示例

本文整理汇总了Python中lshash.LSHash.query方法的典型用法代码示例。如果您正苦于以下问题:Python LSHash.query方法的具体用法?Python LSHash.query怎么用?Python LSHash.query使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在lshash.LSHash的用法示例。


在下文中一共展示了LSHash.query方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_lshash_redis_extra_val

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
def test_lshash_redis_extra_val():
    """
    Test external lshash module
    """
    config = {"redis": {"host": 'localhost', "port": 6379, "db": 15}}
    sr = StrictRedis(**config['redis'])
    sr.flushdb()

    lsh = LSHash(6, 8, 1, config)
    for i in xrange(num_elements):
        lsh.index(list(els[i]), el_names[i])
        lsh.index(list(els[i]), el_names[i])  # multiple insertions
    hasht = lsh.hash_tables[0]
    itms = [hasht.get_list(k) for k in hasht.keys()]
    for itm in itms:
        assert itms.count(itm) == 1
        for el in itm:
            assert el[0] in els
            assert el[1] in el_names
    for el in els:
        res = lsh.query(list(el), num_results=1, distance_func='euclidean')[0]
        # vector an name are in the first element of the tuple res[0]
        el_v, el_name = res[0]
        # the distance is in the second element of the tuple
        el_dist = res[1]
        assert el_v in els
        assert el_name in el_names
        assert el_dist == 0
    del lsh
    sr.flushdb()
开发者ID:disheng,项目名称:LSHash,代码行数:32,代码来源:test_lsh.py

示例2: test

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
def test():
    import utils

    trueIds, testSet = utils.load_test_set('fc7', 'raw', 0)

    lsh = LSHash(128, np.shape(testSet[0])[0], matrices_filename='lsh_planes.data.npz', overwrite=True)

    for idx, input_point in enumerate(testSet):
        hastValue = lsh._hash(lsh.uniform_planes[0], input_point.tolist())
        print hastValue

        lsh.index(input_point, idx)

    print lsh.query(testSet[3], 3)

    return None
开发者ID:TieSKey,项目名称:database_dcnn,代码行数:18,代码来源:lsh_compressor.py

示例3: test_lshash_redis

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
def test_lshash_redis():
    """
    Test external lshash module
    """
    config = {"redis": {"host": 'localhost', "port": 6379, "db": 15}}
    sr = StrictRedis(**config['redis'])
    sr.flushdb()

    lsh = LSHash(6, 8, 1, config)
    for i in xrange(num_elements):
        lsh.index(list(els[i]))
        lsh.index(list(els[i]))  # multiple insertions should be prevented by the library
    hasht = lsh.hash_tables[0]
    itms = [hasht.get_list(k) for k in hasht.keys()]
    for itm in itms:
        for el in itm:
            assert itms.count(itm) == 1  # have multiple insertions been prevented?
            assert el in els
    for el in els:
        res = lsh.query(list(el), num_results=1, distance_func='euclidean')[0]
        el_v, el_dist = res
        assert el_v in els
        assert el_dist == 0
    del lsh
    sr.flushdb()
开发者ID:disheng,项目名称:LSHash,代码行数:27,代码来源:test_lsh.py

示例4: eventIdentification

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
def eventIdentification(dictionaryFile, corpusFile, outputFile):
	outputVector = []
	tempDict = {}
	
	corpus = corpora.MmCorpus(corpusFile)
	dictionary = corpora.Dictionary.load(dictionaryFile)
	#print "Unique Tokens:", dictionary.__len__()
	lsh = LSHash(20, dictionary.__len__())
	index = 0
	for index in range(len(corpus)):
		denseVector = getDenseVector(corpus[index], lsh.input_dim)
		result = lsh.query(denseVector)
		
		#print denseVector
		
		#no similar tweets
		if(result == []):
			#print "No Similar Tweets for: ", index
			tempDict[tuple(denseVector)] = len(outputVector)
			outputVector.append([index])
			lsh.index(denseVector)
			continue
		
		assigned = False
		for vector in result:
			if(getDistance(vector, denseVector) == True):
				ev = tempDict[tuple(vector[0])]
				outputVector[ev].append(index)
				tempDict[tuple(denseVector)] = ev
				#for ind in range(len(outputVector)):
					#done = False
					#for tweetNo in outputVector[ind]:
						#if (tweetNo == tempDict[tuple(vector[0])]):
							#outputVector[ind].append(index)
							#done = True
							#break
					#if done == True:
						#break
				assigned = True
				break
		
		if assigned == False:
			tempDict[tuple(denseVector)] = len(outputVector)
			outputVector.append([index])
			
		lsh.index(denseVector)
		
		
	with open(outputFile, 'w') as out:
		for vector in outputVector:
			line = ""
			for index in vector:
				line += "," + str(index)
			out.write(line[1:]+"\n")
	
	del outputVector
	del tempDict
开发者ID:kansal,项目名称:Sub-Event-Detection,代码行数:59,代码来源:utilities.py

示例5: test_lshash

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
def test_lshash():
    lsh = LSHash(6, 8, 1)
    for i in xrange(num_elements):
        lsh.index(list(els[i]))
        lsh.index(list(els[i]))  # multiple insertions
    hasht = lsh.hash_tables[0]
    itms = [hasht.get_list(k) for k in hasht.keys()]
    for itm in itms:
        assert itms.count(itm) == 1
        for el in itm:
            assert el in els
    for el in els:
        res = lsh.query(list(el), num_results=1, distance_func='euclidean')[0]
        # res is a tuple containing the vector and the distance
        el_v, el_dist = res
        assert el_v in els
        assert el_dist == 0
    del lsh
开发者ID:disheng,项目名称:LSHash,代码行数:20,代码来源:test_lsh.py

示例6: LshIndexer

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
class LshIndexer(Indexer):
	PARAMETERS = {'hash_size': 6,
				  'input_dim': 128,
				  'num_of_hashtables': 1,
				  'storage': {'redis': {'host':'localhost', 'port': 6379}}}

	def initialize_store(self, parameters):
		self.store = LSHash(parameters['hash_size'],
							parameters['input_dim'],
							parameters['num_of_hashtables'],
							parameters['storage'])

	def index(self, features):
		for feature in features:
			self.store.index(feature.data, feature.file_id)

	def query(self, feature, num_results=5):
		return self.store.query(feature, num_results)
开发者ID:balasanjeevi,项目名称:goldeneye,代码行数:20,代码来源:lsh_indexer.py

示例7: detect_subevent

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
def detect_subevent(filename):
	dictionaryFile = filename + ".dict"
	corpusFile = filename + ".mm"
	outputFile = filename + ".out"
	outputVector = []
	tempDict = {}
	outputdict={}
	corpus = corpora.MmCorpus(corpusFile)
	dictionary = corpora.Dictionary.load(dictionaryFile)
	lsh = LSHash(30, dictionary.__len__())
	index = 0
	for index in range(len(corpus)):
		#print str(index)+",",
		#print corpus[index]
		denseVector = getDenseVector(corpus[index], lsh.input_dim)
		#print getSparseVector(denseVector)
		result = lsh.query(denseVector, num_results = 50, distance_func = "euclidean")
		#print result
		#no similar tweets
		
		if(result == []):
			outputdict[index]=[]
			tempDict[getSparseVector(denseVector)] = index
			lsh.index(denseVector)
			#continue
		
		else:
			for r in result:
				if(outputdict.has_key(tempDict[getSparseVector(r[0])])):
					outputdict[tempDict[getSparseVector(r[0])]].append(index)
					break
			
		
		
	#print outputdict
	with open(outputFile, 'w') as out:
		for key in outputdict.iterkeys():
			line = str(key) 
			for i in outputdict[key]:
				line += ", " + str(i)
			out.write(line+"\n")
	
	print "Please check the output file:", outputFile
开发者ID:dreamrulez07,项目名称:SubEventDetectionIRE,代码行数:45,代码来源:pallav_parse.py

示例8: test_lshash_extra_val

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
def test_lshash_extra_val():
    lsh = LSHash(6, 8, 1)
    for i in xrange(num_elements):
        lsh.index(list(els[i]), el_names[i])
    hasht = lsh.hash_tables[0]
    itms = [hasht.get_list(k) for k in hasht.keys()]
    for itm in itms:
        for el in itm:
            assert el[0] in els
            assert el[1] in el_names
    for el in els:
        # res is a list, so we need to select the first entry only
        res = lsh.query(list(el), num_results=1, distance_func='euclidean')[0]
        # vector an name are in the first element of the tuple res[0]
        el_v, el_name = res[0]
        # the distance is in the second element of the tuple
        el_dist = res[1]
        assert el_v in els
        assert el_name in el_names
        assert el_dist == 0
    del lsh
开发者ID:disheng,项目名称:LSHash,代码行数:23,代码来源:test_lsh.py

示例9: subEventDetection

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
def subEventDetection(dictionaryFile, corpusFile, outputFile):
	outputVector = []
	tempDict = {}
	corpus = corpora.MmCorpus(corpusFile)
	dictionary = corpora.Dictionary.load(dictionaryFile)
	lsh = LSHash(30, dictionary.__len__())
	index = 0
	for index in range(len(corpus)):
		denseVector = getDenseVector(corpus[index], lsh.input_dim)
		result = lsh.query(denseVector, num_results = 50, distance_func = "cosine")
		#no similar tweets
		if(result == []):
			outputVector.append([index])
			continue
		assigned = False
		for vector in result:
			if(getDistance(vector, denseVector) == True):
				for ind in range(len(outputVector)):
					done = False
					for tweetNo in outputVector[ind]:
						if (tweetNo == tempDict[vector]):
							outputVector[ind].append(index)
							done = True
							break
					if done == True:
						break
				assigned = True
				break
		if assiged == False:
			outputVector.append([index])
		lsh.index(denseVector)
		tempDict[tuple(denseVector)] = index
	with open(outputFile, 'w') as out:
		for vector in outputVector:
			line = ""
			for index in vector:
				line += ", " + str(index)
			out.write(line[2:]+"\n")
	print "Please check the output file:", outputFile
开发者ID:kansal,项目名称:Sub-Event-Detection,代码行数:41,代码来源:utilities.py

示例10: __init__

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
class Searcher:

    _DIST_FUNCTIONS = ["hamming", "euclidean", "true_euclidean", "centred_euclidean", "cosine", "l1norm"]
    index = None

    def __init__(self, dataset):
        self.create_index(dataset)

    def create_index(self, items, hash_size=6):
        input_dim = len(items.values()[0])
        self.index = LSHash(hash_size, input_dim)
        for key in items:
            self.index.index(items[key], extra_data=key)
        return True

    def query(self, query_item, num_results=10, distance_function='cosine'):
        if distance_function not in self._DIST_FUNCTIONS:
            raise Exception("{0} not supported".format(distance_function))
        results = self.index.query(query_item, num_results=num_results, distance_func=distance_function)
        return self.parse_results(results)

    def parse_results(self, results):
        return {x[0][1]:x[1] for x in results}
开发者ID:ivolima,项目名称:py-diversification-similarity,代码行数:25,代码来源:searcher.py

示例11: vectorize

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
def vectorize(string):
    vec = numpy.zeros(25, dtype=numpy.int)
    for i in range(len(string)):
        vec[i] = ord(string[i])
    #print vec
    return vec

def decode(vec):
    vec = [unichr(int(vec[i])) for i in range(len(vec))]
    s = ''
    s = s.encode('utf-8', 'ignore')
    for i in range(len(vec)):
	if(vec[i] != '\x00'):
	    s = s+vec[i]
    return s


lsh = LSHash(1, 25, storage_config={'dict':'9'}, matrices_filename = '../advs/THE FIVE ORANGE PIPS.npz')

f = open('../advs/THE FIVE ORANGE PIPS.tok')
tok = pickle.load(f)

for word in tok:
    lsh.index(vectorize(word))

res = lsh.query(vectorize('orang'), num_results = 3,  distance_func = 'l1norm')

print len(res)
print [decode(r[0]) for r in res]
开发者ID:shivin9,项目名称:search_engine,代码行数:31,代码来源:test_lshash.py

示例12: LSHash

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
from lshash import LSHash
lsh = LSHash(hash_size=6, input_dim=8, num_hashtables=1, storage_config={"lmdb": {'path': '/Users/christianburger/Downloads/testlmdb'}})
lsh.index([1,2,3,4,5,6,7,8], 'a')
lsh.index([2,3,4,5,6,7,8,9], 'b')
lsh.index([10,12,99,1,5,31,2,3], 'c')
print lsh.query([1,2,3,4,5,6,7,7])
开发者ID:herrbuerger,项目名称:LSHash,代码行数:8,代码来源:test.py

示例13: enumerate

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
for i, sample in enumerate(samples[train_n:]):
    total = total + 1
    #rs = lsh.query(get_img(fn), num_results=1, distance_func="cosine") # test rate: 91.326531, 196 files took 52901.431 ms
    #rs = lsh.query(get_img(fn), num_results=1, distance_func="l1norm") # test rate: 91.326531, 196 files took 35271.345 ms
    #rs = lsh.query(get_img(fn), num_results=1, distance_func="euclidean") # test rate: 90.816327, 196 files took 24904.888 ms
    #rs = lsh.query(get_img(fn), num_results=1, distance_func="true_euclidean") # test rate: 89.795918, 196 files took 17713.646 ms
    #rs = lsh.query(get_img(fn), num_results=1, distance_func="centred_euclidean") # test rate: 52.040816, 196 files took 9000.577 ms

    
    # BLOCKS = 1, ORIENTATIONS = (8, 8, 3), DIMENSION = 57, test rate: 89.285714, 196 files took 9997.003 ms
    # BLOCKS = 2, ORIENTATIONS = (8, 8, 3), DIMENSION = 228, test rate: 91.326531, 196 files took 17227.878 ms
    # BLOCKS = 3, ORIENTATIONS = (8, 8, 3), DIMENSION = 513, test rate: 98.469388, 196 files took 64944.190 ms
    # BLOCKS = 4, ORIENTATIONS = (8, 8, 4), DIMENSION = 960, test rate: 95.408163, 196 files took 47667.006 ms
    # BLOCKS = 5, ORIENTATIONS = (8, 8, 3), DIMENSION = 1425, test rate: 93.367347, 196 files took 71029.642 ms
    #rs = lsh.query(leargist.color_gist(Image.open(fn), nblocks=BLOCKS, orientations=ORIENTATIONS), num_results=1, distance_func="l1norm") 
    rs = lsh.query(sample, num_results=1, distance_func=DISTANCE_FUNC) 
    if rs and rs[0][0][1] == responses[train_n:][i]:
        correct = correct + 1
#    if rs:
#        rs = [r[0][1] for r in rs]
#        try:
#            idx = rs.index(responses[train_n:][i])
#        except ValueError:
#            idx = -1
#        if idx != -1:
#            correct = correct + 1
    #else:
    #    print CHARS[rs[0][0][1]], " => ", CHARS[responses[train_n:][i]]

t2 = time.time()
print "test rate: %f, %d files took %0.3f ms" % (correct/float(total)*100, total, (t2 - t1) * 1000.0)
开发者ID:xieyanfu,项目名称:playground,代码行数:33,代码来源:test.py

示例14: LSHash

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
from __future__ import print_function
from __future__ import division

from scipy.spatial.distance import cosine
from tqdm import tqdm
import numpy
from lshash import LSHash
import time

start = time.time()
lsh = LSHash(8, 300)
sample_word_embeds = []
for i in tqdm(xrange(20000)):
    word_embed = numpy.random.rand(300)
    lsh.index(word_embed)

    if i % 500 == 0:
        sample_word_embeds.append(word_embed)

print("Indexing takes {} seconds".format(time.time() - start))

start = time.time()
for word_embed in sample_word_embeds:
    print('-' * 80)
    results = lsh.query(word_embed, num_results=None, distance_func='cosine')
    print("Num result: {}".format(len(results)))
    print('Nearest neighbor cosine distance:')
    print("    {} | {}".format(results[1][1], cosine(results[1][0], word_embed)))

print('Query takes average {} seconds'.format((time.time() - start) / len(sample_word_embeds)))
开发者ID:khayrallah,项目名称:mtma17-scripts,代码行数:32,代码来源:test_lshash.py

示例15: range

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import query [as 别名]
# -*- coding: utf-8 -*-
"""
Created on Fri Dec 04 15:29:56 2015

@author: MaGesh
"""
import numpy as np
from scipy.ndimage import imread
from lshash import LSHash
lsh=LSHash(20,32*32) #32*32 is the dimension with 20 hash buckets
resultSet=[]
for i in range(1,100001):
    print i;
    X="F:\\Fall 2015\\Data Mining\\Programming Assignments\\PA5\\data\\dataset\\"+str(i)+".bmp"
    im=imread(X,flatten=True)
    single_array=im.flatten()
    lsh.index(single_array)#hashing the each values in to the bucket
for i in range(1,11):
    print i,"for querying"    
    X1="F:\\Fall 2015\\Data Mining\\Programming Assignments\\PA5\\data\\Query\\"+str(i)+".bmp"
    imQ=imread(X1,flatten=True) #converting to grey scale
    imFlatten=imQ.flatten()
    value=lsh.query(imFlatten,distance_func="euclidean") #querying the nearest points
    resultSet.append(value)
开发者ID:rmagesh148,项目名称:LSH-Implementation-for-Image-Files,代码行数:26,代码来源:lshForImageFiles.py


注:本文中的lshash.LSHash.query方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。