Python LSHash.index方法代码示例

本文整理汇总了Python中lshash.LSHash.index方法的典型用法代码示例。如果您正苦于以下问题：Python LSHash.index方法的具体用法？Python LSHash.index怎么用？Python LSHash.index使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类lshash.LSHash的用法示例。

在下文中一共展示了LSHash.index方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_lshash_redis_extra_val

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
def test_lshash_redis_extra_val():
    """
    Test external lshash module
    """
    config = {"redis": {"host": 'localhost', "port": 6379, "db": 15}}
    sr = StrictRedis(**config['redis'])
    sr.flushdb()

    lsh = LSHash(6, 8, 1, config)
    for i in xrange(num_elements):
        lsh.index(list(els[i]), el_names[i])
        lsh.index(list(els[i]), el_names[i])  # multiple insertions
    hasht = lsh.hash_tables[0]
    itms = [hasht.get_list(k) for k in hasht.keys()]
    for itm in itms:
        assert itms.count(itm) == 1
        for el in itm:
            assert el[0] in els
            assert el[1] in el_names
    for el in els:
        res = lsh.query(list(el), num_results=1, distance_func='euclidean')[0]
        # vector an name are in the first element of the tuple res[0]
        el_v, el_name = res[0]
        # the distance is in the second element of the tuple
        el_dist = res[1]
        assert el_v in els
        assert el_name in el_names
        assert el_dist == 0
    del lsh
    sr.flushdb()

开发者ID:disheng，项目名称:LSHash，代码行数:32，代码来源:test_lsh.py

示例2: test_lshash_redis

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
def test_lshash_redis():
    """
    Test external lshash module
    """
    config = {"redis": {"host": 'localhost', "port": 6379, "db": 15}}
    sr = StrictRedis(**config['redis'])
    sr.flushdb()

    lsh = LSHash(6, 8, 1, config)
    for i in xrange(num_elements):
        lsh.index(list(els[i]))
        lsh.index(list(els[i]))  # multiple insertions should be prevented by the library
    hasht = lsh.hash_tables[0]
    itms = [hasht.get_list(k) for k in hasht.keys()]
    for itm in itms:
        for el in itm:
            assert itms.count(itm) == 1  # have multiple insertions been prevented?
            assert el in els
    for el in els:
        res = lsh.query(list(el), num_results=1, distance_func='euclidean')[0]
        el_v, el_dist = res
        assert el_v in els
        assert el_dist == 0
    del lsh
    sr.flushdb()

开发者ID:disheng，项目名称:LSHash，代码行数:27，代码来源:test_lsh.py

示例3: build_index

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
    def build_index(self, X):
        f = X.shape[1]
        n = X.shape[0]

        lsh = LSHash(hash_size = 32, input_dim = f, num_hashtables = 100)
        for i in range(n):
            lsh.index(X[i], i)

        return lsh

开发者ID:BeifeiZhou，项目名称:Performance_evaluations_ANN，代码行数:11，代码来源:evaluation_functions.py

示例4: create_hash2img

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
def create_hash2img():
    img2gist = get_img2gist()
    lsh = LSHash(hash_len, 960, storage_config=redis_config,
                 matrices_filename=matrices_file)
    count = 0
    total_num = len(img2gist)
    for name, gist_v in img2gist.iteritems():
        count += 1
        lsh.index(gist_v, name)
        sys.stdout.write('%d/%d\r    ' % (count, total_num))
        sys.stdout.flush()

    print 'bucket ratio: %d/%d' % (len(lsh.hash_tables[0].keys()), 2 ** hash_len)
    return lsh

开发者ID:thjashin，项目名称:dip，代码行数:16，代码来源:retrieval.py

示例5: subEventDetection

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
def subEventDetection(dictionaryFile, corpusFile, outputFile):
	outputVector = []
	tempDict = {}
	corpus = corpora.MmCorpus(corpusFile)
	dictionary = corpora.Dictionary.load(dictionaryFile)
	lsh = LSHash(30, dictionary.__len__())
	index = 0
	for index in range(len(corpus)):
		denseVector = getDenseVector(corpus[index], lsh.input_dim)
		result = lsh.query(denseVector, num_results = 50, distance_func = "cosine")
		#no similar tweets
		if(result == []):
			outputVector.append([index])
			continue
		assigned = False
		for vector in result:
			if(getDistance(vector, denseVector) == True):
				for ind in range(len(outputVector)):
					done = False
					for tweetNo in outputVector[ind]:
						if (tweetNo == tempDict[vector]):
							outputVector[ind].append(index)
							done = True
							break
					if done == True:
						break
				assigned = True
				break
		if assiged == False:
			outputVector.append([index])
		lsh.index(denseVector)
		tempDict[tuple(denseVector)] = index
	with open(outputFile, 'w') as out:
		for vector in outputVector:
			line = ""
			for index in vector:
				line += ", " + str(index)
			out.write(line[2:]+"\n")
	print "Please check the output file:", outputFile

开发者ID:kansal，项目名称:Sub-Event-Detection，代码行数:41，代码来源:utilities.py

示例6: eventIdentification

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
def eventIdentification(dictionaryFile, corpusFile, outputFile):
	outputVector = []
	tempDict = {}
	
	corpus = corpora.MmCorpus(corpusFile)
	dictionary = corpora.Dictionary.load(dictionaryFile)
	#print "Unique Tokens:", dictionary.__len__()
	lsh = LSHash(20, dictionary.__len__())
	index = 0
	for index in range(len(corpus)):
		denseVector = getDenseVector(corpus[index], lsh.input_dim)
		result = lsh.query(denseVector)
		
		#print denseVector
		
		#no similar tweets
		if(result == []):
			#print "No Similar Tweets for: ", index
			tempDict[tuple(denseVector)] = len(outputVector)
			outputVector.append([index])
			lsh.index(denseVector)
			continue
		
		assigned = False
		for vector in result:
			if(getDistance(vector, denseVector) == True):
				ev = tempDict[tuple(vector[0])]
				outputVector[ev].append(index)
				tempDict[tuple(denseVector)] = ev
				#for ind in range(len(outputVector)):
					#done = False
					#for tweetNo in outputVector[ind]:
						#if (tweetNo == tempDict[tuple(vector[0])]):
							#outputVector[ind].append(index)
							#done = True
							#break
					#if done == True:
						#break
				assigned = True
				break
		
		if assigned == False:
			tempDict[tuple(denseVector)] = len(outputVector)
			outputVector.append([index])
			
		lsh.index(denseVector)
		
		
	with open(outputFile, 'w') as out:
		for vector in outputVector:
			line = ""
			for index in vector:
				line += "," + str(index)
			out.write(line[1:]+"\n")
	
	del outputVector
	del tempDict

开发者ID:kansal，项目名称:Sub-Event-Detection，代码行数:59，代码来源:utilities.py

示例7: test

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
def test():
    import utils

    trueIds, testSet = utils.load_test_set('fc7', 'raw', 0)

    lsh = LSHash(128, np.shape(testSet[0])[0], matrices_filename='lsh_planes.data.npz', overwrite=True)

    for idx, input_point in enumerate(testSet):
        hastValue = lsh._hash(lsh.uniform_planes[0], input_point.tolist())
        print hastValue

        lsh.index(input_point, idx)

    print lsh.query(testSet[3], 3)

    return None

开发者ID:TieSKey，项目名称:database_dcnn，代码行数:18，代码来源:lsh_compressor.py

示例8: LshIndexer

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
class LshIndexer(Indexer):
	PARAMETERS = {'hash_size': 6,
				  'input_dim': 128,
				  'num_of_hashtables': 1,
				  'storage': {'redis': {'host':'localhost', 'port': 6379}}}

	def initialize_store(self, parameters):
		self.store = LSHash(parameters['hash_size'],
							parameters['input_dim'],
							parameters['num_of_hashtables'],
							parameters['storage'])

	def index(self, features):
		for feature in features:
			self.store.index(feature.data, feature.file_id)

	def query(self, feature, num_results=5):
		return self.store.query(feature, num_results)

开发者ID:balasanjeevi，项目名称:goldeneye，代码行数:20，代码来源:lsh_indexer.py

示例9: test_lshash

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
def test_lshash():
    lsh = LSHash(6, 8, 1)
    for i in xrange(num_elements):
        lsh.index(list(els[i]))
        lsh.index(list(els[i]))  # multiple insertions
    hasht = lsh.hash_tables[0]
    itms = [hasht.get_list(k) for k in hasht.keys()]
    for itm in itms:
        assert itms.count(itm) == 1
        for el in itm:
            assert el in els
    for el in els:
        res = lsh.query(list(el), num_results=1, distance_func='euclidean')[0]
        # res is a tuple containing the vector and the distance
        el_v, el_dist = res
        assert el_v in els
        assert el_dist == 0
    del lsh

开发者ID:disheng，项目名称:LSHash，代码行数:20，代码来源:test_lsh.py

示例10: learn

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
def learn(routes):
    global global_training_route
    global next_hop_index
    
    extra_data_len = 2    #destination, next_hop
    ndims = len(routes[0]) - extra_data_len   #Number of dimensions
    hash_length = len(routes[0]) * 2   #arbitrarily chosen hash_length
    next_hop_index = len(routes[0]) - 1   #NextHop index at the last
    
    for i in range(0, len(routes) - 1):
        if(routes[i][next_hop_index] >= routes[i+1][next_hop_index]):
            routes[i][next_hop_index] = i+1
        else:
            routes[i][next_hop_index] = -1
    global_training_route = routes
    lsh = LSHash(hash_length, ndims)
    for entry in routes:
        lsh.index(entry[:-extra_data_len], extra_data = entry[-extra_data_len:])
    return lsh

开发者ID:obsc，项目名称:flight-planner，代码行数:21，代码来源:knn_load.py

示例11: detect_subevent

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
def detect_subevent(filename):
	dictionaryFile = filename + ".dict"
	corpusFile = filename + ".mm"
	outputFile = filename + ".out"
	outputVector = []
	tempDict = {}
	outputdict={}
	corpus = corpora.MmCorpus(corpusFile)
	dictionary = corpora.Dictionary.load(dictionaryFile)
	lsh = LSHash(30, dictionary.__len__())
	index = 0
	for index in range(len(corpus)):
		#print str(index)+",",
		#print corpus[index]
		denseVector = getDenseVector(corpus[index], lsh.input_dim)
		#print getSparseVector(denseVector)
		result = lsh.query(denseVector, num_results = 50, distance_func = "euclidean")
		#print result
		#no similar tweets
		
		if(result == []):
			outputdict[index]=[]
			tempDict[getSparseVector(denseVector)] = index
			lsh.index(denseVector)
			#continue
		
		else:
			for r in result:
				if(outputdict.has_key(tempDict[getSparseVector(r[0])])):
					outputdict[tempDict[getSparseVector(r[0])]].append(index)
					break
			
		
		
	#print outputdict
	with open(outputFile, 'w') as out:
		for key in outputdict.iterkeys():
			line = str(key) 
			for i in outputdict[key]:
				line += ", " + str(i)
			out.write(line+"\n")
	
	print "Please check the output file:", outputFile

开发者ID:dreamrulez07，项目名称:SubEventDetectionIRE，代码行数:45，代码来源:pallav_parse.py

示例12: dump_lsh_data_to_pickle

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
def dump_lsh_data_to_pickle(bits_tid_pickle, lsh_pickle):
	f = file(bits_tid_pickle, "rb")
	data = pickle.load(f)
	f.close()
	#这里的参数可以调整，具体见https://github.com/kayzh/LSHash
	lsh = LSHash(13, 128, num_hashtables=1)
	map(lambda x:lsh.index(np.array([int(tmp) for tmp in x])), data.keys())	
	out = file(lsh_pickle,"wb")
	pickle.dump(lsh, out, -1)
	out.close()

开发者ID:ChengduoZhao，项目名称:image_retrieval，代码行数:12，代码来源:dump_lsh_data_to_pickle.py

示例13: getBuckets

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
def getBuckets(fromFile):
    global nameDict
    global lsh
    nameDict = {}
    lsh = LSHash(bWidth,26, num_hashtables = num_ht)
    if fromFile:
        f = open(datafile, 'r')   
        nameList = f.readlines()
    else:
        nameList = surnames.dic.keys()
    for l in nameList:
        name = l.split(" ")[0].strip()
        nameArr = getvec(name)
        arrStr = toStr(nameArr)
        if arrStr in nameDict:
            nameDict[arrStr].append(name)
        else:
            nameDict[arrStr] = [name]
    for k in nameDict.keys():
        lsh.index(toArr(k))

开发者ID:brendobendo，项目名称:ethnicityClassification，代码行数:22，代码来源:namedistance_clean.py

示例14: test_lshash_extra_val

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
def test_lshash_extra_val():
    lsh = LSHash(6, 8, 1)
    for i in xrange(num_elements):
        lsh.index(list(els[i]), el_names[i])
    hasht = lsh.hash_tables[0]
    itms = [hasht.get_list(k) for k in hasht.keys()]
    for itm in itms:
        for el in itm:
            assert el[0] in els
            assert el[1] in el_names
    for el in els:
        # res is a list, so we need to select the first entry only
        res = lsh.query(list(el), num_results=1, distance_func='euclidean')[0]
        # vector an name are in the first element of the tuple res[0]
        el_v, el_name = res[0]
        # the distance is in the second element of the tuple
        el_dist = res[1]
        assert el_v in els
        assert el_name in el_names
        assert el_dist == 0
    del lsh

开发者ID:disheng，项目名称:LSHash，代码行数:23，代码来源:test_lsh.py

示例15: init

# 需要导入模块: from lshash import LSHash [as 别名]
# 或者: from lshash.LSHash import index [as 别名]
class Searcher:

    _DIST_FUNCTIONS = ["hamming", "euclidean", "true_euclidean", "centred_euclidean", "cosine", "l1norm"]
    index = None

    def __init__(self, dataset):
        self.create_index(dataset)

    def create_index(self, items, hash_size=6):
        input_dim = len(items.values()[0])
        self.index = LSHash(hash_size, input_dim)
        for key in items:
            self.index.index(items[key], extra_data=key)
        return True

    def query(self, query_item, num_results=10, distance_function='cosine'):
        if distance_function not in self._DIST_FUNCTIONS:
            raise Exception("{0} not supported".format(distance_function))
        results = self.index.query(query_item, num_results=num_results, distance_func=distance_function)
        return self.parse_results(results)

    def parse_results(self, results):
        return {x[0][1]:x[1] for x in results}

开发者ID:ivolima，项目名称:py-diversification-similarity，代码行数:25，代码来源:searcher.py

注：本文中的lshash.LSHash.index方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。