本文整理汇总了Python中mmh3.hash函数的典型用法代码示例。如果您正苦于以下问题:Python hash函数的具体用法?Python hash怎么用?Python hash使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了hash函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _hash_bits
def _hash_bits(self, key):
# http://spyced.blogspot.com/2009
# /01/all-you-ever-wanted-to-know-about.html
hash1 = mmh3.hash(key, 0)
hash2 = mmh3.hash(key, hash1)
for i in range(self._hash_funcs):
yield abs((hash1 + i * hash2) % self._bits_per_slice)
示例2: data
def data(path, label_path=None):
fd = open(path)
fd.readline() # skip headers
hash_cols = [3,4,34,35,61,64,65,91,94,95]
npairs = len(hash_cols)
x = [0] * (146 + npairs*(npairs-1)/2)
if label_path:
label = open(label_path)
label.readline() # skip headers
for t, line in enumerate(fd):
# parse x
row = line.rstrip().split(',')
for m, feat in enumerate(row):
if m == 0:
ID = int(feat)
else:
# one-hot encode everything with hash trick
# categorical: one-hotted
# boolean: ONE-HOTTED
# numerical: ONE-HOTTED!
# note, the build in hash(), although fast is not stable,
# i.e., same value won't always have the same hash
# on different machines
x[m] = abs(mmh3.hash(str(m) + '_' + feat)) % D
for i in xrange(10):
for j in xrange(i+1,10):
m += 1
x[m] = abs(mmh3.hash(str(m)+'_'+row[hash_cols[i]]+"_x_"+row[hash_cols[j]])) % D
# parse y, if provided
if label_path:
# use float() to prevent future type casting, [1:] to ignore id
y = [float(y) for y in label.readline().split(',')[1:]]
yield (ID, x, y) if label_path else (ID, x)
示例3: authenticate
def authenticate(self, name, password, certificates, certhash, certstrong, current=None):
with self.app.app_context():
if name == 'SuperUser':
return RET_FALLTHROUGH
user = User.query.filter_by(user_id=name).first()
if not user:
try:
uuid.UUID(name, version=4)
except ValueError:
return RET_DENIED
guest_user = GuestUser.query.get(name)
if guest_user:
if not guest_user.password == password or guest_user.banned:
return RET_DENIED
if guest_user.corporation:
self.app.logger.debug('Authenticating guest with: {} {} {}'.format(abs(mmh3.hash(guest_user.id.hex)), '[{}][GUEST] {}'.format(self.get_ticker(guest_user.corporation), guest_user.name), [u'Guest']))
return abs(mmh3.hash(guest_user.id.hex)), '[{}][GUEST] {}'.format(self.get_ticker(guest_user.corporation), guest_user.name), [u'Guest']
else:
self.app.logger.debug('Authenticating guest with: {} {} {}'.format(abs(mmh3.hash(guest_user.id.hex)), '[GUEST] {}'.format(guest_user.name), [u'Guest']))
return abs(mmh3.hash(guest_user.id.hex)), '[GUEST] {}'.format(guest_user.name), ['Guest']
else:
return RET_DENIED
if not user.mumble_password == password:
return RET_DENIED
self.app.logger.debug('Authenticating user with: {} {} {}'.format(mmh3.hash(user.user_id), '[{}] {}'.format(self.get_ticker(user.corporation_name), user.main_character), user.groups))
return mmh3.hash(user.user_id), '[{}] {}'.format(self.get_ticker(user.corporation_name), user.main_character), user.groups
示例4: hash
def hash(self, string):
hash_arr = []
hash1 = mmh3.hash(string, 0)
hash2 = mmh3.hash(string, hash1)
for i in range(self.k):
hash_arr.append(abs((hash1 + i * hash2) % self.m))
return hash_arr
示例5: get_hash
def get_hash(label,namespace,feature,stride,mask):
if namespace:
namespace_hash = mmh3.hash(namespace,0)
else:
namespace_hash = 0
if is_number(feature):
feature_hash = int(feature) + namespace_hash
else:
feature_hash = mmh3.hash(feature,namespace_hash)
feature_hash_oaa = feature_hash * stride
return (feature_hash_oaa + label - 1) & mask
示例6: Hashmap_WordVector
def Hashmap_WordVector(self,nbits):
length=len(self.Words_Vector)
self.bl_bits=nbits
self.bloom_vector=self.bl_bits*bitarray('0')
for i in range(length):
self.hashmap1.append(mmh3.hash(self.Words_Vector[i]) % self.bl_bits )
self.hashmap2.append(mmh3.hash(self.Words_Vector[i],self.hashmap1[i]) % self.bl_bits )
self.hashmap3.append(mmh3.hash(self.Words_Vector[i],self.hashmap2[i]) % self.bl_bits )
self.bloom_vector[self.hashmap1[i]]=1
self.bloom_vector[self.hashmap2[i]]=1
self.bloom_vector[self.hashmap3[i]]=1
示例7: parse_block
def parse_block(block):
index_block = []
for file_path in block:
file_path_hash = mmh3.hash(file_path)
with open(file_path, 'r') as input_file:
for line in input_file:
items = line.strip().split(' ')
index_block.append(
(mmh3.hash(items[0]),
[file_path_hash,
items[1]])
)
return index_block
示例8: getHash
def getHash(word):
'''
This return the hash value and does the anding with 0xffffffffL on a 32 bit system
'''
curHash = mmh3.hash(word)
curHash = curHash & 0xffffffffL
return curHash
示例9: add_document_indexes
def add_document_indexes(self, text, url, is_print=False):
# TODO: Maybe, it is good idea to change key from string to hash
self.documents.append(url)
doc_id = len(self.documents)-1
word_list = self._split_text(text.lower())
for word in word_list:
#"""
try:
word = word.encode('utf-8')
w_hash = mmh3.hash(word) % self.count_of_files
if is_print:
print word, w_hash
r_index = self.full_index[w_hash]
if r_index.has_key(word):
r_index[word]["docs"].append(doc_id)
else:
r_index[word] = {}
r_index[word]["docs"] = [doc_id]
if not r_index.has_key('encoding'):
r_index['encoding'] = self._encoding
except Exception as e:
print "EXCEPRION", word
traceback.print_exc()
示例10: readHash
def readHash(self):
hll = Hll(self.p)
x = sys.stdin.readline().rstrip('\n')
while x:
hll.AddItem(mmh3.hash(x))
x = sys.stdin.readline().rstrip('\n')
print hll.Count()
示例11: save_cursor
def save_cursor(self, cursor_data):
cursor_id = mmh3.hash(self.data_from)
res = self.es.index(index="lookup",
doc_type="data",
id=cursor_id,
body=cursor_data)
return
示例12: _bit_offsets
def _bit_offsets(self, value):
'''The bit offsets to set/check in this Bloom filter for a given value.
Instantiate a Bloom filter:
>>> dilberts = BloomFilter(
... num_values=100,
... false_positives=0.01,
... key='dilberts',
... )
Now let's look at a few examples:
>>> tuple(dilberts._bit_offsets('rajiv'))
(183, 319, 787, 585, 8, 471, 711)
>>> tuple(dilberts._bit_offsets('raj'))
(482, 875, 725, 667, 109, 714, 595)
>>> tuple(dilberts._bit_offsets('dan'))
(687, 925, 954, 707, 615, 914, 620)
Thus, if we want to insert the value 'rajiv' into our Bloom filter,
then we must set bits 183, 319, 787, 585, 8, 471, and 711 all to 1. If
any/all of them are already 1, no problems.
Similarly, if we want to check to see if the value 'rajiv' is in our
Bloom filter, then we must check to see if the bits 183, 319, 787, 585,
8, 471, and 711 are all set to 1. If even one of those bits is set to
0, then the value 'rajiv' must never have been inserted into our Bloom
filter. But if all of those bits are set to 1, then the value 'rajiv'
was *probably* inserted into our Bloom filter.
'''
encoded_value = self._encode(value)
for seed in range(self.num_hashes()):
yield mmh3.hash(encoded_value, seed=seed) % self.size()
示例13: add
def add(self, string):
# Hash the string
hashlist = [mmh3.hash(string, seed=x) % 1000000 for x in xrange(Bloom.numberofhash)]
for x in hashlist:
Bloom.bit[x] = 1
示例14: lookup
def lookup(self, string):
for seed in range(self.hash_count):
result = mmh3.hash(string, seed) % self.size
if self.bit_array[result] == 0:
#return "Nope"
return False
return True
示例15: select_hash
def select_hash(hashkind, line):
"""Select the kind of hashing for the line.
:param hashkind: -- (str) The name of the hash
:param line: -- (str) The string to hash.
This function is a kind of hash selector which will use the hash passed
in argument to hash the string also passed in argument.
"""
if hashkind == "md5":
hashline = hashlib.md5(line).hexdigest()
elif hashkind == "sha1":
hashline = hashlib.sha1(line).hexdigest()
elif hashkind == "crc":
crc32 = crcmod.Crc(0x104c11db7, initCrc=0, xorOut=0xFFFFFFFF)
crc32.update(line)
hashline = crc32.hexdigest()
elif hashkind == "murmur":
hashline = mmh3.hash(line)
return str(hashline)