本文整理汇总了Python中pybloom.BloomFilter.fromfile方法的典型用法代码示例。如果您正苦于以下问题:Python BloomFilter.fromfile方法的具体用法?Python BloomFilter.fromfile怎么用?Python BloomFilter.fromfile使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pybloom.BloomFilter
的用法示例。
在下文中一共展示了BloomFilter.fromfile方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_insert_then_test
# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
def test_insert_then_test(self):
result = create_index(
'/tmp/fake.csv', # input filename
self.test_file, # file-like object
0.0001, # error rate
1, # skip lines
[1, 2], # fields
',', # delimiter
False) # recursive domain
self.assertEqual(
{'/tmp/fake.csv.2.bfindex': 6,
'/tmp/fake.csv.1.bfindex': 5},
result)
b1 = BloomFilter.fromfile(open('/tmp/fake.csv.1.bfindex', 'rb'))
b2 = BloomFilter.fromfile(open('/tmp/fake.csv.2.bfindex', 'rb'))
self.assertEqual(False, 'FieldA' in b1)
self.assertEqual(False, 'FieldB' in b2)
for word in ('apple', 'banana', 'orange', 'pear', 'pineapple'):
self.assertEqual(True, word in b1)
self.assertEqual(False, word in b2)
for word in ('carrot', 'potato', 'leek', 'cauliflower', 'bean'):
self.assertEqual(True, word in b2)
self.assertEqual(False, word in b1)
示例2: jaccard_ind
# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
def jaccard_ind(filename_1, filename_2):
with open(filename_1, 'rb') as f_1:
with open(filename_2, 'rb') as f_2:
print(filename_1)
b_1 = BloomFilter.fromfile(f_1)
b_2 = BloomFilter.fromfile(f_2)
b_inter = b_1.intersection(b_2)
b_union = b_1.union(b_2)
bits_inter = b_inter.bitarray.count(True)
bits_union = b_union.bitarray.count(True)
j_i = float(bits_inter) / float(bits_union)
#print("%s ~ %s, %f" % filename_1, filename_2, j_i)
print("%s %s %f" % (filename_1, filename_2, j_i))
f_2.close()
f_1.close()
示例3: update_bf
# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
def update_bf(request):
global proxies
temp_list = request.split('\n')
#print temp_list
updated_proxy = temp_list[1] # ip of other proxy
# write the bloom filter to a file bf_upd
temp_bf_recv = open('bf_upd', "w")
temp_bf_recv.write(temp_list[2])
temp_bf_recv.write('\n')
temp_bf_recv.write(temp_list[3])
temp_bf_recv.close()
temp_bf_recv = open('bf_upd', 'r')
# de-serialize the bloom filter
temp_bf = BloomFilter.fromfile(temp_bf_recv)
temp_bf_recv.close()
if os.path.isfile('bf_upd'): # remove the file used to hold the bloom filter
os.remove('bf_upd')
index = 0;
for proxy in proxies:
if proxy[0] == updated_proxy: # update only the proxy who's bloom filter is updated
print "UPDATING BF OF PROXY: ", proxy[0]
curr_port = proxy[1]
break
index = index + 1
proxies[index] = (updated_proxy, curr_port, temp_bf) # update entry in the list
示例4: __init__
# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
def __init__(self):
try:
with open(FILTER_FILE) as f:
self.f = BloomFilter.fromfile(f)
except IOError:
self.f = BloomFilter(capacity=10000000, error_rate=0.001)
self.num = 0
示例5: test
# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
def test():
with open('blooms/boys', 'r') as f:
boys = BloomFilter.fromfile(f)
with open('blooms/girls', 'r') as f:
girls = BloomFilter.fromfile(f)
print "Enter a name:"
while True:
name = raw_input().strip().lower()
if name in boys and name not in girls:
print "That is a boy's name."
elif name not in boys and name in girls:
print "That is a girl's name."
elif name in boys and name in girls:
print "That could be either a boy's or a girl's name."
else:
print "That doesn't look like a boy's or a girl's name."
示例6: add_proxy
# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
def add_proxy(request, conn):
global proxies
recv_list = request.split('\n')
#print "RECV LIST: ",recv_list
new_proxy = recv_list[1]
new_port = int(recv_list[2])
temp_bf = open("temp_bf_rec", "w+") # write the bloom filter string to a file
temp_bf.write(recv_list[3])
temp_bf.write('\n');
temp_bf.write(recv_list[4])
temp_bf.close()
temp_bf = open("temp_bf_rec", 'r')
new_bf = BloomFilter.fromfile(temp_bf) # de-serialize the bloom filter
temp_bf.close()
if os.path.isfile('temp_bf_rec'): # remove the temp file
os.remove('temp_bf_rec')
print "GOT BLOOM FILTER"
proxies.append((new_proxy,new_port,new_bf))
#print "NEW PROXIES: ",proxies
temp_list = ['NEW LIST OF PROXIES']
for proxy in proxies:
temp_list.append('\n')
temp_list.append(proxy[0]) # add IP
temp_list.append('\n')
temp_list.append(str(proxy[1])) # add port
temp_list.append('\n')
temp_bf = open('temp_bf_send', "w") # serialize the bloom filter for sending
proxy[2].tofile(temp_bf)
temp_bf.close()
temp_bf = open('temp_bf_send', "r")
# create the a string so the bloom filter can be sent over socket connect
temp2 = ''
while 1:
temp = temp_bf.read()
#print "Reading from file: ", len(temp)
if len(temp) > 0:
temp2 = temp2 + temp
else:
break
temp_bf.close()
temp_list.append(temp2) # add bloom filter
if os.path.isfile('temp_bf_send'): # remove temp file
os.remove('temp_bf_send')
#print temp_list
temp_string = ''.join(temp_list) # create a string representation of the list
for proxy in proxies:
if proxy[0] != bootstrap_proxy: #only bootstrapping proxy will do this
temp_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
temp_sock.connect((proxy[0],proxy[1])) # connect to each proxy and send the new list of proxies
temp_sock.send(temp_string)
temp_sock.close()
示例7: _fromfile_
# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
def _fromfile_(self):
try:
f= open('../out/filter', 'r')
self.bloom_cache=BloomFilter.fromfile(f)
self.count=self.bloom_cache.count
f.close()
except Exception, ex:
print(Exception, ex)
self.bloom_cache = BloomFilter(capacity=10000000, error_rate=0.00001)
self.count=0
示例8: __init__
# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
def __init__(self, name):
super(BloomZip, self).__init__()
self.__data = StringIO()
self._name = name
self._bf = None
if os.path.isfile(self._name):
with open(self._name, 'rb') as f:
length = struct.unpack(">L", f.read(4))[0]
self._bf = BloomFilter.fromfile(f, length)
示例9: get_values_by_key_data
# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
def get_values_by_key_data(self, token, word_freq, offset, bloom_filter_dump_size):
if word_freq == None:
return numpy.zeros(0), None, 0
if token in self.cache:
self.update_cache(token)
return self.cache[token][:3]
self.values_file.seek(offset)
codes = pickle.load(self.values_file)
prob_filter = None
if bloom_filter_dump_size:
prob_filter = BloomFilter.fromfile(self.values_file, bloom_filter_dump_size)
self.update_cache(token, (codes, prob_filter, word_freq, offset, bloom_filter_dump_size))
return codes, prob_filter, word_freq
示例10: fromfile
# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
def fromfile(cls, f):
"""Deserialize the ScalableBloomFilter in file object `f'."""
filter = cls()
filter._setup(*unpack(cls.FILE_FMT, f.read(calcsize(cls.FILE_FMT))))
nfilters, = unpack("<l", f.read(calcsize("<l")))
if nfilters > 0:
header_fmt = "<" + "Q" * nfilters
bytes = f.read(calcsize(header_fmt))
filter_lengths = unpack(header_fmt, bytes)
for fl in filter_lengths:
filter.filters.append(BloomFilter.fromfile(f, fl))
else:
filter.filters = []
return filter
示例11: waitForBloom
# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
def waitForBloom(from_ip, print_labels=False, frame=None, print_start=0):
# Create file for bloom filter import
f = open('bloomFileIn', 'wb')
# Connect to IP address
host = from_ip
port = 10000
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
# Keep trying until a connection is made
while True:
try:
s.bind((host, port))
except socket.error:
pass
else:
break
s.listen(1)
print "Waiting for data..."
conn, addr = s.accept()
print "Connection from " + addr[0]
# Receive bloom filter in increments
data = conn.recv(1024)
size = sys.getsizeof(data)
while(data):
f.write(data)
data = conn.recv(1024)
size += sys.getsizeof(data)
print "Received " + str(size/1000) + " KB"
# Print to GUI
if print_labels:
ttk.Label(frame, text=("Received bloom filter (" + str(size/1000) + " KB)")).grid(row=print_start,column=0)
# Cleanup & bloom filter creation
f.close()
f = open('bloomFileIn', 'rb')
bloom = BloomFilter.fromfile(f)
f.close()
conn.close()
s.close()
os.remove('bloomFileIn')
return bloom
示例12: fromfiles
# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
def fromfiles( cls, name, bufferSize=1024, dataDir="" ):
bloomFiles = glob.glob( os.path.join( dataDir, "%s-*.bloom" % name ) )
dataFiles = glob.glob( os.path.join( dataDir, "%s-*.data" % name ) )
if( len( bloomFiles ) == 0 ):
return None
factory = cls( name, bufferSize=bufferSize, dataDir=dataDir )
for i in range( len( dataFiles ) ):
b, d = bloomFiles[i], dataFiles[i]
box = GadgetBox( b.split( "." )[0].split( os.sep )[-1], bufferSize, dataDir=dataDir )
with open( os.path.join( dataDir, b ), "rb" ) as f:
box.filter = BloomFilter.fromfile( f )
factory.boxes.append( box )
return factory
示例13: main
# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
def main():
parser = argparse.ArgumentParser(prog='blacktop/nsrl')
parser.add_argument("-v", "--verbose", help="Display verbose output message", action="store_true", required=False)
parser.add_argument('name', metavar='FILE', type=str, nargs='+', help='a file name to search for.')
args = parser.parse_args()
with open('nsrl.bloom', 'rb') as nb:
bf = BloomFilter.fromfile(nb)
for file_name in args.name:
if args.verbose:
if file_name in bf:
print "File {} found in NSRL Database.".format(file_name)
else:
print "File {} was NOT found in NSRL Database.".format(file_name)
else:
print file_name in bf
return
示例14: main
# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
def main():
parser = argparse.ArgumentParser(prog='blacktop/nsrl')
parser.add_argument("-v", "--verbose", help="Display verbose output message", action="store_true", required=False)
parser.add_argument('hash', metavar='MD5', type=str, nargs='+', help='a md5 hash to search for.')
args = parser.parse_args()
with open('nsrl.bloom', 'rb') as nb:
bf = BloomFilter.fromfile(nb)
for hash_hex in args.hash:
hash = binascii.unhexlify(hash_hex)
if args.verbose:
if hash in bf:
print "Hash {} found in NSRL Database.".format(hash)
else:
print "Hash {} was NOT found in NSRL Database.".format(hash)
else:
print hash in bf
return
示例15: test_recursive_domains
# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
def test_recursive_domains(self):
result = create_index(
'/tmp/fake.csv', # input filename
self.test_file, # file-like object
0.0001, # error rate
1, # skip lines
[3], # fields
',', # delimiter
True) # recursive domain
self.assertEqual(
{'/tmp/fake.csv.3.bfindex': 9},
result)
b = BloomFilter.fromfile(open('/tmp/fake.csv.3.bfindex', 'rb'))
for word in ('subdomain.yahoo.com', 'yahoo.com', 'com',
'example.domain.com', 'domain.com', 'www.google.co.uk',
'google.co.uk', 'co.uk', 'uk'):
self.assertEqual(True, word in b)