当前位置: 首页>>代码示例>>Python>>正文


Python BloomFilter.fromfile方法代码示例

本文整理汇总了Python中pybloom.BloomFilter.fromfile方法的典型用法代码示例。如果您正苦于以下问题:Python BloomFilter.fromfile方法的具体用法?Python BloomFilter.fromfile怎么用?Python BloomFilter.fromfile使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pybloom.BloomFilter的用法示例。


在下文中一共展示了BloomFilter.fromfile方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_insert_then_test

# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
    def test_insert_then_test(self):
        result = create_index(
            '/tmp/fake.csv',  # input filename
            self.test_file,   # file-like object
            0.0001,           # error rate
            1,                # skip lines
            [1, 2],           # fields
            ',',              # delimiter
            False)            # recursive domain
        self.assertEqual(
            {'/tmp/fake.csv.2.bfindex': 6,
             '/tmp/fake.csv.1.bfindex': 5},
            result)
        b1 = BloomFilter.fromfile(open('/tmp/fake.csv.1.bfindex', 'rb'))
        b2 = BloomFilter.fromfile(open('/tmp/fake.csv.2.bfindex', 'rb'))

        self.assertEqual(False, 'FieldA' in b1)
        self.assertEqual(False, 'FieldB' in b2)

        for word in ('apple', 'banana', 'orange', 'pear', 'pineapple'):
            self.assertEqual(True, word in b1)
            self.assertEqual(False, word in b2)

        for word in ('carrot', 'potato', 'leek', 'cauliflower', 'bean'):
            self.assertEqual(True, word in b2)
            self.assertEqual(False, word in b1)
开发者ID:andyyuan78,项目名称:bloom-filter-indexer,代码行数:28,代码来源:test.py

示例2: jaccard_ind

# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
def jaccard_ind(filename_1, filename_2):
    with open(filename_1, 'rb') as f_1:
        with open(filename_2, 'rb') as f_2:
            print(filename_1)
            b_1 = BloomFilter.fromfile(f_1)
            b_2 = BloomFilter.fromfile(f_2)
            b_inter = b_1.intersection(b_2)
            b_union = b_1.union(b_2)
            bits_inter = b_inter.bitarray.count(True)
            bits_union = b_union.bitarray.count(True)
            j_i = float(bits_inter) / float(bits_union)
            #print("%s ~ %s, %f" % filename_1, filename_2, j_i)
            print("%s %s %f" % (filename_1, filename_2, j_i))
        f_2.close()
    f_1.close()
开发者ID:chubbymaggie,项目名称:Cardinal,代码行数:17,代码来源:bloom_jaccard_ind.py

示例3: update_bf

# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
def update_bf(request):
    global proxies
    temp_list = request.split('\n')
    #print temp_list
    updated_proxy = temp_list[1] # ip of other proxy
    # write the bloom filter to a file bf_upd
    temp_bf_recv = open('bf_upd', "w") 
    temp_bf_recv.write(temp_list[2])
    temp_bf_recv.write('\n')
    temp_bf_recv.write(temp_list[3])
    temp_bf_recv.close()
    temp_bf_recv = open('bf_upd', 'r')
    # de-serialize the bloom filter
    temp_bf = BloomFilter.fromfile(temp_bf_recv)
    temp_bf_recv.close()

    if os.path.isfile('bf_upd'): # remove the file used to hold the bloom filter
        os.remove('bf_upd')

    index = 0;
    for proxy in proxies: 
        if proxy[0] == updated_proxy: # update only the proxy who's bloom filter is updated
            print "UPDATING BF OF PROXY: ", proxy[0]
            curr_port = proxy[1]
            break
        index = index + 1
    proxies[index] = (updated_proxy, curr_port, temp_bf) # update entry in the list
开发者ID:vtang02,项目名称:comp112-project,代码行数:29,代码来源:proxy.py

示例4: __init__

# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
 def __init__(self):
     try:
         with open(FILTER_FILE) as f:
             self.f = BloomFilter.fromfile(f)
     except IOError:
         self.f = BloomFilter(capacity=10000000, error_rate=0.001)
     self.num = 0
开发者ID:luotigerlsx,项目名称:DataAnalysis_ML,代码行数:9,代码来源:urlfilter_svc.py

示例5: test

# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
def test():
    with open('blooms/boys', 'r') as f:
        boys = BloomFilter.fromfile(f)
    with open('blooms/girls', 'r') as f:
        girls = BloomFilter.fromfile(f)

    print "Enter a name:"
    while True:
        name = raw_input().strip().lower()
        if name in boys and name not in girls:
            print "That is a boy's name."
        elif name not in boys and name in girls:
            print "That is a girl's name."
        elif name in boys and name in girls:
            print "That could be either a boy's or a girl's name."
        else:
            print "That doesn't look like a boy's or a girl's name."
开发者ID:Glank,项目名称:rdp,代码行数:19,代码来源:compile_names.py

示例6: add_proxy

# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
def add_proxy(request, conn):
    global proxies
    recv_list = request.split('\n')
    #print "RECV LIST: ",recv_list
    new_proxy = recv_list[1]
    new_port = int(recv_list[2])
    temp_bf = open("temp_bf_rec", "w+") # write the bloom filter string to a file
    temp_bf.write(recv_list[3])
    temp_bf.write('\n');
    temp_bf.write(recv_list[4])
    temp_bf.close()
    temp_bf = open("temp_bf_rec", 'r')
    new_bf = BloomFilter.fromfile(temp_bf)     # de-serialize the bloom filter
    temp_bf.close()

    if os.path.isfile('temp_bf_rec'): # remove the temp file
        os.remove('temp_bf_rec')
    print "GOT BLOOM FILTER"

    proxies.append((new_proxy,new_port,new_bf))
    #print "NEW PROXIES: ",proxies
    temp_list = ['NEW LIST OF PROXIES']
    for proxy in proxies:
        temp_list.append('\n')
        temp_list.append(proxy[0]) # add IP
        temp_list.append('\n')
        temp_list.append(str(proxy[1])) # add port
        temp_list.append('\n')
        temp_bf = open('temp_bf_send', "w") # serialize the bloom filter for sending
        proxy[2].tofile(temp_bf)
        temp_bf.close()
        temp_bf = open('temp_bf_send', "r")

        # create the a string so the bloom filter can be sent over socket connect
        temp2 = ''
        while 1:
            temp = temp_bf.read()
            #print "Reading from file: ", len(temp)
            if len(temp) > 0:
                temp2 = temp2 + temp
            else:
                break

        temp_bf.close()
        temp_list.append(temp2)  # add bloom filter   

    if os.path.isfile('temp_bf_send'): # remove temp file
        os.remove('temp_bf_send')

    #print temp_list
    temp_string = ''.join(temp_list) # create a string representation of the list

    for proxy in proxies:
        if proxy[0] != bootstrap_proxy: #only bootstrapping proxy will do this
            temp_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            temp_sock.connect((proxy[0],proxy[1])) # connect to each proxy and send the new list of proxies
            temp_sock.send(temp_string)
            temp_sock.close()
开发者ID:vtang02,项目名称:comp112-project,代码行数:60,代码来源:proxy.py

示例7: _fromfile_

# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
 def _fromfile_(self):
     try:
         f= open('../out/filter', 'r')
         self.bloom_cache=BloomFilter.fromfile(f)
         self.count=self.bloom_cache.count
         f.close()
     except Exception, ex:
         print(Exception, ex)
         self.bloom_cache = BloomFilter(capacity=10000000, error_rate=0.00001)
         self.count=0
开发者ID:caidao,项目名称:zhihu_spider,代码行数:12,代码来源:filter.py

示例8: __init__

# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
    def __init__(self, name):
        super(BloomZip, self).__init__()
        self.__data = StringIO()
        self._name = name
        self._bf = None

        if os.path.isfile(self._name):
            with open(self._name, 'rb') as f:
                length = struct.unpack(">L", f.read(4))[0]
                self._bf = BloomFilter.fromfile(f, length)
开发者ID:stiege,项目名称:bloomzip,代码行数:12,代码来源:bloomzip.py

示例9: get_values_by_key_data

# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
 def get_values_by_key_data(self, token, word_freq, offset, bloom_filter_dump_size):
     if word_freq == None:
         return numpy.zeros(0), None, 0
     if token in self.cache:
         self.update_cache(token)
         return self.cache[token][:3]       
     self.values_file.seek(offset)
     codes = pickle.load(self.values_file)
     prob_filter = None
     if bloom_filter_dump_size:
         prob_filter = BloomFilter.fromfile(self.values_file, bloom_filter_dump_size)
     self.update_cache(token, (codes, prob_filter, word_freq, offset, bloom_filter_dump_size))
     return codes, prob_filter, word_freq        
开发者ID:mavlyutovrus,项目名称:light_search,代码行数:15,代码来源:word_index.py

示例10: fromfile

# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
    def fromfile(cls, f):
        """Deserialize the ScalableBloomFilter in file object `f'."""
        filter = cls()
        filter._setup(*unpack(cls.FILE_FMT, f.read(calcsize(cls.FILE_FMT))))
        nfilters, = unpack("<l", f.read(calcsize("<l")))
        if nfilters > 0:
            header_fmt = "<" + "Q" * nfilters
            bytes = f.read(calcsize(header_fmt))
            filter_lengths = unpack(header_fmt, bytes)
            for fl in filter_lengths:
                filter.filters.append(BloomFilter.fromfile(f, fl))
        else:
            filter.filters = []

        return filter
开发者ID:Mondego,项目名称:pyreco,代码行数:17,代码来源:allPythonContent.py

示例11: waitForBloom

# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
def waitForBloom(from_ip, print_labels=False, frame=None, print_start=0):
	# Create file for bloom filter import
	f = open('bloomFileIn', 'wb')

	# Connect to IP address
	host = from_ip
	port = 10000
	s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

	# Keep trying until a connection is made
	while True:
		try:
			s.bind((host, port))
		except socket.error:
			pass
		else:
			break

	s.listen(1)
	print "Waiting for data..."
	conn, addr = s.accept()
	print "Connection from " + addr[0]

	# Receive bloom filter in increments
	data = conn.recv(1024)
	size = sys.getsizeof(data)
	while(data):
		f.write(data)
		data = conn.recv(1024)
		size += sys.getsizeof(data)

	print "Received " + str(size/1000) + " KB"

	# Print to GUI
	if print_labels:
		ttk.Label(frame, text=("Received bloom filter (" + str(size/1000) + " KB)")).grid(row=print_start,column=0)

	# Cleanup & bloom filter creation
	f.close()
	f = open('bloomFileIn', 'rb')
	bloom = BloomFilter.fromfile(f)
	f.close()
	conn.close()
	s.close()
	os.remove('bloomFileIn')
	return bloom
开发者ID:boukisn,项目名称:EC504-Final-Project,代码行数:48,代码来源:dataExchange.py

示例12: fromfiles

# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
  def fromfiles( cls, name, bufferSize=1024, dataDir="" ):
    bloomFiles = glob.glob( os.path.join( dataDir, "%s-*.bloom" % name ) )
    dataFiles = glob.glob( os.path.join( dataDir, "%s-*.data" % name ) )

    if( len( bloomFiles ) == 0 ):
      return None

    factory = cls( name, bufferSize=bufferSize, dataDir=dataDir )

    for i in range( len( dataFiles ) ): 
      b, d = bloomFiles[i], dataFiles[i]
      box = GadgetBox( b.split( "." )[0].split( os.sep )[-1], bufferSize, dataDir=dataDir )
      with open( os.path.join( dataDir, b ), "rb" ) as f:
        box.filter = BloomFilter.fromfile( f )

      factory.boxes.append( box )

    return factory
开发者ID:streed,项目名称:gadgetStore,代码行数:20,代码来源:gadget.py

示例13: main

# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
def main():
    parser = argparse.ArgumentParser(prog='blacktop/nsrl')
    parser.add_argument("-v", "--verbose", help="Display verbose output message", action="store_true", required=False)
    parser.add_argument('name', metavar='FILE', type=str, nargs='+', help='a file name to search for.')
    args = parser.parse_args()

    with open('nsrl.bloom', 'rb') as nb:
        bf = BloomFilter.fromfile(nb)

        for file_name in args.name:
            if args.verbose:
                if file_name in bf:
                    print "File {} found in NSRL Database.".format(file_name)
                else:
                    print "File {} was NOT found in NSRL Database.".format(file_name)
            else:
                print file_name in bf
    return
开发者ID:blacktop,项目名称:docker-nsrl,代码行数:20,代码来源:search.py

示例14: main

# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
def main():
    parser = argparse.ArgumentParser(prog='blacktop/nsrl')
    parser.add_argument("-v", "--verbose", help="Display verbose output message", action="store_true", required=False)
    parser.add_argument('hash', metavar='MD5', type=str, nargs='+', help='a md5 hash to search for.')
    args = parser.parse_args()

    with open('nsrl.bloom', 'rb') as nb:
        bf = BloomFilter.fromfile(nb)

        for hash_hex in args.hash:
            hash = binascii.unhexlify(hash_hex)
            if args.verbose:
                if hash in bf:
                    print "Hash {} found in NSRL Database.".format(hash)
                else:
                    print "Hash {} was NOT found in NSRL Database.".format(hash)
            else:
                print hash in bf
    return
开发者ID:kost,项目名称:docker-kf,代码行数:21,代码来源:search.py

示例15: test_recursive_domains

# 需要导入模块: from pybloom import BloomFilter [as 别名]
# 或者: from pybloom.BloomFilter import fromfile [as 别名]
    def test_recursive_domains(self):
        result = create_index(
            '/tmp/fake.csv',  # input filename
            self.test_file,   # file-like object
            0.0001,           # error rate
            1,                # skip lines
            [3],              # fields
            ',',              # delimiter
            True)             # recursive domain
        self.assertEqual(
            {'/tmp/fake.csv.3.bfindex': 9},
            result)

        b = BloomFilter.fromfile(open('/tmp/fake.csv.3.bfindex', 'rb'))

        for word in ('subdomain.yahoo.com', 'yahoo.com', 'com',
                     'example.domain.com', 'domain.com', 'www.google.co.uk',
                     'google.co.uk', 'co.uk', 'uk'):
            self.assertEqual(True, word in b)
开发者ID:andyyuan78,项目名称:bloom-filter-indexer,代码行数:21,代码来源:test.py


注:本文中的pybloom.BloomFilter.fromfile方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。