本文整理汇总了Python中pybloom.ScalableBloomFilter.fromfile方法的典型用法代码示例。如果您正苦于以下问题:Python ScalableBloomFilter.fromfile方法的具体用法?Python ScalableBloomFilter.fromfile怎么用?Python ScalableBloomFilter.fromfile使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pybloom.ScalableBloomFilter
的用法示例。
在下文中一共展示了ScalableBloomFilter.fromfile方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from pybloom import ScalableBloomFilter [as 别名]
# 或者: from pybloom.ScalableBloomFilter import fromfile [as 别名]
def __init__(self,filterfile):
self.filterfile = filterfile
#if filterfile is present load bloom filter from that file, else create new one
if os.path.exists(filterfile):
self.bf = ScalableBloomFilter.fromfile(open(filterfile,"rb"))
print "available signatures = %d"%len(self.bf)
else:
self.bf = ScalableBloomFilter(mode=ScalableBloomFilter.SMALL_SET_GROWTH)
示例2: _load_from_file
# 需要导入模块: from pybloom import ScalableBloomFilter [as 别名]
# 或者: from pybloom.ScalableBloomFilter import fromfile [as 别名]
def _load_from_file(self):
self.logger_.info('loading data from cache file...')
if not os.path.isfile('data/bloom.data'):
self.logger_.error('bloom cache file not found, create one instead.')
self.deduper_ = ScalableBloomFilter(100000, 0.0001, 4)
else:
with open('data/bloom.data', 'r') as f:
self.deduper_ = ScalableBloomFilter.fromfile(f)
示例3: load
# 需要导入模块: from pybloom import ScalableBloomFilter [as 别名]
# 或者: from pybloom.ScalableBloomFilter import fromfile [as 别名]
def load(cls, filename):
#import pdb; pdb.set_trace()
t = cls.transformer
size = t.size
with open(filename, "rb") as serialized_digest:
readdata = serialized_digest.read(size)
if len(readdata) != size:
msg = 'invalid amount read from file for format %r: %r (should have been %d)'
Logger("digest.load").log(msg % (t.format, readdata, size))
raise ValueError
nonce, maxcapacity, urlcount, meta = t.unpack(readdata)
# If meta has a conversion from string repr, use it.
if hasattr(self, 'meta_from_string'):
meta = self.meta_from_string()
filterS = ScalableBloomFilter.fromfile(serialized_digest)
digest = cls(maxcapacity, meta, filename, filterS=filterS, nonce=nonce)
digest.urlcount = urlcount
return digest
示例4: load
# 需要导入模块: from pybloom import ScalableBloomFilter [as 别名]
# 或者: from pybloom.ScalableBloomFilter import fromfile [as 别名]
def load(cls, filename):
"""
This overrides the base class method to unpack using the siginfo.
"""
#import pdb; pdb.set_trace()
t = cls.transformer
size = t.size
with open(filename, "rb") as serialized_digest:
readdata = serialized_digest.read(size)
if len(readdata) != size:
msg = 'invalid amount read from file for format %r: %r (should have been %d)'
Logger("scandigest.load").log(msg % (t.format, readdata, size))
raise ValueError
nonce, maxcapacity, urlcount, scannervv, sigversion, sigtimestamp = t.unpack(readdata)
# Read the datetime as non-utc, since that's how we wrote it with mktime.
siginfo = SigInfo(scannervv, sigversion,
datetime.datetime.fromtimestamp(sigtimestamp))
filterS = ScalableBloomFilter.fromfile(serialized_digest)
scandigest = cls(maxcapacity, siginfo, filename, filterS=filterS, nonce=nonce)
scandigest.urlcount = urlcount
return scandigest
示例5: open
# 需要导入模块: from pybloom import ScalableBloomFilter [as 别名]
# 或者: from pybloom.ScalableBloomFilter import fromfile [as 别名]
RECOGNIZED = [LOINC, SNOMED, RXNORM, ICD9, ICD10, CPT, CVX, UNITS_OF_MEASURE]
# Enumerating all the FHIR systems here would be a waste of time,
# so load them from the constructed json file.
VALUE_SETS = []
with open('./data/fhir/systems.json') as fhir_handle:
RECOGNIZED += json.load(fhir_handle)
with open('./data/fhir/daf.json') as daf_handle:
VALUE_SETS += json.load(daf_handle)
with open('./data/fhir/argo.json') as argo_handle:
VALUE_SETS += json.load(argo_handle)
# Instantiate the bloom filter.
try:
with open('./data/codes.bf', 'rb') as handle:
BLOOM = ScalableBloomFilter.fromfile(handle)
except FileNotFoundError:
# Generated filter not found, just instantiate an empty one.
BLOOM = ScalableBloomFilter()
def validate_coding(coding):
""" If the coding system is recognized, check the code.
"""
if coding.get('system') not in RECOGNIZED:
raise SystemNotRecognized(coding.get('system'))
if not coding.get('code'):
return False
key = coding['system'] + '|' + coding['code']