本文整理汇总了Python中pybloomfilter.BloomFilter.update方法的典型用法代码示例。如果您正苦于以下问题:Python BloomFilter.update方法的具体用法?Python BloomFilter.update怎么用?Python BloomFilter.update使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pybloomfilter.BloomFilter
的用法示例。
在下文中一共展示了BloomFilter.update方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: create_ref_bloom_filter
# 需要导入模块: from pybloomfilter import BloomFilter [as 别名]
# 或者: from pybloomfilter.BloomFilter import update [as 别名]
def create_ref_bloom_filter(reference_file, error_rate, bf_file, format="fasta"):
"""From a given FASTA reference sequence creates a bloom filter file
from each read.
"""
if format == "fasta":
file_it = FastaIterator
record = lambda it: (seq.seq for seq in it)
elif format == "fastq":
file_it = FastqGeneralIterator
record = lambda it: (seq for _, seq, _ in it)
capacity = total_reads(reference_file)
with open(reference_file) as handle:
it = file_it(handle)
read_it = record(it)
read_len = 109
read_in = []
read = []
buffer = []
bf = BloomFilter(capacity, error_rate, bf_file)
sequence = read_it.next()
step = read_len
i = 0
while i < len(sequence):
read = sequence[i:i + read_len - 1]
i += step
print(read)
bf.update(read)
bf.close()
示例2: _process_one
# 需要导入模块: from pybloomfilter import BloomFilter [as 别名]
# 或者: from pybloomfilter.BloomFilter import update [as 别名]
def _process_one(data_file):
''' Process one output file to generate a bloom filter'''
path, dump_name = os.path.split(data_file)
_, parent_dir = os.path.split(path)
# ensure the containing folder exists
bf_dir_path = os.path.join('bloom_filters', parent_dir)
if not os.path.isdir(bf_dir_path):
os.mkdir(bf_dir_path)
bf_file_path = os.path.join(bf_dir_path, dump_name)
if not os.path.isfile(bf_file_path):
ncpu, _, nparts, _, _, _, ids = read_output(data_file, header_only=False)
bf = BloomFilter(nparts, 1./ncpu, bf_file_path)
bf.update(ids)
return bf_file_path
示例3: __init__
# 需要导入模块: from pybloomfilter import BloomFilter [as 别名]
# 或者: from pybloomfilter.BloomFilter import update [as 别名]
#.........这里部分代码省略.........
break
except aiohttp.ClientError as client_error:
LOGGER.info('try %r for %r raised %r',
tries, url, client_error)
exception = client_error
tries += 1
else:
# We never broke out of the loop: all tries failed.
LOGGER.error('%r failed after %r tries',
url, self.max_tries)
self.record_statistic(FetchStatistic(url=url,
next_url=None,
status=None,
exception=exception,
size=0,
content_type=None,
encoding=None,
num_urls=0,
num_new_urls=0))
return
try:
if is_redirect(response):
location = response.headers['location']
next_url = urllib.parse.urljoin(url, location)
self.record_statistic(FetchStatistic(url=url,
next_url=next_url,
status=response.status,
exception=None,
size=0,
content_type=None,
encoding=None,
num_urls=0,
num_new_urls=0))
if next_url in self.seen_urls:
return
if max_redirect > 0:
LOGGER.info('redirect to %r from %r', next_url, url)
self.add_url(next_url, max_redirect - 1)
else:
LOGGER.error('redirect limit reached for %r from %r',
next_url, url)
else:
stat, links = await self.parse_links(response)
self.record_statistic(stat)
for link in utils.difference(links, self.seen_urls):
# for link in links.difference(self.seen_urls):
self.q.put_nowait((link, self.max_redirect))
# self.seen_urls.update(links)
self.seen_urls.update(links)
finally:
await response.release()
async def work(self):
"""Process queue items forever."""
try:
while True:
url, max_redirect = await self.q.get()
assert url in self.seen_urls
LOGGER.info("url:%s", url)
LOGGER.info("max_redirect:%s", max_redirect)
await self.fetch(url, max_redirect)
self.q.task_done()
except asyncio.CancelledError:
pass
def url_allowed(self, url):
if self.exclude and re.search(self.exclude, url):
return False
parts = urllib.parse.urlparse(url)
if parts.scheme not in ('http', 'https'):
LOGGER.debug('skipping non-http scheme in %r', url)
return False
host, port = urllib.parse.splitport(parts.netloc)
if not self.host_okay(host):
LOGGER.debug('skipping non-root host in %r', url)
return False
return True
def add_url(self, url, max_redirect=None):
"""Add a URL to the queue if not seen before."""
if max_redirect is None:
max_redirect = self.max_redirect
LOGGER.debug('adding %r %r', url, max_redirect)
self.seen_urls.add(url)
self.q.put_nowait((url, max_redirect))
async def crawl(self):
"""Run the crawler until all finished."""
workers = [asyncio.Task(self.work(), loop=self.loop)
for _ in range(self.max_tasks)]
self.t0 = time.time()
yield self.q.join()
self.t1 = time.time()
for w in workers:
w.cancel()