当前位置: 首页>>代码示例>>Python>>正文


Python BloomFilter.update方法代码示例

本文整理汇总了Python中pybloomfilter.BloomFilter.update方法的典型用法代码示例。如果您正苦于以下问题:Python BloomFilter.update方法的具体用法?Python BloomFilter.update怎么用?Python BloomFilter.update使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pybloomfilter.BloomFilter的用法示例。


在下文中一共展示了BloomFilter.update方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: create_ref_bloom_filter

# 需要导入模块: from pybloomfilter import BloomFilter [as 别名]
# 或者: from pybloomfilter.BloomFilter import update [as 别名]
def create_ref_bloom_filter(reference_file, error_rate, bf_file, format="fasta"):
    """From a given FASTA reference sequence creates a bloom filter file
    from each read.
    """

    if format == "fasta":
    	file_it = FastaIterator
        record = lambda it: (seq.seq for seq in it)
    elif format == "fastq":
        file_it = FastqGeneralIterator
        record = lambda it: (seq for _, seq, _ in it)

    capacity = total_reads(reference_file)
    with open(reference_file) as handle:
        it = file_it(handle)
        read_it = record(it)
        read_len = 109
        read_in = []
        read = []
        buffer = []
        
        bf = BloomFilter(capacity, error_rate, bf_file)
        sequence = read_it.next()

        step = read_len
        
        i = 0
        while i < len(sequence):
            read = sequence[i:i + read_len - 1]
            i += step
            print(read)
            bf.update(read)
                
        bf.close()
开发者ID:vals,项目名称:Boutonniere,代码行数:36,代码来源:boutonniere.py

示例2: _process_one

# 需要导入模块: from pybloomfilter import BloomFilter [as 别名]
# 或者: from pybloomfilter.BloomFilter import update [as 别名]
def _process_one(data_file):
    ''' Process one output file to generate a bloom filter'''
    path, dump_name = os.path.split(data_file)
    _, parent_dir = os.path.split(path)

    # ensure the containing folder exists
    bf_dir_path = os.path.join('bloom_filters', parent_dir)
    if not os.path.isdir(bf_dir_path):
        os.mkdir(bf_dir_path)
    bf_file_path = os.path.join(bf_dir_path, dump_name)

    if not os.path.isfile(bf_file_path):
        ncpu, _, nparts, _, _, _, ids = read_output(data_file, header_only=False)
        bf = BloomFilter(nparts, 1./ncpu, bf_file_path)
        bf.update(ids)

    return bf_file_path
开发者ID:cphyc,项目名称:cosmo_z17to0,代码行数:19,代码来源:sort_galaxy.py

示例3: __init__

# 需要导入模块: from pybloomfilter import BloomFilter [as 别名]
# 或者: from pybloomfilter.BloomFilter import update [as 别名]

#.........这里部分代码省略.........

                break
            except aiohttp.ClientError as client_error:
                LOGGER.info('try %r for %r raised %r',
                            tries, url, client_error)
                exception = client_error

            tries += 1
        else:
            # We never broke out of the loop: all tries failed.
            LOGGER.error('%r failed after %r tries',
                         url, self.max_tries)
            self.record_statistic(FetchStatistic(url=url,
                                                 next_url=None,
                                                 status=None,
                                                 exception=exception,
                                                 size=0,
                                                 content_type=None,
                                                 encoding=None,
                                                 num_urls=0,
                                                 num_new_urls=0))
            return

        try:
            if is_redirect(response):
                location = response.headers['location']
                next_url = urllib.parse.urljoin(url, location)
                self.record_statistic(FetchStatistic(url=url,
                                                     next_url=next_url,
                                                     status=response.status,
                                                     exception=None,
                                                     size=0,
                                                     content_type=None,
                                                     encoding=None,
                                                     num_urls=0,
                                                     num_new_urls=0))

                if next_url in self.seen_urls:
                    return
                if max_redirect > 0:
                    LOGGER.info('redirect to %r from %r', next_url, url)
                    self.add_url(next_url, max_redirect - 1)
                else:
                    LOGGER.error('redirect limit reached for %r from %r',
                                 next_url, url)
            else:
                stat, links = await self.parse_links(response)
                self.record_statistic(stat)
                for link in utils.difference(links, self.seen_urls):

                    # for link in links.difference(self.seen_urls):
                    self.q.put_nowait((link, self.max_redirect))
                # self.seen_urls.update(links)
                self.seen_urls.update(links)
        finally:
            await response.release()

    async def work(self):
        """Process queue items forever."""
        try:
            while True:
                url, max_redirect = await self.q.get()
                assert url in self.seen_urls
                LOGGER.info("url:%s", url)
                LOGGER.info("max_redirect:%s", max_redirect)
                await self.fetch(url, max_redirect)
                self.q.task_done()
        except asyncio.CancelledError:
            pass

    def url_allowed(self, url):
        if self.exclude and re.search(self.exclude, url):
            return False
        parts = urllib.parse.urlparse(url)
        if parts.scheme not in ('http', 'https'):
            LOGGER.debug('skipping non-http scheme in %r', url)
            return False
        host, port = urllib.parse.splitport(parts.netloc)
        if not self.host_okay(host):
            LOGGER.debug('skipping non-root host in %r', url)
            return False
        return True

    def add_url(self, url, max_redirect=None):
        """Add a URL to the queue if not seen before."""
        if max_redirect is None:
            max_redirect = self.max_redirect
        LOGGER.debug('adding %r %r', url, max_redirect)
        self.seen_urls.add(url)
        self.q.put_nowait((url, max_redirect))

    async def crawl(self):
        """Run the crawler until all finished."""
        workers = [asyncio.Task(self.work(), loop=self.loop)
                   for _ in range(self.max_tasks)]
        self.t0 = time.time()
        yield self.q.join()
        self.t1 = time.time()
        for w in workers:
            w.cancel()
开发者ID:ramsayleung,项目名称:betacat,代码行数:104,代码来源:crawling.py


注:本文中的pybloomfilter.BloomFilter.update方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。