当前位置: 首页>>代码示例>>Python>>正文


Python Queue.task_done方法代码示例

本文整理汇总了Python中asyncio.Queue.task_done方法的典型用法代码示例。如果您正苦于以下问题:Python Queue.task_done方法的具体用法?Python Queue.task_done怎么用?Python Queue.task_done使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在asyncio.Queue的用法示例。


在下文中一共展示了Queue.task_done方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: process_partitions_queue

# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import task_done [as 别名]
async def process_partitions_queue(
    loop: asyncio.BaseEventLoop,
    partitions_queue: asyncio.Queue,
    results_queue: asyncio.Queue,
    server_address: URL,
    mission_template: Template,
    mission_loader: str,
    width: int,
    scale: int,
) -> Awaitable[None]:

    mission_name = mission_loader.split('/', 1)[0]

    async with aiohttp.ClientSession() as http:
        while True:
            partition = await partitions_queue.get()

            if partition is None:
                partitions_queue.task_done()
                return

            await process_partition(
                loop=loop,
                results_queue=results_queue,
                server_address=server_address,
                http=http,
                partition=partition,
                mission_template=mission_template,
                mission_loader=mission_loader,
                mission_name=mission_name,
                width=width,
                scale=scale,
            )
            partitions_queue.task_done()
开发者ID:IL2HorusTeam,项目名称:il2-heightmap-creator,代码行数:36,代码来源:creation.py

示例2: udp_writer

# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import task_done [as 别名]
async def udp_writer(s: socket, oqueue: Queue) -> None:
    """Forward packets to the UDP socket."""

    while True:
        peer, data = await oqueue.get()
        try:
            s.sendto(data, peer)
        finally:
            oqueue.task_done()
开发者ID:AndreLouisCaron,项目名称:aiotk,代码行数:11,代码来源:_udp.py

示例3: call

# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import task_done [as 别名]
 async def call(loop, inq: asyncio.Queue):
     while True:
         v = await inq.get()
         logger.debug("consume[S]	v:%s", v)
         if v is None:
             inq.task_done()
             break
         v = await afn(v)
         logger.debug("consume[E]	v:%s", v)
         inq.task_done()
     await inq.join()
     logger.debug("consume[CLOSE]")
开发者ID:podhmo,项目名称:individual-sandbox,代码行数:14,代码来源:04funout.py

示例4: __call__

# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import task_done [as 别名]
 async def __call__(self, inq: asyncio.Queue):
     while True:
         v = await inq.get()
         logger.debug("aggregate[S]	v:%s", v)
         if v is None:
             inq.task_done()
             break
         await asyncio.sleep(0.1, loop=self.loop)
         print(v)
         logger.debug("aggregate[E]	v:%s", v)
         inq.task_done()
     await inq.join()
     logger.debug("aggregate[CLOSE]")
开发者ID:podhmo,项目名称:individual-sandbox,代码行数:15,代码来源:03funout.py

示例5: process_results_queue

# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import task_done [as 别名]
async def process_results_queue(
    results_queue: asyncio.Queue,
    total_points: int,
    output_file_path: Path,
) -> Awaitable[None]:

    point_size = calcsize(HEIGHT_PACK_FORMAT)
    output_size = point_size * total_points

    natural_size = humanize.naturalsize(
        output_size,
        binary=True,
        format='%.3f',
    )
    LOG.debug(f"output size: {natural_size}")

    processed_points = 0

    output_file_path.parent.parent.mkdir(parents=True, exist_ok=True)

    with output_file_path.open('wb') as f:
        f.truncate(output_size)

        while True:
            data = await results_queue.get()
            if not data:
                results_queue.task_done()
                return

            partition, values = data
            start = partition.start * point_size

            processed_points += (partition.end - partition.start) + 1
            progress = (processed_points / total_points) * 100

            LOG.debug(
                f"gather results for range "
                f"[{partition.start}:{partition.end}], "
                f"progress: {progress:.2f}%"
            )

            f.seek(start)
            f.write(values)

            results_queue.task_done()
开发者ID:IL2HorusTeam,项目名称:il2-heightmap-creator,代码行数:47,代码来源:creation.py

示例6: __init__

# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import task_done [as 别名]
class Crawler:
    def __init__(self, root_url, max_redirect):
        self.max_tasks = 10
        self.max_redirect = max_redirect
        self.q = Queue()
        self.seen_urls = set()

        # aiohttp's ClientSession does connection pooling and
        # HTTP keep-alives for us.
        self.session = aiohttp.ClientSession(loop=loop)

        # Put (URL, max_redirect) in the Queue
        self.q.put((root_url, self.max_redirect))
        
    @asyncio.coroutine
    def crawl(self):
        '''Run the crawler untill all work is done.'''
        workers = [asyncio.Task(self.work())
                   for _ in range(self.max_tasks)]

        # When all work is done, exit.
        yield from self.q.join()
        for w in workers:
            w.cancel()

    @asyncio.coroutine
    def work(self):
        while True:
            url, max_redirect = yield from self.q.get()

            # Download page and add new links to self.q
            yield from self.fetch(url, max_redirect)
            self.q.task_done()

    @asyncio.coroutine
    def fetch(self, url, max_redirect):
        # Handle redirects ourselves.
        response = yield from self.session.get(
            url, allow_redirects=False)

        try:
            if is_redirect(response):
                if max_redirect > 0:
                    next_url = response.headers['location']
                    if next_url in self.seen_urls:
                        # We have done this before.
                        return

                    # Remember we have seen this url.
                    self.seen_urls.add(next_url)

                    # Follow the redirect. One less redirect remains.
                    self.q.put_nowait((next_url, max_redirect -1))
            else:
                links = yield from self.parse_links(response)
                # Python set-logic:
                for link in links.difference(self.seen_urls):
                    self.q.put_nowait((link, self.max_redirect))
                self.seen_urls.update(links)
        finally:
            # Return connection to pool.
            yield from response.release()
开发者ID:Chaogebruce,项目名称:Webcrawler,代码行数:64,代码来源:app_asyncio.py

示例7: __init__

# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import task_done [as 别名]

#.........这里部分代码省略.........

                break
            except aiohttp.ClientError as client_error:
                LOGGER.info('try %r for %r raised %r',
                            tries, url, client_error)
                exception = client_error

            tries += 1
        else:
            # We never broke out of the loop: all tries failed.
            LOGGER.error('%r failed after %r tries',
                         url, self.max_tries)
            self.record_statistic(FetchStatistic(url=url,
                                                 next_url=None,
                                                 status=None,
                                                 exception=exception,
                                                 size=0,
                                                 content_type=None,
                                                 encoding=None,
                                                 num_urls=0,
                                                 num_new_urls=0))
            return

        try:
            if is_redirect(response):
                location = response.headers['location']
                next_url = urllib.parse.urljoin(url, location)
                self.record_statistic(FetchStatistic(url=url,
                                                     next_url=next_url,
                                                     status=response.status,
                                                     exception=None,
                                                     size=0,
                                                     content_type=None,
                                                     encoding=None,
                                                     num_urls=0,
                                                     num_new_urls=0))

                if next_url in self.seen_urls:
                    return
                if max_redirect > 0:
                    LOGGER.info('redirect to %r from %r', next_url, url)
                    self.add_url(next_url, max_redirect - 1)
                else:
                    LOGGER.error('redirect limit reached for %r from %r',
                                 next_url, url)
            else:
                stat, links = await self.parse_links(response)
                self.record_statistic(stat)
                for link in utils.difference(links, self.seen_urls):

                    # for link in links.difference(self.seen_urls):
                    self.q.put_nowait((link, self.max_redirect))
                # self.seen_urls.update(links)
                self.seen_urls.update(links)
        finally:
            await response.release()

    async def work(self):
        """Process queue items forever."""
        try:
            while True:
                url, max_redirect = await self.q.get()
                assert url in self.seen_urls
                LOGGER.info("url:%s", url)
                LOGGER.info("max_redirect:%s", max_redirect)
                await self.fetch(url, max_redirect)
                self.q.task_done()
        except asyncio.CancelledError:
            pass

    def url_allowed(self, url):
        if self.exclude and re.search(self.exclude, url):
            return False
        parts = urllib.parse.urlparse(url)
        if parts.scheme not in ('http', 'https'):
            LOGGER.debug('skipping non-http scheme in %r', url)
            return False
        host, port = urllib.parse.splitport(parts.netloc)
        if not self.host_okay(host):
            LOGGER.debug('skipping non-root host in %r', url)
            return False
        return True

    def add_url(self, url, max_redirect=None):
        """Add a URL to the queue if not seen before."""
        if max_redirect is None:
            max_redirect = self.max_redirect
        LOGGER.debug('adding %r %r', url, max_redirect)
        self.seen_urls.add(url)
        self.q.put_nowait((url, max_redirect))

    async def crawl(self):
        """Run the crawler until all finished."""
        workers = [asyncio.Task(self.work(), loop=self.loop)
                   for _ in range(self.max_tasks)]
        self.t0 = time.time()
        yield self.q.join()
        self.t1 = time.time()
        for w in workers:
            w.cancel()
开发者ID:ramsayleung,项目名称:betacat,代码行数:104,代码来源:crawling.py

示例8: __init__

# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import task_done [as 别名]

#.........这里部分代码省略.........
                content_type, pdict = cgi.parse_header(content_type)

            encoding = pdict.get('charset', 'utf-8')
            if content_type in ('text/html', 'application/xml'):
                text = yield from response.text()

                # Replace href with (?:href|src) to follow image links.
                urls = set(re.findall(r'''(?i)href=["']([^\s"'<>]+)''',text))
                if urls:
                    LOGGER.info('got %r distinct urls from %r',len(urls), response.url)
                for url in urls:
                    normalized = urllib.parse.urljoin(response.url, url)
                    defragmented, frag = urllib.parse.urldefrag(normalized)
                    if self.url_allowed(defragmented):
                        links.add(defragmented)

        stat = FetchStatistic(
            url=response.url,
            next_url=None,
            status=response.status,
            exception=None,
            size=len(body),
            content_type=content_type,
            encoding=encoding,
            num_urls=len(links),
            num_new_urls=len(links - self.seen_urls))

        return stat, links

    @asyncio.coroutine
    def fetch(self, url, max_redirect):
        """Fetch one URL."""
        tries = 0
        exception = None
        while tries < self.max_tries:
            try:
                response = yield from self.session.get(url, allow_redirects=False)  #1
                break  #2
            except aiohttp.ClientError as client_error:
                LOGGER.info('try %r for %r raised %r', tries, url, client_error)
                exception = client_error
        else:
            return
            
        try:
            if is_redirect(response):
                location = response.headers['location']

            else:  #4
                stat, links = yield from self.parse_links(response)
                self.record_statistic(stat)
                for link in links.difference(self.seen_urls):
                    self.q.put_nowait((link, self.max_redirect))
                self.seen_urls.update(links)
        finally:
            yield from response.release()

    @asyncio.coroutine
    def work(self):
        """Process queue items forever."""
        try:
            while True:
                url, max_redirect = yield from self.q.get()  #q.get() Remove and return an item from the queue. If queue is empty, wait until an item is available.
                #print('url',url, 'max_redirect', max_redirect)
                assert url in self.seen_urls   #assert 断言,异常会直接抛出
                yield from self.fetch(url, max_redirect)
                self.q.task_done()  #Indicate that a formerly enqueued task is complete.表明以前排队的任务完成
        except asyncio.CancelledError:
            pass

    def url_allowed(self, url):
        if self.exclude and re.search(self.exclude, url):
            return False
        parts = urllib.parse.urlparse(url)
        if parts.scheme not in ('http', 'https'):
            LOGGER.debug('skipping non-http scheme in %r', url)
            return False
        host, port = urllib.parse.splitport(parts.netloc)
        if not self.host_okay(host):
            LOGGER.debug('skipping non-root host in %r', url)
            return False
        return True

    def add_url(self, url, max_redirect=None):
        """Add a URL to the queue if not seen before."""
        if max_redirect is None:
            max_redirect = self.max_redirect
        LOGGER.debug('adding %r %r', url, max_redirect)
        self.seen_urls.add(url)
        self.q.put_nowait((url, max_redirect))  #put_nowait() Put an item into the queue without blocking.此句实际最先执行

    @asyncio.coroutine
    def crawl(self):
        """Run the crawler until all finished."""
        workers = [asyncio.Task(self.work(), loop=self.loop) for _ in range(self.max_tasks)]
        self.t0 = time.time()
        yield from self.q.join()  #Block until all items in the queue have been gotten and processed.保持阻塞状态,直到处理了队列中的所有项目为止
        self.t1 = time.time()
        for w in workers:
            w.cancel()
开发者ID:penglee87,项目名称:lpython,代码行数:104,代码来源:crawl_01.py

示例9: Crawler

# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import task_done [as 别名]

#.........这里部分代码省略.........
        self.record_statistic(url=url, next_url=next_url, status=response.status)
        if next_url in self.seen_urls:
            return
        if max_redirect > 0:
            LOGGER.info("redirect to %r from %r max_redir: %i", next_url, url, max_redirect - 1)
            self.add_urls(next_url, max_redirect - 1)
        else:
            LOGGER.error("redirect limit reached for %r from %r", next_url, url)
        return

    @asyncio.coroutine
    def fetch(self, url, max_redirect, sem):
        """Fetch one URL."""
        tries = 0
        web_page = None
        exception = None
        _url = None
        _encoding = None
        _content_type = None
        sleep_time = 0
        while tries < self.max_tries:
            try:
                with (yield from sem):
                    response = yield from asyncio.wait_for(
                        self.session.get(url, allow_redirects=False), 10, loop=self.loop
                    )
                if tries > 1:
                    LOGGER.debug("try %r for %r success", tries, url)
                break
            except Exception as client_error:
                sleep_time += 5
                yield from asyncio.sleep(sleep_time)
                LOGGER.error("try %r for %r raised %r", tries, url, client_error)
                exception = client_error
            tries += 1
        else:
            # We never broke out of the loop: all tries failed.
            LOGGER.error("%r failed after %r tries", url, self.max_tries)
            self.record_statistic(url=url, exception=exception)
            return (web_page, _url, _content_type, _encoding)
        try:
            _url, _content_type, _encoding = get_content_type_and_encoding(response)
            if is_redirect(response):
                self.handle_redirect(response, url, max_redirect)
                web_page = "redirect"
            elif response.status == 200 and _content_type in ("text/html", "application/xml"):
                web_page = yield from response.text()
            else:
                self.record_statistic(
                    url=response.url, status=response.status, content_type=_content_type, encoding=_encoding
                )
        except Exception as e:
            print("*******error**********")
        finally:
            yield from response.release()
        return (web_page, _url, _content_type, _encoding)

    def add_urls(self, urls, max_redirect=None):
        """Add a URL to the queue if not seen before."""
        if max_redirect is None:
            max_redirect = self.max_redirect
        if not isinstance(urls, str):
            urls = set(urls)
            for link in urls.difference(self.seen_urls):
                self.q.put_nowait((link, max_redirect))
            self.seen_urls.update(urls)
        elif urls not in self.seen_urls:
            self.q.put_nowait((urls, max_redirect))
            self.seen_urls.add(urls)

    @asyncio.coroutine
    def work(self, sem):
        """Process queue items forever."""
        try:
            while True:
                url, max_redirect = yield from self.q.get()
                # assert url in self.seen_urls
                web_page, url, content_type, encoding = yield from self.fetch(url, max_redirect, sem)
                if web_page and web_page != "redirect":
                    new_links = yield from self.parse_links(web_page, url, content_type, encoding)
                    if self.scraper:
                        data = self.scraper.scrape(url, web_page)
                    if self.data_handler:
                        self.data_handler.handle(data)
                    self.add_urls(new_links)
                self.q.task_done()
        except (asyncio.CancelledError,):
            print("error")

    @asyncio.coroutine
    def crawl(self):
        sem = asyncio.Semaphore(value=self.max_connections_per_host, loop=self.loop)
        """Run the crawler until all finished."""
        LOGGER.info("Starting crawl...")
        workers = [asyncio.Task(self.work(sem), loop=self.loop) for _ in range(self.max_tasks)]
        self.t0 = time.time()
        yield from self.q.join()
        self.t1 = time.time()
        for w in workers:
            w.cancel()
开发者ID:koolkt,项目名称:python_crawler,代码行数:104,代码来源:crawling.py

示例10: __init__

# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import task_done [as 别名]

#.........这里部分代码省略.........
                    
                    # remove trailing slash
                    if url[-1] == '/':
                        url = url[:-1]
                    
                    # we have not seen this url, so we fetch it and add it
                    if url not in self.processed:
                        self.processed.add(url)
                        
                        # suspend execution until we get data from our HTTP request
                        resp = await self.fetch(url)
                        
                        if resp != None:
                            # add to sites
                            self.data.append(resp)
                        
                            # go through each link and add them to the queue if we have not traversed them
                            links = [x for x in resp['links'] if x.startswith('/') or x.startswith(url)]
                            for link in links:
                                
                                # formatting
                                if not link.startswith(self.basePath):
                                    link = self.basePath + link
                                
                                if '#' in link:
                                    link = link[:link.index('#')]
                                
                                # add it to our queue for processing
                                if link not in self.processed:
                                    if link != '' and link != None:
                                        self.queue.put_nowait(link)
                                    
                    # this task is done
                    self.queue.task_done()
                    
                            
                        
                except Exception as err:
                    pass
                    
        except asyncio.CancelledError:
            pass
  
    
    '''
        Parsed a url for links and other stuff too
    '''
    def parse(self, data, url):
        # parse a single url
        s = soup(data.decode('utf-8', 'ignore'), "html.parser")
        
        # get links
        links = [ x['href'] for x in s.findAll('a') if x.has_attr('href') ]
        
        # get assets 
        assets = self.get_static(s, url)
        
        # get title
        title = s.find('title')
        
        if title != None:
            title = title.text
        else:
            title = ''
            
        return {
开发者ID:HashCollision,项目名称:WebCrawler,代码行数:70,代码来源:crawler.py


注:本文中的asyncio.Queue.task_done方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。