本文整理汇总了Python中asyncio.Queue.task_done方法的典型用法代码示例。如果您正苦于以下问题:Python Queue.task_done方法的具体用法?Python Queue.task_done怎么用?Python Queue.task_done使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类asyncio.Queue
的用法示例。
在下文中一共展示了Queue.task_done方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: process_partitions_queue
# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import task_done [as 别名]
async def process_partitions_queue(
loop: asyncio.BaseEventLoop,
partitions_queue: asyncio.Queue,
results_queue: asyncio.Queue,
server_address: URL,
mission_template: Template,
mission_loader: str,
width: int,
scale: int,
) -> Awaitable[None]:
mission_name = mission_loader.split('/', 1)[0]
async with aiohttp.ClientSession() as http:
while True:
partition = await partitions_queue.get()
if partition is None:
partitions_queue.task_done()
return
await process_partition(
loop=loop,
results_queue=results_queue,
server_address=server_address,
http=http,
partition=partition,
mission_template=mission_template,
mission_loader=mission_loader,
mission_name=mission_name,
width=width,
scale=scale,
)
partitions_queue.task_done()
示例2: udp_writer
# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import task_done [as 别名]
async def udp_writer(s: socket, oqueue: Queue) -> None:
"""Forward packets to the UDP socket."""
while True:
peer, data = await oqueue.get()
try:
s.sendto(data, peer)
finally:
oqueue.task_done()
示例3: call
# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import task_done [as 别名]
async def call(loop, inq: asyncio.Queue):
while True:
v = await inq.get()
logger.debug("consume[S] v:%s", v)
if v is None:
inq.task_done()
break
v = await afn(v)
logger.debug("consume[E] v:%s", v)
inq.task_done()
await inq.join()
logger.debug("consume[CLOSE]")
示例4: __call__
# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import task_done [as 别名]
async def __call__(self, inq: asyncio.Queue):
while True:
v = await inq.get()
logger.debug("aggregate[S] v:%s", v)
if v is None:
inq.task_done()
break
await asyncio.sleep(0.1, loop=self.loop)
print(v)
logger.debug("aggregate[E] v:%s", v)
inq.task_done()
await inq.join()
logger.debug("aggregate[CLOSE]")
示例5: process_results_queue
# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import task_done [as 别名]
async def process_results_queue(
results_queue: asyncio.Queue,
total_points: int,
output_file_path: Path,
) -> Awaitable[None]:
point_size = calcsize(HEIGHT_PACK_FORMAT)
output_size = point_size * total_points
natural_size = humanize.naturalsize(
output_size,
binary=True,
format='%.3f',
)
LOG.debug(f"output size: {natural_size}")
processed_points = 0
output_file_path.parent.parent.mkdir(parents=True, exist_ok=True)
with output_file_path.open('wb') as f:
f.truncate(output_size)
while True:
data = await results_queue.get()
if not data:
results_queue.task_done()
return
partition, values = data
start = partition.start * point_size
processed_points += (partition.end - partition.start) + 1
progress = (processed_points / total_points) * 100
LOG.debug(
f"gather results for range "
f"[{partition.start}:{partition.end}], "
f"progress: {progress:.2f}%"
)
f.seek(start)
f.write(values)
results_queue.task_done()
示例6: __init__
# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import task_done [as 别名]
class Crawler:
def __init__(self, root_url, max_redirect):
self.max_tasks = 10
self.max_redirect = max_redirect
self.q = Queue()
self.seen_urls = set()
# aiohttp's ClientSession does connection pooling and
# HTTP keep-alives for us.
self.session = aiohttp.ClientSession(loop=loop)
# Put (URL, max_redirect) in the Queue
self.q.put((root_url, self.max_redirect))
@asyncio.coroutine
def crawl(self):
'''Run the crawler untill all work is done.'''
workers = [asyncio.Task(self.work())
for _ in range(self.max_tasks)]
# When all work is done, exit.
yield from self.q.join()
for w in workers:
w.cancel()
@asyncio.coroutine
def work(self):
while True:
url, max_redirect = yield from self.q.get()
# Download page and add new links to self.q
yield from self.fetch(url, max_redirect)
self.q.task_done()
@asyncio.coroutine
def fetch(self, url, max_redirect):
# Handle redirects ourselves.
response = yield from self.session.get(
url, allow_redirects=False)
try:
if is_redirect(response):
if max_redirect > 0:
next_url = response.headers['location']
if next_url in self.seen_urls:
# We have done this before.
return
# Remember we have seen this url.
self.seen_urls.add(next_url)
# Follow the redirect. One less redirect remains.
self.q.put_nowait((next_url, max_redirect -1))
else:
links = yield from self.parse_links(response)
# Python set-logic:
for link in links.difference(self.seen_urls):
self.q.put_nowait((link, self.max_redirect))
self.seen_urls.update(links)
finally:
# Return connection to pool.
yield from response.release()
示例7: __init__
# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import task_done [as 别名]
#.........这里部分代码省略.........
break
except aiohttp.ClientError as client_error:
LOGGER.info('try %r for %r raised %r',
tries, url, client_error)
exception = client_error
tries += 1
else:
# We never broke out of the loop: all tries failed.
LOGGER.error('%r failed after %r tries',
url, self.max_tries)
self.record_statistic(FetchStatistic(url=url,
next_url=None,
status=None,
exception=exception,
size=0,
content_type=None,
encoding=None,
num_urls=0,
num_new_urls=0))
return
try:
if is_redirect(response):
location = response.headers['location']
next_url = urllib.parse.urljoin(url, location)
self.record_statistic(FetchStatistic(url=url,
next_url=next_url,
status=response.status,
exception=None,
size=0,
content_type=None,
encoding=None,
num_urls=0,
num_new_urls=0))
if next_url in self.seen_urls:
return
if max_redirect > 0:
LOGGER.info('redirect to %r from %r', next_url, url)
self.add_url(next_url, max_redirect - 1)
else:
LOGGER.error('redirect limit reached for %r from %r',
next_url, url)
else:
stat, links = await self.parse_links(response)
self.record_statistic(stat)
for link in utils.difference(links, self.seen_urls):
# for link in links.difference(self.seen_urls):
self.q.put_nowait((link, self.max_redirect))
# self.seen_urls.update(links)
self.seen_urls.update(links)
finally:
await response.release()
async def work(self):
"""Process queue items forever."""
try:
while True:
url, max_redirect = await self.q.get()
assert url in self.seen_urls
LOGGER.info("url:%s", url)
LOGGER.info("max_redirect:%s", max_redirect)
await self.fetch(url, max_redirect)
self.q.task_done()
except asyncio.CancelledError:
pass
def url_allowed(self, url):
if self.exclude and re.search(self.exclude, url):
return False
parts = urllib.parse.urlparse(url)
if parts.scheme not in ('http', 'https'):
LOGGER.debug('skipping non-http scheme in %r', url)
return False
host, port = urllib.parse.splitport(parts.netloc)
if not self.host_okay(host):
LOGGER.debug('skipping non-root host in %r', url)
return False
return True
def add_url(self, url, max_redirect=None):
"""Add a URL to the queue if not seen before."""
if max_redirect is None:
max_redirect = self.max_redirect
LOGGER.debug('adding %r %r', url, max_redirect)
self.seen_urls.add(url)
self.q.put_nowait((url, max_redirect))
async def crawl(self):
"""Run the crawler until all finished."""
workers = [asyncio.Task(self.work(), loop=self.loop)
for _ in range(self.max_tasks)]
self.t0 = time.time()
yield self.q.join()
self.t1 = time.time()
for w in workers:
w.cancel()
示例8: __init__
# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import task_done [as 别名]
#.........这里部分代码省略.........
content_type, pdict = cgi.parse_header(content_type)
encoding = pdict.get('charset', 'utf-8')
if content_type in ('text/html', 'application/xml'):
text = yield from response.text()
# Replace href with (?:href|src) to follow image links.
urls = set(re.findall(r'''(?i)href=["']([^\s"'<>]+)''',text))
if urls:
LOGGER.info('got %r distinct urls from %r',len(urls), response.url)
for url in urls:
normalized = urllib.parse.urljoin(response.url, url)
defragmented, frag = urllib.parse.urldefrag(normalized)
if self.url_allowed(defragmented):
links.add(defragmented)
stat = FetchStatistic(
url=response.url,
next_url=None,
status=response.status,
exception=None,
size=len(body),
content_type=content_type,
encoding=encoding,
num_urls=len(links),
num_new_urls=len(links - self.seen_urls))
return stat, links
@asyncio.coroutine
def fetch(self, url, max_redirect):
"""Fetch one URL."""
tries = 0
exception = None
while tries < self.max_tries:
try:
response = yield from self.session.get(url, allow_redirects=False) #1
break #2
except aiohttp.ClientError as client_error:
LOGGER.info('try %r for %r raised %r', tries, url, client_error)
exception = client_error
else:
return
try:
if is_redirect(response):
location = response.headers['location']
else: #4
stat, links = yield from self.parse_links(response)
self.record_statistic(stat)
for link in links.difference(self.seen_urls):
self.q.put_nowait((link, self.max_redirect))
self.seen_urls.update(links)
finally:
yield from response.release()
@asyncio.coroutine
def work(self):
"""Process queue items forever."""
try:
while True:
url, max_redirect = yield from self.q.get() #q.get() Remove and return an item from the queue. If queue is empty, wait until an item is available.
#print('url',url, 'max_redirect', max_redirect)
assert url in self.seen_urls #assert 断言,异常会直接抛出
yield from self.fetch(url, max_redirect)
self.q.task_done() #Indicate that a formerly enqueued task is complete.表明以前排队的任务完成
except asyncio.CancelledError:
pass
def url_allowed(self, url):
if self.exclude and re.search(self.exclude, url):
return False
parts = urllib.parse.urlparse(url)
if parts.scheme not in ('http', 'https'):
LOGGER.debug('skipping non-http scheme in %r', url)
return False
host, port = urllib.parse.splitport(parts.netloc)
if not self.host_okay(host):
LOGGER.debug('skipping non-root host in %r', url)
return False
return True
def add_url(self, url, max_redirect=None):
"""Add a URL to the queue if not seen before."""
if max_redirect is None:
max_redirect = self.max_redirect
LOGGER.debug('adding %r %r', url, max_redirect)
self.seen_urls.add(url)
self.q.put_nowait((url, max_redirect)) #put_nowait() Put an item into the queue without blocking.此句实际最先执行
@asyncio.coroutine
def crawl(self):
"""Run the crawler until all finished."""
workers = [asyncio.Task(self.work(), loop=self.loop) for _ in range(self.max_tasks)]
self.t0 = time.time()
yield from self.q.join() #Block until all items in the queue have been gotten and processed.保持阻塞状态,直到处理了队列中的所有项目为止
self.t1 = time.time()
for w in workers:
w.cancel()
示例9: Crawler
# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import task_done [as 别名]
#.........这里部分代码省略.........
self.record_statistic(url=url, next_url=next_url, status=response.status)
if next_url in self.seen_urls:
return
if max_redirect > 0:
LOGGER.info("redirect to %r from %r max_redir: %i", next_url, url, max_redirect - 1)
self.add_urls(next_url, max_redirect - 1)
else:
LOGGER.error("redirect limit reached for %r from %r", next_url, url)
return
@asyncio.coroutine
def fetch(self, url, max_redirect, sem):
"""Fetch one URL."""
tries = 0
web_page = None
exception = None
_url = None
_encoding = None
_content_type = None
sleep_time = 0
while tries < self.max_tries:
try:
with (yield from sem):
response = yield from asyncio.wait_for(
self.session.get(url, allow_redirects=False), 10, loop=self.loop
)
if tries > 1:
LOGGER.debug("try %r for %r success", tries, url)
break
except Exception as client_error:
sleep_time += 5
yield from asyncio.sleep(sleep_time)
LOGGER.error("try %r for %r raised %r", tries, url, client_error)
exception = client_error
tries += 1
else:
# We never broke out of the loop: all tries failed.
LOGGER.error("%r failed after %r tries", url, self.max_tries)
self.record_statistic(url=url, exception=exception)
return (web_page, _url, _content_type, _encoding)
try:
_url, _content_type, _encoding = get_content_type_and_encoding(response)
if is_redirect(response):
self.handle_redirect(response, url, max_redirect)
web_page = "redirect"
elif response.status == 200 and _content_type in ("text/html", "application/xml"):
web_page = yield from response.text()
else:
self.record_statistic(
url=response.url, status=response.status, content_type=_content_type, encoding=_encoding
)
except Exception as e:
print("*******error**********")
finally:
yield from response.release()
return (web_page, _url, _content_type, _encoding)
def add_urls(self, urls, max_redirect=None):
"""Add a URL to the queue if not seen before."""
if max_redirect is None:
max_redirect = self.max_redirect
if not isinstance(urls, str):
urls = set(urls)
for link in urls.difference(self.seen_urls):
self.q.put_nowait((link, max_redirect))
self.seen_urls.update(urls)
elif urls not in self.seen_urls:
self.q.put_nowait((urls, max_redirect))
self.seen_urls.add(urls)
@asyncio.coroutine
def work(self, sem):
"""Process queue items forever."""
try:
while True:
url, max_redirect = yield from self.q.get()
# assert url in self.seen_urls
web_page, url, content_type, encoding = yield from self.fetch(url, max_redirect, sem)
if web_page and web_page != "redirect":
new_links = yield from self.parse_links(web_page, url, content_type, encoding)
if self.scraper:
data = self.scraper.scrape(url, web_page)
if self.data_handler:
self.data_handler.handle(data)
self.add_urls(new_links)
self.q.task_done()
except (asyncio.CancelledError,):
print("error")
@asyncio.coroutine
def crawl(self):
sem = asyncio.Semaphore(value=self.max_connections_per_host, loop=self.loop)
"""Run the crawler until all finished."""
LOGGER.info("Starting crawl...")
workers = [asyncio.Task(self.work(sem), loop=self.loop) for _ in range(self.max_tasks)]
self.t0 = time.time()
yield from self.q.join()
self.t1 = time.time()
for w in workers:
w.cancel()
示例10: __init__
# 需要导入模块: from asyncio import Queue [as 别名]
# 或者: from asyncio.Queue import task_done [as 别名]
#.........这里部分代码省略.........
# remove trailing slash
if url[-1] == '/':
url = url[:-1]
# we have not seen this url, so we fetch it and add it
if url not in self.processed:
self.processed.add(url)
# suspend execution until we get data from our HTTP request
resp = await self.fetch(url)
if resp != None:
# add to sites
self.data.append(resp)
# go through each link and add them to the queue if we have not traversed them
links = [x for x in resp['links'] if x.startswith('/') or x.startswith(url)]
for link in links:
# formatting
if not link.startswith(self.basePath):
link = self.basePath + link
if '#' in link:
link = link[:link.index('#')]
# add it to our queue for processing
if link not in self.processed:
if link != '' and link != None:
self.queue.put_nowait(link)
# this task is done
self.queue.task_done()
except Exception as err:
pass
except asyncio.CancelledError:
pass
'''
Parsed a url for links and other stuff too
'''
def parse(self, data, url):
# parse a single url
s = soup(data.decode('utf-8', 'ignore'), "html.parser")
# get links
links = [ x['href'] for x in s.findAll('a') if x.has_attr('href') ]
# get assets
assets = self.get_static(s, url)
# get title
title = s.find('title')
if title != None:
title = title.text
else:
title = ''
return {