本文整理汇总了Python中eventlet.Queue.join方法的典型用法代码示例。如果您正苦于以下问题:Python Queue.join方法的具体用法?Python Queue.join怎么用?Python Queue.join使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类eventlet.Queue
的用法示例。
在下文中一共展示了Queue.join方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: ECWriter
# 需要导入模块: from eventlet import Queue [as 别名]
# 或者: from eventlet.Queue import join [as 别名]
class ECWriter(object):
"""
Writes an EC chunk
"""
def __init__(self, chunk, conn):
self._chunk = chunk
self._conn = conn
self.failed = False
self.bytes_transferred = 0
self.checksum = hashlib.md5()
@property
def chunk(self):
return self._chunk
@property
def conn(self):
return self._conn
@classmethod
def connect(cls, chunk, sysmeta, reqid=None):
raw_url = chunk["url"]
parsed = urlparse(raw_url)
chunk_path = parsed.path.split('/')[-1]
h = {}
h["transfer-encoding"] = "chunked"
h[chunk_headers["content_id"]] = sysmeta['id']
h[chunk_headers["content_path"]] = sysmeta['content_path']
h[chunk_headers["content_chunkmethod"]] = sysmeta['chunk_method']
h[chunk_headers["container_id"]] = sysmeta['container_id']
h[chunk_headers["chunk_pos"]] = chunk["pos"]
h[chunk_headers["chunk_id"]] = chunk_path
h[chunk_headers["content_policy"]] = sysmeta['policy']
h[chunk_headers["content_version"]] = sysmeta['version']
if reqid:
h['X-oio-req-id'] = reqid
# in the trailer
# metachunk_size & metachunk_hash
h["Trailer"] = (chunk_headers["metachunk_size"],
chunk_headers["metachunk_hash"])
with ConnectionTimeout(io.CONNECTION_TIMEOUT):
conn = io.http_connect(
parsed.netloc, 'PUT', parsed.path, h)
conn.chunk = chunk
return cls(chunk, conn)
def start(self, pool):
# we use eventlet Queue to pass data to the send coroutine
self.queue = Queue(io.PUT_QUEUE_DEPTH)
# spawn the send coroutine
pool.spawn(self._send)
def _send(self):
# this is the send coroutine loop
while True:
# fetch input data from the queue
d = self.queue.get()
# use HTTP transfer encoding chunked
# to write data to RAWX
if not self.failed:
# format the chunk
to_send = "%x\r\n%s\r\n" % (len(d), d)
try:
with ChunkWriteTimeout(io.CHUNK_TIMEOUT):
self.conn.send(to_send)
self.bytes_transferred += len(d)
except (Exception, ChunkWriteTimeout) as e:
self.failed = True
msg = str(e)
logger.warn("Failed to write to %s (%s)", self.chunk, msg)
self.chunk['error'] = msg
self.queue.task_done()
def wait(self):
# wait until all data in the queue
# has been processed by the send coroutine
if self.queue.unfinished_tasks:
self.queue.join()
def send(self, data):
# do not send empty data because
# this will end the chunked body
if not data:
return
# put the data to send into the queue
# it will be processed by the send coroutine
self.queue.put(data)
def finish(self, metachunk_size, metachunk_hash):
parts = [
'0\r\n',
'%s: %s\r\n' % (chunk_headers['metachunk_size'],
metachunk_size),
'%s: %s\r\n' % (chunk_headers['metachunk_hash'],
metachunk_hash),
'\r\n'
]
to_send = "".join(parts)
#.........这里部分代码省略.........
示例2: Crawler
# 需要导入模块: from eventlet import Queue [as 别名]
# 或者: from eventlet.Queue import join [as 别名]
class Crawler(object):
"""
A crawler will traverse all the pages of a site and process the content
in a defined way.
:param init_urls: the very first urls to start with.
:param q: the queue that stores all urls to be crawled
:param urls: a set stores all urls already crawled
"""
def __init__(self, init_urls, max_workers=200):
self.init_urls = init_urls
self.max_workers = max_workers
self.q = Queue()
self.urls = set()
self.s = requests.Session()
self.root_hosts = set()
for url in init_urls:
self.q.put(url)
self.urls.add(url)
self.root_hosts.add(get_netloc(url))
def url_allowed(self, url):
"""Check if given url will be crawled.
Current, only if the url belongs to the same host as init_urls.
"""
return get_netloc(url) in self.root_hosts
def save(self, response):
"""Save data at the given url."""
raise NotImplementedError(
"Please implement your own save logic in subclass.")
def parse(self, response):
self.save(response)
new_links = set()
for url in self.find_links(response):
if url not in self.urls and self.url_allowed(url):
new_links.add(url)
self.urls.add(url)
self.q.put(url)
if len(new_links) != 0:
print("Find %d new urls to crawl" % len(new_links))
def fetch(self, url):
"""Fetch content of the url from network."""
response = self.s.get(url)
print("Getting content from %s, length: %d" % (url,
len(response.content)))
return response
def work(self, i):
"""Define the work process.
Retrieve a url from queue, fetch the content from it,
process it and get new urls to crawl.
Continue the process until all pages are crawled.
:param i: indicate the worker number
"""
while True:
url = self.q.get()
print("Worker %d: Getting url %s from queue." % (i, url))
response = self.fetch(url)
self.parse(response)
self.q.task_done()
def run(self):
"""Start the crawling process.
This is the main entrance for our crawler. It will start several
workers, crawling in parallel.
"""
pool = eventlet.GreenPool()
start = time.time()
for i in range(self.max_workers):
pool.spawn(self.work, i)
self.q.join()
end = time.time()
print("Finished crawling, takes %s seconds." % str(end - start))
print("Have fun hacking!")