本文整理汇总了Python中threadpool.ThreadPool.map方法的典型用法代码示例。如果您正苦于以下问题:Python ThreadPool.map方法的具体用法?Python ThreadPool.map怎么用?Python ThreadPool.map使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类threadpool.ThreadPool
的用法示例。
在下文中一共展示了ThreadPool.map方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: LinkExtractorTPool
# 需要导入模块: from threadpool import ThreadPool [as 别名]
# 或者: from threadpool.ThreadPool import map [as 别名]
class LinkExtractorTPool(object):
""" TO BE FIXED """
def __init__(self, feed=True):
#Thread pool
if feed:
self.__tpool = ThreadPool(100)
#ETREE queue for the parsed xhtml(s) to be stored
self.__etree_q = Queue.Queue()
self.__site_links_q = Queue.Queue()
self.__media_links_q = Queue.Queue()
self.__scripts_links_q = Queue.Queue()
self.__undefined_links_q = Queue.Queue()
#Define a default queue returned with the iterator or callable instance of this Class
self.__call_q = self.__etree_q
#XPath objects for extracting the URL Types
self.__extract_site_urls = etree.XPath("/html/body//a/@href")
self.__extract_media_urls = etree.XPath("//src")
self.__extract_scripts_urls = etree.XPath("//src")
self._url_href = re.compile('<a href="([^"]+)"')
def __iter__(self):
"""Be careful when use the LinkExtractor as iterator"""
return self
def next(self):
"""Be careful: class as 'Iterator' returns etrees queue, by default,
or the one defined but the proper function bellow"""
try:
print "GET ITER"
return self.__call_q.get(timeout=1) #timeout maybe should be trimmed
except Queue.Empty:
print "EMPTY ITER"
raise StopIteration
def __call__(self):
"""Be careful: class as 'Callable' returns etrees queue, by default,
or the one defined but the proper function bellow"""
try:
return self.__call_q.get(timeout=1) #timeout maybe should be trimmed
except Queue.Empty:
return False
def feed(self, xhtml):
self.__tpool.dispatch(self.__parseto_xtree, xhtml, self.__callback_chain)
def l_feed(self, xhtml_l):
if isinstance(xhtml_l, list):
self.__tpool.map(self.__parseto_xtree, self.__callback_chain, xhtml_l)
else:
raise Exception("LinkExtractor.l_feed() Error: List() argument was expected")
def __callback_chain(self, etree):
#Put the etree to the etree-queue for getting all the URLs available
self.__etree_q.put(etree)
#Find Links to other site and put them in the queue
site_links = self.__site_links(etree)
if site_links:
self.__site_links_q.put(site_links)
#Find Links of media and put them in the queue
media_links = self.__media_links(etree)
if media_links:
self.__media_links_q.put(media_links)
#Find Links of scripts and put them in the queue
script_links = self.__media_links(etree)
if script_links:
self.__scripts_links_q.put(script_links)
undefined_links = self.__undefined_links(etree)
if undefined_links:
self.__undefined_links_q.put(undefined_links)
def all_links(self, etree):
links = list()
for link in etree.iterlinks():
links.append(link)
def sites_links(self, xhtml):
url_l = self._url_href.findall(xhtml['xhtml_s'])
for i, url in enumerate(url_l):
if url.find('#') > -1:
url = ""
prsd_url = urlparse(url)
if not prsd_url.netloc:
url_l[i] = xhtml['base_url'] + url
return url_l
def media_links(self, xhtml):
return None #to be Fixed
def scripts_links(self, xhtml):
return None #to be Fixed
def undefined_links(self, xhtml):
return None #to be fixed
def __site_links(self, etree):
return self.__extract_site_urls(etree)
def __media_links(self, etree):
#.........这里部分代码省略.........