当前位置: 首页>>代码示例>>Python>>正文


Python ThreadPool.map方法代码示例

本文整理汇总了Python中threadpool.ThreadPool.map方法的典型用法代码示例。如果您正苦于以下问题:Python ThreadPool.map方法的具体用法?Python ThreadPool.map怎么用?Python ThreadPool.map使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在threadpool.ThreadPool的用法示例。


在下文中一共展示了ThreadPool.map方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: LinkExtractorTPool

# 需要导入模块: from threadpool import ThreadPool [as 别名]
# 或者: from threadpool.ThreadPool import map [as 别名]
class LinkExtractorTPool(object):
    """                                    TO BE FIXED                         """
    def __init__(self, feed=True):
        #Thread pool
        if feed:
            self.__tpool = ThreadPool(100)
            #ETREE queue for the parsed xhtml(s) to be stored
            self.__etree_q = Queue.Queue()
            self.__site_links_q = Queue.Queue()
            self.__media_links_q = Queue.Queue()
            self.__scripts_links_q = Queue.Queue()
            self.__undefined_links_q = Queue.Queue()
            #Define a default queue returned with the iterator or callable instance of this Class  
            self.__call_q = self.__etree_q 
        #XPath objects for extracting the URL Types
        self.__extract_site_urls = etree.XPath("/html/body//a/@href")
        self.__extract_media_urls = etree.XPath("//src")
        self.__extract_scripts_urls = etree.XPath("//src")
        self._url_href = re.compile('<a href="([^"]+)"')
        
        
    def __iter__(self):
        """Be careful when use the LinkExtractor as iterator"""
        return self
    
    def next(self):
        """Be careful: class as 'Iterator' returns etrees queue, by default, 
        or the one defined but the proper function bellow"""
        try:
            print "GET ITER"
            return self.__call_q.get(timeout=1) #timeout maybe should be trimmed 
        except Queue.Empty:
            print "EMPTY ITER" 
            raise StopIteration 
    
    def __call__(self):
        """Be careful: class as 'Callable' returns etrees queue, by default, 
        or the one defined but the proper function bellow"""
        try:
            return self.__call_q.get(timeout=1) #timeout maybe should be trimmed
        except Queue.Empty:
            return False 
    
    def feed(self, xhtml):
        self.__tpool.dispatch(self.__parseto_xtree, xhtml, self.__callback_chain)
        
    def l_feed(self, xhtml_l):
        if isinstance(xhtml_l, list):
            self.__tpool.map(self.__parseto_xtree,  self.__callback_chain, xhtml_l)
        else:
            raise Exception("LinkExtractor.l_feed() Error: List() argument was expected")
            
    
    def __callback_chain(self, etree):
        #Put the etree to the etree-queue for getting all the URLs available
        self.__etree_q.put(etree)
        #Find Links to other site and put them in the queue 
        site_links = self.__site_links(etree)
        if site_links: 
            self.__site_links_q.put(site_links)
        #Find Links of media and put them in the queue
        media_links = self.__media_links(etree)
        if media_links:
            self.__media_links_q.put(media_links)
        #Find Links of scripts and put them in the queue
        script_links = self.__media_links(etree)
        if script_links:
            self.__scripts_links_q.put(script_links)
        undefined_links = self.__undefined_links(etree)
        if undefined_links:
            self.__undefined_links_q.put(undefined_links)
    
    def all_links(self, etree):
        links = list()
        for link in etree.iterlinks():
            links.append(link)
            
    def sites_links(self, xhtml): 
        url_l = self._url_href.findall(xhtml['xhtml_s'])
        for i, url in enumerate(url_l):
            if url.find('#') > -1:
                url = ""
            prsd_url = urlparse(url)
            if not prsd_url.netloc:
                url_l[i] = xhtml['base_url'] + url
        return url_l
    
    def media_links(self, xhtml):
        return None #to be Fixed
    
    def scripts_links(self, xhtml):
        return None #to be Fixed
    
    def undefined_links(self, xhtml):
        return None #to be fixed
    
    def __site_links(self, etree):
        return self.__extract_site_urls(etree)
    
    def __media_links(self, etree):
#.........这里部分代码省略.........
开发者ID:dpritsos,项目名称:synergeticprocessing,代码行数:103,代码来源:linkextractors.py


注:本文中的threadpool.ThreadPool.map方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。