本文整理汇总了Python中threadPool.ThreadPool.close方法的典型用法代码示例。如果您正苦于以下问题:Python ThreadPool.close方法的具体用法?Python ThreadPool.close怎么用?Python ThreadPool.close使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类threadPool.ThreadPool
的用法示例。
在下文中一共展示了ThreadPool.close方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Scheduler
# 需要导入模块: from threadPool import ThreadPool [as 别名]
# 或者: from threadPool.ThreadPool import close [as 别名]
class Scheduler(object):
'''初始化并开启爬虫程序所有模块,并为下载模块分配url'''
def __init__(self, dbName, threadNum, logLevel, startUrls, depth, keyword, downloadMode):
self.__threadNum = threadNum
self.__startUrls = startUrls
self.__depth = depth
self.__keyword = keyword
self.__downloadMode = downloadMode
self.__dbName = dbName
self.__logLevel = logLevel
self.__exitEvent = threading.Event()
# url队列存储待下载的url节点
self.__urlQueue = Queue.Queue()
# html队列存储已经下载完成等待解析的html节点
self.__htmlQueue = Queue.Queue()
# data队列存储已解析完成并符合存入数据库条件的html节点
self.__dataQueue = Queue.Queue()
# 存储为各个下载模块分配的下载队列
self.__downloadQueueList = []
# 创建线程池
self.__threadPool = ThreadPool(threadNum + 2)
self.__downloadingFlag = 0
def __initUrlQueue(self, urlList):
'''将url封装为内部数据格式'''
for url in urlList:
urlNode = UrlModel(url, '', timestamp(), 0)
self.__urlQueue.put(urlNode)
def start(self):
'''创建并启动各个模块'''
logger.debug('Init start urls...')
self.__initUrlQueue(self.__startUrls)
# 启动threadNum个下载器并为它们分配下载队列
logger.debug('Put downloader to thread pool...')
for i in range(self.__threadNum):
dlQueue = Queue.Queue()
self.__downloadQueueList.append(dlQueue)
downloadReq = Downloader(dlQueue, self.__downloadMode, self.__htmlQueue, self.__exitEvent, self.__downloadingFlag)
self.__threadPool.putRequest(downloadReq)
# 创建解析模块并添加到线程池运行
logger.debug('Put parser to thread pool...')
parserReq = Parser(self.__depth, self.__startUrls, self.__keyword, self.__htmlQueue, self.__dataQueue, self.__urlQueue, self.__exitEvent)
self.__threadPool.putRequest(parserReq)
# 创建存储模块并添加到线程池运行
logger.debug('Put storage to thread pool...')
storageReq = Storage(self.__dbName, self.__dataQueue, self.__exitEvent)
self.__threadPool.putRequest(storageReq)
# 主循环用于为各个下载队列分配url以及输出日志信息
logger.debug('start main loop...')
lastTime = time.time()
while True:
for dlQueue in self.__downloadQueueList:
if self.__urlQueue.qsize() > 0 and dlQueue.qsize() < 1:
node = self.__urlQueue.get()
dlQueue.put(node)
now = time.time()
if now - lastTime > PRINT_TIME_INTERVAL:
logger.info('URL QUEUE SIZE : %d', self.__urlQueue.qsize())
logger.info('HTML QUEUE SIZE : %d', self.__htmlQueue.qsize())
logger.info('DATA QUEUE SIZE : %d', self.__dataQueue.qsize())
logger.info('REPEAT SET SIZE : %d', parserReq.getRepeatSetSize())
# 延迟检测退出事件,防止程序启动时即退出
if now - lastTime > 30:
if self.__urlQueue.qsize() < 1 and self.__htmlQueue.qsize() < 1 and \
self.__dataQueue.qsize() < 1 and self.__downloadingFlag < 1:
self.__exitEvent.set()
self.__threadPool.close(True)
return
lastTime = now