本文整理汇总了Python中dispatcher.Dispatcher.processinq方法的典型用法代码示例。如果您正苦于以下问题:Python Dispatcher.processinq方法的具体用法?Python Dispatcher.processinq怎么用?Python Dispatcher.processinq使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dispatcher.Dispatcher
的用法示例。
在下文中一共展示了Dispatcher.processinq方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: CrawlJob
# 需要导入模块: from dispatcher import Dispatcher [as 别名]
# 或者: from dispatcher.Dispatcher import processinq [as 别名]
class CrawlJob(object):
def __init__(self, hq, jobname):
self.hq = hq
self.jobconfigs = self.hq.jobconfigs
self.jobname = jobname
self.mapper = CrawlMapper(self, hqconfig.NWORKSETS_BITS)
self.scheduler = Scheduler(hqconfig.worksetdir(self.jobname),
self.mapper)
self.inq = PooledIncomingQueue(
qdir=hqconfig.inqdir(self.jobname),
buffsize=1000)
self._dispatcher_mode = hqconfig.get(
('jobs', self.jobname, 'dispatcher'), 'internal')
self.dispatcher = None
#self.init_dispatcher()
# currently disabled by default - too slow
self.use_crawlinfo = False
self.save_crawlinfo = False
self.last_inq_count = 0
PARAMS = [('use_crawlinfo', bool),
('save_crawlinfo', bool),
('dispatcher_mode', str)]
@property
def dispatcher_mode(self):
return self._dispatcher_mode
@dispatcher_mode.setter
def dispatcher_mode(self, value):
self._dispatcher_mode = value
if value == 'external':
self.shutdown_dispatcher()
def init_dispatcher(self):
if self.dispatcher: return
if self.dispatcher_mode == 'external':
raise RuntimeError, 'dispatcher mode is %s' % self.dispatcher_mode
self.dispatcher = Dispatcher(self.hq.get_domaininfo(),
self.jobname,
mapper=self.mapper,
scheduler=self.scheduler,
inq=self.inq.rqfile)
def shutdown_dispatcher(self):
if not self.dispatcher: return
logging.info("shutting down dispatcher")
self.dispatcher.shutdown()
self.dispatcher = None
def shutdown(self):
logging.info("shutting down scheduler")
self.scheduler.shutdown()
logging.info("closing incoming queues")
self.inq.flush()
self.inq.close()
self.shutdown_dispatcher()
logging.info("done.")
def get_status(self):
r = dict(job=self.jobname, oid=id(self))
r['sch'] = self.scheduler and self.scheduler.get_status()
r['inq'] = self.inq and self.inq.get_status()
return r
def get_workset_status(self):
r = dict(job=self.jobname, crawljob=id(self))
if self.scheduler:
r['sch'] = id(self.scheduler)
r['worksets'] = self.scheduler.get_workset_status()
return r
def workset_activating(self, *args):
self.init_dispatcher()
self.dispatcher.workset_activating(*args)
def schedule(self, curis):
'''schedule curis bypassing seen-check. typically used for starting
new crawl cycle.'''
scheduled = 0
for curi in curis:
self.scheduler.schedule(curi)
scheduled += 1
return dict(processed=scheduled, scheduled=scheduled)
def discovered(self, curis):
return self.inq.add(curis)
def processinq(self, maxn):
self.init_dispatcher()
return self.dispatcher.processinq(maxn)
def makecuri(self, o):
# temporary rescue measure. delete after everything's got fixed.
a = o.get('a')
#.........这里部分代码省略.........