本文整理汇总了Python中BTrees.OIBTree.OIBTree.clear方法的典型用法代码示例。如果您正苦于以下问题:Python OIBTree.clear方法的具体用法?Python OIBTree.clear怎么用?Python OIBTree.clear使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类BTrees.OIBTree.OIBTree
的用法示例。
在下文中一共展示了OIBTree.clear方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: LinkCheckTool
# 需要导入模块: from BTrees.OIBTree import OIBTree [as 别名]
# 或者: from BTrees.OIBTree.OIBTree import clear [as 别名]
class LinkCheckTool(SimpleItem):
security = ClassSecurityInfo()
def __init__(self, id=None):
super(LinkCheckTool, self).__init__(id)
# This is the work queue; items in this queue are scheduled
# for link validity check.
self.queue = CompositeQueue()
# Additional queue for internal crawler to revalidate the site
self.crawl_queue = CompositeQueue()
# This is the link database. It maps a hyperlink index to a
# tuple (timestamp, status, referers).
self.checked = IOBTree()
# Indexes
self.index = OIBTree()
self.links = IOBTree()
# This is a counter that allows us to add new hyperlinks and
# provide an indexc quickly.
self.counter = 0
security.declarePrivate("is_available")
def is_available(self):
return hasattr(self, 'index') and \
hasattr(self, 'checked') and \
hasattr(self, 'queue') and \
hasattr(self, 'counter')
security.declarePrivate("clear")
def clear(self):
while True:
try:
self.queue.pull()
except IndexError:
break
while True:
try:
self.crawl_queue.pull()
except IndexError:
break
self.checked.clear()
self.index.clear()
self.links.clear()
self.counter = 0
security.declarePrivate("crawl")
def crawl(self):
self.clear()
query = {}
registry = getUtility(IRegistry)
settings = registry.forInterface(ISettings)
if settings.content_types:
query['portal_type'] = settings.content_types
if settings.workflow_states:
query['review_state'] = settings.workflow_states
catalog = api.portal.get_tool('portal_catalog')
brains = catalog(query)
for brain in brains:
# asyncronous crawling not working yet
# self.crawl_enqueue(brain.UID)
obj = brain.getObject()
obj.restrictedTraverse('@@linkcheck')()
logger.info('Crawling: checked {0}'.format(brain.getURL()))
security.declarePrivate("enqueue")
def enqueue(self, url):
index = self.index.get(url)
if index is None:
# a really new url
index = self.store(url)
else:
entry = self.checked.get(index)
if entry is not None and entry:
entry = None, entry[1], entry[2]
self.checked[index] = entry
else:
# reset empty entry
self.remove(url)
index = self.store(url)
self.queue.put(index)
return index
security.declarePrivate("register")
def register(self, hrefs, referer, timestamp):
"""Add or update link presence information.
If a link has not been checked since the provided timestamp,
it will be added to the queue (or if it is not in the
database).
"""
referer = self.index.get(referer) or self.store(referer)
#.........这里部分代码省略.........