本文整理汇总了Python中classes.Registry.Registry.drop方法的典型用法代码示例。如果您正苦于以下问题:Python Registry.drop方法的具体用法?Python Registry.drop怎么用?Python Registry.drop使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类classes.Registry.Registry
的用法示例。
在下文中一共展示了Registry.drop方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: prepare_first_pages
# 需要导入模块: from classes.Registry import Registry [as 别名]
# 或者: from classes.Registry.Registry import drop [as 别名]
def prepare_first_pages(host):
""" Prepare link on first page in MongoDB. Add root url if urls for this host not exists. """
pid = Registry().get('pData')['id']
coll = Registry().get('mongo').spider_urls
coll.drop()
Urls = UrlsModel()
urls = Urls.list_by_host_name_for_spider(pid, host)
if not len(urls):
Registry().get('logger').log("Spider: Root URL was added automaticaly")
Urls.add(
pid, HostsModel().get_id_by_name(pid, host), '/', who_add='spider'
)
urls = Urls.list_by_host_name_for_spider(pid, host)
for url in urls:
url = urlparse(url['url'])
data = {
'hash': md5(str(url.path + url.query)),
'path': url.path,
'query': url.query,
'time': 0,
'code':0,
'checked': 0,
'getted' : 0,
'referer': '',
'size': 0,
'founder': 'spider'
}
coll.insert(data)
coll.create_index([('hash', 1)], unique=True, dropDups=True)
coll.create_index([('checked', 1)])
示例2: MongoJob
# 需要导入模块: from classes.Registry import Registry [as 别名]
# 或者: from classes.Registry.Registry import drop [as 别名]
class MongoJob(WSJob):
""" Common class for jobs works with MongoDB """
unique = True
collection = None
select_limit = 50
skip_blank_rows = True
counter = 0
collection_name = None
def __init__(self, maxsize=0):
WSJob.__init__(self, maxsize)
self.collection = Registry().get('mongo')[self.collection_name]
def build_row(self, _str):
""" Common build row method for MongoDB """
return {
"name": _str.strip(),
"checked": 0,
"getted": 0
}
def qsize(self):
""" Size of queue """
return self.collection.find({"checked": 0}).count()
def set_unique(self, unique=True):
""" Enable remove dups in queue """
self.unique = unique
def set_skip_blank_rows(self, value=True):
""" If True - we will skip blank rows then fill queue from dict or file """
self.skip_blank_rows = value
def task_done(self, name):
""" Mark current row as done """
self.counter += 1
self.collection.update({'name': str(unicode(name)), "getted": 1}, {"$set": {"checked": 1}})
WSJob.task_done(self)
def get(self, block=False, timeout=None):
""" Get next item from queue """
if self.empty() or self.qsize() < 50:
self.load_data()
if self.empty():
raise Queue.Empty
return WSJob.get(self, block, timeout)
def load_data(self):
""" Load data into queue from MongoDB """
data = self.collection.find(
{"checked": 0, "getted": 0},
limit=int(Registry().get('config')['main']['mongo_data_load_per_once'])
)
for row in data:
self.put(row['name'])
self.collection.update({"name": row['name']}, {"$set": {"getted": 1}})
return True
def load_dict(self, dict_for_load, drop=True):
""" Fill collection from dict """
if drop:
self.collection.drop()
counter = 0
last = "START OF FILE"
for line in dict_for_load:
try:
line = line.strip()
unicode(line)
self.collection.insert(self.build_row(line))
except UnicodeDecodeError:
_str = " UNICODE ERROR: In file '{0}' skip word '{1}', after word '{2}' !".format(file, line, last)
if Registry().isset('logger'):
Registry().get('logger').log(_str)
else:
print _str
continue
counter += 1
last = line
self.load_data()
return counter
def load_dom(self, dom):
""" Fill queue from DictOfMask """
self.collection.drop()
while True:
word = dom.get()
if word is None:
break
self.collection.insert(self.build_row(word))
self.collection.create_index('name', drop_dups=True, unique=self.unique)
#.........这里部分代码省略.........