本文整理汇总了Python中classes.Registry.Registry.find方法的典型用法代码示例。如果您正苦于以下问题:Python Registry.find方法的具体用法?Python Registry.find怎么用?Python Registry.find使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类classes.Registry.Registry
的用法示例。
在下文中一共展示了Registry.find方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _get_codes_stat
# 需要导入模块: from classes.Registry import Registry [as 别名]
# 或者: from classes.Registry.Registry import find [as 别名]
def _get_codes_stat(self):
""" Build dict with http-codes and their counts """
coll = Registry().get('mongo').spider_urls
result = {}
codes = coll.group({'code': True}, '', {}, 'function () {}')
for code in codes:
links = []
code = code['code']
data = coll.find({'code': code}, {'path': 1, 'query': 1})
for link in mongo_result_to_list(data):
links.append(link['path'] + '?' + link['query'] if link['query'] else link['path'])
result[int(code)] = links
return result
示例2: _get_extensions
# 需要导入模块: from classes.Registry import Registry [as 别名]
# 或者: from classes.Registry.Registry import find [as 别名]
def _get_extensions(self):
""" Build files extensions list """
result = {}
coll = Registry().get('mongo').spider_urls
links = coll.group({'path': True}, '', {}, 'function () {}')
links = mongo_result_to_list(links)
exts = []
for link in links:
if link['path'].rfind('.') > -1 and len(link['path']) - link['path'].rfind('.') <= 5:
exts.append(link['path'][link['path'].rfind('.'):])
for ext in list(set(exts)):
if ext not in result:
result[ext] = []
links = coll.find({'path': re.compile('\\' + ext + '$')})
links = mongo_result_to_list(links)
for link in links:
result[ext].append(link['path'] + '?' + link['query'] if link['query'] else link['path'])
return result
示例3: MongoJob
# 需要导入模块: from classes.Registry import Registry [as 别名]
# 或者: from classes.Registry.Registry import find [as 别名]
class MongoJob(WSJob):
""" Common class for jobs works with MongoDB """
unique = True
collection = None
select_limit = 50
skip_blank_rows = True
counter = 0
collection_name = None
def __init__(self, maxsize=0):
WSJob.__init__(self, maxsize)
self.collection = Registry().get('mongo')[self.collection_name]
def build_row(self, _str):
""" Common build row method for MongoDB """
return {
"name": _str.strip(),
"checked": 0,
"getted": 0
}
def qsize(self):
""" Size of queue """
return self.collection.find({"checked": 0}).count()
def set_unique(self, unique=True):
""" Enable remove dups in queue """
self.unique = unique
def set_skip_blank_rows(self, value=True):
""" If True - we will skip blank rows then fill queue from dict or file """
self.skip_blank_rows = value
def task_done(self, name):
""" Mark current row as done """
self.counter += 1
self.collection.update({'name': str(unicode(name)), "getted": 1}, {"$set": {"checked": 1}})
WSJob.task_done(self)
def get(self, block=False, timeout=None):
""" Get next item from queue """
if self.empty() or self.qsize() < 50:
self.load_data()
if self.empty():
raise Queue.Empty
return WSJob.get(self, block, timeout)
def load_data(self):
""" Load data into queue from MongoDB """
data = self.collection.find(
{"checked": 0, "getted": 0},
limit=int(Registry().get('config')['main']['mongo_data_load_per_once'])
)
for row in data:
self.put(row['name'])
self.collection.update({"name": row['name']}, {"$set": {"getted": 1}})
return True
def load_dict(self, dict_for_load, drop=True):
""" Fill collection from dict """
if drop:
self.collection.drop()
counter = 0
last = "START OF FILE"
for line in dict_for_load:
try:
line = line.strip()
unicode(line)
self.collection.insert(self.build_row(line))
except UnicodeDecodeError:
_str = " UNICODE ERROR: In file '{0}' skip word '{1}', after word '{2}' !".format(file, line, last)
if Registry().isset('logger'):
Registry().get('logger').log(_str)
else:
print _str
continue
counter += 1
last = line
self.load_data()
return counter
def load_dom(self, dom):
""" Fill queue from DictOfMask """
self.collection.drop()
while True:
word = dom.get()
if word is None:
break
self.collection.insert(self.build_row(word))
self.collection.create_index('name', drop_dups=True, unique=self.unique)
#.........这里部分代码省略.........