当前位置: 首页>>代码示例>>Python>>正文


Python Registry.find方法代码示例

本文整理汇总了Python中classes.Registry.Registry.find方法的典型用法代码示例。如果您正苦于以下问题:Python Registry.find方法的具体用法?Python Registry.find怎么用?Python Registry.find使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在classes.Registry.Registry的用法示例。


在下文中一共展示了Registry.find方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _get_codes_stat

# 需要导入模块: from classes.Registry import Registry [as 别名]
# 或者: from classes.Registry.Registry import find [as 别名]
    def _get_codes_stat(self):
        """ Build dict with http-codes and their counts """
        coll = Registry().get('mongo').spider_urls
        result = {}

        codes = coll.group({'code': True}, '', {}, 'function () {}')
        for code in codes:
            links = []
            code = code['code']
            data = coll.find({'code': code}, {'path': 1, 'query': 1})
            for link in mongo_result_to_list(data):
                links.append(link['path'] + '?' + link['query'] if link['query'] else link['path'])
            result[int(code)] = links

        return result
开发者ID:Sts0mrg0,项目名称:ws-cli,代码行数:17,代码来源:SpiderResult.py

示例2: _get_extensions

# 需要导入模块: from classes.Registry import Registry [as 别名]
# 或者: from classes.Registry.Registry import find [as 别名]
    def _get_extensions(self):
        """ Build files extensions list """
        result = {}
        coll = Registry().get('mongo').spider_urls
        links = coll.group({'path': True}, '', {}, 'function () {}')
        links = mongo_result_to_list(links)

        exts = []
        for link in links:
            if link['path'].rfind('.') > -1 and len(link['path']) - link['path'].rfind('.') <= 5:
                exts.append(link['path'][link['path'].rfind('.'):])

        for ext in list(set(exts)):
            if ext not in result:
                result[ext] = []

            links = coll.find({'path': re.compile('\\' + ext + '$')})
            links = mongo_result_to_list(links)

            for link in links:
                result[ext].append(link['path'] + '?' + link['query'] if link['query'] else link['path'])

        return result
开发者ID:Sts0mrg0,项目名称:ws-cli,代码行数:25,代码来源:SpiderResult.py

示例3: MongoJob

# 需要导入模块: from classes.Registry import Registry [as 别名]
# 或者: from classes.Registry.Registry import find [as 别名]
class MongoJob(WSJob):
    """ Common class for jobs works with MongoDB """
    unique = True
    collection = None
    select_limit = 50
    skip_blank_rows = True
    counter = 0
    collection_name = None

    def __init__(self, maxsize=0):
        WSJob.__init__(self, maxsize)
        self.collection = Registry().get('mongo')[self.collection_name]

    def build_row(self, _str):
        """ Common build row method for MongoDB """
        return {
            "name": _str.strip(),
            "checked": 0,
            "getted": 0
        }

    def qsize(self):
        """ Size of queue """
        return self.collection.find({"checked": 0}).count()

    def set_unique(self, unique=True):
        """ Enable remove dups in queue """
        self.unique = unique

    def set_skip_blank_rows(self, value=True):
        """ If True - we will skip blank rows then fill queue from dict or file """
        self.skip_blank_rows = value

    def task_done(self, name):
        """ Mark current row as done """
        self.counter += 1
        self.collection.update({'name': str(unicode(name)), "getted": 1}, {"$set": {"checked": 1}})
        WSJob.task_done(self)

    def get(self, block=False, timeout=None):
        """ Get next item from queue """
        if self.empty() or self.qsize() < 50:
            self.load_data()

        if self.empty():
            raise Queue.Empty

        return WSJob.get(self, block, timeout)

    def load_data(self):
        """ Load data into queue from MongoDB """
        data = self.collection.find(
            {"checked": 0, "getted": 0},
            limit=int(Registry().get('config')['main']['mongo_data_load_per_once'])
        )

        for row in data:
            self.put(row['name'])
            self.collection.update({"name": row['name']}, {"$set": {"getted": 1}})

        return True

    def load_dict(self, dict_for_load, drop=True):
        """ Fill collection from dict """
        if drop:
            self.collection.drop()

        counter = 0
        last = "START OF FILE"

        for line in dict_for_load:
            try:
                line = line.strip()
                unicode(line)
                self.collection.insert(self.build_row(line))
            except UnicodeDecodeError:
                _str = " UNICODE ERROR: In file '{0}' skip word '{1}', after word '{2}' !".format(file, line, last)
                if Registry().isset('logger'):
                    Registry().get('logger').log(_str)
                else:
                    print _str

                continue

            counter += 1
            last = line

        self.load_data()

        return counter

    def load_dom(self, dom):
        """ Fill queue from DictOfMask """
        self.collection.drop()
        while True:
            word = dom.get()
            if word is None:
                break
            self.collection.insert(self.build_row(word))
        self.collection.create_index('name', drop_dups=True, unique=self.unique)
#.........这里部分代码省略.........
开发者ID:hack4sec,项目名称:ws-cli,代码行数:103,代码来源:MongoJob.py


注:本文中的classes.Registry.Registry.find方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。