当前位置: 首页>>代码示例>>Python>>正文


Python AccessChecker.parsed_access方法代码示例

本文整理汇总了Python中logger.accesschecker.AccessChecker.parsed_access方法的典型用法代码示例。如果您正苦于以下问题:Python AccessChecker.parsed_access方法的具体用法?Python AccessChecker.parsed_access怎么用?Python AccessChecker.parsed_access使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在logger.accesschecker.AccessChecker的用法示例。


在下文中一共展示了AccessChecker.parsed_access方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_parsed_access_valid_pdf_access

# 需要导入模块: from logger.accesschecker import AccessChecker [as 别名]
# 或者: from logger.accesschecker.AccessChecker import parsed_access [as 别名]
    def test_parsed_access_valid_pdf_access(self):
        accesschecker = self.mocker.patch(AccessChecker)
        accesschecker._allowed_collections()
        self.mocker.result([u'scl', u'arg'])
        accesschecker._acronym_to_issn_dict()
        self.mocker.result({u'zool': u'1984-4670', u'bjmbr': u'1414-431X'})

        self.mocker.replay()

        ac = AccessChecker(collection='scl')

        line = '201.14.120.2 - - [30/May/2013:00:01:01 -0300] "GET http://www.scielo.br/pdf/bjmbr/v14n4/03.pdf HTTP/1.1" 206 4608 "-" "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)"'

        expected = {
                        'ip': '201.14.120.2',
                        'code': '/pdf/bjmbr/v14n4/03.pdf',
                        'access_type': 'PDF',
                        'iso_date': '2013-05-30',
                        'iso_datetime': '2013-05-30T00:01:01',
                        'year': '2013',
                        'day': '30',
                        'month': '05',
                        'original_agent': 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)',
                        'original_date': '[30/May/2013:00:01:01 -0300]',
                        'query_string': None,
                        'pdf_issn': u'1414-431X',
                        'script': '',
                        'pdf_path': '/pdf/bjmbr/v14n4/03.pdf'
                    }

        self.assertEqual(ac.parsed_access(line), expected)
开发者ID:,项目名称:,代码行数:33,代码来源:

示例2: test_parsed_access_valid_html_access

# 需要导入模块: from logger.accesschecker import AccessChecker [as 别名]
# 或者: from logger.accesschecker.AccessChecker import parsed_access [as 别名]
    def test_parsed_access_valid_html_access(self):
        accesschecker = self.mocker.patch(AccessChecker)
        accesschecker._allowed_collections()
        self.mocker.result([u'scl', u'arg'])
        accesschecker._acronym_to_issn_dict()
        self.mocker.result({u'zool': u'1984-4670', u'bjmbr': u'1414-431X'})

        self.mocker.replay()

        ac = AccessChecker(collection='scl')

        line = '187.19.211.179 - - [30/May/2013:00:01:01 -0300] "GET http://www.scielo.br/scielo.php?pid=S1414-431X2000000300007&script=sci_arttext HTTP/1.1" 200 25084 "-" "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)"'

        expected = {
                        'ip': '187.19.211.179',
                        'code': 'S1414-431X2000000300007',
                        'access_type': 'HTML',
                        'iso_date': '2013-05-30',
                        'iso_datetime': '2013-05-30T00:01:01',
                        'year': '2013',
                        'query_string': {
                            'pid': 'S1414-431X2000000300007',
                            'script': 'sci_arttext'
                        },
                        'day': '30',
                        'original_agent': 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)',
                        'original_date': '[30/May/2013:00:01:01 -0300]',
                        'script': 'sci_arttext',
                        'month': '05'
                    }

        self.assertEqual(ac.parsed_access(line), expected)
开发者ID:,项目名称:,代码行数:34,代码来源:

示例3: test_parsed_access_valid_pdf_access_GET_string_without_domain

# 需要导入模块: from logger.accesschecker import AccessChecker [as 别名]
# 或者: from logger.accesschecker.AccessChecker import parsed_access [as 别名]
    def test_parsed_access_valid_pdf_access_GET_string_without_domain(self):
        accesschecker = self.mocker.patch(AccessChecker)
        accesschecker._allowed_collections()
        self.mocker.result([u'scl', u'arg'])
        accesschecker._acronym_to_issn_dict()
        self.mocker.result({u'zool': u'1984-4670', u'bjmbr': u'1414-431X'})

        self.mocker.replay()

        ac = AccessChecker(collection='scl')

        line = '66.249.73.80 - - [30/May/2013:00:01:01 -0300] "GET /pdf/bjmbr/v29n4/18781.pdf HTTP/1.1" 200 32061 "-" "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)"'

        expected = {
                        'ip': '66.249.73.80',
                        'code': '/pdf/bjmbr/v29n4/18781.pdf',
                        'access_type': 'PDF',
                        'iso_date': '2013-05-30',
                        'iso_datetime': '2013-05-30T00:01:01',
                        'year': '2013',
                        'day': '30',
                        'month': '05',
                        'query_string': None,
                        'pdf_issn': u'1414-431X',
                        'script': '',
                        'pdf_path': '/pdf/bjmbr/v29n4/18781.pdf'
                    }

        self.assertEqual(ac.parsed_access(line), expected)
开发者ID:fabiobatalha,项目名称:Logger,代码行数:31,代码来源:test_accesseschecker.py

示例4: bulk

# 需要导入模块: from logger.accesschecker import AccessChecker [as 别名]
# 或者: from logger.accesschecker.AccessChecker import parsed_access [as 别名]
def bulk(collection=None):
    _logger.info('Running as bulk')

    if COUNTER_COMPLIANT:
        ts = TimedSet(expired=checkdatelock)

    ac = AccessChecker(collection)

    proc_coll = get_proc_collection()
    proc_robots_coll = get_proc_robots_collection()

    for logfile in os.popen('ls %s/*' % LOG_DIR):

        logfile = logfile.strip()

        # Verifica se arquivo já foi processado.
        if proc_coll.find({'file_name': logfile}).count() > 0:
            _logger.debug('File already processe %s' % logfile)
            continue

        # Registra em base de dados de arquivos processados o novo arquivo.
        _logger.info("Processing: %s" % logfile)
        proc_coll.insert({'file_name': logfile})

        rq = Local(MONGO_URI, collection)

        with open(logfile, 'rb') as f:

            log_file_line = 0
            for raw_line in f:
                log_file_line += 1
                _logger.debug("Reading line {0} from file {1}".format(str(log_file_line), logfile))
                parsed_line = ac.parsed_access(raw_line)

                if not parsed_line:
                    continue

                if COUNTER_COMPLIANT:
                    # Counter Mode Accesses
                    locktime = 10
                    if parsed_line['access_type'] == "PDF":
                        locktime = 30
                    try:
                        lockid = '_'.join([parsed_line['ip'],
                                           parsed_line['code'],
                                           parsed_line['script']])
                        ts.add(lockid, parsed_line['iso_datetime'], locktime)
                        register_access(rq, parsed_line)
                    except ValueError:
                        continue
                else:
                    # SciELO Mode Accesses
                    register_access(rq, parsed_line)


        rq.send(slp=SLEEP)
        del(rq)
开发者ID:fabiobatalha,项目名称:Logger,代码行数:59,代码来源:loadlogs.py

示例5: test_parsed_access_valid_pdf_with_any_different_access

# 需要导入模块: from logger.accesschecker import AccessChecker [as 别名]
# 或者: from logger.accesschecker.AccessChecker import parsed_access [as 别名]
    def test_parsed_access_valid_pdf_with_any_different_access(self):
        accesschecker = self.mocker.patch(AccessChecker)
        accesschecker._allowed_collections()
        self.mocker.result([u'scl', u'arg'])
        accesschecker._acronym_to_issn_dict()
        self.mocker.result({u'zool': u'1984-4670', u'bjmbr': u'1414-431X'})

        self.mocker.replay()

        ac = AccessChecker(collection='scl')

        line = '177.191.212.233 - - [30/May/2013:00:01:01 -0300] "GET http://www.scielo.br/img/pt/author.gif HTTP/1.1" 304 0 "http://www.scielo.br/scielo.php?script=sci_serial&pid=1415-4757&nrm=iso&rep=&lng=pt" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36"'

        self.assertEqual(ac.parsed_access(line), None)
开发者ID:,项目名称:,代码行数:16,代码来源:

示例6: test_parsed_access_valid_pdf_with_not_allowed_acronym

# 需要导入模块: from logger.accesschecker import AccessChecker [as 别名]
# 或者: from logger.accesschecker.AccessChecker import parsed_access [as 别名]
    def test_parsed_access_valid_pdf_with_not_allowed_acronym(self):
        accesschecker = self.mocker.patch(AccessChecker)
        accesschecker._allowed_collections()
        self.mocker.result([u'scl', u'arg'])
        accesschecker._acronym_to_issn_dict()
        self.mocker.result({u'zool': u'1984-4670', u'bjmbr': u'1414-431X'})

        self.mocker.replay()

        ac = AccessChecker(collection='scl')

        line = '201.14.120.2 - - [30/May/2013:00:01:01 -0300] "GET http://www.scielo.br/pdf/not_allowed_acronym/v14n4/03.pdf HTTP/1.1" 206 4608 "-" "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)"'

        self.assertEqual(ac.parsed_access(line), None)
开发者ID:,项目名称:,代码行数:16,代码来源:

示例7: test_parsed_access_invalid_article_access_without_query_string

# 需要导入模块: from logger.accesschecker import AccessChecker [as 别名]
# 或者: from logger.accesschecker.AccessChecker import parsed_access [as 别名]
    def test_parsed_access_invalid_article_access_without_query_string(self):
        accesschecker = self.mocker.patch(AccessChecker)
        accesschecker._allowed_collections()
        self.mocker.result([u'scl', u'arg'])
        accesschecker._acronym_to_issn_dict()
        self.mocker.result({u'zool': u'1984-4670', u'bjmbr': u'1414-431X'})

        self.mocker.replay()

        ac = AccessChecker(collection='scl')

        line = '187.19.211.179 - - [30/May/2013:00:01:01 -0300] "GET http://www.scielo.br/scielo.php HTTP/1.1" 200 25084 "-" "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)"'

        self.assertEqual(ac.parsed_access(line), None)
开发者ID:,项目名称:,代码行数:16,代码来源:

示例8: test_is_bot_Spider_sample

# 需要导入模块: from logger.accesschecker import AccessChecker [as 别名]
# 或者: from logger.accesschecker.AccessChecker import parsed_access [as 别名]
    def test_is_bot_Spider_sample(self):
        accesschecker = self.mocker.patch(AccessChecker)
        accesschecker._allowed_collections()
        self.mocker.result([u'scl', u'arg'])
        accesschecker._acronym_to_issn_dict()
        self.mocker.result({u'zool': u'1984-4670', u'bjmbr': u'1414-431X'})

        self.mocker.replay()

        ac = AccessChecker(collection='scl')

        line = '180.76.5.118 - - [24/Dec/2013:04:49:09 -0200] "GET http://www.scielo.br/pdf/csc/v11n2/30434.pdf HTTP/1.1" 200 79618 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"'
        
        self.assertEqual(ac.parsed_access(line), None)
开发者ID:,项目名称:,代码行数:16,代码来源:

示例9: test_is_bot_Bing_sample

# 需要导入模块: from logger.accesschecker import AccessChecker [as 别名]
# 或者: from logger.accesschecker.AccessChecker import parsed_access [as 别名]
    def test_is_bot_Bing_sample(self):
        accesschecker = self.mocker.patch(AccessChecker)
        accesschecker._allowed_collections()
        self.mocker.result([u'scl', u'arg'])
        accesschecker._acronym_to_issn_dict()
        self.mocker.result({u'zool': u'1984-4670', u'bjmbr': u'1414-431X'})

        self.mocker.replay()

        ac = AccessChecker(collection='scl')

        line = '13245  157.56.92.164 - - [30/Nov/2013:03:53:26 -0200] "GET http://www.scielo.br/scielo.php?script=sci_arttext&pid=S0104-87752010000200013 HTTP/1.1" 200 108777 "-" "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"'
        
        self.assertEqual(ac.parsed_access(line), None)
开发者ID:,项目名称:,代码行数:16,代码来源:

示例10: test_is_bot_GoogleBot_sample

# 需要导入模块: from logger.accesschecker import AccessChecker [as 别名]
# 或者: from logger.accesschecker.AccessChecker import parsed_access [as 别名]
    def test_is_bot_GoogleBot_sample(self):
        accesschecker = self.mocker.patch(AccessChecker)
        accesschecker._allowed_collections()
        self.mocker.result([u'scl', u'arg'])
        accesschecker._acronym_to_issn_dict()
        self.mocker.result({u'zool': u'1984-4670', u'bjmbr': u'1414-431X'})

        self.mocker.replay()

        ac = AccessChecker(collection='scl')

        line = '66.249.75.131 - - [24/Dec/2013:04:49:09 -0200] "GET http://www.scielo.br/scielo.php?script=sci_arttext&pid=S0102-79722002000200013 HTTP/1.1" 200 102967 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"'
        
        self.assertEqual(ac.parsed_access(line), None)
开发者ID:,项目名称:,代码行数:16,代码来源:

示例11: Bulk

# 需要导入模块: from logger.accesschecker import AccessChecker [as 别名]
# 或者: from logger.accesschecker.AccessChecker import parsed_access [as 别名]
class Bulk(object):

    def __init__(self, collection, mongo_uri=MONGO_URI, logs_source=LOGS_SOURCE, counter_compliant=None, skipped_log_dir=None):
        self._mongo_uri = "%s_%s" % (mongo_uri, collection)
        self._proc_coll = self.get_proc_collection()
        self._collection = collection
        self._logs_source = logs_source
        self._counter_compliant = counter_compliant
        self._ts = utils.TimedSet(expired=utils.checkdatelock)
        self._skipped_log_dir = skipped_log_dir
        self._skipped_log = None
        self._ac = AccessChecker(self._collection)

    def __enter__(self):
        if self._skipped_log_dir:
            now = datetime.datetime.now().isoformat()
            skipped_log = '/'.join([self._skipped_log_dir, now]).replace('//', '/')
            try:
                self._skipped_log = open(skipped_log, 'w')
            except ValueError:
                raise "Invalid directory or file name: %s" % skipped_log

        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self._ts = None
        self._ac = None
        if self._skipped_log:
            self._skipped_log.close()

    def _mongodb_connect(self, mdb_database):

        db_url = urlparse.urlparse(self._mongo_uri)
        conn = pymongo.MongoClient(host=db_url.hostname, port=db_url.port)
        db = conn[db_url.path[1:]]
        if db_url.username and db_url.password:
            db.authenticate(db_url.username, db_url.password)

        return db[mdb_database]

    def get_proc_collection(self):
        """
        The proc collection is a mongodb database that keeps the name of each
        processed file, to avoid processing these files again.
        """
        coll =  self._mongodb_connect('proc_files')
        coll.ensure_index('file_name')

        return coll

    def write_skipped_log(self, line):
        if self._skipped_log:
            self._skipped_log.write("%s \r\n" % line)

    def read_log(self, logfile):
        logfile = logfile.strip()

        # Verifica se arquivo já foi processado.
        if self._proc_coll.find({'file_name': logfile}).count() > 0:
            logger.info('File already processed %s' % logfile)
            return None

        reader = codecs
        if utils.check_file_format(logfile) == 'gzip':
            reader = gzip

        # Registra em base de dados de arquivos processados o novo arquivo.
        logger.info("Processing: %s" % logfile)
        self._proc_coll.insert({'file_name': logfile})

        with reader.open(logfile, 'rb') as f:
            with Local(self._mongo_uri, self._collection) as rq:
                log_file_line = 0
                for raw_line in f:
                    log_file_line += 1
                    logger.debug("Reading line {0} from file {1}".format(
                        str(log_file_line), logfile))
                    logger.debug(raw_line)

                    try:
                        parsed_line = self._ac.parsed_access(raw_line)
                    except ValueError as e:
                        logger.error("%s: %s" % (e.message, raw_line))
                        continue

                    if not parsed_line:
                        continue

                    if COUNTER_COMPLIANT:
                        # Counter Mode Accesses
                        locktime = 10
                        if parsed_line['access_type'] == "PDF":
                            locktime = 30
                        try:
                            lockid = '_'.join([parsed_line['ip'],
                                               parsed_line['code'],
                                               parsed_line['script']])
                            self._ts.add(lockid, parsed_line['iso_datetime'], locktime)
                            rq.register_access(parsed_line)
                        except ValueError:
#.........这里部分代码省略.........
开发者ID:scieloorg,项目名称:Logger,代码行数:103,代码来源:scielo.py


注:本文中的logger.accesschecker.AccessChecker.parsed_access方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。