本文整理汇总了Python中logger.accesschecker.AccessChecker.parsed_access方法的典型用法代码示例。如果您正苦于以下问题:Python AccessChecker.parsed_access方法的具体用法?Python AccessChecker.parsed_access怎么用?Python AccessChecker.parsed_access使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类logger.accesschecker.AccessChecker
的用法示例。
在下文中一共展示了AccessChecker.parsed_access方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_parsed_access_valid_pdf_access
# 需要导入模块: from logger.accesschecker import AccessChecker [as 别名]
# 或者: from logger.accesschecker.AccessChecker import parsed_access [as 别名]
def test_parsed_access_valid_pdf_access(self):
accesschecker = self.mocker.patch(AccessChecker)
accesschecker._allowed_collections()
self.mocker.result([u'scl', u'arg'])
accesschecker._acronym_to_issn_dict()
self.mocker.result({u'zool': u'1984-4670', u'bjmbr': u'1414-431X'})
self.mocker.replay()
ac = AccessChecker(collection='scl')
line = '201.14.120.2 - - [30/May/2013:00:01:01 -0300] "GET http://www.scielo.br/pdf/bjmbr/v14n4/03.pdf HTTP/1.1" 206 4608 "-" "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)"'
expected = {
'ip': '201.14.120.2',
'code': '/pdf/bjmbr/v14n4/03.pdf',
'access_type': 'PDF',
'iso_date': '2013-05-30',
'iso_datetime': '2013-05-30T00:01:01',
'year': '2013',
'day': '30',
'month': '05',
'original_agent': 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)',
'original_date': '[30/May/2013:00:01:01 -0300]',
'query_string': None,
'pdf_issn': u'1414-431X',
'script': '',
'pdf_path': '/pdf/bjmbr/v14n4/03.pdf'
}
self.assertEqual(ac.parsed_access(line), expected)
示例2: test_parsed_access_valid_html_access
# 需要导入模块: from logger.accesschecker import AccessChecker [as 别名]
# 或者: from logger.accesschecker.AccessChecker import parsed_access [as 别名]
def test_parsed_access_valid_html_access(self):
accesschecker = self.mocker.patch(AccessChecker)
accesschecker._allowed_collections()
self.mocker.result([u'scl', u'arg'])
accesschecker._acronym_to_issn_dict()
self.mocker.result({u'zool': u'1984-4670', u'bjmbr': u'1414-431X'})
self.mocker.replay()
ac = AccessChecker(collection='scl')
line = '187.19.211.179 - - [30/May/2013:00:01:01 -0300] "GET http://www.scielo.br/scielo.php?pid=S1414-431X2000000300007&script=sci_arttext HTTP/1.1" 200 25084 "-" "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)"'
expected = {
'ip': '187.19.211.179',
'code': 'S1414-431X2000000300007',
'access_type': 'HTML',
'iso_date': '2013-05-30',
'iso_datetime': '2013-05-30T00:01:01',
'year': '2013',
'query_string': {
'pid': 'S1414-431X2000000300007',
'script': 'sci_arttext'
},
'day': '30',
'original_agent': 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)',
'original_date': '[30/May/2013:00:01:01 -0300]',
'script': 'sci_arttext',
'month': '05'
}
self.assertEqual(ac.parsed_access(line), expected)
示例3: test_parsed_access_valid_pdf_access_GET_string_without_domain
# 需要导入模块: from logger.accesschecker import AccessChecker [as 别名]
# 或者: from logger.accesschecker.AccessChecker import parsed_access [as 别名]
def test_parsed_access_valid_pdf_access_GET_string_without_domain(self):
accesschecker = self.mocker.patch(AccessChecker)
accesschecker._allowed_collections()
self.mocker.result([u'scl', u'arg'])
accesschecker._acronym_to_issn_dict()
self.mocker.result({u'zool': u'1984-4670', u'bjmbr': u'1414-431X'})
self.mocker.replay()
ac = AccessChecker(collection='scl')
line = '66.249.73.80 - - [30/May/2013:00:01:01 -0300] "GET /pdf/bjmbr/v29n4/18781.pdf HTTP/1.1" 200 32061 "-" "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)"'
expected = {
'ip': '66.249.73.80',
'code': '/pdf/bjmbr/v29n4/18781.pdf',
'access_type': 'PDF',
'iso_date': '2013-05-30',
'iso_datetime': '2013-05-30T00:01:01',
'year': '2013',
'day': '30',
'month': '05',
'query_string': None,
'pdf_issn': u'1414-431X',
'script': '',
'pdf_path': '/pdf/bjmbr/v29n4/18781.pdf'
}
self.assertEqual(ac.parsed_access(line), expected)
示例4: bulk
# 需要导入模块: from logger.accesschecker import AccessChecker [as 别名]
# 或者: from logger.accesschecker.AccessChecker import parsed_access [as 别名]
def bulk(collection=None):
_logger.info('Running as bulk')
if COUNTER_COMPLIANT:
ts = TimedSet(expired=checkdatelock)
ac = AccessChecker(collection)
proc_coll = get_proc_collection()
proc_robots_coll = get_proc_robots_collection()
for logfile in os.popen('ls %s/*' % LOG_DIR):
logfile = logfile.strip()
# Verifica se arquivo já foi processado.
if proc_coll.find({'file_name': logfile}).count() > 0:
_logger.debug('File already processe %s' % logfile)
continue
# Registra em base de dados de arquivos processados o novo arquivo.
_logger.info("Processing: %s" % logfile)
proc_coll.insert({'file_name': logfile})
rq = Local(MONGO_URI, collection)
with open(logfile, 'rb') as f:
log_file_line = 0
for raw_line in f:
log_file_line += 1
_logger.debug("Reading line {0} from file {1}".format(str(log_file_line), logfile))
parsed_line = ac.parsed_access(raw_line)
if not parsed_line:
continue
if COUNTER_COMPLIANT:
# Counter Mode Accesses
locktime = 10
if parsed_line['access_type'] == "PDF":
locktime = 30
try:
lockid = '_'.join([parsed_line['ip'],
parsed_line['code'],
parsed_line['script']])
ts.add(lockid, parsed_line['iso_datetime'], locktime)
register_access(rq, parsed_line)
except ValueError:
continue
else:
# SciELO Mode Accesses
register_access(rq, parsed_line)
rq.send(slp=SLEEP)
del(rq)
示例5: test_parsed_access_valid_pdf_with_any_different_access
# 需要导入模块: from logger.accesschecker import AccessChecker [as 别名]
# 或者: from logger.accesschecker.AccessChecker import parsed_access [as 别名]
def test_parsed_access_valid_pdf_with_any_different_access(self):
accesschecker = self.mocker.patch(AccessChecker)
accesschecker._allowed_collections()
self.mocker.result([u'scl', u'arg'])
accesschecker._acronym_to_issn_dict()
self.mocker.result({u'zool': u'1984-4670', u'bjmbr': u'1414-431X'})
self.mocker.replay()
ac = AccessChecker(collection='scl')
line = '177.191.212.233 - - [30/May/2013:00:01:01 -0300] "GET http://www.scielo.br/img/pt/author.gif HTTP/1.1" 304 0 "http://www.scielo.br/scielo.php?script=sci_serial&pid=1415-4757&nrm=iso&rep=&lng=pt" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36"'
self.assertEqual(ac.parsed_access(line), None)
示例6: test_parsed_access_valid_pdf_with_not_allowed_acronym
# 需要导入模块: from logger.accesschecker import AccessChecker [as 别名]
# 或者: from logger.accesschecker.AccessChecker import parsed_access [as 别名]
def test_parsed_access_valid_pdf_with_not_allowed_acronym(self):
accesschecker = self.mocker.patch(AccessChecker)
accesschecker._allowed_collections()
self.mocker.result([u'scl', u'arg'])
accesschecker._acronym_to_issn_dict()
self.mocker.result({u'zool': u'1984-4670', u'bjmbr': u'1414-431X'})
self.mocker.replay()
ac = AccessChecker(collection='scl')
line = '201.14.120.2 - - [30/May/2013:00:01:01 -0300] "GET http://www.scielo.br/pdf/not_allowed_acronym/v14n4/03.pdf HTTP/1.1" 206 4608 "-" "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)"'
self.assertEqual(ac.parsed_access(line), None)
示例7: test_parsed_access_invalid_article_access_without_query_string
# 需要导入模块: from logger.accesschecker import AccessChecker [as 别名]
# 或者: from logger.accesschecker.AccessChecker import parsed_access [as 别名]
def test_parsed_access_invalid_article_access_without_query_string(self):
accesschecker = self.mocker.patch(AccessChecker)
accesschecker._allowed_collections()
self.mocker.result([u'scl', u'arg'])
accesschecker._acronym_to_issn_dict()
self.mocker.result({u'zool': u'1984-4670', u'bjmbr': u'1414-431X'})
self.mocker.replay()
ac = AccessChecker(collection='scl')
line = '187.19.211.179 - - [30/May/2013:00:01:01 -0300] "GET http://www.scielo.br/scielo.php HTTP/1.1" 200 25084 "-" "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)"'
self.assertEqual(ac.parsed_access(line), None)
示例8: test_is_bot_Spider_sample
# 需要导入模块: from logger.accesschecker import AccessChecker [as 别名]
# 或者: from logger.accesschecker.AccessChecker import parsed_access [as 别名]
def test_is_bot_Spider_sample(self):
accesschecker = self.mocker.patch(AccessChecker)
accesschecker._allowed_collections()
self.mocker.result([u'scl', u'arg'])
accesschecker._acronym_to_issn_dict()
self.mocker.result({u'zool': u'1984-4670', u'bjmbr': u'1414-431X'})
self.mocker.replay()
ac = AccessChecker(collection='scl')
line = '180.76.5.118 - - [24/Dec/2013:04:49:09 -0200] "GET http://www.scielo.br/pdf/csc/v11n2/30434.pdf HTTP/1.1" 200 79618 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"'
self.assertEqual(ac.parsed_access(line), None)
示例9: test_is_bot_Bing_sample
# 需要导入模块: from logger.accesschecker import AccessChecker [as 别名]
# 或者: from logger.accesschecker.AccessChecker import parsed_access [as 别名]
def test_is_bot_Bing_sample(self):
accesschecker = self.mocker.patch(AccessChecker)
accesschecker._allowed_collections()
self.mocker.result([u'scl', u'arg'])
accesschecker._acronym_to_issn_dict()
self.mocker.result({u'zool': u'1984-4670', u'bjmbr': u'1414-431X'})
self.mocker.replay()
ac = AccessChecker(collection='scl')
line = '13245 157.56.92.164 - - [30/Nov/2013:03:53:26 -0200] "GET http://www.scielo.br/scielo.php?script=sci_arttext&pid=S0104-87752010000200013 HTTP/1.1" 200 108777 "-" "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"'
self.assertEqual(ac.parsed_access(line), None)
示例10: test_is_bot_GoogleBot_sample
# 需要导入模块: from logger.accesschecker import AccessChecker [as 别名]
# 或者: from logger.accesschecker.AccessChecker import parsed_access [as 别名]
def test_is_bot_GoogleBot_sample(self):
accesschecker = self.mocker.patch(AccessChecker)
accesschecker._allowed_collections()
self.mocker.result([u'scl', u'arg'])
accesschecker._acronym_to_issn_dict()
self.mocker.result({u'zool': u'1984-4670', u'bjmbr': u'1414-431X'})
self.mocker.replay()
ac = AccessChecker(collection='scl')
line = '66.249.75.131 - - [24/Dec/2013:04:49:09 -0200] "GET http://www.scielo.br/scielo.php?script=sci_arttext&pid=S0102-79722002000200013 HTTP/1.1" 200 102967 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"'
self.assertEqual(ac.parsed_access(line), None)
示例11: Bulk
# 需要导入模块: from logger.accesschecker import AccessChecker [as 别名]
# 或者: from logger.accesschecker.AccessChecker import parsed_access [as 别名]
class Bulk(object):
def __init__(self, collection, mongo_uri=MONGO_URI, logs_source=LOGS_SOURCE, counter_compliant=None, skipped_log_dir=None):
self._mongo_uri = "%s_%s" % (mongo_uri, collection)
self._proc_coll = self.get_proc_collection()
self._collection = collection
self._logs_source = logs_source
self._counter_compliant = counter_compliant
self._ts = utils.TimedSet(expired=utils.checkdatelock)
self._skipped_log_dir = skipped_log_dir
self._skipped_log = None
self._ac = AccessChecker(self._collection)
def __enter__(self):
if self._skipped_log_dir:
now = datetime.datetime.now().isoformat()
skipped_log = '/'.join([self._skipped_log_dir, now]).replace('//', '/')
try:
self._skipped_log = open(skipped_log, 'w')
except ValueError:
raise "Invalid directory or file name: %s" % skipped_log
return self
def __exit__(self, exc_type, exc_value, traceback):
self._ts = None
self._ac = None
if self._skipped_log:
self._skipped_log.close()
def _mongodb_connect(self, mdb_database):
db_url = urlparse.urlparse(self._mongo_uri)
conn = pymongo.MongoClient(host=db_url.hostname, port=db_url.port)
db = conn[db_url.path[1:]]
if db_url.username and db_url.password:
db.authenticate(db_url.username, db_url.password)
return db[mdb_database]
def get_proc_collection(self):
"""
The proc collection is a mongodb database that keeps the name of each
processed file, to avoid processing these files again.
"""
coll = self._mongodb_connect('proc_files')
coll.ensure_index('file_name')
return coll
def write_skipped_log(self, line):
if self._skipped_log:
self._skipped_log.write("%s \r\n" % line)
def read_log(self, logfile):
logfile = logfile.strip()
# Verifica se arquivo já foi processado.
if self._proc_coll.find({'file_name': logfile}).count() > 0:
logger.info('File already processed %s' % logfile)
return None
reader = codecs
if utils.check_file_format(logfile) == 'gzip':
reader = gzip
# Registra em base de dados de arquivos processados o novo arquivo.
logger.info("Processing: %s" % logfile)
self._proc_coll.insert({'file_name': logfile})
with reader.open(logfile, 'rb') as f:
with Local(self._mongo_uri, self._collection) as rq:
log_file_line = 0
for raw_line in f:
log_file_line += 1
logger.debug("Reading line {0} from file {1}".format(
str(log_file_line), logfile))
logger.debug(raw_line)
try:
parsed_line = self._ac.parsed_access(raw_line)
except ValueError as e:
logger.error("%s: %s" % (e.message, raw_line))
continue
if not parsed_line:
continue
if COUNTER_COMPLIANT:
# Counter Mode Accesses
locktime = 10
if parsed_line['access_type'] == "PDF":
locktime = 30
try:
lockid = '_'.join([parsed_line['ip'],
parsed_line['code'],
parsed_line['script']])
self._ts.add(lockid, parsed_line['iso_datetime'], locktime)
rq.register_access(parsed_line)
except ValueError:
#.........这里部分代码省略.........