本文整理汇总了Python中common.logger.Logger.log方法的典型用法代码示例。如果您正苦于以下问题:Python Logger.log方法的具体用法?Python Logger.log怎么用?Python Logger.log使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类common.logger.Logger
的用法示例。
在下文中一共展示了Logger.log方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: BatchCrawler
# 需要导入模块: from common.logger import Logger [as 别名]
# 或者: from common.logger.Logger import log [as 别名]
class BatchCrawler():
MAX_DOCS_NUM = 100
def __init__(self, database_config_path, source_name, domain, encode, request_interval):
self.logger = Logger("crawler", domain)
self.adapter = DocRawAdapter(database_config_path, source_name, self.logger)
self.domain = domain
self.encode = encode
self.request_interval = request_interval
def run(self):
while True:
count = 0
try:
for url_hash, url in self.adapter.load_uncrawled_docs(BatchCrawler.MAX_DOCS_NUM):
count += 1
self.logger.log("crawling url %s"%url, 2)
page = common_utils.page_crawl(url)
if page == None:
self.adapter.update_doc_raw_as_crawled_failed(url_hash)
continue
if self.encode != "utf-8":
page = unicode(page, self.encode).encode("utf-8")
self.adapter.update_doc_raw_with_crawled_page(url_hash, "utf-8", page)
time.sleep(float(self.request_interval))
if count < BatchCrawler.MAX_DOCS_NUM:
break
except:
self.logger.log("mongo error")
示例2: DumpPass
# 需要导入模块: from common.logger import Logger [as 别名]
# 或者: from common.logger.Logger import log [as 别名]
def DumpPass(outputFilename, passName):
c1File = ParseC1visualizerStream(os.path.basename(outputFilename), open(outputFilename, "r"))
compiler_pass = c1File.findPass(passName)
if compiler_pass:
maxLineNo = compiler_pass.startLineNo + len(compiler_pass.body)
lenLineNo = len(str(maxLineNo)) + 2
curLineNo = compiler_pass.startLineNo
for line in compiler_pass.body:
Logger.log((str(curLineNo) + ":").ljust(lenLineNo) + line)
curLineNo += 1
else:
Logger.fail("Pass \"" + passName + "\" not found in the output")
示例3: ListPasses
# 需要导入模块: from common.logger import Logger [as 别名]
# 或者: from common.logger.Logger import log [as 别名]
def ListPasses(outputFilename):
c1File = ParseC1visualizerStream(os.path.basename(outputFilename), open(outputFilename, "r"))
for compiler_pass in c1File.passes:
Logger.log(compiler_pass.name)
示例4: SpiderBase
# 需要导入模块: from common.logger import Logger [as 别名]
# 或者: from common.logger.Logger import log [as 别名]
class SpiderBase():
def __init__(self, data_adapter_config_path, source_name, encode = "utf-8", parse_try_limit = 3):
self.logger = Logger("spider", source_name)
self.doc_raw_adapter = DocRawAdapter(data_adapter_config_path, source_name, self.logger)
self.data_raw_adapter = DataRawAdapter(data_adapter_config_path, source_name, self.logger)
self.image_store_adapter = ImageStoreAdapter(data_adapter_config_path, self.logger)
self.source_name = source_name
self.encode = encode
self.parse_try_limit = parse_try_limit
self.exploring_times = 0
def url_exists_in_doc_raw(self, url):
url_hash = common_utils.gen_url_hash(url)
return self.doc_raw_adapter.has_doc_raw_by_url_hash(url_hash)
def url_hash_exists_in_data_raw(self, url_hash):
return self.data_raw_adapter.has_data_raw_by_url_hash(url_hash)
def parse(self, url_hash, page, encode, stage, context, created_at, page_crawled_at):
'''
you must override this function
'''
self.logger.log("what the hell!!!you have to override to implement parse logic!!!")
features = {}
images = []
images.append({"name" : "test_image_name", "url" : "test_image_url", "image_format" : "jpg"})
next_update_time = None
children = []
children.append({"url" : "test_url", "stage" : "test_stage", "context" : "test_context", "operation_flag" : SpiderChildNodeOperationFlag.NEW_ADD})
return features, images, next_update_time, children
def explore_child(self, father_url_hash, url, url_hash, stage, context, operation_flag):
if operation_flag == SpiderChildNodeOperationFlag.NEW_ADD:
if not self.doc_raw_adapter.has_doc_raw_by_url_hash(url_hash):
self.doc_raw_adapter.create_doc_raw(url_hash, url, stage, context, father_url_hash)
self.logger.log("child [%s] %s new added."%(url_hash, url))
else:
if self.doc_raw_adapter.has_doc_raw_by_url_hash(url_hash):
if operation_flag == SpiderChildNodeOperationFlag.UPDATE_INFO_ONLY:
self.doc_raw_adapter.update_doc_raw_with_node_info(url_hash,
stage = stage,
context = context,
father = father_url_hash)
self.logger.log("child [%s]'s info is updated."%(url_hash))
elif operation_flag == SpiderChildNodeOperationFlag.FORCE_TO_REPARSE:
self.doc_raw_adapter.update_doc_raw_with_node_info(url_hash,
stage = stage,
context = context,
father = father_url_hash,
status_flag = DocRawStatus.PAGE_CRAWLED)
self.logger.log("child [%s] is set to reparse data."%(url_hash))
elif operation_flag == SpiderChildNodeOperationFlag.FORCE_TO_RECRAWL:
self.doc_raw_adapter.update_doc_raw_with_node_info(url_hash,
stage = stage,
context = context,
father = father_url_hash,
status_flag = DocRawStatus.NEW_ADDED)
self.logger.log("child [%s]'s is set to recrawled page."%(url_hash))
def spider_run(self):
for url_hash, url, stage, page, encode, context, created_at, page_crawled_at in self.doc_raw_adapter.load_unparsed_doc_raw():
try:
self.logger.log("parsing [%s]."%(url_hash))
features, images, next_update_time, children = self.parse(url_hash, page, encode, stage, context, created_at, page_crawled_at)
if images != None:
for i in range(0, len(images)):
try:
image_id = common_utils.gen_url_hash(images[i]["url"])
if not self.image_store_adapter.has_image_index_by_image_id(image_id):
images[i]["image_id"] = image_id
self.image_store_adapter.create_image_index(image_id, images[i]["image_format"], images[i]["url"])
self.logger.log("image [%s] created for [%s]."%(image_id, url_hash))
except BaseException, e:
self.logger.log("Error occured when creating image index: %s"%(e))
if features != None:
if not self.url_hash_exists_in_data_raw(url_hash):
self.data_raw_adapter.create_data_raw(url_hash, url, features, images)
self.logger.log("features for [%s] is added."%(url_hash))
else:
self.data_raw_adapter.update_data_raw(url_hash, features, images)
self.logger.log("features for [%s] is updated."%(url_hash))
children_url_hashes = None
if children != None:
children_url_hashes = []
for child in children:
try:
url_new = child["url"]
url_hash_new = common_utils.gen_url_hash(child["url"])
#.........这里部分代码省略.........
示例5: __init__
# 需要导入模块: from common.logger import Logger [as 别名]
# 或者: from common.logger.Logger import log [as 别名]
class ImageCrawler:
NUM_PER_FETCH = 100
NUM_PROCESSES = 10
def __init__(self, database_config_path):
self.queue = JoinableQueue()
self.logger = Logger("image_crawler")
self.adapter = ImageStoreAdapter(database_config_path, self.logger)
def produce(self):
while True:
if self.queue.empty():
for image_id, link in self.adapter.load_undownloaded_images(self.NUM_PER_FETCH):
self.logger.log("Producer: add new image to crawl:" + image_id + " " + link)
self.queue.put((image_id, link))
time.sleep(10)
def consume(self, process_id):
while True:
self.logger.log("Consumer process:" + str(process_id) + " fetch new image from queue")
if not self.queue.empty():
image_id, link = self.queue.get()
self.logger.log("Consumer process:"+ str(process_id) + " start crawling " + str(link))
image = common_utils.page_crawl(link)
if image != None:
self.logger.log(link + "crawled successfully")
self.adapter.store_image(image_id, image)
else:
self.logger.log(link + " failed at crawling")
self.adapter.update_image_status(image_id, ImageIndexStatus.DOWNLOAD_FAILED)
self.queue.task_done()
time.sleep(1)
else:
self.logger.log("Queue empty")
time.sleep(10)
def run(self):
producer = Process(target=self.produce)
producer.start()
consumers = []
for i in range(self.NUM_PROCESSES):
consumer = Process(target=self.consume, args=(i,))
consumers.append(consumer)
consumer.start()
for consumer in consumers:
consumer.join()
producer.join()
self.queue.join()