本文整理汇总了Python中db.Database.set_website_status方法的典型用法代码示例。如果您正苦于以下问题:Python Database.set_website_status方法的具体用法?Python Database.set_website_status怎么用?Python Database.set_website_status使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类db.Database
的用法示例。
在下文中一共展示了Database.set_website_status方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: save_result_to_database
# 需要导入模块: from db import Database [as 别名]
# 或者: from db.Database import set_website_status [as 别名]
def save_result_to_database(website):
"""
Saves result to database
"""
try:
Database.set_website_status(id=website.id, status='finished', result=website.result_to_json())
log.info('Result Save successfully :: {0}'.format(website.url))
except Exception as e:
log.exception('Error in saving result in database')
示例2: save_result_to_database
# 需要导入模块: from db import Database [as 别名]
# 或者: from db.Database import set_website_status [as 别名]
def save_result_to_database(website):
"""
Saves result to database
"""
try:
result = rDB.lrange(website.id+':result', 0, -1)
if not result:
result = []
Database.set_website_status(id=website.id, status='finished', result=result)
log.info('Result Save successfully :: {0}'.format(website.url))
# flush redis database
rDB.flushdb()
except Exception as e:
log.exception('Error in saving result in database')
示例3: dispatch_website
# 需要导入模块: from db import Database [as 别名]
# 或者: from db.Database import set_website_status [as 别名]
def dispatch_website(id, url, keywords):
"""
Dispatcher to start crawling of a website
"""
try:
Database.set_website_status(id=id, status='queued')
# create and set website and page object for a job
website = Website(id=id, url=url, keywords=keywords)
website.preInit()
page = Page(website.url, website.url)
# set website watch variables in redis db
rDB.set(website.id+':pages_queued', 1)
rDB.set(website.id+':pages_crawled', 0)
# Enqueue job in redis-queue
job = qL.enqueue(crawl_page, website, page)
log.debug('Website Added in Queue :: {0}'.format(url))
except Exception as e:
log.exception('Error occurred in dispatch website')
示例4: crawl_page
# 需要导入模块: from db import Database [as 别名]
# 或者: from db.Database import set_website_status [as 别名]
def crawl_page(website, page):
"""
Crawl a single page at a time and checks is job of crawling
a website done or not and take required steps
"""
try:
# set website status to started if it's a first page of website
print 'Pages Crawled:: {0}'.format(rDB.get(website.id+':pages_crawled'))
print 'Pages Queued:: {0}'.format(rDB.get(website.id+':pages_queued'))
if rDB.get(website.id+':pages_queued')=='1':
Database.set_website_status(id=website.id, status='started')
log.debug('Crawling :: {0}'.format(page.url))
# get page content
log.info('Getting Page Content :: {0}'.format(page.url))
page.get_content()
# get keywords matched
keys = page.get_keywords_matched(website.aho)
log.info('Matched Keywords :: {0}'.format(keys))
# get external links
# page.get_external_links()
log.info('Found External Links :: {0}'.format(len(page.external_links)))
# get internal links
page.get_internal_links(website)
log.info('Found Internal Links :: {0}'.format(len(page.internal_links)))
# get status code of all links
log.info('Getting Status of all Links')
page.get_status_codes_of_links(website)
log.info('Enqueueing New Jobs ')
# enqueue the un-broken internal links
for p in page.crawl_pages:
log.info('Enqueued :: {0}'.format(p.url))
rDB.incr(website.id+':pages_queued')
qH.enqueue(crawl_page, website, p)
log.info('Adding Result to website')
# add rotto links to result
if page.rotto_links:
log.info('Broken Links Found :: {0}'.format(page.rotto_links))
rDB.rpush(website.id+':result', Website.result_to_json(page))
log.debug('Crawled :: {0}'.format(page.url))
# increment website crawled page counter
rDB.incr(website.id+':pages_crawled')
log.info('Pages Queued:: {0}'.format(rDB.get(website.id+':pages_queued')))
log.info('Pages Crawled:: {0}'.format(rDB.get(website.id+':pages_crawled')))
# checks if website crawled completely or not
if rDB.get(website.id+':pages_queued')==rDB.get(website.id+':pages_crawled'):
log.info('Website {0} crawled Completely'.format(website.url))
# save results to database
log.info('Saving results to database')
qH.enqueue(save_result_to_database, website)
# send the email to user
log.info('Sending email to user')
send_mail_to_user(website)
except Exception as e:
log.exception('Error in crawling :: {0}'.format(page.url))