当前位置: 首页>>代码示例>>Python>>正文


Python Database.set_website_status方法代码示例

本文整理汇总了Python中db.Database.set_website_status方法的典型用法代码示例。如果您正苦于以下问题:Python Database.set_website_status方法的具体用法?Python Database.set_website_status怎么用?Python Database.set_website_status使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在db.Database的用法示例。


在下文中一共展示了Database.set_website_status方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: save_result_to_database

# 需要导入模块: from db import Database [as 别名]
# 或者: from db.Database import set_website_status [as 别名]
def save_result_to_database(website):
    """
    Saves result to database
    """
    try:
        Database.set_website_status(id=website.id, status='finished', result=website.result_to_json())
        log.info('Result Save successfully :: {0}'.format(website.url))
    except Exception as e:
        log.exception('Error in saving result in database')
开发者ID:sunnykrGupta,项目名称:Rotto-Links-Scraper,代码行数:11,代码来源:tasks.py

示例2: save_result_to_database

# 需要导入模块: from db import Database [as 别名]
# 或者: from db.Database import set_website_status [as 别名]
def save_result_to_database(website):
    """
    Saves result to database
    """
    try:
        result = rDB.lrange(website.id+':result', 0, -1)
        if not result:
            result = []
        Database.set_website_status(id=website.id, status='finished', result=result)
        log.info('Result Save successfully :: {0}'.format(website.url))

        # flush redis database
        rDB.flushdb()
    except Exception as e:
        log.exception('Error in saving result in database')
开发者ID:KodeKracker,项目名称:Rotto-Links-Scraper,代码行数:17,代码来源:tasks.py

示例3: dispatch_website

# 需要导入模块: from db import Database [as 别名]
# 或者: from db.Database import set_website_status [as 别名]
def dispatch_website(id, url, keywords):
    """
    Dispatcher to start crawling of a website
    """
    try:
        Database.set_website_status(id=id, status='queued')

        # create and set website and page object for a job
        website = Website(id=id, url=url, keywords=keywords)
        website.preInit()
        page = Page(website.url, website.url)

        # set website watch variables in redis db
        rDB.set(website.id+':pages_queued', 1)
        rDB.set(website.id+':pages_crawled', 0)

        # Enqueue job in redis-queue
        job = qL.enqueue(crawl_page, website, page)

        log.debug('Website Added in Queue :: {0}'.format(url))
    except Exception as e:
        log.exception('Error occurred in dispatch website')
开发者ID:KodeKracker,项目名称:Rotto-Links-Scraper,代码行数:24,代码来源:tasks.py

示例4: crawl_page

# 需要导入模块: from db import Database [as 别名]
# 或者: from db.Database import set_website_status [as 别名]
def crawl_page(website, page):
    """
    Crawl a single page at a time and checks is job of crawling
    a website done or not and take required steps
    """
    try:
        # set website status to started if it's a first page of website
        print 'Pages Crawled:: {0}'.format(rDB.get(website.id+':pages_crawled'))
        print 'Pages Queued:: {0}'.format(rDB.get(website.id+':pages_queued'))

        if rDB.get(website.id+':pages_queued')=='1':
            Database.set_website_status(id=website.id, status='started')

        log.debug('Crawling :: {0}'.format(page.url))

        # get page content
        log.info('Getting Page Content :: {0}'.format(page.url))
        page.get_content()

        # get keywords matched
        keys = page.get_keywords_matched(website.aho)
        log.info('Matched Keywords :: {0}'.format(keys))

        # get external links
        # page.get_external_links()
        log.info('Found External Links :: {0}'.format(len(page.external_links)))

        # get internal links
        page.get_internal_links(website)
        log.info('Found Internal Links :: {0}'.format(len(page.internal_links)))

        # get status code of all links
        log.info('Getting Status of all Links')
        page.get_status_codes_of_links(website)

        log.info('Enqueueing New Jobs ')
        # enqueue the un-broken internal links
        for p in page.crawl_pages:
            log.info('Enqueued :: {0}'.format(p.url))
            rDB.incr(website.id+':pages_queued')
            qH.enqueue(crawl_page, website, p)


        log.info('Adding Result to website')
        # add rotto links to result
        if page.rotto_links:
            log.info('Broken Links Found :: {0}'.format(page.rotto_links))
            rDB.rpush(website.id+':result', Website.result_to_json(page))

        log.debug('Crawled :: {0}'.format(page.url))

        # increment website crawled page counter
        rDB.incr(website.id+':pages_crawled')

        log.info('Pages Queued:: {0}'.format(rDB.get(website.id+':pages_queued')))
        log.info('Pages Crawled:: {0}'.format(rDB.get(website.id+':pages_crawled')))

        # checks if website crawled completely or not
        if rDB.get(website.id+':pages_queued')==rDB.get(website.id+':pages_crawled'):

            log.info('Website {0} crawled Completely'.format(website.url))

            # save results to database
            log.info('Saving results to database')
            qH.enqueue(save_result_to_database, website)

            # send the email to user
            log.info('Sending email to user')
            send_mail_to_user(website)

    except Exception as e:
        log.exception('Error in crawling :: {0}'.format(page.url))
开发者ID:KodeKracker,项目名称:Rotto-Links-Scraper,代码行数:74,代码来源:tasks.py


注:本文中的db.Database.set_website_status方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。