当前位置: 首页>>代码示例>>Python>>正文


Python Domain.get_active_domains方法代码示例

本文整理汇总了Python中holmes.models.Domain.get_active_domains方法的典型用法代码示例。如果您正苦于以下问题:Python Domain.get_active_domains方法的具体用法?Python Domain.get_active_domains怎么用?Python Domain.get_active_domains使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在holmes.models.Domain的用法示例。


在下文中一共展示了Domain.get_active_domains方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_can_get_active_domains

# 需要导入模块: from holmes.models import Domain [as 别名]
# 或者: from holmes.models.Domain import get_active_domains [as 别名]
    def test_can_get_active_domains(self):
        self.db.query(Domain).delete()

        domain = DomainFactory(is_active=True)
        DomainFactory(is_active=False)

        domains = Domain.get_active_domains(self.db)

        expect(domains).to_length(1)
        expect(domains[0].id).to_equal(domain.id)
开发者ID:ricardodani,项目名称:holmes-api,代码行数:12,代码来源:test_domain.py

示例2: get_next_jobs_count

# 需要导入模块: from holmes.models import Domain [as 别名]
# 或者: from holmes.models.Domain import get_active_domains [as 别名]
    def get_next_jobs_count(cls, db, config):
        from holmes.models import Domain

        active_domains = Domain.get_active_domains(db)
        active_domains_ids = [item.id for item in active_domains]

        return db \
                .query(
                    sa.func.count(Page.id)
                ) \
                .filter(Page.domain_id.in_(active_domains_ids)) \
                .scalar()
开发者ID:ricardodani,项目名称:holmes-api,代码行数:14,代码来源:page.py

示例3: get_next_job_list

# 需要导入模块: from holmes.models import Domain [as 别名]
# 或者: from holmes.models.Domain import get_active_domains [as 别名]
    def get_next_job_list(cls, db, expiration, current_page=1, page_size=200):
        from holmes.models import Domain

        lower_bound = (current_page - 1) * page_size
        upper_bound = lower_bound + page_size

        active_domains = Domain.get_active_domains(db)
        active_domains_ids = [item.id for item in active_domains]

        pages_query = db \
            .query(
                Page.uuid,
                Page.url,
                Page.score,
                Page.last_review_date
            ) \
            .filter(Page.domain_id.in_(active_domains_ids)) \
            .order_by(Page.score.desc())

        return pages_query[lower_bound:upper_bound]
开发者ID:ricardodani,项目名称:holmes-api,代码行数:22,代码来源:page.py

示例4: _verify_workers_limits

# 需要导入模块: from holmes.models import Domain [as 别名]
# 或者: from holmes.models.Domain import get_active_domains [as 别名]
 def _verify_workers_limits(self, url, avg_links_per_page=10):
     active_domains = Domain.get_active_domains(self.db)
     return LimiterModel.has_limit_to_work(self.db, active_domains, url, avg_links_per_page)
开发者ID:ricardodani,项目名称:holmes-api,代码行数:5,代码来源:worker.py

示例5: get_next_job

# 需要导入模块: from holmes.models import Domain [as 别名]
# 或者: from holmes.models.Domain import get_active_domains [as 别名]
    def get_next_job(cls, db, expiration, cache, lock_expiration, avg_links_per_page=10):
        from holmes.models import Settings, Worker, Domain, Limiter  # Avoid circular dependency

        page = None
        lock = None
        settings = Settings.instance(db)
        workers = db.query(Worker).all()
        number_of_workers = len(workers)

        active_domains = Domain.get_active_domains(db)
        active_domains_ids = [item.id for item in active_domains]

        all_domains_pages_in_need_of_review = {}

        for domain_id in active_domains_ids:
            pages = db \
                .query(
                    Page.uuid,
                    Page.url,
                    Page.score,
                    Page.last_review_date
                ) \
                .filter(Page.domain_id == domain_id) \
                .order_by(Page.score.desc())[:number_of_workers]
            if pages:
                all_domains_pages_in_need_of_review[domain_id] = pages

        pages_in_need_of_review = []
        current_domain = 0
        while all_domains_pages_in_need_of_review:
            domains = all_domains_pages_in_need_of_review.keys()
            if current_domain >= len(domains):
                current_domain = 0

            domain_id = domains[current_domain]

            item = all_domains_pages_in_need_of_review[domain_id].pop(0)
            pages_in_need_of_review.append(item)

            if not all_domains_pages_in_need_of_review[domain_id]:
                del all_domains_pages_in_need_of_review[domain_id]

            current_domain += 1

        if not pages_in_need_of_review:
            return None

        if settings.lambda_score > 0 and settings.lambda_score > pages_in_need_of_review[0].score:
            cls.update_pages_score_by(settings, settings.lambda_score, db)

        for i in range(len(pages_in_need_of_review)):
            if not Limiter.has_limit_to_work(db, active_domains, pages_in_need_of_review[i].url, avg_links_per_page):
                continue

            lock = cache.has_next_job_lock(
                pages_in_need_of_review[i].url,
                lock_expiration
            )

            if lock is not None:
                page = pages_in_need_of_review[i]
                break

        if page is None:
            return None

        return {
            'page': str(page.uuid),
            'url': page.url,
            'score': page.score,
            'lock': lock
        }
开发者ID:ricardodani,项目名称:holmes-api,代码行数:74,代码来源:page.py

示例6: fill_job_bucket

# 需要导入模块: from holmes.models import Domain [as 别名]
# 或者: from holmes.models.Domain import get_active_domains [as 别名]
    def fill_job_bucket(self, expiration, look_ahead_pages=1000, avg_links_per_page=10.0):
        try:
            with Lock('next-job-fill-bucket-lock', redis=self.redis):
                logging.info('Refilling job bucket. Lock acquired...')
                expired_time = datetime.utcnow() - timedelta(seconds=expiration)

                active_domains = Domain.get_active_domains(self.db)

                if not active_domains:
                    return

                active_domains_ids = [item.id for item in active_domains]

                limiter_buckets = self.get_limiter_buckets(active_domains, avg_links_per_page)

                all_domains_pages_in_need_of_review = []

                for domain_id in active_domains_ids:
                    pages = self.db \
                        .query(
                            Page.uuid,
                            Page.url,
                            Page.score,
                            Page.last_review_date
                        ) \
                        .filter(Page.domain_id == domain_id) \
                        .filter(or_(
                            Page.last_review_date == None,
                            Page.last_review_date <= expired_time
                        ))[:look_ahead_pages]

                    if pages:
                        all_domains_pages_in_need_of_review.append(pages)

                logging.debug('Total of %d pages found to add to redis.' % (sum([len(item) for item in all_domains_pages_in_need_of_review])))

                item_count = int(self.redis.zcard('next-job-bucket'))
                current_domain = 0
                while item_count < look_ahead_pages and len(all_domains_pages_in_need_of_review) > 0:
                    if current_domain >= len(all_domains_pages_in_need_of_review):
                        current_domain = 0

                    item = all_domains_pages_in_need_of_review[current_domain].pop(0)

                    has_limit = True
                    logging.debug('Available Limit Buckets: %s' % limiter_buckets)
                    for index, (limit, available) in enumerate(limiter_buckets):
                        if limit.matches(item.url):
                            if available <= 0:
                                has_limit = False
                                break
                            limiter_buckets[index] = (limit, available - 1)

                    if has_limit:
                        self.add_next_job_bucket(item.uuid, item.url)
                        item_count += 1

                    # if there are not any more pages in this domain remove it from dictionary
                    if not all_domains_pages_in_need_of_review[current_domain]:
                        del all_domains_pages_in_need_of_review[current_domain]

                    current_domain += 1

                logging.debug('ADDED A TOTAL of %d ITEMS TO REDIS...' % item_count)

        except LockTimeout:
            logging.info("Can't acquire lock. Moving on...")
开发者ID:marcelometal,项目名称:holmes-api,代码行数:69,代码来源:cache.py


注:本文中的holmes.models.Domain.get_active_domains方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。