當前位置: 首頁>>代碼示例>>Python>>正文


Python Domain.get_active_domains方法代碼示例

本文整理匯總了Python中holmes.models.Domain.get_active_domains方法的典型用法代碼示例。如果您正苦於以下問題:Python Domain.get_active_domains方法的具體用法?Python Domain.get_active_domains怎麽用?Python Domain.get_active_domains使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在holmes.models.Domain的用法示例。


在下文中一共展示了Domain.get_active_domains方法的6個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: test_can_get_active_domains

# 需要導入模塊: from holmes.models import Domain [as 別名]
# 或者: from holmes.models.Domain import get_active_domains [as 別名]
    def test_can_get_active_domains(self):
        self.db.query(Domain).delete()

        domain = DomainFactory(is_active=True)
        DomainFactory(is_active=False)

        domains = Domain.get_active_domains(self.db)

        expect(domains).to_length(1)
        expect(domains[0].id).to_equal(domain.id)
開發者ID:ricardodani,項目名稱:holmes-api,代碼行數:12,代碼來源:test_domain.py

示例2: get_next_jobs_count

# 需要導入模塊: from holmes.models import Domain [as 別名]
# 或者: from holmes.models.Domain import get_active_domains [as 別名]
    def get_next_jobs_count(cls, db, config):
        from holmes.models import Domain

        active_domains = Domain.get_active_domains(db)
        active_domains_ids = [item.id for item in active_domains]

        return db \
                .query(
                    sa.func.count(Page.id)
                ) \
                .filter(Page.domain_id.in_(active_domains_ids)) \
                .scalar()
開發者ID:ricardodani,項目名稱:holmes-api,代碼行數:14,代碼來源:page.py

示例3: get_next_job_list

# 需要導入模塊: from holmes.models import Domain [as 別名]
# 或者: from holmes.models.Domain import get_active_domains [as 別名]
    def get_next_job_list(cls, db, expiration, current_page=1, page_size=200):
        from holmes.models import Domain

        lower_bound = (current_page - 1) * page_size
        upper_bound = lower_bound + page_size

        active_domains = Domain.get_active_domains(db)
        active_domains_ids = [item.id for item in active_domains]

        pages_query = db \
            .query(
                Page.uuid,
                Page.url,
                Page.score,
                Page.last_review_date
            ) \
            .filter(Page.domain_id.in_(active_domains_ids)) \
            .order_by(Page.score.desc())

        return pages_query[lower_bound:upper_bound]
開發者ID:ricardodani,項目名稱:holmes-api,代碼行數:22,代碼來源:page.py

示例4: _verify_workers_limits

# 需要導入模塊: from holmes.models import Domain [as 別名]
# 或者: from holmes.models.Domain import get_active_domains [as 別名]
 def _verify_workers_limits(self, url, avg_links_per_page=10):
     active_domains = Domain.get_active_domains(self.db)
     return LimiterModel.has_limit_to_work(self.db, active_domains, url, avg_links_per_page)
開發者ID:ricardodani,項目名稱:holmes-api,代碼行數:5,代碼來源:worker.py

示例5: get_next_job

# 需要導入模塊: from holmes.models import Domain [as 別名]
# 或者: from holmes.models.Domain import get_active_domains [as 別名]
    def get_next_job(cls, db, expiration, cache, lock_expiration, avg_links_per_page=10):
        from holmes.models import Settings, Worker, Domain, Limiter  # Avoid circular dependency

        page = None
        lock = None
        settings = Settings.instance(db)
        workers = db.query(Worker).all()
        number_of_workers = len(workers)

        active_domains = Domain.get_active_domains(db)
        active_domains_ids = [item.id for item in active_domains]

        all_domains_pages_in_need_of_review = {}

        for domain_id in active_domains_ids:
            pages = db \
                .query(
                    Page.uuid,
                    Page.url,
                    Page.score,
                    Page.last_review_date
                ) \
                .filter(Page.domain_id == domain_id) \
                .order_by(Page.score.desc())[:number_of_workers]
            if pages:
                all_domains_pages_in_need_of_review[domain_id] = pages

        pages_in_need_of_review = []
        current_domain = 0
        while all_domains_pages_in_need_of_review:
            domains = all_domains_pages_in_need_of_review.keys()
            if current_domain >= len(domains):
                current_domain = 0

            domain_id = domains[current_domain]

            item = all_domains_pages_in_need_of_review[domain_id].pop(0)
            pages_in_need_of_review.append(item)

            if not all_domains_pages_in_need_of_review[domain_id]:
                del all_domains_pages_in_need_of_review[domain_id]

            current_domain += 1

        if not pages_in_need_of_review:
            return None

        if settings.lambda_score > 0 and settings.lambda_score > pages_in_need_of_review[0].score:
            cls.update_pages_score_by(settings, settings.lambda_score, db)

        for i in range(len(pages_in_need_of_review)):
            if not Limiter.has_limit_to_work(db, active_domains, pages_in_need_of_review[i].url, avg_links_per_page):
                continue

            lock = cache.has_next_job_lock(
                pages_in_need_of_review[i].url,
                lock_expiration
            )

            if lock is not None:
                page = pages_in_need_of_review[i]
                break

        if page is None:
            return None

        return {
            'page': str(page.uuid),
            'url': page.url,
            'score': page.score,
            'lock': lock
        }
開發者ID:ricardodani,項目名稱:holmes-api,代碼行數:74,代碼來源:page.py

示例6: fill_job_bucket

# 需要導入模塊: from holmes.models import Domain [as 別名]
# 或者: from holmes.models.Domain import get_active_domains [as 別名]
    def fill_job_bucket(self, expiration, look_ahead_pages=1000, avg_links_per_page=10.0):
        try:
            with Lock('next-job-fill-bucket-lock', redis=self.redis):
                logging.info('Refilling job bucket. Lock acquired...')
                expired_time = datetime.utcnow() - timedelta(seconds=expiration)

                active_domains = Domain.get_active_domains(self.db)

                if not active_domains:
                    return

                active_domains_ids = [item.id for item in active_domains]

                limiter_buckets = self.get_limiter_buckets(active_domains, avg_links_per_page)

                all_domains_pages_in_need_of_review = []

                for domain_id in active_domains_ids:
                    pages = self.db \
                        .query(
                            Page.uuid,
                            Page.url,
                            Page.score,
                            Page.last_review_date
                        ) \
                        .filter(Page.domain_id == domain_id) \
                        .filter(or_(
                            Page.last_review_date == None,
                            Page.last_review_date <= expired_time
                        ))[:look_ahead_pages]

                    if pages:
                        all_domains_pages_in_need_of_review.append(pages)

                logging.debug('Total of %d pages found to add to redis.' % (sum([len(item) for item in all_domains_pages_in_need_of_review])))

                item_count = int(self.redis.zcard('next-job-bucket'))
                current_domain = 0
                while item_count < look_ahead_pages and len(all_domains_pages_in_need_of_review) > 0:
                    if current_domain >= len(all_domains_pages_in_need_of_review):
                        current_domain = 0

                    item = all_domains_pages_in_need_of_review[current_domain].pop(0)

                    has_limit = True
                    logging.debug('Available Limit Buckets: %s' % limiter_buckets)
                    for index, (limit, available) in enumerate(limiter_buckets):
                        if limit.matches(item.url):
                            if available <= 0:
                                has_limit = False
                                break
                            limiter_buckets[index] = (limit, available - 1)

                    if has_limit:
                        self.add_next_job_bucket(item.uuid, item.url)
                        item_count += 1

                    # if there are not any more pages in this domain remove it from dictionary
                    if not all_domains_pages_in_need_of_review[current_domain]:
                        del all_domains_pages_in_need_of_review[current_domain]

                    current_domain += 1

                logging.debug('ADDED A TOTAL of %d ITEMS TO REDIS...' % item_count)

        except LockTimeout:
            logging.info("Can't acquire lock. Moving on...")
開發者ID:marcelometal,項目名稱:holmes-api,代碼行數:69,代碼來源:cache.py


注:本文中的holmes.models.Domain.get_active_domains方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。