当前位置: 首页>>代码示例>>Python>>正文


Python Page.select方法代码示例

本文整理汇总了Python中models.Page.select方法的典型用法代码示例。如果您正苦于以下问题:Python Page.select方法的具体用法?Python Page.select怎么用?Python Page.select使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在models.Page的用法示例。


在下文中一共展示了Page.select方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_save_zhuanlan

# 需要导入模块: from models import Page [as 别名]
# 或者: from models.Page import select [as 别名]
def test_save_zhuanlan():
  query = (Page.select(Page, Task)
           .join(Task)
           .where((Task.page_type == 'zhihu_article') & (Page.title.contains('无痛的机器学习')))
           .group_by(Page.task)
           .having(Page.watch_date == fn.MAX(Page.watch_date))
           .limit(9999))
  for page in query:
    log(page.title)
    log(page.task)
    page.to_local_file(folder='deep', fetch_images=False)
开发者ID:probe301,项目名称:the-north-remembers,代码行数:13,代码来源:loop_watch.py

示例2: test_to_local_file_3

# 需要导入模块: from models import Page [as 别名]
# 或者: from models.Page import select [as 别名]
def test_to_local_file_3():

  query = (Page.select(Page, Task)
           .join(Task)
           .where(Page.topic.contains('矩阵'))
           .group_by(Page.task)
           .having(Page.watch_date == fn.MAX(Page.watch_date))
           .limit(8800))
  for page in query:
    log(page.title)
    # log(page.metadata)
    page.to_local_file(folder='deep', fetch_images=False)
开发者ID:probe301,项目名称:the-north-remembers,代码行数:14,代码来源:loop_watch.py

示例3: main

# 需要导入模块: from models import Page [as 别名]
# 或者: from models.Page import select [as 别名]
def main():

    ''' Set up progress bar. '''
    widgets = [
        'Progress: ', Percentage(), ' ', Bar(marker=RotatingMarker()), ' ', ETA(),
        ' Downloaded ', Counter(), ' sites.'
    ]
    pbar = ProgressBar(widgets=widgets, maxval=Page.select().count())
    pbar.start()

    page_index = 0
    for page in Page.select().where(Page.language == 'jquery'):

        dest = os.path.join('pages', page.language, page.query, str(page.rank))
        if not os.path.isdir(dest):
            os.makedirs(dest)

        output, return_code = run_wget(page.link, dest)
        bad_ssl = False
        if return_code == 5:  # SSL error -- double-check with requests package
            try:
                requests.get(page.link)
                output, return_code = run_wget(page.link, dest, skip_certificate=True)
            except requests.exceptions.SSLError:
                bad_ssl = True

        ''' Run wget to fetch page and all it's dependencies. '''
        ''' First downloaded file is the index.  Store where it's saved. '''
        save_locs = re.findall(r"^Saving to: '(.*)'$", output, re.MULTILINE)
        if bad_ssl or len(save_locs) == 0:
            logging.warn("Failed fetch (code=%d): %s", return_code, page.link)
        else:
            page.dest = save_locs[0]
            page.save()
            logging.info("Fetched file (code=%d): %s", return_code, page.link)

        page_index += 1
        pbar.update(page_index)

    pbar.finish()
开发者ID:andrewhead,项目名称:StackSkim,代码行数:42,代码来源:fetch_pages.py

示例4: label_pages

# 需要导入模块: from models import Page [as 别名]
# 或者: from models.Page import select [as 别名]
def label_pages(start_index, unknown_only=False, purpose=None):

    page_query = \
        (Page.select()
             .group_by(Page.link)
             .where(
                 Page.language == 'regex',
                 Page.has_example == 1,
                 ))
    pages = [p for p in page_query]
    random.shuffle(pages)

    browser = webdriver.Firefox()
    print "Press enter to open page.",
    raw_input()
    for i, p in enumerate(pages[start_index:], start=start_index):

        if ((unknown_only and p.purpose != 'unknown') or
           (purpose is not None and p.purpose != purpose)):
            continue

        link = build_local_url(p)
        browser.get(link)

        pshort = ''
        while pshort not in PURPOSES.keys():
            pshort = raw_input(
                "Page {idx} loaded. Type class ({opts}): "
                .format(idx=i, opts=','.join(PURPOSES.keys())))
            pshort = pshort.lower()

        for same_page in Page.select().where(Page.link == p.link):
            same_page.purpose = PURPOSES[pshort]
            same_page.save()

    print "You have labeled the purposes of all pages."
    browser.close()
开发者ID:andrewhead,项目名称:StackSkim,代码行数:39,代码来源:mark_purposes.py

示例5: order_pages

# 需要导入模块: from models import Page [as 别名]
# 或者: from models.Page import select [as 别名]
def order_pages(language, random_seed):

    ''' Random seed should be deterministic, but different for each language. '''
    random.seed(random_seed * hash(language))

    pages = (Page.select()
             .group_by(Page.link)
             .where(
                 Page.language == language,
                 Page.has_example == 1,
                 ))

    ids = [p.id for p in pages]
    random.shuffle(ids)
    ordered_pages = [Page.get(Page.id == id_) for id_ in ids]
    return ordered_pages
开发者ID:andrewhead,项目名称:StackSkim,代码行数:18,代码来源:order.py

示例6: test_to_local_file

# 需要导入模块: from models import Page [as 别名]
# 或者: from models.Page import select [as 别名]
def test_to_local_file():
  # page = Page.select().order_by(-Page.id).get()

  # page = Page.select(Page.topic).distinct().where(Page.topic.contains('房')).limit(5)
  # q = Page.select(Page.id).distinct()
  # for p in q:
  #   print(p)
  query = (Page.select(Page, Task)
           .join(Task)
           .where(Page.author == '十年寒霜')  # .where(Page.topic.contains('建筑'))
           .group_by(Page.task)
           .having(Page.watch_date == fn.MAX(Page.watch_date))
           .limit(8800))
  for page in query:
    log(page.title)
    # log(page.metadata)
    page.to_local_file(folder='test', fetch_images=False)
开发者ID:probe301,项目名称:the-north-remembers,代码行数:19,代码来源:loop_watch.py

示例7: main

# 需要导入模块: from models import Page [as 别名]
# 或者: from models.Page import select [as 别名]
def main(start_index):

    page_query = \
        (Page.select()
             .group_by(Page.link)
             .where(
                 Page.language == 'regex',
                 Page.has_example == 1,
                 ))
    pages = [p for p in page_query]
    random.shuffle(pages)

    with open(OUTPUT_FILE, 'a') as outfile:

        browser = webdriver.Firefox()
        print "Press enter to open page.",
        raw_input()
        for i, p in enumerate(pages[start_index:], start=start_index):
            link = build_local_url(p)
            browser.get(link)
            class_ = raw_input("Page {idx} loaded. Type class: ".format(idx=i))
            outfile.write(',,,'.join([link, class_]) + '\n')
            outfile.flush()
开发者ID:andrewhead,项目名称:StackSkim,代码行数:25,代码来源:random_page.py

示例8: main

# 需要导入模块: from models import Page [as 别名]
# 或者: from models.Page import select [as 别名]
def main():
    print "\t".join(["domain", "language", "query", "has_example", "notfound"])
    for page in Page.select().group_by(Page.link):
        print "\t".join(
            str(_) for _ in [urlparse(page.link).netloc, page.language, page.query, page.has_example, page.notfound]
        )
开发者ID:andrewhead,项目名称:StackSkim,代码行数:8,代码来源:data_to_tsv.py


注:本文中的models.Page.select方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。