本文整理汇总了Python中models.Page.select方法的典型用法代码示例。如果您正苦于以下问题:Python Page.select方法的具体用法?Python Page.select怎么用?Python Page.select使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类models.Page
的用法示例。
在下文中一共展示了Page.select方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_save_zhuanlan
# 需要导入模块: from models import Page [as 别名]
# 或者: from models.Page import select [as 别名]
def test_save_zhuanlan():
query = (Page.select(Page, Task)
.join(Task)
.where((Task.page_type == 'zhihu_article') & (Page.title.contains('无痛的机器学习')))
.group_by(Page.task)
.having(Page.watch_date == fn.MAX(Page.watch_date))
.limit(9999))
for page in query:
log(page.title)
log(page.task)
page.to_local_file(folder='deep', fetch_images=False)
示例2: test_to_local_file_3
# 需要导入模块: from models import Page [as 别名]
# 或者: from models.Page import select [as 别名]
def test_to_local_file_3():
query = (Page.select(Page, Task)
.join(Task)
.where(Page.topic.contains('矩阵'))
.group_by(Page.task)
.having(Page.watch_date == fn.MAX(Page.watch_date))
.limit(8800))
for page in query:
log(page.title)
# log(page.metadata)
page.to_local_file(folder='deep', fetch_images=False)
示例3: main
# 需要导入模块: from models import Page [as 别名]
# 或者: from models.Page import select [as 别名]
def main():
''' Set up progress bar. '''
widgets = [
'Progress: ', Percentage(), ' ', Bar(marker=RotatingMarker()), ' ', ETA(),
' Downloaded ', Counter(), ' sites.'
]
pbar = ProgressBar(widgets=widgets, maxval=Page.select().count())
pbar.start()
page_index = 0
for page in Page.select().where(Page.language == 'jquery'):
dest = os.path.join('pages', page.language, page.query, str(page.rank))
if not os.path.isdir(dest):
os.makedirs(dest)
output, return_code = run_wget(page.link, dest)
bad_ssl = False
if return_code == 5: # SSL error -- double-check with requests package
try:
requests.get(page.link)
output, return_code = run_wget(page.link, dest, skip_certificate=True)
except requests.exceptions.SSLError:
bad_ssl = True
''' Run wget to fetch page and all it's dependencies. '''
''' First downloaded file is the index. Store where it's saved. '''
save_locs = re.findall(r"^Saving to: '(.*)'$", output, re.MULTILINE)
if bad_ssl or len(save_locs) == 0:
logging.warn("Failed fetch (code=%d): %s", return_code, page.link)
else:
page.dest = save_locs[0]
page.save()
logging.info("Fetched file (code=%d): %s", return_code, page.link)
page_index += 1
pbar.update(page_index)
pbar.finish()
示例4: label_pages
# 需要导入模块: from models import Page [as 别名]
# 或者: from models.Page import select [as 别名]
def label_pages(start_index, unknown_only=False, purpose=None):
page_query = \
(Page.select()
.group_by(Page.link)
.where(
Page.language == 'regex',
Page.has_example == 1,
))
pages = [p for p in page_query]
random.shuffle(pages)
browser = webdriver.Firefox()
print "Press enter to open page.",
raw_input()
for i, p in enumerate(pages[start_index:], start=start_index):
if ((unknown_only and p.purpose != 'unknown') or
(purpose is not None and p.purpose != purpose)):
continue
link = build_local_url(p)
browser.get(link)
pshort = ''
while pshort not in PURPOSES.keys():
pshort = raw_input(
"Page {idx} loaded. Type class ({opts}): "
.format(idx=i, opts=','.join(PURPOSES.keys())))
pshort = pshort.lower()
for same_page in Page.select().where(Page.link == p.link):
same_page.purpose = PURPOSES[pshort]
same_page.save()
print "You have labeled the purposes of all pages."
browser.close()
示例5: order_pages
# 需要导入模块: from models import Page [as 别名]
# 或者: from models.Page import select [as 别名]
def order_pages(language, random_seed):
''' Random seed should be deterministic, but different for each language. '''
random.seed(random_seed * hash(language))
pages = (Page.select()
.group_by(Page.link)
.where(
Page.language == language,
Page.has_example == 1,
))
ids = [p.id for p in pages]
random.shuffle(ids)
ordered_pages = [Page.get(Page.id == id_) for id_ in ids]
return ordered_pages
示例6: test_to_local_file
# 需要导入模块: from models import Page [as 别名]
# 或者: from models.Page import select [as 别名]
def test_to_local_file():
# page = Page.select().order_by(-Page.id).get()
# page = Page.select(Page.topic).distinct().where(Page.topic.contains('房')).limit(5)
# q = Page.select(Page.id).distinct()
# for p in q:
# print(p)
query = (Page.select(Page, Task)
.join(Task)
.where(Page.author == '十年寒霜') # .where(Page.topic.contains('建筑'))
.group_by(Page.task)
.having(Page.watch_date == fn.MAX(Page.watch_date))
.limit(8800))
for page in query:
log(page.title)
# log(page.metadata)
page.to_local_file(folder='test', fetch_images=False)
示例7: main
# 需要导入模块: from models import Page [as 别名]
# 或者: from models.Page import select [as 别名]
def main(start_index):
page_query = \
(Page.select()
.group_by(Page.link)
.where(
Page.language == 'regex',
Page.has_example == 1,
))
pages = [p for p in page_query]
random.shuffle(pages)
with open(OUTPUT_FILE, 'a') as outfile:
browser = webdriver.Firefox()
print "Press enter to open page.",
raw_input()
for i, p in enumerate(pages[start_index:], start=start_index):
link = build_local_url(p)
browser.get(link)
class_ = raw_input("Page {idx} loaded. Type class: ".format(idx=i))
outfile.write(',,,'.join([link, class_]) + '\n')
outfile.flush()
示例8: main
# 需要导入模块: from models import Page [as 别名]
# 或者: from models.Page import select [as 别名]
def main():
print "\t".join(["domain", "language", "query", "has_example", "notfound"])
for page in Page.select().group_by(Page.link):
print "\t".join(
str(_) for _ in [urlparse(page.link).netloc, page.language, page.query, page.has_example, page.notfound]
)