本文整理汇总了Python中models.Document.hash_url方法的典型用法代码示例。如果您正苦于以下问题:Python Document.hash_url方法的具体用法?Python Document.hash_url怎么用?Python Document.hash_url使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类models.Document
的用法示例。
在下文中一共展示了Document.hash_url方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse_bookxcess_html
# 需要导入模块: from models import Document [as 别名]
# 或者: from models.Document import hash_url [as 别名]
def parse_bookxcess_html(document, headers, filename=None):
"""Parses Bookxcess book listings page
"""
soup = BeautifulSoup(document.contents)
links = soup.findAll(['a', 'area'], href=True)
parsers = {
'.htm': parse_bookxcess_html,
'.html': parse_bookxcess_html,
'.pdf': parse_bookxcess_pdf
}
urls = {}
for link in links:
url = link['href'].strip()
if not url.startswith('http://'):
url = BOOKXCESS + url
urlp = urlsplit(url)
path = urlp.path.lower()
args = {
"filename": basename(path)
}
ext = splitext(path)[1]
if ext in parsers:
parser = parsers[ext]
urls[url] = (parser, args)
for url, (parser, args) in urls.items():
task_name = 'download-%s' % Document.hash_url(url)
logging.info('parse_bookxcess_html: downloading %s in task %s' % (url, task_name))
try:
deferred.defer(download_page, url, callback=parser, args=args,
_name=task_name, _queue='downloader')
except (taskqueue.TaskAlreadyExistsError, taskqueue.TombstonedTaskError):
pass
示例2: get
# 需要导入模块: from models import Document [as 别名]
# 或者: from models.Document import hash_url [as 别名]
def get(self):
self.response.headers['content-type'] = 'text/plain'
if self.request.get('dbg'):
self.debug()
else:
source = self.request.get('source')
name = self.request.get('name')
urls, call = None, None
callbacks = {
'mph_rss': parse_mph_rss,
'bookxcess_pdf': parse_bookxcess_pdf
}
TASKS = dict(cron.tasks)
if source in TASKS:
urlset = dict(TASKS[source]['urls'])
if name in urlset:
urls = urlset[name]
call = TASKS[source]['callback']
call = callbacks.get(call, None)
if urls and call:
for url in urls:
urlp = urlsplit(url)
path = urlp.path.lower()
args = {
"_queue": 'downloader',
"_name": 'download-%s' % Document.hash_url(url),
"callback": call,
"args": {
"filename": basename(path)
}
}
self.response.out.write("%s\n" % url)
try:
deferred.defer(download_page, url, **args)
except (taskqueue.TaskAlreadyExistsError, taskqueue.TombstonedTaskError):
pass
else:
self.error(500)
self.response.out.write('No URLs or callback found: %s, %s' % (urls, call))