本文整理汇总了Python中models.SyndicatedPost.query_by_originals方法的典型用法代码示例。如果您正苦于以下问题:Python SyndicatedPost.query_by_originals方法的具体用法?Python SyndicatedPost.query_by_originals怎么用?Python SyndicatedPost.query_by_originals使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类models.SyndicatedPost
的用法示例。
在下文中一共展示了SyndicatedPost.query_by_originals方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _process_author
# 需要导入模块: from models import SyndicatedPost [as 别名]
# 或者: from models.SyndicatedPost import query_by_originals [as 别名]
def _process_author(source, author_url, refetch_blanks=False):
"""Fetch the author's domain URL, and look for syndicated posts.
Args:
source: a subclass of models.Source
author_url: the author's homepage URL
refetch_blanks: boolean, if true, refetch SyndicatedPosts that have
previously been marked as not having a rel=syndication link
Return:
a dict of syndicated_url to models.SyndicatedPost
"""
# for now use whether the url is a valid webmention target
# as a proxy for whether it's worth searching it.
# TODO skip sites we know don't have microformats2 markup
author_url, _, ok = util.get_webmention_target(author_url)
if not ok:
return {}
try:
logging.debug('fetching author domain %s', author_url)
author_resp = requests.get(author_url, timeout=HTTP_TIMEOUT)
# TODO for error codes that indicate a temporary error, should we make
# a certain number of retries before giving up forever?
author_resp.raise_for_status()
except AssertionError:
raise # for unit tests
except BaseException:
# TODO limit allowed failures, cache the author's h-feed url
# or the # of times we've failed to fetch it
logging.warning('Could not fetch author url %s', author_url, exc_info=True)
return {}
author_dom = BeautifulSoup(author_resp.text)
author_parser = mf2py.Parser(url=author_url, doc=author_dom)
author_parsed = author_parser.to_dict()
# look for canonical feed url (if it isn't this one) using
# rel='feed', type='text/html'
for rel_feed_node in (author_dom.find_all('link', rel='feed')
+ author_dom.find_all('a', rel='feed')):
feed_url = rel_feed_node.get('href')
if not feed_url:
continue
feed_url = urlparse.urljoin(author_url, feed_url)
feed_type = rel_feed_node.get('type')
if not feed_type:
# type is not specified, use this to confirm that it's text/html
feed_url, _, feed_type_ok = util.get_webmention_target(feed_url)
else:
feed_type_ok = feed_type == 'text/html'
if feed_url == author_url:
logging.debug('author url is the feed url, proceeding')
break
elif not feed_type_ok:
logging.debug('skipping feed of type %s', feed_type)
continue
try:
logging.debug("fetching author's h-feed %s", feed_url)
feed_resp = requests.get(feed_url, timeout=HTTP_TIMEOUT)
feed_resp.raise_for_status()
logging.debug("author's h-feed fetched successfully %s", feed_url)
author_parsed = mf2py.Parser(
url=feed_url, doc=feed_resp.text).to_dict()
break
except AssertionError:
raise # reraise assertions for unit tests
except BaseException:
logging.warning('Could not fetch h-feed url %s.', feed_url, exc_info=True)
feeditems = author_parsed['items']
hfeed = next((item for item in feeditems
if 'h-feed' in item['type']), None)
if hfeed:
feeditems = hfeed.get('children', [])
else:
logging.info('No h-feed found, fallback to top-level h-entrys.')
permalinks = set()
for child in feeditems:
if 'h-entry' in child['type']:
# TODO if this h-entry in the h-feed has u-syndication links, we
# can just use it without fetching its permalink page
# TODO maybe limit to first ~30 entries? (do that here rather than,
# below because we want the *first* n entries)
for permalink in child['properties'].get('url', []):
permalinks.add(permalink)
# query all preexisting permalinks at once, instead of once per link
preexisting = {r.original: r for r in
SyndicatedPost.query_by_originals(source, permalinks)}
results = {}
for permalink in permalinks:
logging.debug('processing permalink: %s', permalink)
results.update(_process_entry(source, permalink, refetch_blanks,
preexisting))
#.........这里部分代码省略.........