本文整理汇总了Python中models.SyndicatedPost.get_or_insert_by_syndication_url方法的典型用法代码示例。如果您正苦于以下问题:Python SyndicatedPost.get_or_insert_by_syndication_url方法的具体用法?Python SyndicatedPost.get_or_insert_by_syndication_url怎么用?Python SyndicatedPost.get_or_insert_by_syndication_url使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类models.SyndicatedPost
的用法示例。
在下文中一共展示了SyndicatedPost.get_or_insert_by_syndication_url方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _posse_post_discovery
# 需要导入模块: from models import SyndicatedPost [as 别名]
# 或者: from models.SyndicatedPost import get_or_insert_by_syndication_url [as 别名]
def _posse_post_discovery(source, activity, author_url, syndication_url,
fetch_hfeed):
"""Performs the actual meat of the posse-post-discover. It was split
out from discover() so that it can be done inside of a transaction.
Args:
source: models.Source subclass
activity: activity dict
author_url: author's url configured in their silo profile
syndication_url: url of the syndicated copy for which we are
trying to find an original
fetch_hfeed: boolean, whether or not to fetch and parse the
author's feed if we don't have a previously stored
relationship.
Return:
the activity, updated with original post urls if any are found
"""
logging.info(
'starting posse post discovery with author %s and syndicated %s',
author_url, syndication_url)
relationship = SyndicatedPost.query_by_syndication(source, syndication_url)
if not relationship and fetch_hfeed:
# a syndicated post we haven't seen before! fetch the author's
# h-feed to see if we can find it.
results = _process_author(source, author_url)
relationship = results.get(syndication_url, None)
if not relationship:
# No relationship was found. Remember that we've seen this
# syndicated post to avoid reprocessing it every time
logging.debug('posse post discovery found no relationship for %s',
syndication_url)
SyndicatedPost.get_or_insert_by_syndication_url(
source, syndication_url, None)
return activity
logging.debug('posse post discovery found relationship %s -> %s',
syndication_url, relationship.original)
if relationship.original:
obj = activity.get('object') or activity
obj.setdefault('upstreamDuplicates', []).append(relationship.original)
return activity
示例2: test_get_or_insert_by_syndication_replace
# 需要导入模块: from models import SyndicatedPost [as 别名]
# 或者: from models.SyndicatedPost import get_or_insert_by_syndication_url [as 别名]
def test_get_or_insert_by_syndication_replace(self):
"""Make sure we replace original=None with original=something
when it is discovered"""
r = SyndicatedPost.get_or_insert_by_syndication_url(
self.source, 'http://silo/no-original',
'http://original/newly-discovered')
self.assertIsNotNone(r)
self.assertEquals('http://original/newly-discovered', r.original)
# make sure it's in NDB
rs = SyndicatedPost.query(
SyndicatedPost.syndication == 'http://silo/no-original',
ancestor=self.source.key
).fetch()
self.assertEquals(1, len(rs))
self.assertEquals('http://original/newly-discovered', rs[0].original)
self.assertEquals('http://silo/no-original', rs[0].syndication)
示例3: test_get_or_insert_by_syndication_do_not_replace
# 需要导入模块: from models import SyndicatedPost [as 别名]
# 或者: from models.SyndicatedPost import get_or_insert_by_syndication_url [as 别名]
def test_get_or_insert_by_syndication_do_not_replace(self):
"""Make sure we don't replace original=something with
original=something else (in practice, that would mean another task
is running discovery concurrently and found a different url)
"""
r = SyndicatedPost.get_or_insert_by_syndication_url(
self.source, 'http://silo/post/url',
'http://original/different/url')
self.assertIsNotNone(r)
self.assertEquals('http://original/post/url', r.original)
# make sure it's unchanged in NDB
rs = SyndicatedPost.query(
SyndicatedPost.syndication == 'http://silo/post/url',
ancestor=self.source.key
).fetch()
self.assertEquals(1, len(rs))
self.assertEquals('http://original/post/url', rs[0].original)
self.assertEquals('http://silo/post/url', rs[0].syndication)
示例4: _process_entry
# 需要导入模块: from models import SyndicatedPost [as 别名]
# 或者: from models.SyndicatedPost import get_or_insert_by_syndication_url [as 别名]
def _process_entry(source, permalink, refetch_blanks, preexisting):
"""Fetch and process an h-entry, saving a new SyndicatedPost to the
DB if successful.
Args:
permalink: url of the unprocessed post
syndication_url: url of the syndicated content
refetch_blanks: boolean whether we should ignore blank preexisting
SyndicatedPosts
preexisting: dict of original url to SyndicatedPost
Return:
a dict from syndicated url to new models.SyndicatedPosts
"""
results = {}
preexisting_relationship = preexisting.get(permalink)
# if the post has already been processed, do not add to the results
# since this method only returns *newly* discovered relationships.
if preexisting_relationship:
# if we're refetching blanks and this one is blank, do not return
if refetch_blanks and not preexisting_relationship.syndication:
logging.debug('ignoring blank relationship for original %s', permalink)
else:
return results
syndication_urls = set()
parsed = None
try:
logging.debug('fetching post permalink %s', permalink)
permalink, _, type_ok = util.get_webmention_target(permalink)
if type_ok:
resp = requests.get(permalink, timeout=HTTP_TIMEOUT)
resp.raise_for_status()
parsed = mf2py.Parser(url=permalink, doc=resp.text).to_dict()
except BaseException:
# TODO limit the number of allowed failures
logging.warning('Could not fetch permalink %s', permalink, exc_info=True)
if parsed:
relsynd = parsed.get('rels').get('syndication', [])
logging.debug('rel-syndication links: %s', relsynd)
syndication_urls.update(relsynd)
# there should only be one h-entry on a permalink page, but
# we'll check all of them just in case.
for hentry in (item for item in parsed['items']
if 'h-entry' in item['type']):
usynd = hentry.get('properties', {}).get('syndication', [])
logging.debug('u-syndication links: %s', usynd)
syndication_urls.update(usynd)
# save the results (or lack thereof) to the db, and put them in a
# map for immediate use
for syndication_url in syndication_urls:
# follow redirects to give us the canonical syndication url --
# gives the best chance of finding a match.
syndication_url = util.follow_redirects(syndication_url).url
# source-specific logic to standardize the URL. (e.g., replace facebook
# username with numeric id)
syndication_url = source.canonicalize_syndication_url(syndication_url)
# check that the syndicated url belongs to this source TODO save future
# lookups by saving results for other sources too (note: query the
# appropriate source subclass by author.domains, rather than
# author.domain_urls)
parsed = urlparse.urlparse(syndication_url)
if util.domain_from_link(parsed.netloc) == source.AS_CLASS.DOMAIN:
logging.debug('saving discovered relationship %s -> %s',
syndication_url, permalink)
relationship = SyndicatedPost.get_or_insert_by_syndication_url(
source, syndication=syndication_url, original=permalink)
results[syndication_url] = relationship
if not results:
logging.debug('no syndication links from %s to current source %s. '
'saving empty relationship so that it will not be '
'searched again', permalink, source.label())
# remember that this post doesn't have syndication links for this
# particular source
SyndicatedPost(parent=source.key, original=permalink,
syndication=None).put()
logging.debug('discovered relationships %s', results)
return results