本文整理汇总了Python中util.domain_from_link函数的典型用法代码示例。如果您正苦于以下问题:Python domain_from_link函数的具体用法?Python domain_from_link怎么用?Python domain_from_link使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了domain_from_link函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _urls_and_domains
def _urls_and_domains(self, auth_entity, user_url):
"""Returns this user's valid (not webmention-blacklisted) URLs and domains.
Converts the auth entity's user_json to an ActivityStreams actor and uses
its 'urls' and 'url' fields. May be overridden by subclasses.
Args:
auth_entity: oauth_dropins.models.BaseAuth
user_url: string, optional URL passed in when authorizing
Returns: ([string url, ...], [string domain, ...])
"""
actor = self.gr_source.user_to_actor(json.loads(auth_entity.user_json))
logging.debug('Converted to actor: %s', json.dumps(actor, indent=2))
urls = []
for url in util.trim_nulls(util.uniquify(
[user_url] + [actor.get('url')] +
[u.get('value') for u in actor.get('urls', [])])):
domain = util.domain_from_link(url)
if domain and not util.in_webmention_blacklist(domain.lower()):
urls.append(url)
urls = util.dedupe_urls(urls)
domains = [util.domain_from_link(url).lower() for url in urls]
return urls, domains
示例2: test_domain_from_link
def test_domain_from_link(self):
self.assertEqual('localhost', util.domain_from_link('http://localhost/foo'))
self.assertEqual('a.b.c.d', util.domain_from_link('http://a.b.c.d/foo'))
for good_link in ('asdf.com', 'www.asdf.com', 'https://asdf.com/',
'asdf.com/foo?bar#baz', 'm.asdf.com', 'asdf.com:1234',
'mobile.asdf.com/foo?bar#baz', '//asdf.com/foo/bar',
'https://m.asdf.com/foo?bar#baz'):
actual = util.domain_from_link(good_link)
self.assertEqual('asdf.com', actual, '%s returned %s' % (good_link, actual))
self.assertEqual('asdf.com.', util.domain_from_link('http://asdf.com./x'))
for bad_link in '', ' ', 'a&b.com', 'http://', 'file:///':
self.assertEquals(None, util.domain_from_link(bad_link))
示例3: _urls_and_domains
def _urls_and_domains(self, auth_entity, user_url):
"""Returns this user's valid (not webmention-blacklisted) URLs and domains.
Converts the auth entity's user_json to an ActivityStreams actor and uses
its 'urls' and 'url' fields. May be overridden by subclasses.
Args:
auth_entity: oauth_dropins.models.BaseAuth
user_url: string, optional URL passed in when authorizing
Returns: ([string url, ...], [string domain, ...])
"""
actor = self.gr_source.user_to_actor(json.loads(auth_entity.user_json))
logging.debug('Converted to actor: %s', json.dumps(actor, indent=2))
candidates = util.trim_nulls(util.uniquify(
[user_url] + microformats2.object_urls(actor)))
if len(candidates) > MAX_AUTHOR_URLS:
logging.warning('Too many profile links! Only resolving the first %s: %s',
MAX_AUTHOR_URLS, candidates)
urls = []
for i, url in enumerate(candidates):
url, domain, send = util.get_webmention_target(url, resolve=i < MAX_AUTHOR_URLS)
if send:
urls.append(url)
urls = util.dedupe_urls(urls) # normalizes domains to lower case
domains = [util.domain_from_link(url) for url in urls]
return urls, domains
示例4: _process_syndication_urls
def _process_syndication_urls(source, permalink, syndication_urls):
"""Process a list of syndication URLs looking for one that matches the
current source. If one is found, stores a new SyndicatedPost in the
db.
Args:
source: a models.Source subclass
permalink: a string. the current h-entry permalink
syndication_urls: a collection of strings. the unfitered list
of syndication_urls
"""
results = {}
# save the results (or lack thereof) to the db, and put them in a
# map for immediate use
for syndication_url in syndication_urls:
# follow redirects to give us the canonical syndication url --
# gives the best chance of finding a match.
syndication_url = util.follow_redirects(syndication_url).url
# source-specific logic to standardize the URL. (e.g., replace facebook
# username with numeric id)
syndication_url = source.canonicalize_syndication_url(syndication_url)
# check that the syndicated url belongs to this source TODO save future
# lookups by saving results for other sources too (note: query the
# appropriate source subclass by author.domains, rather than
# author.domain_urls)
if util.domain_from_link(syndication_url) == source.AS_CLASS.DOMAIN:
logging.debug('saving discovered relationship %s -> %s',
syndication_url, permalink)
relationship = SyndicatedPost.insert(
source, syndication=syndication_url, original=permalink)
results.setdefault(syndication_url, []).append(relationship)
return results
示例5: post
def post(self):
source = self.load_source()
# validate URL, find silo post
url = util.get_required_param(self, 'url')
domain = util.domain_from_link(url)
path = urlparse.urlparse(url).path
msg = 'Discovering now. Refresh in a minute to see the results!'
if domain == source.GR_CLASS.DOMAIN:
post_id = source.GR_CLASS.post_id(url)
if post_id:
type = 'event' if path.startswith('/events/') else None
util.add_discover_task(source, post_id, type=type)
else:
msg = "Sorry, that doesn't look like a %s post URL." % source.GR_CLASS.NAME
elif util.domain_or_parent_in(domain, source.domains):
synd_links = original_post_discovery.process_entry(source, url, {}, False, [])
if synd_links:
for link in synd_links:
util.add_discover_task(source, source.GR_CLASS.post_id(link))
source.updates = {'last_syndication_url': util.now_fn()}
models.Source.put_updates(source)
else:
msg = 'Failed to fetch %s or find a %s syndication link.' % (
util.pretty_link(url), source.GR_CLASS.NAME)
else:
msg = 'Please enter a URL on either your web site or %s.' % source.GR_CLASS.NAME
self.messages.add(msg)
self.redirect(source.bridgy_url(self))
示例6: new
def new(handler, auth_entity=None, **kwargs):
"""Creates and returns a WordPress for the logged in user.
Args:
handler: the current RequestHandler
auth_entity: oauth_dropins.wordpress.WordPressAuth
"""
auth_domain = auth_entity.key.id()
site_info = WordPress.get_site_info(handler, auth_entity)
if site_info is None:
return
urls = util.dedupe_urls(util.trim_nulls(
[site_info.get('URL'), auth_entity.blog_url]))
domains = [util.domain_from_link(u) for u in urls]
avatar = (json.loads(auth_entity.user_json).get('avatar_URL')
if auth_entity.user_json else None)
return WordPress(id=domains[0],
auth_entity=auth_entity.key,
name=auth_entity.user_display_name(),
picture=avatar,
superfeedr_secret=util.generate_secret(),
url=urls[0],
domain_urls=urls,
domains=domains,
site_info=site_info,
**kwargs)
示例7: new
def new(handler, auth_entity=None, **kwargs):
"""Creates and returns a WordPress for the logged in user.
Args:
handler: the current RequestHandler
auth_entity: oauth_dropins.wordpress.WordPressAuth
"""
# Fetch blog's site info
auth_domain = auth_entity.key.id()
site_info = json.loads(auth_entity.urlopen(
API_SITE_URL % auth_entity.blog_id).read())
site_url = site_info.get('URL')
if site_url:
domains = [util.domain_from_link(site_url), auth_domain]
urls = [site_url, auth_entity.blog_url]
else:
domains = [auth_domain]
urls = [auth_entity.blog_url]
avatar = (json.loads(auth_entity.user_json).get('avatar_URL')
if auth_entity.user_json else None)
return WordPress(id=domains[0],
auth_entity=auth_entity.key,
name=auth_entity.user_display_name(),
picture=avatar,
superfeedr_secret=util.generate_secret(),
url=urls[0],
domain_urls=urls,
domains=domains,
site_info=site_info,
**kwargs)
示例8: handle_feed
def handle_feed(feed, source):
"""Handles a Superfeedr JSON feed.
Creates :class:`models.BlogPost` entities and adds propagate-blogpost tasks
for new items.
http://documentation.superfeedr.com/schema.html#json
http://documentation.superfeedr.com/subscribers.html#pubsubhubbubnotifications
Args:
feed: unicode string, Superfeedr JSON feed
source: Blogger, Tumblr, or WordPress
"""
logging.info('Source: %s %s', source.label(), source.key.string_id())
logging.info('Raw feed: %s', feed)
if source.status != 'enabled':
logging.info('Dropping because source is %s', source.status)
return
elif 'webmention' not in source.features:
logging.info("Dropping because source doesn't have webmention feature")
return
for item in json.loads(feed).get('items', []):
url = item.get('permalinkUrl') or item.get('id')
if not url:
logging.error('Dropping feed item without permalinkUrl or id!')
continue
# extract links from content, discarding self links.
#
# i don't use get_webmention_target[s]() here because they follows redirects
# and fetch link contents, and this handler should be small and fast and try
# to return a response to superfeedr successfully.
#
# TODO: extract_links currently has a bug that makes it drop trailing
# slashes. ugh. fix that.
content = item.get('content') or item.get('summary', '')
links = [util.clean_url(util.unwrap_t_umblr_com(l))
for l in util.extract_links(content)
if util.domain_from_link(l) not in source.domains]
unique = []
for link in util.dedupe_urls(links):
if len(link) <= _MAX_STRING_LENGTH:
unique.append(link)
else:
logging.info('Giving up on link over %s chars! %s', _MAX_STRING_LENGTH, link)
logging.info('Found links: %s', unique)
if len(url) > _MAX_KEYPART_BYTES:
logging.warning('Blog post URL is too long (over 500 chars)! Giving up.')
bp = models.BlogPost(id=url[:_MAX_KEYPART_BYTES], source=source.key,
feed_item=item, failed=unique)
else:
bp = models.BlogPost(id=url, source=source.key, feed_item=item, unsent=unique)
bp.get_or_save()
示例9: post
def post(self):
logging.debug('Params: %s', self.request.params)
if self.lease(ndb.Key(urlsafe=self.request.params['key'])):
# skip "self" links to this blog's domain
source_domains = self.entity.source.get().domains
to_send = set()
for url in self.entity.unsent:
link_domain = util.domain_from_link(url)
if link_domain and link_domain not in source_domains:
to_send.add(url)
self.entity.unsent = list(to_send)
self.send_webmentions()
示例10: search_for_links
def search_for_links(self):
"""Searches for activities with links to any of this source's web sites.
G+ search supports OR:
https://developers.google.com/+/api/latest/activities/search
Returns: sequence of ActivityStreams activity dicts
"""
query = ' OR '.join(
'"%s"' % util.fragmentless(url) for url in self.domain_urls
if not util.in_webmention_blacklist(util.domain_from_link(url)))
return self.get_activities(
search_query=query, group_id=gr_source.SEARCH, etag=self.last_activities_etag,
fetch_replies=False, fetch_likes=False, fetch_shares=False, count=50)
示例11: _urls_and_domains
def _urls_and_domains(auth_entity, blog_name=None):
"""Returns this blog's URL and domain.
Args:
auth_entity: oauth_dropins.tumblr.TumblrAuth
blog_name: which blog. optional. matches the 'name' field for one of the
blogs in auth_entity.user_json['user']['blogs'].
Returns: ([string url], [string domain])
"""
for blog in json.loads(auth_entity.user_json).get('user', {}).get('blogs', []):
if ((blog_name and blog_name == blog.get('name')) or
(not blog_name and blog.get('primary'))):
return [blog['url']], [util.domain_from_link(blog['url']).lower()]
return [], []
示例12: _url_and_domain
def _url_and_domain(auth_entity, blog_name=None):
"""Returns the blog URL and domain.
Args:
auth_entity: oauth_dropins.tumblr.TumblrAuth
blog_name: which blog. optional. matches the 'name' field for one of the
blogs in auth_entity.user_json['user']['blogs'].
Returns: (string url, string domain, boolean ok)
"""
for blog in json.loads(auth_entity.user_json).get('user', {}).get('blogs', []):
if ((blog_name and blog_name == blog.get('name')) or
(not blog_name and blog.get('primary'))):
return blog['url'], util.domain_from_link(blog['url']), True
return None, None, False
示例13: handle_feed
def handle_feed(feed, source):
"""Handles a Superfeedr JSON feed.
Creates BlogPost entities and adds propagate-blogpost tasks for new items.
http://documentation.superfeedr.com/schema.html#json
http://documentation.superfeedr.com/subscribers.html#pubsubhubbubnotifications
Args:
feed: string, Superfeedr JSON feed
source: Blogger, Tumblr, or WordPress
"""
logging.info('Source: %s %s', source.label(), source.key.string_id())
logging.info('Raw feed: %s', feed)
if source.status != 'enabled':
logging.warning('Dropping because source is %s', source.status)
return
elif 'webmention' not in source.features:
logging.warning("Dropping because source doesn't have webmention feature")
return
for item in json.loads(feed).get('items', []):
url = item.get('permalinkUrl') or item.get('id')
if not url:
logging.error('Dropping feed item without permalinkUrl or id!')
continue
source.preprocess_superfeedr_item(item)
# extract links from content, discarding self links.
#
# i don't use get_webmention_target[s]() here because they follows redirects
# and fetch link contents, and this handler should be small and fast and try
# to return a response to superfeedr successfully.
#
# TODO: extract_links currently has a bug that makes it drop trailing
# slashes. ugh. fix that.
content = item.get('content') or item.get('summary', '')
links = [l for l in util.extract_links(content)
if util.domain_from_link(l) not in source.domains]
logging.info('Found links: %s', links)
models.BlogPost(id=url,
source=source.key,
feed_item=item,
unsent=links,
).get_or_save()
示例14: _urls_and_domains
def _urls_and_domains(self, auth_entity, user_url):
"""Returns this user's valid (not webmention-blacklisted) URLs and domains.
Converts the auth entity's user_json to an ActivityStreams actor and uses
its 'urls' and 'url' fields. May be overridden by subclasses.
Args:
auth_entity: :class:`oauth_dropins.models.BaseAuth`
user_url: string, optional URL passed in when authorizing
Returns:
([string url, ...], [string domain, ...])
"""
actor = self.gr_source.user_to_actor(json.loads(auth_entity.user_json))
logging.debug('Converted to actor: %s', json.dumps(actor, indent=2))
candidates = util.trim_nulls(util.uniquify(
[user_url] + microformats2.object_urls(actor)))
if len(candidates) > MAX_AUTHOR_URLS:
logging.info('Too many profile links! Only resolving the first %s: %s',
MAX_AUTHOR_URLS, candidates)
urls = []
for i, url in enumerate(candidates):
final, domain, ok = util.get_webmention_target(url, resolve=i < MAX_AUTHOR_URLS)
if ok:
final = final.lower()
if util.schemeless(final).startswith(util.schemeless(url.lower())):
# redirected to a deeper path. use the original higher level URL. #652
final = url
# If final has a path segment check if root has a matching rel=me.
match = re.match(r'^(https?://[^/]+)/.+', final)
if match and i < MAX_AUTHOR_URLS:
root = match.group(1)
resp = util.requests_get(root)
resp.raise_for_status()
data = util.mf2py_parse(resp.text, root)
me_urls = data.get('rels', {}).get('me', [])
if final in me_urls:
final = root
urls.append(final)
urls = util.dedupe_urls(urls) # normalizes domains to lower case
domains = [util.domain_from_link(url) for url in urls]
return urls, domains
示例15: authorize
def authorize(self):
"""Check for a backlink to brid.gy/publish/SILO."""
bases = set()
if util.domain_from_link(self.request.host_url) == 'brid.gy':
bases.add('brid.gy')
bases.add('www.brid.gy') # also accept www
else:
bases.add(self.request.host_url)
expected = ['%s/publish/%s' % (base, self.source.SHORT_NAME) for base in bases]
if self.entity.html:
for url in expected:
if url in self.entity.html or urllib.quote(url, safe='') in self.entity.html:
return True
self.error("Couldn't find link to %s" % expected[0])
return False