本文整理汇总了Python中utils.get_domain函数的典型用法代码示例。如果您正苦于以下问题:Python get_domain函数的具体用法?Python get_domain怎么用?Python get_domain使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了get_domain函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_source_node
def get_source_node(self):
if 'source_selector' in self.options:
if self.options['source_selector']:
nodes = self.doc.cssselect(self.options['source_selector'])
if len(nodes) == 1:
return nodes[0]
for node in nodes:
res = self.has_source(node)
if res is not None:
return res
body = self.doc.find('body')
if body is None:
return None
for node in body.iter():
res = self.has_source(node)
if res is not None:
return res
domain = get_domain(self.url)
for a in self.doc.iter('a'):
link = a.get('href')
if link and link.startswith('http') \
and get_domain(link) != domain:
text = self.get_block_text(a)
if len(text) > 2 \
and text.endswith(u'报') \
and not text.endswith(u'举报'):
return a
示例2: data_needs
def data_needs(request, template="needs.html"):
themes = Theme.objects.all().order_by("display_name")
ordered_themes, theme_dict = add_ordered_needs_lists(themes)
context = {
"themes": themes,
"theme_dict": theme_dict,
"ordered_themes": ordered_themes,
"domain": get_domain(8000),
"domain8010": get_domain(),
}
return render_to_response(template, RequestContext(request, context))
示例3: csw_listing
def csw_listing(request, template='pycsw_catalog_view.html'):
if logger:
logger.info("Start csw_listing")
csw_recs = pycsw_records.objects.using('pycsw_test').all().order_by('organization')
html_id = 0
for rec in csw_recs:
rec.html_id = html_id
html_id += 1
context = {'records': csw_recs, 'domain': get_domain(8000), 'domain8010': get_domain()}
if logger:
logger.info("End csw_listing")
return render_to_response(template, RequestContext(request, context))
示例4: add
def add(self, cate):
url = cate['url']
domain = get_domain(url)
subdomains = get_subdomains(url)
paths = get_path(url).split('/')
query = urlparse.urlparse(url).query
if domain not in self.root:
self.root[domain] = {'sub':{}, 'path':{}}
node = self.root[domain]
if len(subdomains) > 1 or len(subdomains) == 1 and subdomains[0] != 'www':
for sub in subdomains:
if sub not in node['sub']:
node['sub'][sub] = {'sub':{}, 'path':{}}
node = node['sub'][sub]
for path in paths:
if path not in node['path']:
node['path'][path] = {'path':{}}
node = node['path'][path]
if query:
node['path']['query___' + query] = {'path':{}}
node = node['path']['query___' + query]
node['cate'] = cate
示例5: bookmark_link
def bookmark_link(self):
if not self.bookmark and self.is_sublayer and self.parent.bookmark:
return self.parent.bookmark.replace('<layer_id>', str(self.id))
if not self.bookmark:
domain = get_domain(8000)
return '%s/planner/#%s' %(domain, self.slug)
return self.bookmark
示例6: __init__
def __init__(self, link, base_url):
self.text = self.get_text(link)
self.class_ = self.get_class(link)
self.href = self.get_href(link, base_url)
self.domain = get_domain(self.href)
self.parent = link.parent
self.base_url = base_url
示例7: introspect
def introspect(domain):
filter_func = lambda x: get_domain(loads(x[1])).lower() in domain.lower()
pages, requested_page = get_effective_page(request.args.get("page", 0),
filter_func)
items = get_items(filter_func, g.db_file, requested_page)
return render_template("index.html", items=items, pages=pages,
requested_page=requested_page, current_page=request.args.get('page', 0))
示例8: is_image_link
def is_image_link(url):
if url.split('.')[-1] in img_extensions:
return True
domain = get_domain(url).split('.')
for sharer in img_sharers:
if sharer in domain:
return True
return False
示例9: add_learn_links
def add_learn_links(themes):
context = []
domain = get_domain()
for theme in themes:
link = '%s/portal/learn/%s' %(domain, linkify(theme.name))
#print link
context.append({'theme': theme, 'learn_link': link})
return context
示例10: get_allowed_from
def get_allowed_from(self, child_urls):
"""
:param child_urls: List of child urls to check robots.txt on
:return: A list of allowed child urls to crawl
"""
allowed = []
domains = list(set('{0}'.format(get_domain(url)) for url in child_urls))
domain_to_children = {domain: filter(lambda u: get_domain(u) == domain, child_urls) for domain in domains}
for domain in domain_to_children:
try:
rules = self.robots.fetch(domain)
for url in domain_to_children[domain]:
if rules.allowed(url, self._agent):
allowed.append(url)
except:
allowed.extend(domain_to_children[domain])
return allowed
示例11: top_things
def top_things(db_file):
urls = {}
people = {}
graph = {}
db = DB()
if not db.open("{0}".format(db_file), DB.OREADER | DB.OCREATE):
print "Could not open database. (Top things)"
cur = db.cursor()
cur.jump_back()
while True:
rec = cur.get(False)
if not rec:
break
loaded_rec = loads(rec[1])
split = get_domain(loaded_rec)
if urls.get(split, False) == False:
urls[split] = 1
else:
urls[split] = urls[split] + 1
person = loaded_rec['person']
if people.get(person, False) == False:
people[person] = 1
else:
people[person] = people[person] + 1
if split is not None and split is not "" and \
person is not None and person is not "":
# Build a crazy relational graph out of my nosql data
if graph.get(split, False) == False:
graph[split] = {"is_person": False, "data": [person], "linked_to_count": 1}
elif person not in graph[split]:
graph[split]["data"].append(person)
graph[split]["linked_to_count"] = graph[split]["linked_to_count"] + 1
if graph.get(person, False) == False:
graph[person] = {"is_person": True, "data": [split]}
elif split not in graph[person]:
graph[person]["data"].append(split)
cur.step_back()
cur.disable()
db.close()
def get_one(x):
return x[1]
return (sorted(urls.items(), key=get_one, reverse=True),
sorted(people.items(), key=get_one, reverse=True),
graph)
示例12: __init__
def __init__(self, input, **options):
self.input = input
self.url = options.get('url', '')
self.debug = options.get('debug', False)
self.title = options.get('title', '^^')
self.pages = options.get('pages', None)
self.texts = options.get('texts', None)
self.domain = get_domain(self.url)
self.options = options
self.doc = clean_html(input, return_doc=True)
self.text = self.doc.text_content()
self.len = word_count(self.text) if self.text else 0
示例13: fetch_from
def fetch_from(self, urls):
"""
:param urls: A list of urls to fetch sitemaps of
:return: A list of urls that was found within each sitemap of given urls
"""
unique_domains = list(set(get_domain(u) for u in urls))
sitemaps = self._try_fetch_sitemaps(unique_domains)
results = []
for url in sitemaps:
sitemaps_content = self.requests_getter.get_content_from(sitemaps[url])
for content in sitemaps_content:
locations = self.sitemap_url_extractor.extract_from(content)
locations = filter(lambda u: not u.endswith('.xml'), locations)
results.extend(locations)
return results
示例14: fetch_stories
def fetch_stories(self, correlation_id=-1):
"""Fetches new stories from the datasource. Uses the last story external id to
fetch only new stories."""
try:
url = "http://%s/twitter_sensor/?user=%s&password=%s" % (get_domain(), self.user.user_name, self.user.user_password)
tweets = urllib.urlopen(url).read()
tweets = json.loads(tweets)
print tweets
for key in tweets:
try :
authors = []
authors.append(tweets[key])
self.add_read_story(key, authors)
self.add_user(tweets[key])
except:
log_event("fetch_stories_failed", "AgentCell", self.id, "Adding fetched story %s failed, for %s" % (key, self.user), correlation_id)
except:
log_event("fetch_stories_failed", "AgentCell", self.id, "Failed to fetch stories for %s" % self.user, correlation_id)
示例15: article
def article():
url = request.args.get('url')
article = mongo.article.find_one({'_id':url})
if not article:
try:
html = get_or_cache(url)
article = html2article(html, url, selector=True, merge=True)
if article and not article['src_name']:
article['src_name'] = get_domain(url)
tpl = url2tpl(url)
urls = html2urls(html, url)
texts = dict(map(lambda x: (x[0], max(x[1], key=lambda y:len(y))), urls.iteritems()))
tmp = dict(map(lambda x: (x, url2tpl(x)), texts.iterkeys()))
urls = {}
for u, t in tmp.iteritems():
if u != url and t == tpl:
urls[u] = texts[u]
if len(urls) >= 10:
break
if article:
article['urls'] = urls
article['_id'] = url
article['view'] = 1
article['last'] = time.time()
copy = article.copy()
copy['urls'] = json.dumps(copy['urls'])
mongo.article.save(copy)
except:
pass
else:
article['urls'] = json.loads(article['urls'])
mongo.article.update({'_id':url}, {'$set':{'view':article['view'] + 1}})
if article:
article['pubtime'] = article['pubtime'][:10]
return render_template('extract/article.html', article=article, url=url)