Python utils.get_domain函数代码示例

本文整理汇总了Python中utils.get_domain函数的典型用法代码示例。如果您正苦于以下问题：Python get_domain函数的具体用法？Python get_domain怎么用？Python get_domain使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了get_domain函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_source_node

	def get_source_node(self):
		if 'source_selector' in self.options:
			if self.options['source_selector']:
				nodes = self.doc.cssselect(self.options['source_selector'])
				if len(nodes) == 1:
					return nodes[0]
				for node in nodes:
					res = self.has_source(node)
					if res is not None:
						return res

		body = self.doc.find('body')
		if body is None:
			return None

		for node in body.iter():
			res = self.has_source(node)
			if res is not None:
				return res

		domain = get_domain(self.url)
		for a in self.doc.iter('a'):
			link = a.get('href')
			if link and link.startswith('http') \
					and get_domain(link) != domain:
				text = self.get_block_text(a)
				if len(text) > 2 \
						and text.endswith(u'报') \
						and not text.endswith(u'举报'):
					return a

开发者ID:dotajin，项目名称:haoku-open，代码行数:30，代码来源:article.py

示例2: data_needs

def data_needs(request, template="needs.html"):
    themes = Theme.objects.all().order_by("display_name")
    ordered_themes, theme_dict = add_ordered_needs_lists(themes)
    context = {
        "themes": themes,
        "theme_dict": theme_dict,
        "ordered_themes": ordered_themes,
        "domain": get_domain(8000),
        "domain8010": get_domain(),
    }
    return render_to_response(template, RequestContext(request, context))

开发者ID:WCGA，项目名称:marine-planner-wcodp，代码行数:11，代码来源:views.py

示例3: csw_listing

def csw_listing(request, template='pycsw_catalog_view.html'):
  if logger:
    logger.info("Start csw_listing")
  csw_recs = pycsw_records.objects.using('pycsw_test').all().order_by('organization')
  html_id = 0
  for rec in csw_recs:
    rec.html_id = html_id
    html_id += 1
  context = {'records': csw_recs, 'domain': get_domain(8000), 'domain8010': get_domain()}
  if logger:
    logger.info("End csw_listing")
  return render_to_response(template, RequestContext(request, context))

开发者ID:DanRamage，项目名称:secoora-portal，代码行数:12，代码来源:views.py

示例4: add

	def add(self, cate):
		url = cate['url']

		domain = get_domain(url)
		subdomains = get_subdomains(url)
		paths = get_path(url).split('/')
		query = urlparse.urlparse(url).query

		if domain not in self.root:
			self.root[domain] = {'sub':{}, 'path':{}}

		node = self.root[domain]
		if len(subdomains) > 1 or len(subdomains) == 1 and subdomains[0] != 'www':
			for sub in subdomains:
				if sub not in node['sub']:
					node['sub'][sub] = {'sub':{}, 'path':{}}
				node = node['sub'][sub]

		for path in paths:
			if path not in node['path']:
				node['path'][path] = {'path':{}}
			node = node['path'][path]

		if query:
			node['path']['query___' + query] = {'path':{}}
			node = node['path']['query___' + query]

		node['cate'] = cate

开发者ID:dotajin，项目名称:haoku-open，代码行数:28，代码来源:best2spider.py

示例5: bookmark_link

 def bookmark_link(self):
     if not self.bookmark and self.is_sublayer and self.parent.bookmark:
         return self.parent.bookmark.replace('<layer_id>', str(self.id))
     if not self.bookmark:
         domain = get_domain(8000)
         return '%s/planner/#%s' %(domain, self.slug)
     return self.bookmark

开发者ID:Ecotrust，项目名称:marco-portal，代码行数:7，代码来源:models.py

示例6: init

 def __init__(self, link, base_url):
     self.text = self.get_text(link)
     self.class_ = self.get_class(link)
     self.href = self.get_href(link, base_url)
     self.domain = get_domain(self.href)
     self.parent = link.parent
     self.base_url = base_url

开发者ID:nlpwhu，项目名称:info-source，代码行数:7，代码来源:website.py

示例7: introspect

def introspect(domain):
    filter_func = lambda x: get_domain(loads(x[1])).lower() in domain.lower()
    pages, requested_page = get_effective_page(request.args.get("page", 0),
            filter_func)
    items = get_items(filter_func, g.db_file, requested_page)

    return render_template("index.html", items=items, pages=pages,
            requested_page=requested_page, current_page=request.args.get('page', 0))

开发者ID:makoConstruct，项目名称:merveilles_io，代码行数:8，代码来源:routes.py

示例8: is_image_link

def is_image_link(url):
	if url.split('.')[-1] in img_extensions:
		return True 
	domain = get_domain(url).split('.')
	for sharer in img_sharers:
		if sharer in domain: 
			return True 
	return False

开发者ID:morganecf，项目名称:topic-modeling，代码行数:8，代码来源:subreddit_stats.py

示例9: add_learn_links

def add_learn_links(themes):
    context = []
    domain = get_domain()
    for theme in themes:
        link = '%s/portal/learn/%s' %(domain, linkify(theme.name))
        #print link
        context.append({'theme': theme, 'learn_link': link})
    return context

开发者ID:atrawog，项目名称:marco-portal，代码行数:8，代码来源:views.py

示例10: get_allowed_from

 def get_allowed_from(self, child_urls):
     """
     :param child_urls: List of child urls to check robots.txt on
     :return: A list of allowed child urls to crawl
     """
     allowed = []
     domains = list(set('{0}'.format(get_domain(url)) for url in child_urls))
     domain_to_children = {domain: filter(lambda u: get_domain(u) == domain, child_urls) for domain in domains}
     for domain in domain_to_children:
         try:
             rules = self.robots.fetch(domain)
             for url in domain_to_children[domain]:
                 if rules.allowed(url, self._agent):
                     allowed.append(url)
         except:
             allowed.extend(domain_to_children[domain])
     return allowed

开发者ID:netarachelhershko，项目名称:crawler，代码行数:17，代码来源:robots_validator.py

示例11: top_things

def top_things(db_file):
    urls = {}
    people = {}
    graph = {}

    db = DB()

    if not db.open("{0}".format(db_file), DB.OREADER | DB.OCREATE):
        print "Could not open database. (Top things)"

    cur = db.cursor()
    cur.jump_back()
    while True:
        rec = cur.get(False)
        if not rec:
            break

        loaded_rec = loads(rec[1])
        split = get_domain(loaded_rec)

        if urls.get(split, False) == False:
            urls[split] = 1
        else:
            urls[split] = urls[split] + 1

        person = loaded_rec['person']
        if people.get(person, False) == False:
            people[person] = 1
        else:
            people[person] = people[person] + 1

        if split is not None and split is not "" and \
            person is not None and person is not "":
            # Build a crazy relational graph out of my nosql data
            if graph.get(split, False) == False:
                graph[split] = {"is_person": False, "data": [person], "linked_to_count": 1}
            elif person not in graph[split]:
                graph[split]["data"].append(person)
                graph[split]["linked_to_count"] = graph[split]["linked_to_count"] + 1

            if graph.get(person, False) == False:
                graph[person] = {"is_person": True, "data": [split]}
            elif split not in graph[person]:
                graph[person]["data"].append(split)

        cur.step_back()
    cur.disable()
    db.close()

    def get_one(x):
        return x[1]

    return (sorted(urls.items(), key=get_one, reverse=True),
            sorted(people.items(), key=get_one, reverse=True),
            graph)

开发者ID:lykkin，项目名称:merveilles_io，代码行数:55，代码来源:database.py

示例12: init

	def __init__(self, input, **options):
		self.input = input
		self.url = options.get('url', '')
		self.debug = options.get('debug', False)
		self.title = options.get('title', '^^')
		self.pages = options.get('pages', None)
		self.texts = options.get('texts', None)
		self.domain = get_domain(self.url)
		self.options = options
		self.doc = clean_html(input, return_doc=True)
		self.text = self.doc.text_content()
		self.len = word_count(self.text) if self.text else 0

开发者ID:dotajin，项目名称:haoku-open，代码行数:12，代码来源:clean+(copy).py

示例13: fetch_from

 def fetch_from(self, urls):
     """
     :param urls: A list of urls to fetch sitemaps of
     :return: A list of urls that was found within each sitemap of given urls
     """
     unique_domains = list(set(get_domain(u) for u in urls))
     sitemaps = self._try_fetch_sitemaps(unique_domains)
     results = []
     for url in sitemaps:
         sitemaps_content = self.requests_getter.get_content_from(sitemaps[url])
         for content in sitemaps_content:
             locations = self.sitemap_url_extractor.extract_from(content)
             locations = filter(lambda u: not u.endswith('.xml'), locations)
             results.extend(locations)
     return results

开发者ID:netarachelhershko，项目名称:crawler，代码行数:15，代码来源:sitemap_fetcher.py

示例14: fetch_stories

 def fetch_stories(self, correlation_id=-1):
     """Fetches new stories from the datasource. Uses the last story external id to 
     fetch only new stories."""
     try:
         url = "http://%s/twitter_sensor/?user=%s&password=%s" % (get_domain(), self.user.user_name, self.user.user_password)
         tweets = urllib.urlopen(url).read()
         tweets = json.loads(tweets)
         print tweets
         for key in tweets:
             try :
                 authors = []
                 authors.append(tweets[key])
                 self.add_read_story(key, authors)
                 self.add_user(tweets[key])
             except:
                 log_event("fetch_stories_failed", "AgentCell", self.id, "Adding fetched story %s failed, for %s" % (key, self.user), correlation_id)
     except:
         log_event("fetch_stories_failed", "AgentCell", self.id, "Failed to fetch stories for %s" % self.user, correlation_id)

开发者ID:ofri，项目名称:EventHorizon，代码行数:18，代码来源:models.py

示例15: article

def article():
	url = request.args.get('url')

	article = mongo.article.find_one({'_id':url})

	if not article:
		try:
			html = get_or_cache(url)
			article = html2article(html, url, selector=True, merge=True)
			if article and not article['src_name']:
				article['src_name'] = get_domain(url)

			tpl = url2tpl(url)
			urls = html2urls(html, url)
			texts = dict(map(lambda x: (x[0], max(x[1], key=lambda y:len(y))), urls.iteritems()))
			tmp = dict(map(lambda x: (x, url2tpl(x)), texts.iterkeys()))

			urls = {}
			for u, t in tmp.iteritems():
				if u != url and t == tpl:
					urls[u] = texts[u]
					if len(urls) >= 10:
						break

			if article:
				article['urls'] = urls
				article['_id'] = url
				article['view'] = 1
				article['last'] = time.time()

				copy = article.copy()
				copy['urls'] = json.dumps(copy['urls'])
				mongo.article.save(copy)
		except:
			pass
	else:
		article['urls'] = json.loads(article['urls'])
		mongo.article.update({'_id':url}, {'$set':{'view':article['view'] + 1}})

	if article:
		article['pubtime'] = article['pubtime'][:10]

	return render_template('extract/article.html', article=article, url=url)

开发者ID:dotajin，项目名称:haoku-open，代码行数:43，代码来源:views.py

注：本文中的utils.get_domain函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。