本文整理汇总了Python中utils.get_page函数的典型用法代码示例。如果您正苦于以下问题:Python get_page函数的具体用法?Python get_page怎么用?Python get_page使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了get_page函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_project
def get_project(name):
project_page = etree.HTML(get_page('https://www.openhub.net/p/' + name).decode('utf-8'))
project_name = project_page.xpath(u"//*[@id=\"project_header\"]/div[1]/h1/a")[0].text
project_tag = project_page.xpath(u"//*[@id=\"project_tags\"]/p")[0].text
similar_projects = project_page.xpath(u"//*[@id=\"similar_projects\"]")[0].text
manager = project_page.xpath(u"//*[@id=\"page_contents\"]/div[3]/div[2]/div/dl/dd[5]/a")[0].text
licenses = project_page.xpath(u"//*[@id=\"page_contents\"]/div[3]/div[2]/div/dl/dd[3]")[0].text
location_page = etree.HTML(get_page('https://www.openhub.net/p/' + name + '/enlistments').decode('utf-8'))
location_table = location_page.xpath(u"//table//tbody")[0]
locations = [c.getchildren()[0].text.strip() for c in location_table.getchildren()]
code_location = '\t'.join(locations)
project = {"update_time": datetime.datetime.now().isoformat(), "project_name": project_name, "project_tag": project_tag, "similar_projects": similar_projects, "manager": manager, "licenses": licenses, "code_location": code_location }
for key in project:
if project[key] is None:
project[key] = ''
return project
# //*[@id="analyses_language_table"]
示例2: _task_list_handle
def _task_list_handle(self, request, template):
'''所有任务列表 通用处理'''
dicts = self._get_role(request)
if not dicts.has_key('is_operator'):
return request, get_template(template), None
name = request.GET.get('name')
email = request.GET.get('email')
duty = request.GET.get('duty')
result = request.GET.get('result')
als = Task.objects.filter(operator = request.user, uuid__category=self.ApplicationModel.__name__)
if name:
als = als.filter(uuid__name__contains = name)
if email:
als = als.filter(uuid__email__contains = email)
if duty:
als = als.filter(operator_duty = duty)
if result:
als = als.filter(result = result)
als = als.order_by('-allot_time')
dicts['p'] = get_page(als, request )
dicts['result_list'] = dict(Task.result_choices)
dicts['duty_list'] = dict(HelpDeskUser.duty_choices)
return request, get_template(template), dicts
示例3: POST
def POST(self, favorite_id):
"""
save changes.
"""
post_sent = web.input()
#post_sent['date'] = datetime.datetime.now() # umcomment if you want to update the date!
old_favorite = db.load_fav_by_id(favorite_id) # load it again
flag_get_title = False
flag_save_page = False
# update post_sent with old page_path
# if save_page is True it will be overwritten to new page_path
# otherwise old value used.
post_sent['page_path'] = old_favorite['page_path']
# checkboxes
if post_sent.has_key('get_title'):
flag_get_title = True
if post_sent.has_key('save_page'):
db.delete_saved_page(favorite_id) # remove previous page
flag_save_page = True
# if any of two flags is True -> call utils.get_page
if flag_get_title or flag_save_page:
post_sent = utils.get_page(post_sent, flag_save_page, flag_get_title)
db.update_favorite(favorite_id, post_sent) # update
raise web.seeother('/')# go home
示例4: history
def history(self, request, template):
'''用户申请历史记录'''
dicts = self._get_role(request)
if not dicts.has_key('is_applicant'):
return HttpResponseRedirect(self._get_login_url(request))
name = request.GET.get('name')
email = request.GET.get('email')
status = request.GET.get('status')
page = request.GET.get('page')
user = HelpDeskUser.objects.filter(user = request.user, role='0')
user = user[0].user
# dep = [u.department for u in user]
# apps = self.ApplicationModel.objects.filter(department__in = dep).order_by('-apply_time')
# only login user
apps = self.ApplicationModel.objects.filter(submit_user = user).order_by('-apply_time')
if name:
apps = apps.filter(name__contains = name)
if email:
apps = apps.filter(email__contains = email)
if status:
apps = apps.filter(status = status)
p = get_page(apps, request)
dicts['p'] = p
dicts['statuslist'] = dict(self.ApplicationModel.status_choices)
return render_to_response(request, get_template(template), dicts)
示例5: ajax_list
def ajax_list(request):
'通过ajax的方式请求和返回新闻列表'
if request.session['stunum']: # 已经登录
print request.session['stunum']
news = News.objects.all()
if 'date' in request.GET:
date = request.GET['date']
startDate = date.split('--')[0]
endDate = date.split('--')[1]
# print startDate, endDate
news = news.filter(rel_time__gte=startDate) # 大于等于开始日期
news = news.filter(rel_time__lte=endDate) # 小于等于结束日期
# 加len(news) >0 的原因是,如果之前过滤后就没有数据了,其他条件就不用过滤了
if len(news) > 0 and 'key_word' in request.GET:
keyword = request.GET['key_word']
news = news.filter(title__contains=keyword)
if len(news) > 0 and 'department' in request.GET:
department = request.GET['department']
news = news.filter(section=department)
if len(news) > 0 and 'zhuanti' in request.GET:
zhuanti = request.GET['zhuanti']
news = utils.handle_zhuanti(zhuanti, news)
reDict = utils.get_page(news, 10, request.GET['page'])
# reDict['data_list'] = json.dumps(reDict['data_list'])
# print reDict#['data_list']
return HttpResponse(json.dumps(reDict), content_type='application/json')
else:
return HttpResponse(u'error')
示例6: findPerson
def findPerson(query):
"""
returns the name that shows up the most from the google search of the query
arguments:
string of the question
return:
name of a person
"""
file = open("words.txt")
words = file.read()
l = utils.search(query)
goodWords=[]
exp = "[A-Z][a-z][a-z]+ [A-Z][a-z]+"
for pages in l:
text = re.sub("[\t\n ]", " ", utils.get_page(pages))
result = re.findall(exp, text)
for x in result:
z = x.split(" ")
if z[0].lower() not in words and z[1].lower() not in words:
goodWords.append(x)
wordcounts={}
for word in goodWords:
if wordcounts.has_key(word):
wordcounts[word]+=1
else:
wordcounts[word]=1
person = wordcounts.keys()[0]
for word in wordcounts:
if wordcounts[word] > wordcounts[person]:
person = word
return person
示例7: list
def list(self, request, template):
"""
在原来的基础上添加了文件上传和xls导入,添加了分页后数据处理的接口
"""
u = request.user
ls = self._get_list(request)
#重写_get_list的时候返回的ls 如果是None会报错,强制转换一下增加可用性
if ls == None:
ls = self.DefaultModel.objects.none()
args = {}
for ak in self.list_args.keys():
if re.search('_doption$', ak):
if request.GET.get(ak , None):
datestr = (request.GET.get(ak, None)).split('-')
args[str(self.list_args.get(ak))] = datetime.strptime((''.join((datestr[0],'-',datestr[1],'-01'))), '%Y-%m-%d')
elif re.search('_option$', self.list_args.get(ak)):
if request.GET.get(ak, None) and request.GET.get(ak + '_option', None):
args[str(ak+'__'+request.GET.get(ak + '_option', None))] = str(request.GET.get(ak, None))
# elif re.search('_extra$', self.list_args.get(ak)):
# if request.GET.get(ak, None):
# ls = self._extra_filter(request, ls, ak,self.list_args[ak])
else:
if request.GET.get(ak, None):
try:
args[str(self.list_args.get(ak))] = str(request.GET.get(ak, None))
except UnicodeEncodeError:
args[str(self.list_args.get(ak))] = request.GET.get(ak, None)
ls = ls.filter(**args)
ls = self._extra_filter(request,ls)
if(request.GET.get('excel')):
if request.method == "POST":
cols = request.POST.getlist("cols")
return self.csv_export(request, ls, cols)
try:
p = get_page(ls, request)
except EmptyPage:
return HttpResponseRedirect('./')
c_list = []
if self.csv_columns:
for c in self.csv_columns:
c_list.append(c[0].decode("utf-8"));
p = self._deal_page_data(request,p)
list_dicts = {'p':p, 'excel_cs':c_list}
list_dicts.update(self._get_list_dicts(request))
if(request.GET.get('upload')):
if request.method == "POST":
return self.upload(request, template, list_dicts)
return render_to_response(request, template, list_dicts )
示例8: dump_single
def dump_single(number, image_format=None):
if not image_format:
text = utils.get_page('http://megatokyo.com/strip/{0}'.format(number)) #retrieving the image format
#print (strip_image(text))
strip_name = strip_image(text)[0]
else:
strip_name = str(number)+image_format
utils.store('http://megatokyo.com/strips/{0}'.format(strip_name), strip_name, overwrite=False)
示例9: download_episode
def download_episode(number):
try:
download_episode(get_episodes()[(number.real)-1])
except AttributeError: #no .real => is _not_ an integer
page = utils.get_page("http://musicforprogramming.net/?{0}".format("c="+number if not number.startswith("c") else number))
url, songname = re.findall(r"(http:\/\/datashat\.net\/(music_for_programming_.+\.mp3))\"", page)[0]
print(url, songname)
utils.store(url, songname, overwrite=False)
示例10: imguralbum
def imguralbum(url, opt_store=True):
html = utils.get_page(url)
names = []
for s in re.findall(r"<a.+?class=\"zoom\".+?href=\"(.+?)\">", html):
r = re.search(r"([^/]+?)(.png|.jpg|.jpeg)$", s)
if opt_store: utils.store("https:" + s, r.group(1) + r.group(2))
names.append(r.group(1) + r.group(2))
return names
示例11: last_comic
def last_comic(download=True, return_number=False):
text = utils.get_page('http://megatokyo.com')
#print (strip_image(text))
strip_number = int(strip_image(text)[0][:-4]) #removing .png || .gif
if download:
dump_single(strip_number, image_format=strip_image(text)[0][-4:])
if return_number:
return strip_number
示例12: parse_category_by_type
def parse_category_by_type(category, subcategory, link, project_index, type = 'popular'):
if subcategory:
message = 'Parse subcategory "{0}" of "{1}"'.format(subcategory, category)
else:
message = 'Parse category "{0}"'.format(category)
print message
projects = []
stop = False
page_count = 1
while not stop and (page_count <= MAX_PAGE_PARSE or MAX_PAGE_PARSE < 0):
page = get_page('{0}{2}/?page={1}'.format(link, page_count, type))
page_count+=1
project_blocks = page.cssselect('.project')
stop = len(project_blocks) == 0
for block in project_blocks:
try:
location = block.cssselect('.location-name')[0].text.strip()
except Exception:
location = ''
project = {
'category': category,
'subcategory': subcategory,
'name': block.cssselect('.project-card > h2 > strong > a')[0].text.strip(),
'description': block.cssselect('.project-card > p')[0].text.strip(),
'location': location,
'founder': block.cssselect('.project-card > h2 > span')[0].text.strip()[3:],
'funded': None,
'funded_date': None,
'pledged': None,
'days left': None,
}
stats = block.cssselect('.project-stats > li')
for stat in stats:
stat_name = ''.join(stat.xpath("text()")).strip()
if stat_name in {'funded', 'pledged'}:
value = stat.cssselect('strong')[0].text.replace('%', '').replace('$', '').replace(',', '').strip()
project[stat_name] = float(value)
elif stat_name == 'days left':
value = stat.cssselect('.num')[0].text.strip()
project[stat_name] = int(value)
elif stat_name in ['hours left', 'hour left', 'min left', 'mins left']:
project['days left'] = 0
else:
value = stat_name
project['days left'] = -1
project['funded_date'] = str(datetime.datetime.strptime(value, '%b %d, %Y'))
h = hashit(project)
if h not in project_index:
project_index.add(h)
projects.append(project)
print '{0}. Ended!!'.format(message)
return projects
示例13: get_project_news
def get_project_news(name):
project_page = etree.HTML(get_page('https://www.openhub.net/p/' + name).decode('utf-8'))
project_age = project_page.xpath(u"//*[@id=\"factoids\"]/li[3]/div/span[1]/a")[0].text.strip()
team_size = project_page.xpath(u"//*[@id=\"factoids\"]/li[1]/div/a[2]")[0].text.strip()
project_activity = project_page.xpath(u"//*[@id=\"project_header_activity_indicator\"]/div")[0].text.strip()
factoids_page = etree.HTML(get_page('https://www.openhub.net/p/' + name + '/factoids').decode('utf-8'))
comments = ''.join(factoids_page.xpath(u"//*[@id=\"page_contents\"]")[0].itertext()).replace(u'\xa0', '').strip()
# team_size_per_month = project_page.xpath(u"//*[@id=\"factoids\"]/li[3]/div/span[2]/a")[0].text
# print(team_size_per_month)
# contributor = project_page.xpath(u"")[0].text
# print(contributor)
ratings_page = etree.HTML(get_page('https://www.openhub.net/p/' + name + '/reviews/summary').decode('utf-8'))
community_score = ratings_page.xpath(u"//*[@id=\"average_rating_details_2\"]")[0].text.replace(u'\xa0', '').strip()
cost_page = etree.HTML(get_page('https://www.openhub.net/p/' + name + '/estimated_cost').decode('utf-8'))
costs = [''.join(i.itertext()).strip().replace(',', '').split('\n') for i in cost_page.xpath('.//div[@class="controls"]')][1:]
lines = [i.attrib['value'] for i in cost_page.xpath('.//option')]
codebase_size = int(costs[0][0])
estimated_effort = int(costs[1][0])
estimated_cost = int(costs[2][1])
cocomo = { 'codebase_size': codebase_size, 'estimated_effort': estimated_effort, 'estimated_cost': estimated_cost, "all_code": lines[0], 'logic_code_only': lines[1], 'markup_only': lines[2], 'build_scripts_only': lines[3] }
language_page = etree.HTML(get_page('https://www.openhub.net/p/' + name + '/analyses/latest/languages_summary').decode('utf-8'))
languages_table = language_page.xpath(u"//*[@id=\"analyses_language_table\"]")[0]
data = [x for c in languages_table.getchildren() for x in c.getchildren()][2:-2]
data = [[''.join(j.itertext()).strip() for j in i.getchildren()][1:] for i in data]
languages = [{"code_name": line[0], "code_lines": line[1], "comment_lines": line[2], "comment_ratio": line[3], "blank_lines" : line[4], "total_lines": line[5], "total_percentage" : line[6]} for line in data]
project_news = {"update_time": datetime.datetime.now().isoformat(), 'team_size': team_size, 'project_age': project_age, 'activity': project_activity, 'comments': comments, 'languages': json.dumps(languages), 'cost': json.dumps(cocomo) }
for key in project_news:
if project_news[key] is None:
project_news[key] = ''
return project_news
示例14: _task_handle
def _task_handle(self, request, template):
'''待处理任务列表 通用处理'''
dicts = self._get_role(request)
if not dicts.has_key('is_operator'):
return request, get_template(template), None
al = Task.objects.filter(operator = request.user, result='0', uuid__category=self.ApplicationModel.__name__)
dicts['p'] = get_page(al, request)
return request, get_template(template), dicts
示例15: search
def search(request):
from sphinxapi import SphinxClient, SPH_MATCH_EXTENDED, SPH_SORT_RELEVANCE
term = request.GET.get('term', '')
category = None
args = [u'term=%s'%term]
template_name = 'board/search.html'
if term:
sphinx = SphinxClient()
sphinx.SetServer(settings.SPHINX_SERVER, settings.SPHINX_PORT)
sphinx.SetMatchMode(SPH_MATCH_EXTENDED)
sphinx.SetSortMode(SPH_SORT_RELEVANCE)
cid = request.GET.get('c')
if cid:
try:
cid = int(cid)
except TypeError:
raise Http404
category = get_object_or_404(Category, cid)
if category:
sphinx.SetFilter('category_id', [category])
args.append(u'c=%s'%cid)
user_settings = get_user_settings(request.user)
try:
page = int(request.GET.get('page', '1'))
if page < 1:
raise Http404
except ValueError:
raise Http404
#sphinx.SetLimits(page * user_settings.ppp, user_settings.ppp)
if request.GET.get('adv_submit.x'):
template_name='board/advanced_search.html'
u = User.objects.filter(username=term)
if u:
q = QuerySetPaginator(Post.objects.filter(user=u),
user_settings.ppp)
else:
q = Paginator([], 1).page(1)
else:
result = sphinx.Query(u'@@relaxed %s'%term)
if not result.has_key('total_found'):
template_name = 'board/search_unavailable.html'
pages = result.get('total_found', 0) / user_settings.ppp
if pages > 0 and page > pages:
raise Http404
ids = [m['id'] for m in result.get('matches', [])]
q = QuerySetPaginator(Post.view_manager.filter(id__in=ids),
user_settings.ppp)
q = get_page(request.GET.get('page', 1), q)
else:
q = Paginator([], 1).page(1)
return render_to_response(template_name, {
'result': q,
'term': term,
'category': category,
'args': u'&'.join(['']+args),
}, context_instance=RequestContext(request, processors=extra_processors))