本文整理汇总了Python中models.Author.email_domain方法的典型用法代码示例。如果您正苦于以下问题:Python Author.email_domain方法的具体用法?Python Author.email_domain怎么用?Python Author.email_domain使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类models.Author
的用法示例。
在下文中一共展示了Author.email_domain方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: crawl_author
# 需要导入模块: from models import Author [as 别名]
# 或者: from models.Author import email_domain [as 别名]
def crawl_author():
"""
Crawls Google Scholar in order to retrieve information about an author.
"""
# The ID of the author in Google Scholar.
scholar_id = request.form['scholar_id']
print 'Crawl author ' + scholar_id + '.'
# Retrieve the author with that ID (if any).
author = Author.query.filter_by(scholar_id = scholar_id).first()
if author is None:
author = Author()
cookie_jar = CookieJar()
opener = build_opener(HTTPCookieProcessor(cookie_jar))
install_opener(opener)
url = 'https://scholar.google.com/citations';
params = urlencode({'hl': 'en', 'view_op': 'list_works', 'sortby': 'pubdate',
'user': scholar_id, 'cstart': 0, 'pagesize': 20})
req = Request(url + '?' + params)
opener.open(req)
res = opener.open(req)
doc = html.parse(res)
no_content = doc.xpath('.//div[contains(text(), "Sorry, no content found for this URL")]')
if len(no_content):
print 'Author ' + scholar_id + ' not found.'
return 'Done.'
author.scholar_id = scholar_id
nname = doc.find('.//div[@id="gsc_prf_in"]')
if nname is not None:
# The name of the author.
author.name = nname.text_content()
nemaildomain = doc.find('.//div[@id="gsc_prf_ivh"]')
if nemaildomain is not None:
# The domain where the author has an email.
author.email_domain = nemaildomain.text_content().split(" - ")[0].split()[-1]
ncitations = doc.find('.//table[@id="gsc_rsb_st"]')
if ncitations is not None:
# The total citations for the author.
author.total_citations = ncitations.xpath('.//tr[2]/td')[1].text
# The h-index for the author.
author.h_index = ncitations.xpath('.//tr[3]/td')[1].text
# The i10-index for the author.
author.i10_index = ncitations.xpath('.//tr[4]/td')[1].text
params = urlencode({'hl': 'en', 'view_op': 'citations_histogram',
'user': scholar_id})
req = Request(url + '?' + params)
opener.open(req)
res = opener.open(req)
doc = html.parse(res)
# The citations per year for the author.
author_citations_per_year = []
nhistogram = doc.find('.//div[@id="gsc_md_hist_b"]')
if nhistogram is not None:
years = [x.text for x in nhistogram.xpath('.//span[@class="gsc_g_t"]')]
for a in nhistogram.xpath('.//a[@class="gsc_g_a"]'):
i = int(a.get('style').split('z-index:')[1])
year = int(years[-i])
citations_per_year = AuthorCitationsPerYear.query.filter_by(author_id = author.id, year = year).first()
if citations_per_year is None:
citations_per_year = AuthorCitationsPerYear()
citations_per_year.year = int(years[-i])
citations_per_year.citations = int(a.xpath('./span[@class="gsc_g_al"]')[0].text)
author_citations_per_year.append(citations_per_year)
author.citations_per_year = author_citations_per_year
params = urlencode({'hl': 'en', 'view_op': 'list_colleagues', 'user': scholar_id})
req = Request(url + '?' + params)
opener.open(req)
res = opener.open(req)
doc = html.parse(res)
# The co-authors of the author.
author_coauthors = []
for a in doc.xpath('.//h3[@class="gsc_1usr_name"]//a'):
co_scholar_id = a.get('href').split('user=')[1].split('&hl')[0]
coauthor = Author.query.filter_by(scholar_id = co_scholar_id).first()
if coauthor is None:
coauthor = Author()
coauthor.scholar_id = co_scholar_id
author_coauthors.append(coauthor)
author.coauthors = author_coauthors
# The publications.
author_publications = []
#.........这里部分代码省略.........