本文整理汇总了Python中models.Author.retrieved_at方法的典型用法代码示例。如果您正苦于以下问题:Python Author.retrieved_at方法的具体用法?Python Author.retrieved_at怎么用?Python Author.retrieved_at使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类models.Author
的用法示例。
在下文中一共展示了Author.retrieved_at方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: crawl_author
# 需要导入模块: from models import Author [as 别名]
# 或者: from models.Author import retrieved_at [as 别名]
#.........这里部分代码省略.........
doc = html.parse(res)
# The citations per year for the author.
author_citations_per_year = []
nhistogram = doc.find('.//div[@id="gsc_md_hist_b"]')
if nhistogram is not None:
years = [x.text for x in nhistogram.xpath('.//span[@class="gsc_g_t"]')]
for a in nhistogram.xpath('.//a[@class="gsc_g_a"]'):
i = int(a.get('style').split('z-index:')[1])
year = int(years[-i])
citations_per_year = AuthorCitationsPerYear.query.filter_by(author_id = author.id, year = year).first()
if citations_per_year is None:
citations_per_year = AuthorCitationsPerYear()
citations_per_year.year = int(years[-i])
citations_per_year.citations = int(a.xpath('./span[@class="gsc_g_al"]')[0].text)
author_citations_per_year.append(citations_per_year)
author.citations_per_year = author_citations_per_year
params = urlencode({'hl': 'en', 'view_op': 'list_colleagues', 'user': scholar_id})
req = Request(url + '?' + params)
opener.open(req)
res = opener.open(req)
doc = html.parse(res)
# The co-authors of the author.
author_coauthors = []
for a in doc.xpath('.//h3[@class="gsc_1usr_name"]//a'):
co_scholar_id = a.get('href').split('user=')[1].split('&hl')[0]
coauthor = Author.query.filter_by(scholar_id = co_scholar_id).first()
if coauthor is None:
coauthor = Author()
coauthor.scholar_id = co_scholar_id
author_coauthors.append(coauthor)
author.coauthors = author_coauthors
# The publications.
author_publications = []
cstart = 0
pagesize = 100
while True:
params = urlencode({'hl': 'en', 'view_op': 'list_works', 'sortby': 'pubdate',
'user': scholar_id, 'cstart': cstart, 'pagesize': pagesize})
req = Request(url + '?' + params)
opener.open(req)
res = opener.open(req)
doc = html.parse(res)
for tr in doc.xpath('.//tr[@class="gsc_a_tr"]'):
a = tr.find('.//td[@class="gsc_a_t"]//a')
# NOTE: When there are no publications, there is a single tr.
# <tr class="gsc_a_tr"><td class="gsc_a_e" colspan="3">There are no articles in this profile.</td></tr>
if a is None:
continue
purl = a.get('href')
# The ID of the publication in Google Scholar.
pub_scholar_id = purl.split('citation_for_view=')[1]
# Retrieve the publication with that ID (if any).
publication = Publication.query.filter_by(scholar_id = pub_scholar_id).first()
if publication is None:
publication = Publication()
publication.scholar_id = pub_scholar_id
# The title of the publication.
publication.title = a.text_content()
pub_nyear = tr.find('.//td[@class="gsc_a_y"]//span')
if pub_nyear is not None:
year_of_publication = pub_nyear.text_content().strip()
if year_of_publication:
# The year of the publication.
publication.year_of_publication = int(year_of_publication)
pub_ncitations = tr.find('.//a[@class="gsc_a_ac"]')
if pub_ncitations is not None:
total_citations = pub_ncitations.text_content().strip()
if total_citations:
# The total citations for the publication.
publication.total_citations = int(total_citations)
author_publications.append(publication)
if doc.xpath('.//button[@id="gsc_bpf_next"]')[0].get("disabled"):
break
cstart += 100
author.publications = author_publications
# When information about the author was retrieved from Google Scholar.
author.retrieved_at = datetime.datetime.now()
db.session.add(author)
db.session.commit()
print 'Crawled author ' + scholar_id + '.'
return 'Done.'