当前位置: 首页>>代码示例>>Python>>正文


Python Author.retrieved_at方法代码示例

本文整理汇总了Python中models.Author.retrieved_at方法的典型用法代码示例。如果您正苦于以下问题:Python Author.retrieved_at方法的具体用法?Python Author.retrieved_at怎么用?Python Author.retrieved_at使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在models.Author的用法示例。


在下文中一共展示了Author.retrieved_at方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: crawl_author

# 需要导入模块: from models import Author [as 别名]
# 或者: from models.Author import retrieved_at [as 别名]

#.........这里部分代码省略.........
  doc = html.parse(res)

  # The citations per year for the author.
  author_citations_per_year = []
  nhistogram = doc.find('.//div[@id="gsc_md_hist_b"]')
  if nhistogram is not None:
    years = [x.text for x in nhistogram.xpath('.//span[@class="gsc_g_t"]')]
    for a in nhistogram.xpath('.//a[@class="gsc_g_a"]'):
      i = int(a.get('style').split('z-index:')[1])
      year = int(years[-i])
      citations_per_year = AuthorCitationsPerYear.query.filter_by(author_id = author.id, year = year).first()
      if citations_per_year is None:
        citations_per_year = AuthorCitationsPerYear()
      citations_per_year.year = int(years[-i])
      citations_per_year.citations = int(a.xpath('./span[@class="gsc_g_al"]')[0].text)
      author_citations_per_year.append(citations_per_year)
  author.citations_per_year = author_citations_per_year

  params = urlencode({'hl': 'en', 'view_op': 'list_colleagues', 'user': scholar_id})
  req = Request(url + '?' + params)
  opener.open(req)
  res = opener.open(req)
  doc = html.parse(res)

  # The co-authors of the author.
  author_coauthors = []
  for a in doc.xpath('.//h3[@class="gsc_1usr_name"]//a'):
    co_scholar_id = a.get('href').split('user=')[1].split('&hl')[0]
    coauthor = Author.query.filter_by(scholar_id = co_scholar_id).first()
    if coauthor is None:
      coauthor = Author()
    coauthor.scholar_id = co_scholar_id
    author_coauthors.append(coauthor)
  author.coauthors = author_coauthors

  # The publications.
  author_publications = []
  cstart = 0
  pagesize = 100
  while True:
    params = urlencode({'hl': 'en', 'view_op': 'list_works', 'sortby': 'pubdate',
                        'user': scholar_id, 'cstart': cstart, 'pagesize': pagesize})
    req = Request(url + '?' + params)
    opener.open(req)
    res = opener.open(req)
    doc = html.parse(res)

    for tr in doc.xpath('.//tr[@class="gsc_a_tr"]'):
      a = tr.find('.//td[@class="gsc_a_t"]//a')
      # NOTE: When there are no publications, there is a single tr.
      # <tr class="gsc_a_tr"><td class="gsc_a_e" colspan="3">There are no articles in this profile.</td></tr>
      if a is None:
        continue
      purl = a.get('href')

      # The ID of the publication in Google Scholar.
      pub_scholar_id = purl.split('citation_for_view=')[1]

      # Retrieve the publication with that ID (if any).
      publication = Publication.query.filter_by(scholar_id = pub_scholar_id).first()
      if publication is None:
        publication = Publication()
      publication.scholar_id = pub_scholar_id

      # The title of the publication.
      publication.title = a.text_content()

      pub_nyear = tr.find('.//td[@class="gsc_a_y"]//span')
      if pub_nyear is not None:
        year_of_publication = pub_nyear.text_content().strip()
        if year_of_publication:

          # The year of the publication.
          publication.year_of_publication = int(year_of_publication)

      pub_ncitations = tr.find('.//a[@class="gsc_a_ac"]')

      if pub_ncitations is not None:
        total_citations = pub_ncitations.text_content().strip()
        if total_citations:

          # The total citations for the publication.
          publication.total_citations = int(total_citations)

      author_publications.append(publication)

    if doc.xpath('.//button[@id="gsc_bpf_next"]')[0].get("disabled"):
      break

    cstart += 100
  author.publications = author_publications

  # When information about the author was retrieved from Google Scholar.
  author.retrieved_at = datetime.datetime.now()

  db.session.add(author)
  db.session.commit()

  print 'Crawled author ' + scholar_id + '.'
  return 'Done.'
开发者ID:ipeirotis,项目名称:Citation-Analysis,代码行数:104,代码来源:crawlers.py


注:本文中的models.Author.retrieved_at方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。