本文整理汇总了Python中pupa.scrape.Bill.extras['summary']方法的典型用法代码示例。如果您正苦于以下问题:Python Bill.extras['summary']方法的具体用法?Python Bill.extras['summary']怎么用?Python Bill.extras['summary']使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pupa.scrape.Bill
的用法示例。
在下文中一共展示了Bill.extras['summary']方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: scrape_bills
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import extras['summary'] [as 别名]
def scrape_bills(self, chamber_to_scrape, session):
url = 'http://billstatus.ls.state.ms.us/%s/pdf/all_measures/allmsrs.xml' % session
bill_dir_page = self.get(url)
root = lxml.etree.fromstring(bill_dir_page.content)
for mr in root.xpath('//LASTACTION/MSRGROUP'):
bill_id = mr.xpath('string(MEASURE)').replace(" ", "")
if bill_id[0] == "S":
chamber = "upper"
else:
chamber = "lower"
bill_type = {'B': 'bill', 'C': 'concurrent resolution',
'R': 'resolution', 'N': 'nomination'}[bill_id[1]]
# just skip past bills that are of the wrong chamber
if chamber != chamber_to_scrape:
continue
link = mr.xpath('string(ACTIONLINK)').replace("..", "")
main_doc = mr.xpath('string(MEASURELINK)').replace("../../../", "")
main_doc_url = 'http://billstatus.ls.state.ms.us/%s' % main_doc
bill_details_url = 'http://billstatus.ls.state.ms.us/%s/pdf%s' % (session, link)
try:
details_page = self.get(bill_details_url)
except scrapelib.HTTPError:
self.warning('Bill page not loading for {}; skipping'.format(bill_id))
continue
page = details_page.content
# Some pages have the (invalid) byte 11 sitting around. Just drop
# them out. Might as well.
details_root = lxml.etree.fromstring(page)
title = details_root.xpath('string(//SHORTTITLE)')
longtitle = details_root.xpath('string(//LONGTITLE)')
bill = Bill(bill_id,
legislative_session=session,
chamber=chamber,
title=title,
classification=bill_type)
bill.extras['summary'] = longtitle
bill.add_source(main_doc_url)
# sponsors
main_sponsor = details_root.xpath('string(//P_NAME)').split()
if main_sponsor:
main_sponsor = main_sponsor[0]
main_sponsor_link = details_root.xpath('string(//P_LINK)').replace(" ", "_")
main_sponsor_url = ('http://billstatus.ls.state.ms.us/%s/'
'pdf/%s') % (session, main_sponsor_link.strip('../'))
type = "primary"
bill.add_source(main_sponsor_url)
bill.add_sponsorship(main_sponsor,
classification=type,
entity_type='person',
primary=True)
for author in details_root.xpath('//AUTHORS/ADDITIONAL'):
leg = author.xpath('string(CO_NAME)').replace(" ", "_")
if leg:
leg_url = ('http://billstatus.ls.state.ms.us/%s/'
'pdf/House_authors/%s.xml') % (session, leg)
type = "cosponsor"
bill.add_source(leg_url)
bill.add_sponsorship(leg,
classification=type,
entity_type='person',
primary=False
)
# Versions
curr_version = details_root.xpath('string(//CURRENT_OTHER'
')').replace("../../../../", "")
if curr_version != "":
curr_version_url = "http://billstatus.ls.state.ms.us/" \
+ curr_version
bill.add_version_link("Current version", curr_version_url,
on_duplicate="ignore",
media_type="text/html"
)
intro_version = details_root.xpath('string(//INTRO_OTHER)').replace("../../../../", "")
if intro_version != "":
intro_version_url = "http://billstatus.ls.state.ms.us/"\
+ intro_version
bill.add_version_link("As Introduced", intro_version_url,
on_duplicate='ignore',
media_type='text/html')
comm_version = details_root.xpath('string(//CMTESUB_OTHER'
')').replace("../../../../", "")
if comm_version.find("documents") != -1:
comm_version_url = "http://billstatus.ls.state.ms.us/" + comm_version
bill.add_version_link("Committee Substitute", comm_version_url,
on_duplicate='ignore',
media_type='text/html')
passed_version = details_root.xpath('string(//PASSED_OTHER'
')').replace("../../../../", "")
if passed_version.find("documents") != -1:
passed_version_url = "http://billstatus.ls.state.ms.us/" + passed_version
#.........这里部分代码省略.........