本文整理汇总了Python中pupa.scrape.Bill.extras["BDR"]方法的典型用法代码示例。如果您正苦于以下问题:Python Bill.extras["BDR"]方法的具体用法?Python Bill.extras["BDR"]怎么用?Python Bill.extras["BDR"]使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pupa.scrape.Bill
的用法示例。
在下文中一共展示了Bill.extras["BDR"]方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse_bill
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import extras["BDR"] [as 别名]
def parse_bill(self, chamber, session, bill_id, url):
try:
page = self.lxmlize(url)
except scrapelib.HTTPError as e:
self.logger.warning(e)
return
last_action = self.parse_bill_field(
page, 'Last Action').xpath('text()')[0]
if 'WITHDRAWN' in last_action.upper():
self.info("{} Withdrawn, skipping".format(bill_id))
return
version = self.parse_bill_field(page, 'Bill Documents')
source_url = version.xpath('a[1]/@href')[0]
version_title = version.xpath('a[1]/text()')[0].strip()
if version is None:
# Bill withdrawn
self.logger.warning('Bill withdrawn.')
return
else:
if source_url.endswith('.doc'):
mimetype = 'application/msword'
elif source_url.endswith('.pdf'):
mimetype = 'application/pdf'
title = self.parse_bill_field(page, 'Title').text_content()
# actions = self.get_nodes(
# page,
# '//div[@class="StandardText leftDivMargin"]/'
# 'div[@class="StandardText"][last()]//text()[normalize-space()]')
if 'CR' in bill_id:
bill_type = 'concurrent resolution'
elif 'JR' in bill_id:
bill_type = 'joint resolution'
elif 'R' in bill_id:
bill_type = 'resolution'
else:
bill_type = 'bill'
bill = Bill(bill_id, legislative_session=session, chamber=chamber,
title=title, classification=bill_type)
bill.subject = self._subjects[bill_id]
bill.add_source(url)
bill.add_version_link(version_title, source_url, media_type=mimetype)
self.parse_actions(page, bill, chamber)
self.parse_subjects(page, bill)
# LM is "Locally Mandated fiscal impact"
fiscal_notes = page.xpath('//a[contains(@href, "/LM.pdf")]')
for fiscal_note in fiscal_notes:
source_url = fiscal_note.attrib['href']
if source_url.endswith('.doc'):
mimetype = 'application/msword'
elif source_url.endswith('.pdf'):
mimetype = 'application/pdf'
bill.add_document_link(
"Fiscal Note", source_url, media_type=mimetype)
for link in page.xpath("//td/span/a[contains(@href, 'Legislator-Profile')]"):
bill.add_sponsorship(link.text.strip(), classification='primary',
entity_type='person', primary=True)
bdr_no = self.parse_bill_field(page, 'Bill Request Number')
if bdr_no.xpath('text()'):
bdr = bdr_no.xpath('text()')[0].strip()
bill.extras["BDR"] = bdr
yield bill