本文整理汇总了Python中pupa.scrape.Bill.title方法的典型用法代码示例。如果您正苦于以下问题:Python Bill.title方法的具体用法?Python Bill.title怎么用?Python Bill.title使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pupa.scrape.Bill
的用法示例。
在下文中一共展示了Bill.title方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: scrape_bill_list
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import title [as 别名]
def scrape_bill_list(self, url):
bill_list = self._get_bill_list(url)
for bill_info in bill_list:
(bill_id, ) = bill_info.xpath('td[1]/font/input/@value')
(sponsor, ) = bill_info.xpath('td[2]/font/input/@value')
(subject, ) = bill_info.xpath('td[3]//text()')
subject = subject.strip()
chamber = self.CHAMBERS[bill_id[0]]
if 'B' in bill_id:
bill_type = 'bill'
elif 'JR' in bill_id:
bill_type = 'joint resolution'
elif 'R' in bill_id:
bill_type = 'resolution'
else:
raise AssertionError(
"Unknown bill type for bill '{}'".format(bill_id))
bill = Bill(
bill_id,
legislative_session=self.session,
chamber=chamber,
title='',
classification=bill_type,
)
if subject:
bill.subject = [subject]
if sponsor:
bill.add_sponsorship(
name=sponsor,
entity_type='person',
classification='primary',
primary=True,
)
bill.add_source(url)
bill_url = ('http://alisondb.legislature.state.al.us/Alison/'
'SESSBillStatusResult.aspx?BILL={}'.format(bill_id))
bill.add_source(bill_url)
bill_html = self._get_bill_response(bill_url)
if bill_html is None:
self.warning("Bill {} has no webpage, and will be skipped".
format(bill_id))
continue
bill_doc = lxml.html.fromstring(bill_html)
if (bill_doc.xpath('//span[@id="ContentPlaceHolder1_lblShotTitle"]')):
title = bill_doc.xpath(
'//span[@id="ContentPlaceHolder1_lblShotTitle"]'
)[0].text_content().strip()
if not title:
title = "[No title given by state]"
bill.title = title
version_url_base = (
'http://alisondb.legislature.state.al.us/ALISON/'
'SearchableInstruments/{0}/PrintFiles/{1}-'.
format(self.session, bill_id))
versions = bill_doc.xpath(
'//table[@class="box_versions"]/tr/td[2]/font/text()')
for version in versions:
name = version
if version == "Introduced":
version_url = version_url_base + 'int.pdf'
elif version == "Engrossed":
version_url = version_url_base + 'eng.pdf'
elif version == "Enrolled":
version_url = version_url_base + 'enr.pdf'
else:
raise NotImplementedError(
"Unknown version type found: '{}'".format(name))
bill.add_version_link(
name,
version_url,
media_type='application/pdf',
on_duplicate='ignore',
)
# Fiscal notes exist, but I can't figure out how to build their URL
fiscal_notes = bill_doc.xpath(
'//table[@class="box_fiscalnote"]')[1:]
for fiscal_note in fiscal_notes:
pass
# Budget Isolation Resolutions are handled as extra actions/votes
birs = bill_doc.xpath(
'//div[@class="box_bir"]//table//table/tr')[1:]
for bir in birs:
bir_action = bir.xpath('td[1]')[0].text_content().strip()
# Sometimes ALISON's database puts another bill's
# actions into the BIR action list; ignore these
if bill_id not in bir_action:
self.warning(
"BIR action found ({}) ".format(bir_action) +
"that doesn't match the bill ID ({})".format(bill_id))
#.........这里部分代码省略.........
示例2: scrape_bill_type
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import title [as 别名]
def scrape_bill_type(self, chamber, session, bill_type, type_abbr,
committee_abbr_regex=get_committee_name_regex()):
bills = self.session.query(CABill).filter_by(
session_year=session).filter_by(
measure_type=type_abbr)
for bill in bills:
bill_session = session
if bill.session_num != '0':
bill_session += ' Special Session %s' % bill.session_num
bill_id = bill.short_bill_id
fsbill = Bill(bill_id, session, title='', chamber=chamber)
if ((bill_id.startswith('S') and chamber == 'lower') or
(bill_id.startswith('A') and chamber == 'upper')):
print("!!!! BAD ID/CHAMBER PAIR !!!!", bill)
continue
# # Construct session for web query, going from '20092010' to '0910'
# source_session = session[2:4] + session[6:8]
# # Turn 'AB 10' into 'ab_10'
# source_num = "%s_%s" % (bill.measure_type.lower(),
# bill.measure_num)
# Construct a fake source url
source_url = ('http://leginfo.legislature.ca.gov/faces/'
'billNavClient.xhtml?bill_id=%s') % bill.bill_id
fsbill.add_source(source_url)
fsbill.add_version_link(bill_id, source_url, media_type='text/html')
title = ''
type_ = ['bill']
subject = ''
all_titles = set()
# Get digest test (aka "summary") from latest version.
if bill.versions:
version = bill.versions[-1]
nsmap = version.xml.nsmap
xpath = '//caml:DigestText/xhtml:p'
els = version.xml.xpath(xpath, namespaces=nsmap)
chunks = []
for el in els:
t = etree_text_content(el)
t = re.sub(r'\s+', ' ', t)
t = re.sub(r'\)(\S)', lambda m: ') %s' % m.group(1), t)
chunks.append(t)
summary = '\n\n'.join(chunks)
for version in bill.versions:
if not version.bill_xml:
continue
version_date = self._tz.localize(version.bill_version_action_date)
# create a version name to match the state's format
# 02/06/17 - Enrolled
version_date_human = version_date.strftime(
'%m/%d/%y')
version_name = "{} - {}".format(
version_date_human, version.bill_version_action)
version_base = "https://leginfo.legislature.ca.gov/faces"
version_url_pdf = "{}/billPdf.xhtml?bill_id={}&version={}".format(
version_base, version.bill_id, version.bill_version_id)
fsbill.add_version_link(
version_name,
version_url_pdf,
media_type='application/pdf',
date=version_date.date())
# CA is inconsistent in that some bills have a short title
# that is longer, more descriptive than title.
if bill.measure_type in ('AB', 'SB'):
impact_clause = clean_title(version.title)
title = clean_title(version.short_title)
else:
impact_clause = None
if len(version.title) < len(version.short_title) and \
not version.title.lower().startswith('an act'):
title = clean_title(version.short_title)
else:
title = clean_title(version.title)
if title:
all_titles.add(title)
type_ = [bill_type]
if version.appropriation == 'Yes':
type_.append('appropriation')
tags = []
if version.fiscal_committee == 'Yes':
tags.append('fiscal committee')
#.........这里部分代码省略.........
示例3: scrape
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import title [as 别名]
def scrape(self, session=None, chambers=None):
# Bills endpoint can sometimes take a very long time to load
self.timeout = 300
if not session:
session = self.latest_session()
self.info('no session, using %s', session)
if int(session) < 128:
raise AssertionError("No data for period {}".format(session))
elif int(session) < 131:
# they changed their data format starting in 131st and added
# an undocumented API
yield from self.old_scrape(session)
else:
chamber_dict = {"Senate": "upper", "House": "lower",
"House of Representatives": "lower",
"house": "lower", "senate": "upper"}
# so presumanbly not everything passes, but we haven't
# seen anything not pass yet, so we'll need to wait
# till it fails and get the right language in here
vote_results = {"approved": True,
"passed": True,
"adopted": True,
"true": True,
"false": False,
"failed": False,
True: True,
False: False}
action_dict = {"ref_ctte_100": "referral-committee",
"intro_100": "introduction",
"intro_101": "introduction",
"pass_300": "passage",
"intro_110": "reading-1",
"refer_210": "referral-committee",
"crpt_301": None,
"crpt_317": None,
"concur_606": "passage",
"pass_301": "passage",
"refer_220": "referral-committee",
"intro_102": ["introduction", "passage"],
"intro_105": ["introduction", "passage"],
"intro_ref_ctte_100": "referral-committee",
"refer_209": None,
"intro_108": ["introduction", "passage"],
"intro_103": ["introduction", "passage"],
"msg_reso_503": "passage",
"intro_107": ["introduction", "passage"],
"imm_consid_360": "passage",
"refer_213": None,
"adopt_reso_100": "passage",
"adopt_reso_110": "passage",
"msg_507": "amendment-passage",
"confer_713": None,
"concur_603": None,
"confer_712": None,
"msg_506": "amendment-failure",
"receive_message_100": "passage",
"motion_920": None,
"concur_611": None,
"confer_735": None,
"third_429": None,
"final_501": None,
"concur_608": None,
}
base_url = "http://search-prod.lis.state.oh.us"
first_page = base_url
first_page += "/solarapi/v1/general_assembly_{session}/".format(session=session)
legislators = self.get_legislator_ids(first_page)
all_amendments = self.get_other_data_source(first_page, base_url, "amendments")
all_fiscals = self.get_other_data_source(first_page, base_url, "fiscals")
all_synopsis = self.get_other_data_source(first_page, base_url, "synopsiss")
all_analysis = self.get_other_data_source(first_page, base_url, "analysiss")
for row in self.get_bill_rows(session):
spacer, number_link, _ga, title, primary_sponsor, status, spacer = row.xpath('td')
# S.R.No.1 -> SR1
bill_id = number_link.text_content().replace('No.', '')
bill_id = bill_id.replace('.', '').replace(' ', '')
# put one space back in between type and number
bill_id = re.sub(r'([a-zA-Z]+)(\d+)', r'\1 \2', bill_id)
title = title.text_content().strip()
title = re.sub(r'^Title', '', title)
chamber = 'lower' if 'H' in bill_id else 'upper'
classification = 'bill' if 'B' in bill_id else 'resolution'
bill = Bill(bill_id, legislative_session=session, chamber=chamber,
title=title, classification=classification)
bill.add_source(number_link.xpath('a/@href')[0])
# get bill from API
bill_api_url = ('http://search-prod.lis.state.oh.us/solarapi/v1/'
#.........这里部分代码省略.........