本文整理汇总了Python中pupa.scrape.Bill.add_source方法的典型用法代码示例。如果您正苦于以下问题:Python Bill.add_source方法的具体用法?Python Bill.add_source怎么用?Python Bill.add_source使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pupa.scrape.Bill
的用法示例。
在下文中一共展示了Bill.add_source方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: handle_list_item
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_source [as 别名]
def handle_list_item(self, item):
bill_id = item.text.strip()
title = item.xpath("string(../following-sibling::td[1])").strip()
sponsor = item.xpath("string(../following-sibling::td[2])").strip()
bill_url = item.attrib['href'] + '/ByCategory'
if bill_id.startswith(('SB ', 'HB ', 'SPB ', 'HPB ')):
bill_type = 'bill'
elif bill_id.startswith(('HR ', 'SR ')):
bill_type = 'resolution'
elif bill_id.startswith(('HJR ', 'SJR ')):
bill_type = 'joint resolution'
elif bill_id.startswith(('SCR ', 'HCR ')):
bill_type = 'concurrent resolution'
elif bill_id.startswith(('SM ', 'HM ')):
bill_type = 'memorial'
else:
raise ValueError('Failed to identify bill type.')
bill = Bill(bill_id, self.kwargs['session'], title,
chamber='lower' if bill_id[0] == 'H' else 'upper',
classification=bill_type)
bill.add_source(bill_url)
# normalize id from HB 0004 to H4
subj_bill_id = re.sub('(H|S)\w+ 0*(\d+)', r'\1\2', bill_id)
bill.subject = list(self.kwargs['subjects'][subj_bill_id])
sponsor = re.sub(r'^(?:Rep|Sen)\.\s', "", sponsor)
for sp in sponsor.split(', '):
bill.add_sponsorship(sp, 'primary', 'person', True)
yield from self.scrape_page_items(BillDetail, url=bill_url, obj=bill)
yield bill
示例2: scrape_bill
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_source [as 别名]
def scrape_bill(self, row, chamber, session):
bill_id = row['LegislationNumber']
# TODO: re-evaluate if these should be separate bills
if 'SA' in bill_id or 'HA' in bill_id:
self.warning('skipping amendment %s', bill_id)
return
bill_type = self.classify_bill(bill_id)
bill = Bill(identifier=bill_id,
legislative_session=session,
chamber=chamber,
title=row['LongTitle'],
classification=bill_type)
if row['Synopsis']:
bill.add_abstract(row['Synopsis'], 'synopsis')
if row['ShortTitle']:
bill.add_title(row['ShortTitle'], 'short title')
if row['SponsorPersonId']:
self.add_sponsor_by_legislator_id(bill, row['SponsorPersonId'], 'primary')
# TODO: Is there a way get additional sponsors and cosponsors, and versions/fns via API?
html_url = 'https://legis.delaware.gov/BillDetail?LegislationId={}'.format(
row['LegislationId']
)
bill.add_source(html_url, note='text/html')
html = self.lxmlize(html_url)
# Additional Sponsors: '//label[text()="Additional Sponsor(s):"]/following-sibling::div/a'
additional_sponsors = html.xpath('//label[text()="Additional Sponsor(s):"]'
'/following-sibling::div/a/@href')
for sponsor_url in additional_sponsors:
sponsor_id = sponsor_url.replace('https://legis.delaware.gov/LegislatorDetail?'
'personId=', '')
self.add_sponsor_by_legislator_id(bill, sponsor_id, 'primary')
# CoSponsors: '//label[text()="Co-Sponsor(s):"]/following-sibling::div/a'
cosponsors = html.xpath('//label[text()="Additional Sponsor(s):"]/'
'following-sibling::div/a/@href')
for sponsor_url in cosponsors:
sponsor_id = sponsor_url.replace('https://legis.delaware.gov/LegislatorDetail?'
'personId=', '')
self.add_sponsor_by_legislator_id(bill, sponsor_id, 'cosponsor')
versions = html.xpath('//label[text()="Original Text:"]/following-sibling::div/a/@href')
for version_url in versions:
media_type = self.mime_from_link(version_url)
version_name = 'Bill Text'
# on_duplicate='error'
bill.add_version_link(version_name, version_url, media_type=media_type)
fiscals = html.xpath('//div[contains(@class,"fiscalNote")]/a/@href')
for fiscal in fiscals:
self.scrape_fiscal_note(bill, fiscal)
self.scrape_actions(bill, row['LegislationId'])
yield from self.scrape_votes(bill, row['LegislationId'], session)
yield bill
示例3: get_bill
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_source [as 别名]
def get_bill(self, bill_id, **kwargs):
url = kwargs.pop('url')
agenda_item = kwargs.pop('agenda_item')
_type = self.get_type(bill_id)
bill = Bill(bill_id, self.session, type=_type, **kwargs)
bill.add_source(url, note='detail')
return bill
示例4: scrape
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_source [as 别名]
def scrape(self):
for i, page in enumerate(self.searchLegislation()) :
for legislation_summary in self.parseSearchResults(page) :
title = legislation_summary['Title'].strip()
if title == "":
continue
if legislation_summary['Type'].lower() in ('order',
'claim',
'communication',
'report',
'oath of office') :
continue
else :
bill_type = legislation_summary['Type'].lower()
bill_session = self.session(legislation_summary['Intro\xa0Date'])
bill = Bill(identifier=legislation_summary['Record #'],
legislative_session=bill_session,
title=title,
classification=bill_type,
from_organization=self.jurisdiction.name)
bill.add_source(legislation_summary['url'])
bill, votes = self.addDetails(bill, legislation_summary['url'])
yield bill
for vote in votes :
yield vote
示例5: scrape_bill
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_source [as 别名]
def scrape_bill(self, chamber, session, bill_id, session_id):
"""
Scrapes documents, actions, vote counts and votes for
a given bill.
"""
bill_json_url = 'https://apps.azleg.gov/api/Bill/?billNumber={}&sessionId={}&' \
'legislativeBody={}'.format(bill_id, session_id, self.chamber_map[chamber])
response = self.get(bill_json_url)
# print(response.content)
page = json.loads(response.content.decode('utf-8'))
bill_title = page['ShortTitle']
bill_id = page['Number']
internal_id = page['BillId']
bill_type = self.get_bill_type(bill_id)
bill = Bill(
bill_id,
legislative_session=session,
chamber=chamber,
title=bill_title,
classification=bill_type,
)
bill = self.scrape_actions(bill, page, chamber)
bill = self.scrape_versions(bill, internal_id)
bill = self.scrape_sponsors(bill, internal_id)
bill = self.scrape_subjects(bill, internal_id)
bill_url = 'https://apps.azleg.gov/BillStatus/BillOverview/{}?SessionId={}'.format(
internal_id, session_id)
bill.add_source(bill_url)
bill = self.sort_bill_actions(bill)
yield bill
示例6: scrape_bill
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_source [as 别名]
def scrape_bill(self, chamber, session, bill_id, session_id):
bill_json_url = 'https://apps.azleg.gov/api/Bill/?billNumber={}&sessionId={}&' \
'legislativeBody={}'.format(bill_id, session_id, self.chamber_map[chamber])
response = self.get(bill_json_url)
page = json.loads(response.content.decode('utf-8'))
if not page:
self.warning('null page for %s', bill_id)
return
bill_title = page['ShortTitle']
bill_id = page['Number']
internal_id = page['BillId']
bill_type = self.get_bill_type(bill_id)
bill = Bill(
bill_id,
legislative_session=session,
chamber=chamber,
title=bill_title,
classification=bill_type,
)
self.scrape_actions(bill, page, chamber)
self.scrape_versions_and_documents(bill, internal_id)
self.scrape_sponsors(bill, internal_id)
self.scrape_subjects(bill, internal_id)
yield from self.scrape_votes(bill, page)
bill_url = 'https://apps.azleg.gov/BillStatus/BillOverview/{}?SessionId={}'.format(
internal_id, session_id)
bill.add_source(bill_url)
self.sort_bill_actions(bill)
yield bill
示例7: scrape_bills
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_source [as 别名]
def scrape_bills(self, session):
session_key = SESSION_KEYS[session]
measures_response = self.api_client.get('measures', page=500, session=session_key)
legislators = index_legislators(self, session_key)
for measure in measures_response:
bid = '{} {}'.format(measure['MeasurePrefix'], measure['MeasureNumber'])
chamber = self.chamber_code[bid[0]]
bill = Bill(
bid.replace(' ', ''),
legislative_session=session,
chamber=chamber,
title=measure['RelatingTo'],
classification=self.bill_types[measure['MeasurePrefix'][1:]]
)
bill.add_abstract(measure['MeasureSummary'].strip(), note='summary')
for sponsor in measure['MeasureSponsors']:
legislator_code = sponsor['LegislatoreCode'] # typo in API
if legislator_code:
try:
legislator = legislators[legislator_code]
except KeyError:
logger.warn('Legislator {} not found in session {}'.format(
legislator_code, session))
legislator = legislator_code
bill.add_sponsorship(
name=legislator,
classification={'Chief': 'primary', 'Regular': 'cosponsor'}[
sponsor['SponsorLevel']],
entity_type='person',
primary=True if sponsor['SponsorLevel'] == 'Chief' else False
)
bill.add_source(
"https://olis.leg.state.or.us/liz/{session}/Measures/Overview/{bid}".format(
session=session_key, bid=bid.replace(' ', ''))
)
for document in measure['MeasureDocuments']:
# TODO: probably mixing documents & versions here - should revisit
try:
bill.add_version_link(document['VersionDescription'], document['DocumentUrl'],
media_type='application/pdf')
except ValueError:
logger.warn('Duplicate link found for {}'.format(document['DocumentUrl']))
for action in measure['MeasureHistoryActions']:
classifiers = self.determine_action_classifiers(action['ActionText'])
when = datetime.datetime.strptime(action['ActionDate'], '%Y-%m-%dT%H:%M:%S')
when = self.tz.localize(when)
bill.add_action(action['ActionText'], when,
chamber=self.chamber_code[action['Chamber']],
classification=classifiers)
yield bill
示例8: toy_bill
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_source [as 别名]
def toy_bill():
b = Bill(
identifier="HB 2017",
legislative_session="2012A",
title="A bill for an act to raise the cookie budget by 200%",
from_organization="Foo Senate",
classification="bill",
)
b.add_source("http://uri.example.com/", note="foo")
return b
示例9: get_bill
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_source [as 别名]
def get_bill(self, bill_id, **kwargs):
if bill_id == '1':
assert kwargs == {'extra': 'param'}
raise self.ContinueScraping
else:
assert bill_id == '2'
assert kwargs == {}
b = Bill('1', self.session, 'title')
b.add_source('http;//example.com')
return b
示例10: scrape_bill_info
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_source [as 别名]
def scrape_bill_info(self, session, chambers):
info_url = "ftp://ftp.cga.ct.gov/pub/data/bill_info.csv"
data = self.get(info_url)
page = open_csv(data)
chamber_map = {'H': 'lower', 'S': 'upper'}
for row in page:
bill_id = row['bill_num']
chamber = chamber_map[bill_id[0]]
if chamber not in chambers:
continue
# assert that the bill data is from this session, CT is tricky
assert row['sess_year'] == session
if re.match(r'^(S|H)J', bill_id):
bill_type = 'joint resolution'
elif re.match(r'^(S|H)R', bill_id):
bill_type = 'resolution'
else:
bill_type = 'bill'
bill = Bill(identifier=bill_id,
legislative_session=session,
title=row['bill_title'],
classification=bill_type,
chamber=chamber)
bill.add_source(info_url)
for introducer in self._introducers[bill_id]:
bill.add_sponsorship(name=str(introducer),
classification='primary',
primary=True,
entity_type='person')
try:
for subject in self._subjects[bill_id]:
bill.subject.append(subject)
self.bills[bill_id] = [bill, chamber]
yield from self.scrape_bill_page(bill)
except SkipBill:
self.warning('no such bill: ' + bill_id)
pass
示例11: parse_bill
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_source [as 别名]
def parse_bill(self, chamber, session, special, link):
bill_num = link.text.strip()
type_abbr = re.search('type=(B|R|)', link.attrib['href']).group(1)
if type_abbr == 'B':
btype = ['bill']
elif type_abbr == 'R':
btype = ['resolution']
bill_id = "%s%s %s" % (utils.bill_abbr(chamber), type_abbr, bill_num)
url = utils.info_url(chamber, session, special, type_abbr, bill_num)
page = self.get(url).text
page = lxml.html.fromstring(page)
page.make_links_absolute(url)
xpath = '/'.join([
'//div[contains(@class, "BillInfo-ShortTitle")]',
'div[@class="BillInfo-Section-Data"]',
])
title = page.xpath(xpath).pop().text_content().strip()
if not title:
return
bill = Bill(bill_id, legislative_session=session, title=title, chamber=chamber,
classification=btype)
bill.add_source(url)
self.parse_bill_versions(bill, page)
self.parse_history(bill, chamber, utils.history_url(chamber, session, special,
type_abbr, bill_num))
# only fetch votes if votes were seen in history
# if vote_count:
yield from self.parse_votes(
bill,
utils.vote_url(chamber, session, special, type_abbr, bill_num),
)
# Dedupe sources.
sources = bill.sources
for source in sources:
if 1 < sources.count(source):
sources.remove(source)
yield bill
示例12: scrape
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_source [as 别名]
def scrape(self):
self.session = '2011'
for i, page in enumerate(self.searchLegislation()) :
for legislation_summary in self.parseSearchResults(page) :
title = legislation_summary['Title'].strip()
if title == "":
continue
bill = Bill(name=legislation_summary['Record #'],
session=self.session,
title=title,
type=[legislation_summary['Type'].lower()],
organization=self.jurisdiction.name)
bill.add_source(legislation_summary['URL'])
legislation_details = self.expandLegislationSummary(legislation_summary)
for related_bill in legislation_details.get('Related files', []) :
bill.add_related_bill(name = related_bill,
session = self.session,
relation='other-session',
chamber=None)
for i, sponsor in enumerate(legislation_details.get('Sponsors', [])) :
if i == 0 :
primary = True
sponsorship_type = "Primary"
else :
primary = False
sponsorship_type = "Regular"
bill.add_sponsor(sponsor, sponsorship_type,
'person', primary)
for subject in legislation_details.get(u'Topics', []) :
bill.add_subject(subject)
for attachment in legislation_details.get(u'Attachments', []) :
bill.add_version_link('PDF',
attachment['url'],
mimetype="application/pdf")
yield bill
示例13: createBill
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_source [as 别名]
def createBill(self, agenda_item):
title = agenda_item['Title'].replace('\n', ' ')
title, primary_role, primary_sponsor, secondary_role, secondary_sponsor = re.match(agenda_item_title_re, title).groups()
bill = {
'identifier': agenda_item['Item No.'],
'title': title,
'legislative_session': agenda_item['session'],
# TODO: Add agenda_item type to OCD
'classification': 'bill',
'from_organization': {'name': self.jurisdiction.name},
}
b = Bill(**bill)
b.add_source(agenda_item['url'], note='web')
if primary_sponsor and secondary_sponsor:
b.add_sponsorship(primary_sponsor, 'mover', 'person', True)
b.add_sponsorship(secondary_sponsor, 'seconder', 'person', False)
return b
示例14: scrape_chamber
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_source [as 别名]
def scrape_chamber(self, chamber, session):
chamber_abbrev = {'upper': 'SF', 'lower': 'HB'}[chamber]
url = ("http://legisweb.state.wy.us/%s/billreference/"
"BillReference.aspx?type=%s" % (session, chamber_abbrev))
page = self.lxmlize(url)
for tr in page.xpath("//table[contains(@id,'cphContent_gvBills')]//tr")[1:]:
bill_id = tr.xpath("string(td[1])").strip()
title = tr.xpath("string(td[2])").strip()
if bill_id[0:2] in ['SJ', 'HJ']:
bill_type = 'joint resolution'
else:
bill_type = 'bill'
bill = Bill(bill_id, legislative_session=session, title=title, chamber=chamber,
classification=bill_type)
yield from self.scrape_digest(bill, chamber)
# versions
for a in (tr.xpath('td[8]//a') + tr.xpath('td[11]//a') +
tr.xpath('td[12]//a')):
# skip references to other bills
if a.text.startswith('See'):
continue
bill.add_version_link(a.text, a.get('href'),
media_type='application/pdf')
# documents
fnote = tr.xpath('td[9]//a')
if fnote:
bill.add_document_link('Fiscal Note', fnote[0].get('href'))
summary = tr.xpath('td[14]//a')
if summary:
bill.add_document_link('Summary', summary[0].get('href'))
bill.add_source(url)
yield bill
示例15: handle_page
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_source [as 别名]
def handle_page(self):
bills = self.doc.xpath('//ul[@class="linkSect"]/li')
for bill in bills:
link = bill.getchildren()[0]
bill_id = str(link.text_content())
if not bill_id.startswith(('S', 'H')):
continue
# create a bill
desc = bill.xpath('text()')[0].strip()
chamber = {
'H': 'lower',
'S': 'upper',
}[bill_id[0]]
bill_type = {
'B': 'bill',
'J': 'joint resolution',
'R': 'resolution'
}[bill_id[1]]
bill = Bill(bill_id, self.kwargs['session'], desc,
chamber=chamber, classification=bill_type)
bill_url = link.get('href')
sponsor_url = BASE_URL + URL_PATTERNS['sponsors'].format(
self.kwargs['session_id'],
bill_id.replace(' ', ''),
)
list(self.scrape_page_items(BillSponsorPage, url=sponsor_url, obj=bill))
yield from self.scrape_page_items(BillDetailPage, url=bill_url, obj=bill)
bill.subject = self.kwargs['subjects'][bill_id]
bill.add_source(bill_url)
yield bill
next_url = self.doc.xpath('//a/b[text()="More..."]/../@href')
if next_url:
yield from self.scrape_page_items(BillListPage, url=next_url[0], **self.kwargs)