本文整理汇总了Python中pupa.scrape.Bill.add_sponsorship方法的典型用法代码示例。如果您正苦于以下问题:Python Bill.add_sponsorship方法的具体用法?Python Bill.add_sponsorship怎么用?Python Bill.add_sponsorship使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pupa.scrape.Bill
的用法示例。
在下文中一共展示了Bill.add_sponsorship方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: handle_list_item
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_sponsorship [as 别名]
def handle_list_item(self, item):
bill_id = item.text.strip()
title = item.xpath("string(../following-sibling::td[1])").strip()
sponsor = item.xpath("string(../following-sibling::td[2])").strip()
bill_url = item.attrib['href'] + '/ByCategory'
if bill_id.startswith(('SB ', 'HB ', 'SPB ', 'HPB ')):
bill_type = 'bill'
elif bill_id.startswith(('HR ', 'SR ')):
bill_type = 'resolution'
elif bill_id.startswith(('HJR ', 'SJR ')):
bill_type = 'joint resolution'
elif bill_id.startswith(('SCR ', 'HCR ')):
bill_type = 'concurrent resolution'
elif bill_id.startswith(('SM ', 'HM ')):
bill_type = 'memorial'
else:
raise ValueError('Failed to identify bill type.')
bill = Bill(bill_id, self.kwargs['session'], title,
chamber='lower' if bill_id[0] == 'H' else 'upper',
classification=bill_type)
bill.add_source(bill_url)
# normalize id from HB 0004 to H4
subj_bill_id = re.sub('(H|S)\w+ 0*(\d+)', r'\1\2', bill_id)
bill.subject = list(self.kwargs['subjects'][subj_bill_id])
sponsor = re.sub(r'^(?:Rep|Sen)\.\s', "", sponsor)
for sp in sponsor.split(', '):
bill.add_sponsorship(sp, 'primary', 'person', True)
yield from self.scrape_page_items(BillDetail, url=bill_url, obj=bill)
yield bill
示例2: scrape_bill
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_sponsorship [as 别名]
def scrape_bill(self, session, bill_id, chamber):
# https://malegislature.gov/Bills/189/SD2739
session_for_url = self.replace_non_digits(session)
bill_url = 'https://malegislature.gov/Bills/{}/{}'.format(session_for_url, bill_id)
try:
response = requests.get(bill_url)
except requests.exceptions.RequestException as e:
self.warning(u'Server Error on {}'.format(bill_url))
return False
html = response.text
page = lxml.html.fromstring(html)
if not page.xpath('//div[contains(@class, "followable")]/h1/text()'):
self.warning(u'Server Error on {}'.format(bill_url))
return False
bill_title = page.xpath('//div[@id="contentContainer"]/div/div/h2/text()')[0]
bill_id = re.sub(r'[^S|H|D|\d]', '', bill_id)
bill = Bill(bill_id, legislative_session=session, chamber=chamber,
title=bill_title, classification='bill')
bill_summary = None
if page.xpath('//p[@id="pinslip"]/text()'):
bill_summary = page.xpath('//p[@id="pinslip"]/text()')[0]
if bill_summary:
bill.add_abstract(bill_summary, 'summary')
bill.add_source(bill_url)
# https://malegislature.gov/Bills/189/SD2739 has a presenter
# https://malegislature.gov/Bills/189/S2168 no sponsor
# Find the non-blank text of the dt following Sponsor or Presenter,
# including any child link text.
sponsor = page.xpath(
'//dt[text()="Sponsor:" or text()="Presenter:"]/'
'following-sibling::dd/descendant-or-self::*/text()[normalize-space()]')
if sponsor:
sponsor = sponsor[0].strip()
bill.add_sponsorship(sponsor, classification='primary', primary=True,
entity_type='person')
self.scrape_cosponsors(bill, bill_url)
version = page.xpath("//div[contains(@class, 'modalBtnGroup')]/"
"a[contains(text(), 'Download PDF') and not(@disabled)]/@href")
if version:
version_url = "https://malegislature.gov{}".format(version[0])
bill.add_version_link('Bill Text', version_url, media_type='application/pdf')
# yield back votes and bill
yield from self.scrape_actions(bill, bill_url, session)
yield bill
示例3: scrape_bills
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_sponsorship [as 别名]
def scrape_bills(self, session):
session_key = SESSION_KEYS[session]
measures_response = self.api_client.get('measures', page=500, session=session_key)
legislators = index_legislators(self, session_key)
for measure in measures_response:
bid = '{} {}'.format(measure['MeasurePrefix'], measure['MeasureNumber'])
chamber = self.chamber_code[bid[0]]
bill = Bill(
bid.replace(' ', ''),
legislative_session=session,
chamber=chamber,
title=measure['RelatingTo'],
classification=self.bill_types[measure['MeasurePrefix'][1:]]
)
bill.add_abstract(measure['MeasureSummary'].strip(), note='summary')
for sponsor in measure['MeasureSponsors']:
legislator_code = sponsor['LegislatoreCode'] # typo in API
if legislator_code:
try:
legislator = legislators[legislator_code]
except KeyError:
logger.warn('Legislator {} not found in session {}'.format(
legislator_code, session))
legislator = legislator_code
bill.add_sponsorship(
name=legislator,
classification={'Chief': 'primary', 'Regular': 'cosponsor'}[
sponsor['SponsorLevel']],
entity_type='person',
primary=True if sponsor['SponsorLevel'] == 'Chief' else False
)
bill.add_source(
"https://olis.leg.state.or.us/liz/{session}/Measures/Overview/{bid}".format(
session=session_key, bid=bid.replace(' ', ''))
)
for document in measure['MeasureDocuments']:
# TODO: probably mixing documents & versions here - should revisit
try:
bill.add_version_link(document['VersionDescription'], document['DocumentUrl'],
media_type='application/pdf')
except ValueError:
logger.warn('Duplicate link found for {}'.format(document['DocumentUrl']))
for action in measure['MeasureHistoryActions']:
classifiers = self.determine_action_classifiers(action['ActionText'])
when = datetime.datetime.strptime(action['ActionDate'], '%Y-%m-%dT%H:%M:%S')
when = self.tz.localize(when)
bill.add_action(action['ActionText'], when,
chamber=self.chamber_code[action['Chamber']],
classification=classifiers)
yield bill
示例4: scrape_bill
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_sponsorship [as 别名]
def scrape_bill(self, chamber, session):
url = "ftp://www.arkleg.state.ar.us/dfadooas/LegislativeMeasures.txt"
page = self.get(url).text
page = unicode_csv_reader(StringIO(page), delimiter='|')
for row in page:
bill_chamber = {'H': 'lower', 'S': 'upper'}[row[0]]
if bill_chamber != chamber:
continue
bill_id = "%s%s %s" % (row[0], row[1], row[2])
type_spec = re.match(r'(H|S)([A-Z]+)\s', bill_id).group(2)
bill_type = {
'B': 'bill',
'R': 'resolution',
'JR': 'joint resolution',
'CR': 'concurrent resolution',
'MR': 'memorial',
'CMR': 'concurrent memorial'}[type_spec]
if row[-1] != self.slug:
continue
bill = Bill(bill_id, legislative_session=session,
chamber=chamber, title=row[3], classification=bill_type)
bill.add_source(url)
primary = row[11]
if not primary:
primary = row[12]
if primary:
bill.add_sponsorship(primary, classification='primary',
entity_type='person', primary=True)
# ftp://www.arkleg.state.ar.us/Bills/
# TODO: Keep on eye on this post 2017 to see if they apply R going forward.
session_code = '2017R' if session == '2017' else session
version_url = ("ftp://www.arkleg.state.ar.us/Bills/"
"%s/Public/%s.pdf" % (
session_code, bill_id.replace(' ', '')))
bill.add_version_link(bill_id, version_url, media_type='application/pdf')
yield from self.scrape_bill_page(bill)
self.bills[bill_id] = bill
示例5: scrape_bill_info
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_sponsorship [as 别名]
def scrape_bill_info(self, session, chambers):
info_url = "ftp://ftp.cga.ct.gov/pub/data/bill_info.csv"
data = self.get(info_url)
page = open_csv(data)
chamber_map = {'H': 'lower', 'S': 'upper'}
for row in page:
bill_id = row['bill_num']
chamber = chamber_map[bill_id[0]]
if chamber not in chambers:
continue
# assert that the bill data is from this session, CT is tricky
assert row['sess_year'] == session
if re.match(r'^(S|H)J', bill_id):
bill_type = 'joint resolution'
elif re.match(r'^(S|H)R', bill_id):
bill_type = 'resolution'
else:
bill_type = 'bill'
bill = Bill(identifier=bill_id,
legislative_session=session,
title=row['bill_title'],
classification=bill_type,
chamber=chamber)
bill.add_source(info_url)
for introducer in self._introducers[bill_id]:
bill.add_sponsorship(name=str(introducer),
classification='primary',
primary=True,
entity_type='person')
try:
for subject in self._subjects[bill_id]:
bill.subject.append(subject)
self.bills[bill_id] = [bill, chamber]
yield from self.scrape_bill_page(bill)
except SkipBill:
self.warning('no such bill: ' + bill_id)
pass
示例6: scrape_bill
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_sponsorship [as 别名]
def scrape_bill(self, chamber, session):
url = "ftp://www.arkleg.state.ar.us/SessionInformation/LegislativeMeasures.txt"
page = csv.reader(get_utf_16_ftp_content(url).splitlines(), delimiter='|')
for row in page:
bill_chamber = {'H': 'lower', 'S': 'upper'}[row[0]]
if bill_chamber != chamber:
continue
bill_id = "%s%s %s" % (row[0], row[1], row[2])
type_spec = re.match(r'(H|S)([A-Z]+)\s', bill_id).group(2)
bill_type = {
'B': 'bill',
'R': 'resolution',
'JR': 'joint resolution',
'CR': 'concurrent resolution',
'MR': 'memorial',
'CMR': 'concurrent memorial'}[type_spec]
if row[-1] != self.slug:
continue
bill = Bill(bill_id, legislative_session=session,
chamber=chamber, title=row[3], classification=bill_type)
bill.add_source(url)
primary = row[11]
if not primary:
primary = row[12]
if primary:
bill.add_sponsorship(primary, classification='primary',
entity_type='person', primary=True)
version_url = ("ftp://www.arkleg.state.ar.us/Bills/"
"%s/Public/Searchable/%s.pdf" % (
self.slug, bill_id.replace(' ', '')))
bill.add_version_link(bill_id, version_url, media_type='application/pdf')
yield from self.scrape_bill_page(bill)
self.bills[bill_id] = bill
示例7: createBill
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_sponsorship [as 别名]
def createBill(self, agenda_item):
title = agenda_item['Title'].replace('\n', ' ')
title, primary_role, primary_sponsor, secondary_role, secondary_sponsor = re.match(agenda_item_title_re, title).groups()
bill = {
'identifier': agenda_item['Item No.'],
'title': title,
'legislative_session': agenda_item['session'],
# TODO: Add agenda_item type to OCD
'classification': 'bill',
'from_organization': {'name': self.jurisdiction.name},
}
b = Bill(**bill)
b.add_source(agenda_item['url'], note='web')
if primary_sponsor and secondary_sponsor:
b.add_sponsorship(primary_sponsor, 'mover', 'person', True)
b.add_sponsorship(secondary_sponsor, 'seconder', 'person', False)
return b
示例8: scrape
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_sponsorship [as 别名]
#.........这里部分代码省略.........
# add title if no short title
if not bill.title:
bill.title = data['items'][0]['longtitle']
bill.add_title(data['items'][0]['longtitle'], 'long title')
# this stuff is version-specific
for version in data['items']:
version_name = version["version"]
version_link = base_url+version["pdfDownloadLink"]
bill.add_version_link(version_name, version_link, media_type='application/pdf')
# we'll use latest bill_version for everything else
bill_version = data['items'][0]
bill.add_source(bill_api_url)
# subjects
for subj in bill_version["subjectindexes"]:
try:
bill.add_subject(subj["primary"])
except KeyError:
pass
try:
secondary_subj = subj["secondary"]
except KeyError:
secondary_subj = ""
if secondary_subj:
bill.add_subject(secondary_subj)
# sponsors
sponsors = bill_version["sponsors"]
for sponsor in sponsors:
sponsor_name = self.get_sponsor_name(sponsor)
bill.add_sponsorship(
sponsor_name,
classification='primary',
entity_type='person',
primary=True
)
cosponsors = bill_version["cosponsors"]
for sponsor in cosponsors:
sponsor_name = self.get_sponsor_name(sponsor)
bill.add_sponsorship(
sponsor_name,
classification='cosponsor',
entity_type='person',
primary=False,
)
try:
action_doc = self.get(base_url+bill_version["action"][0]["link"])
except scrapelib.HTTPError:
pass
else:
actions = action_doc.json()
for action in reversed(actions["items"]):
actor = chamber_dict[action["chamber"]]
action_desc = action["description"]
try:
action_type = action_dict[action["actioncode"]]
except KeyError:
self.warning("Unknown action {desc} with code {code}."
" Add it to the action_dict"
".".format(desc=action_desc,
示例9: scrape
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_sponsorship [as 别名]
def scrape(self):
unreachable_urls = []
for leg_summary in self.legislation(created_after=datetime.datetime(2015, 5, 17)) :
title = leg_summary['Title'].strip()
if not title or not leg_summary['Intro\xa0Date'] :
continue
# https://chicago.legistar.com/LegislationDetail.aspx?ID=1800754&GUID=29575A7A-5489-4D8B-8347-4FC91808B201&Options=Advanced&Search=
# doesn't have an intro date
bill_type = BILL_TYPES[leg_summary['Type']]
bill_session = self.session(self.toTime(leg_summary['Intro\xa0Date']))
bill = Bill(identifier=leg_summary['Record #'],
legislative_session=bill_session,
title=title,
classification=bill_type,
from_organization={"name":"Chicago City Council"})
bill.add_source(leg_summary['url'])
try :
leg_details = self.legDetails(leg_summary['url'])
except IndexError :
unreachable_urls.append(leg_summary['url'])
yield bill
continue
for related_bill in leg_details.get('Related files', []) :
lower_title = title.lower()
if "sundry" in title or "miscellaneous" in title: #these are ominbus
bill.add_related_bill(identifier = related_bill['label'],
legislative_session = bill.legislative_session,
relation_type='replaces')
#for now we're skipping related bills if they
#don't contain words that make us think they're
#in a ominbus relationship with each other
for i, sponsor in enumerate(leg_details.get('Sponsors', [])) :
if i == 0 :
primary = True
sponsorship_type = "Primary"
else :
primary = False
sponsorship_type = "Regular"
sponsor_name = sponsor['label']
# Does the Mayor/Clerk introduce legisislation as
# individuals role holders or as the OFfice of City
# Clerk and the Office of the Mayor?
entity_type = 'person'
if sponsor_name.startswith(('City Clerk',
'Mendoza, Susana')) :
sponsor_name = 'Office of the City Clerk'
entity_type = 'organization'
elif sponsor_name.startswith(('Emanuel, Rahm',)) :
sponsor_name = 'Office of the Mayor'
entity_type = 'organization'
if not sponsor_name.startswith(('Misc. Transmittal',
'No Sponsor',
'Dept./Agency')) :
bill.add_sponsorship(sponsor_name,
sponsorship_type,
entity_type,
primary,
entity_id = _make_pseudo_id(name=sponsor_name))
if 'Topic' in leg_details :
for subject in leg_details[u'Topic'].split(',') :
bill.add_subject(subject)
for attachment in leg_details.get('Attachments', []) :
if attachment['label'] :
bill.add_version_link(attachment['label'],
attachment['url'],
media_type="application/pdf")
for action in self.history(leg_summary['url']) :
action_description = action['Action']
try :
action_date = self.toTime(action['Date']).date().isoformat()
except AttributeError : # https://chicago.legistar.com/LegislationDetail.aspx?ID=1424866&GUID=CEC53337-B991-4268-AE8A-D4D174F8D492
continue
if action_description :
try :
responsible_org = action['Action\xa0By']['label']
except TypeError :
responsible_org = action['Action\xa0By']
if responsible_org == 'City Council' :
responsible_org = 'Chicago City Council'
act = bill.add_action(action_description,
action_date,
organization={'name': responsible_org},
classification=ACTION_CLASSIFICATION[action_description])
if action_description == 'Referred' :
#.........这里部分代码省略.........
示例10: _scrape_bill
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_sponsorship [as 别名]
def _scrape_bill(self, session, bill_data):
details = self._parse_bill_details(bill_data)
(senate_url, assembly_url, bill_chamber, bill_type, bill_id,
title, (prefix, number, active_version)) = details
bill = Bill(
bill_id,
legislative_session=session,
chamber=bill_chamber,
title=title or bill_data['summary'],
classification=bill_type,
)
if bill_data['summary']:
bill.add_abstract(bill_data['summary'], note='')
bill_active_version = bill_data['amendments']['items'][active_version]
# Parse sponsors.
if bill_data['sponsor'] is not None:
if bill_data['sponsor']['rules'] is True:
bill.add_sponsorship(
'Rules Committee',
entity_type='organization',
classification='primary',
primary=True,
)
elif not bill_data['sponsor']['budget']:
primary_sponsor = bill_data['sponsor']['member']
bill.add_sponsorship(
primary_sponsor['shortName'],
entity_type='person',
classification='primary',
primary=True,
)
# There *shouldn't* be cosponsors if there is no sponsor.
cosponsors = bill_active_version['coSponsors']['items']
for cosponsor in cosponsors:
bill.add_sponsorship(
cosponsor['shortName'],
entity_type='person',
classification='cosponsor',
primary=False,
)
# List companion bill.
same_as = bill_active_version.get('sameAs', {})
# Check whether "sameAs" property is populated with at least one bill.
if same_as['items']:
# Get companion bill ID.
companion_bill_id = same_as['items'][0]['basePrintNo']
# Build companion bill session.
start_year = same_as['items'][0]['session']
end_year = start_year + 1
companion_bill_session = '-'.join([str(start_year), str(end_year)])
# Attach companion bill data.
bill.add_related_bill(
companion_bill_id,
companion_bill_session,
relation_type='companion',
)
# Parse actions.
chamber_map = {
'senate': 'upper',
'assembly': 'lower',
}
for action in bill_data['actions']['items']:
chamber = chamber_map[action['chamber'].lower()]
action_datetime = datetime.datetime.strptime(action['date'], '%Y-%m-%d')
action_date = action_datetime.date()
types, _ = NYBillScraper.categorizer.categorize(action['text'])
bill.add_action(
action['text'],
action_date.strftime('%Y-%m-%d'),
chamber=chamber,
classification=types,
)
# Handling of sources follows. Sources serving either chamber
# maintain duplicate data, so we can see certain bill data
# through either chamber's resources. However, we have to refer
# to a specific chamber's resources if we want to grab certain
# specific information such as vote data.
#
# As such, I'm placing all potential sources in the interest of
# thoroughness. - Andy Lo
# List Open Legislation API endpoint as a source.
api_url = self.api_client.root + self.api_client.resources['bill'].format(
session_year=session,
bill_id=bill_id,
summary='',
detail='')
#.........这里部分代码省略.........
示例11: parse_bill_status_page
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_sponsorship [as 别名]
def parse_bill_status_page(self, status_url, bill_url, session, chamber):
status_page = lxml.html.fromstring(self.get(status_url).text)
# see 2007 HB 2... weird.
bill_re = r'.*?/([A-Z]+)0*(\d+)\.pdf'
bill_xpath = '//a[contains(@href, ".pdf") and contains(@href, "billpdf")]/@href'
bill_id = re.search(bill_re, status_page.xpath(bill_xpath)[0],
re.IGNORECASE).groups()
bill_id = "{0} {1}".format(bill_id[0], int(bill_id[1]))
try:
xp = '//b[text()="Short Title:"]/../following-sibling::td/text()'
title = status_page.xpath(xp).pop()
except IndexError:
title = status_page.xpath('//tr[1]/td[2]')[0].text_content()
# Add bill type.
_bill_id = bill_id.lower()
if 'b' in _bill_id:
classification = 'bill'
elif 'j' in _bill_id or 'jr' in _bill_id:
classification = 'joint resolution'
elif 'cr' in _bill_id:
classification = 'concurrent resolution'
elif 'r' in _bill_id:
classification = 'resolution'
bill = Bill(bill_id, legislative_session=session, chamber=chamber,
title=title, classification=classification)
self.add_actions(bill, status_page)
votes = self.add_votes(bill, status_page, status_url)
tabledata = self._get_tabledata(status_page)
# Add sponsor info.
bill.add_sponsorship(tabledata['primary sponsor:'][0], classification='primary',
entity_type='person', primary=True)
# A various plus fields MT provides.
plus_fields = [
'requester',
('chapter number:', 'chapter'),
'transmittal date:',
'drafter',
'fiscal note probable:',
'bill draft number:',
'preintroduction required:',
'by request of',
'category:']
for x in plus_fields:
if isinstance(x, tuple):
_key, key = x
else:
_key = key = x
key = key.replace(' ', '_')
try:
val = tabledata[_key]
except KeyError:
continue
if len(val) == 1:
val = val[0]
bill.extras[key] = val
# Add bill subjects.
xp = '//th[contains(., "Revenue/Approp.")]/ancestor::table/tr'
subjects = []
for tr in status_page.xpath(xp):
try:
subj = tr.xpath('td')[0].text_content()
except:
continue
subjects.append(subj)
for s in subjects:
bill.add_subject(s)
self.add_fiscal_notes(status_page, bill)
return bill, list(votes)
示例12: scrape
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_sponsorship [as 别名]
#.........这里部分代码省略.........
result='pass' if vote_['Yeas'] > vote_['Nays'] else 'fail',
classification='passage',
bill=bill,
)
vote.set_count('yes', vote_['Yeas'])
vote.set_count('no', vote_['Nays'])
vote.set_count('other', vote_['Excused'] + vote_['NotVoting'])
vote.add_source(self.vsource)
methods = {'Yea': 'yes', 'Nay': 'no'}
for vdetail in vote_['Votes'][0]:
whom = vdetail['Member']
how = vdetail['MemberVoted']
vote.vote(methods.get(how, 'other'), whom['Name'])
yield vote
ccommittees = defaultdict(list)
committees = instrument['Committees']
if committees:
for committee in committees[0]:
ccommittees[{
'House': 'lower',
'Senate': 'upper',
}[committee['Type']]].append(committee['Name'])
for action in actions:
action_chamber = chamber_map[action['code'][0]]
try:
action_types = action_code_map[action['code']]
except KeyError:
error_msg = 'Code {code} for action {action} not recognized.'.format(
code=action['code'], action=action['action'])
self.logger.warning(error_msg)
action_types = None
committees = []
if action_types and any(('committee' in x for x in action_types)):
committees = [str(x) for x in ccommittees.get(
action_chamber, [])]
act = bill.add_action(
action['action'], action['date'].strftime('%Y-%m-%d'),
classification=action_types,
chamber=action_chamber)
for committee in committees:
act.add_related_entity(committee, 'organization')
act.extras = {
'code': action['code'],
'guid': action['_guid'],
}
sponsors = []
if instrument['Authors']:
sponsors = instrument['Authors']['Sponsorship']
if 'Sponsors' in instrument and instrument['Sponsors']:
sponsors += instrument['Sponsors']['Sponsorship']
sponsors = [
(x['Type'], self.get_member(x['MemberId'])) for x in sponsors
]
for typ, sponsor in sponsors:
name = '{First} {Last}'.format(**dict(sponsor['Name']))
bill.add_sponsorship(
name,
entity_type='person',
classification='primary' if 'Author' in typ else 'secondary',
primary='Author' in typ,
)
for version in instrument['Versions']['DocumentDescription']:
name, url, doc_id, version_id = [
version[x] for x in [
'Description',
'Url',
'Id',
'Version'
]
]
# link = bill.add_version_link(
# name, url, media_type='application/pdf')
# link['extras'] = {
# '_internal_document_id': doc_id,
# '_version_id': version_id
# }
bill.add_source(self.msource)
bill.add_source(self.lsource)
bill.add_source(SOURCE_URL.format(**{
'session': session,
'bid': guid,
}))
yield bill
示例13: scrape_chamber
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_sponsorship [as 别名]
def scrape_chamber(self, chamber, session):
chamber_name = 'Senate' if chamber == 'upper' else 'House'
chamber_letter = chamber_name[0]
# perhaps we should save this data so we can make one request for both?
bill_request = self.get(ksapi.url + 'bill_status/').text
bill_request_json = json.loads(bill_request)
bills = bill_request_json['content']
for bill_data in bills:
bill_id = bill_data['BILLNO']
# filter other chambers
if not bill_id.startswith(chamber_letter):
continue
if 'CR' in bill_id:
btype = 'concurrent resolution'
elif 'R' in bill_id:
btype = 'resolution'
elif 'B' in bill_id:
btype = 'bill'
title = bill_data['SHORTTITLE'] or bill_data['LONGTITLE']
# main
bill = Bill(
bill_id,
session,
title,
chamber=chamber,
classification=btype,
)
bill.extras = {'status': bill_data['STATUS']}
bill.add_source(ksapi.url + 'bill_status/' + bill_id.lower())
if (bill_data['LONGTITLE'] and
bill_data['LONGTITLE'] != bill.title):
bill.add_title(bill_data['LONGTITLE'])
for sponsor in bill_data['SPONSOR_NAMES']:
stype = ('primary' if len(bill_data['SPONSOR_NAMES']) == 1
else 'cosponsor')
if sponsor:
bill.add_sponsorship(
name=sponsor,
entity_type='person',
primary=stype == 'primary',
classification=stype,
)
# history is backwards
for event in reversed(bill_data['HISTORY']):
actor = ('upper' if event['chamber'] == 'Senate'
else 'lower')
date = datetime.datetime.strptime(event['occurred_datetime'], "%Y-%m-%dT%H:%M:%S")
# append committee names if present
if 'committee_names' in event:
action = (event['status'] + ' ' +
' and '.join(event['committee_names']))
else:
action = event['status']
if event['action_code'] not in ksapi.action_codes:
self.warning('unknown action code on %s: %s %s' %
(bill_id, event['action_code'],
event['status']))
atype = None
else:
atype = ksapi.action_codes[event['action_code']]
bill.add_action(
action, date.strftime('%Y-%m-%d'), chamber=actor, classification=atype)
try:
yield from self.scrape_html(bill, session)
except scrapelib.HTTPError as e:
self.warning('unable to fetch HTML for bill {0}'.format(
bill['bill_id']))
yield bill
示例14: scrape_matter
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_sponsorship [as 别名]
def scrape_matter(self, matter_link, sess):
matter_types = {
"Additions":"other",
"Administrative Order":"order",
"Annual Evaluation":"other",
"Bid Advertisement":"other",
"Bid Awards":"other",
"Bid Contract":"contract",
"Bid Protest":"other",
"Bid Rejection":"other",
"Birthday Scroll":"commemoration",
"Certificate of Appreciation":"commemoration",
"Change Order":"order",
"Citizen's Presentation":"other",
"Commendation":"commemoration",
"Conflict Waiver":"other",
"Congratulatory Certificate":"commemoration",
"Deferrals":"other",
"Discussion Item":"other",
"Distinguished Visitor":"other",
"Joint Meeting/Workshop":"other",
"Mayoral Veto":"other",
"Miscellaneous":"other",
"Nomination":"nomination",
"Oath of Office":"other",
"Omnibus Reserve":"bill",
"Ordinance":"ordinance",
"Plaque":"commemoration",
"Presentation":"other",
"Proclamation":"proclamation",
"Professional Service Agreement":"contract",
"Public Hearing":"other",
"Report":"other",
"Request for Proposals":"other",
"Request for Qualifications":"other",
"Request to Advertise":"other",
"Resolution":"resolution",
"Resolution of Sympathy":"resolution",
"Service Awards":"commemoration",
"Special Item":"other",
"Special Presentation":"other",
"Supplement":"other",
"Swearing-In":"other",
"Time Sensitive Items":"other",
"Withdrawals":"other",
"Workshop Item":"other",
"Zoning":"other",
"Zoning Resolution":"resolution"
}
matter_doc = self.lxmlize(matter_link)
info_dict = self.matter_table_to_dict(matter_doc)
#we're going to use the year of the intro date as the session
#until/unless we come up with something better
intro_date = datetime.strptime(info_dict["Introduced"],"%m/%d/%Y")
session = sess["identifier"]
category = matter_types[info_dict["File Type"]]
if 'File Name' in info_dict:
title = info_dict["File Name"]
elif "Title" in info_dict and info_dict["Title"].strip():
title = info_dict["Title"].strip()
else:
self.warning("bill has no title")
return
if category == 'other':
bill = Bill(identifier=info_dict["File Number"],
legislative_session=session,
title=title
)
else:
bill = Bill(identifier=info_dict["File Number"],
legislative_session=session,
title=title,
classification=category
)
for spons in info_dict["Sponsors"]:
if spons == "NONE":
continue
try:
name,spons_type = spons.rsplit(",",1)
except ValueError:
name = spons
spons_type = "Sponsor"
primary = True if "Prime Sponsor" in spons_type else False
entity = "person"
if "committee" in name:
entity = committee
bill.add_sponsorship(name,spons_type,entity,primary)
if "Indexes" in info_dict:
for subj in info_dict["Indexes"]:
if subj.strip() and subj.strip() != "NONE":
bill.add_subject(subj.strip())
if "Title" in info_dict and info_dict["Title"].strip():
note = "bill's long title'"
if ("Note" in info_dict and info_dict["Note"].strip()):
note = info_dict["Note"]
bill.add_abstract(abstract=info_dict["Title"],note=note)
self.process_action_table(matter_doc,bill)
bill.add_source(matter_link, note='web')
yield bill
示例15: scrape_assem_bills
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_sponsorship [as 别名]
def scrape_assem_bills(self, chamber, insert, session, year):
doc_type = {1: 'bill', 3: 'resolution', 5: 'concurrent resolution',
6: 'joint resolution', 9: 'petition'}
for docnum, bill_type in doc_type.items():
parentpage_url = 'http://www.leg.state.nv.us/Session/%s/' \
'Reports/HistListBills.cfm?DoctypeID=%s' % (insert, docnum)
links = self.scrape_links(parentpage_url)
count = 0
for link in links:
count = count + 1
page_path = 'http://www.leg.state.nv.us/Session/%s/Reports/%s' % (insert, link)
page = self.get(page_path).text
page = page.replace(u"\xa0", " ")
root = lxml.html.fromstring(page)
root.make_links_absolute("http://www.leg.state.nv.us/")
bill_id = root.xpath('string(/html/body/div[@id="content"]'
'/table[1]/tr[1]/td[1]/font)')
title = self.get_node(
root,
'//div[@id="content"]/table/tr[preceding-sibling::tr/td/'
'b[contains(text(), "By:")]]/td/em/text()')
bill = Bill(bill_id, legislative_session=session, chamber=chamber,
title=title, classification=bill_type)
bill.subject = list(set(self.subject_mapping[bill_id]))
billtext = root.xpath("//b[text()='Bill Text']")[0].getparent().getnext()
text_urls = billtext.xpath("./a")
for text_url in text_urls:
version_name = text_url.text.strip()
version_url = text_url.attrib['href']
bill.add_version_link(note=version_name, url=version_url,
media_type='application/pdf')
primary, secondary = self.scrape_sponsors(page)
for leg in primary:
bill.add_sponsorship(classification='primary',
name=leg, entity_type='person',
primary=True)
for leg in secondary:
bill.add_sponsorship(classification='cosponsor',
name=leg, entity_type='person',
primary=False)
minutes_count = 2
for mr in root.xpath('//table[4]/tr/td[3]/a'):
minutes = mr.xpath("string(@href)")
minutes_url = "http://www.leg.state.nv.us" + minutes
minutes_date_path = "string(//table[4]/tr[%s]/td[2])" % minutes_count
minutes_date = mr.xpath(minutes_date_path).split()
minutes_date = minutes_date[0] + minutes_date[1] + minutes_date[2] + " Minutes"
bill.add_document_link(note=minutes_date, url=minutes_url)
minutes_count += 1
self.scrape_actions(root, bill, "lower")
yield from self.scrape_votes(page, page_path, bill, insert, year)
bill.add_source(page_path)
yield bill