本文整理汇总了Python中pupa.scrape.Bill.add_action方法的典型用法代码示例。如果您正苦于以下问题:Python Bill.add_action方法的具体用法?Python Bill.add_action怎么用?Python Bill.add_action使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pupa.scrape.Bill
的用法示例。
在下文中一共展示了Bill.add_action方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: scrape_bills
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]
def scrape_bills(self, session):
session_key = SESSION_KEYS[session]
measures_response = self.api_client.get('measures', page=500, session=session_key)
legislators = index_legislators(self, session_key)
for measure in measures_response:
bid = '{} {}'.format(measure['MeasurePrefix'], measure['MeasureNumber'])
chamber = self.chamber_code[bid[0]]
bill = Bill(
bid.replace(' ', ''),
legislative_session=session,
chamber=chamber,
title=measure['RelatingTo'],
classification=self.bill_types[measure['MeasurePrefix'][1:]]
)
bill.add_abstract(measure['MeasureSummary'].strip(), note='summary')
for sponsor in measure['MeasureSponsors']:
legislator_code = sponsor['LegislatoreCode'] # typo in API
if legislator_code:
try:
legislator = legislators[legislator_code]
except KeyError:
logger.warn('Legislator {} not found in session {}'.format(
legislator_code, session))
legislator = legislator_code
bill.add_sponsorship(
name=legislator,
classification={'Chief': 'primary', 'Regular': 'cosponsor'}[
sponsor['SponsorLevel']],
entity_type='person',
primary=True if sponsor['SponsorLevel'] == 'Chief' else False
)
bill.add_source(
"https://olis.leg.state.or.us/liz/{session}/Measures/Overview/{bid}".format(
session=session_key, bid=bid.replace(' ', ''))
)
for document in measure['MeasureDocuments']:
# TODO: probably mixing documents & versions here - should revisit
try:
bill.add_version_link(document['VersionDescription'], document['DocumentUrl'],
media_type='application/pdf')
except ValueError:
logger.warn('Duplicate link found for {}'.format(document['DocumentUrl']))
for action in measure['MeasureHistoryActions']:
classifiers = self.determine_action_classifiers(action['ActionText'])
when = datetime.datetime.strptime(action['ActionDate'], '%Y-%m-%dT%H:%M:%S')
when = self.tz.localize(when)
bill.add_action(action['ActionText'], when,
chamber=self.chamber_code[action['Chamber']],
classification=classifiers)
yield bill
示例2: test_vote_event_bill_actions_two_stage
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]
def test_vote_event_bill_actions_two_stage():
# this test is very similar to what we're testing in test_vote_event_bill_actions w/
# ve3 and ve4, that two bills that reference the same action won't conflict w/ the
# OneToOneField, but in this case we do it in two stages so that the conflict is found
# even if the votes weren't in the same scrape
j = create_jurisdiction()
j.legislative_sessions.create(name='1900', identifier='1900')
org1 = ScrapeOrganization(name='House', classification='lower')
bill = ScrapeBill('HB 1', '1900', 'Axe & Tack Tax Act', from_organization=org1._id)
bill.add_action(description='passage', date='1900-04-02', chamber='lower')
ve1 = ScrapeVoteEvent(legislative_session='1900', motion_text='passage',
start_date='1900-04-02', classification='passage:bill',
result='pass', bill_chamber='lower', bill='HB 1',
bill_action='passage',
organization=org1._id)
ve2 = ScrapeVoteEvent(legislative_session='1900', motion_text='passage',
start_date='1900-04-02', classification='passage:bill',
result='pass', bill_chamber='lower', bill='HB 1',
bill_action='passage',
organization=org1._id)
# disambiguate them
ve1.pupa_id = 'one'
ve2.pupa_id = 'two'
oi = OrganizationImporter('jid')
oi.import_data([org1.as_dict()])
bi = BillImporter('jid', oi, DumbMockImporter())
bi.import_data([bill.as_dict()])
# first imports just fine
VoteEventImporter('jid', DumbMockImporter(), oi, bi).import_data([
ve1.as_dict(),
])
votes = list(VoteEvent.objects.all())
assert len(votes) == 1
assert votes[0].bill_action is not None
# when second is imported, ensure that action stays pinned to first just as it would
# have if they were both in same import
VoteEventImporter('jid', DumbMockImporter(), oi, bi).import_data([
ve1.as_dict(),
ve2.as_dict(),
])
votes = list(VoteEvent.objects.all())
assert len(votes) == 2
assert votes[0].bill_action is not None
assert votes[1].bill_action is None
示例3: scrape_bill
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]
#.........这里部分代码省略.........
new.add_subject(subj)
for spon in old.pop('sponsors'):
if spon.get('committee_id') is not None:
entity_type = 'organization'
elif spon.get('leg_id') is not None:
entity_type = 'person'
else:
entity_type = ''
new.add_sponsorship(spon['name'], spon['type'], entity_type,
spon['type'] == 'primary')
for act in old.pop('actions'):
actor = act['actor']
if actor.lower() in ('governor', 'mayor', 'secretary of state'):
actor = 'executive'
elif actor.lower() == 'house' or (actor.lower().startswith('lower (') and self.state == 'ca'):
actor = 'lower'
elif actor.lower() in ('senate', 'upper`') or (actor.lower().startswith('upper (') and self.state == 'ca'):
actor = 'upper'
elif actor in ('joint', 'other', 'Data Systems', 'Speaker', 'clerk',
'Office of the Legislative Fiscal Analyst', 'Became Law w',
'conference') or (actor.lower().startswith('legislature (') and self.state == 'ca'):
actor = 'legislature'
if actor in ('committee', 'sponsor') and self.state == 'pr':
actor = 'legislature'
# nebraska & DC
if actor in ('upper','council') and self.state in ('ne', 'dc'):
actor = 'legislature'
if act['action']:
newact = new.add_action(act['action'], act['date'][:10], chamber=actor,
classification=[action_types[c] for c in act['type'] if c != 'other'])
for re in act.get('related_entities', []):
if re['type'] == 'committee':
re['type'] = 'organization'
elif re['type'] == 'legislator':
re['type'] = 'person'
newact.add_related_entity(re['name'], re['type'])
for comp in old.pop('companions', []):
if self.state in ('nj', 'ny', 'mn'):
rtype = 'companion'
new.add_related_bill(comp['bill_id'], comp['session'], rtype)
for abid in old.pop('alternate_bill_ids', []) + old.pop('+alternate_bill_ids', []):
new.add_identifier(abid)
# generic OpenStates stuff
for id in old.pop('all_ids'):
new.add_identifier(id, scheme='openstates')
for source in old.pop('sources'):
source.pop('retrieved', None)
new.add_source(**source)
ext_title = old.pop('+extended_title', None)
if ext_title:
new.add_title(ext_title, note='Extended Title')
official_title = old.pop('+official_title', None)
if official_title:
new.add_title(official_title, note='Official Title')
示例4: scrape_bill
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]
def scrape_bill(self, chamber, session, bill_id):
# try and get bill for the first year of the session biennium
url = 'http://legislature.mi.gov/doc.aspx?%s-%s' % (
session[:4], bill_id.replace(' ', '-'))
html = self.get(url).text
# Otherwise, try second year of the session biennium
if ('Page Not Found' in html or
'The bill you are looking for is not available yet' in html):
url = 'http://legislature.mi.gov/doc.aspx?%s-%s' % (
session[-4:], bill_id.replace(' ', '-'))
html = self.get(url).text
if ('Page Not Found' in html or
'The bill you are looking for is not available yet' in html):
self.warning("Cannot open bill page for {}; skipping".format(bill_id))
return
doc = lxml.html.fromstring(html)
doc.make_links_absolute('http://legislature.mi.gov')
title = doc.xpath('//span[@id="frg_billstatus_ObjectSubject"]')[0].text_content()
# get B/R/JR/CR part and look up bill type
bill_type = bill_types[bill_id.split(' ')[0][1:]]
bill = Bill(bill_id, session, title, chamber=chamber,
classification=bill_type)
bill.add_source(url)
# sponsors
sponsors = doc.xpath('//span[@id="frg_billstatus_SponsorList"]/a')
for sponsor in sponsors:
name = sponsor.text.replace(u'\xa0', ' ')
# sometimes district gets added as a link
if name.isnumeric():
continue
if len(sponsors) > 1:
classification = (
'primary'
if sponsor.tail and 'primary' in sponsor.tail
else 'cosponsor'
)
else:
classification = 'primary'
bill.add_sponsorship(
name=name,
chamber=chamber,
entity_type='person',
primary=classification == 'primary',
classification=classification,
)
bill.subject = doc.xpath('//span[@id="frg_billstatus_CategoryList"]/a/text()')
# actions (skip header)
for row in doc.xpath('//table[@id="frg_billstatus_HistoriesGridView"]/tr')[1:]:
tds = row.xpath('td') # date, journal link, action
date = tds[0].text_content()
journal = tds[1].text_content()
action = tds[2].text_content()
date = TIMEZONE.localize(datetime.datetime.strptime(date, "%m/%d/%Y"))
# instead of trusting upper/lower case, use journal for actor
actor = 'upper' if 'SJ' in journal else 'lower'
classification = categorize_action(action)
bill.add_action(action, date, chamber=actor, classification=classification)
# check if action mentions a sub
submatch = re.search(r'WITH SUBSTITUTE\s+([\w\-\d]+)', action, re.IGNORECASE)
if submatch and tds[2].xpath('a'):
version_url = tds[2].xpath('a/@href')[0]
version_name = tds[2].xpath('a/text()')[0].strip()
version_name = 'Substitute {}'.format(version_name)
self.info("Found Substitute {}".format(version_url))
if version_url.lower().endswith('.pdf'):
mimetype = 'application/pdf'
elif version_url.lower().endswith('.htm'):
mimetype = 'text/html'
bill.add_version_link(version_name, version_url, media_type=mimetype)
# check if action mentions a vote
rcmatch = re.search(r'Roll Call # (\d+)', action, re.IGNORECASE)
if rcmatch:
rc_num = rcmatch.groups()[0]
# in format mileg.aspx?page=getobject&objectname=2011-SJ-02-10-011
journal_link = tds[1].xpath('a/@href')
if journal_link:
objectname = journal_link[0].rsplit('=', 1)[-1]
chamber_name = {'upper': 'Senate', 'lower': 'House'}[actor]
vote_url = BASE_URL + '/documents/%s/Journal/%s/htm/%s.htm' % (
session, chamber_name, objectname)
results = self.parse_roll_call(vote_url, rc_num)
vote = VoteEvent(
start_date=date,
chamber=actor,
bill=bill,
motion_text=action,
result='pass' if len(results['yes']) > len(results['no']) else 'fail',
classification='passage',
)
#.........这里部分代码省略.........
示例5: scrape_chamber
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]
def scrape_chamber(self, chamber, session):
chamber_name = 'Senate' if chamber == 'upper' else 'House'
chamber_letter = chamber_name[0]
# perhaps we should save this data so we can make one request for both?
bill_request = self.get(ksapi.url + 'bill_status/').text
bill_request_json = json.loads(bill_request)
bills = bill_request_json['content']
for bill_data in bills:
bill_id = bill_data['BILLNO']
# filter other chambers
if not bill_id.startswith(chamber_letter):
continue
if 'CR' in bill_id:
btype = 'concurrent resolution'
elif 'R' in bill_id:
btype = 'resolution'
elif 'B' in bill_id:
btype = 'bill'
title = bill_data['SHORTTITLE'] or bill_data['LONGTITLE']
# main
bill = Bill(
bill_id,
session,
title,
chamber=chamber,
classification=btype,
)
bill.extras = {'status': bill_data['STATUS']}
bill.add_source(ksapi.url + 'bill_status/' + bill_id.lower())
if (bill_data['LONGTITLE'] and
bill_data['LONGTITLE'] != bill.title):
bill.add_title(bill_data['LONGTITLE'])
# An "original sponsor" is the API's expression of "primary sponsor"
for primary_sponsor in bill_data['ORIGINAL_SPONSOR']:
bill.add_sponsorship(
name=primary_sponsor,
entity_type='organization' if "committee" in primary_sponsor.lower()
else 'person',
primary=True,
classification="original sponsor"
)
for sponsor in bill_data['SPONSOR_NAMES']:
if sponsor in bill_data['ORIGINAL_SPONSOR']:
continue
bill.add_sponsorship(
name=sponsor,
entity_type='organization' if "committee" in sponsor.lower() else 'person',
primary=False,
classification='cosponsor',
)
# history is backwards
for event in reversed(bill_data['HISTORY']):
actor = ('upper' if event['chamber'] == 'Senate'
else 'lower')
date = event['session_date']
# append committee names if present
if 'committee_names' in event:
action = (event['status'] + ' ' +
' and '.join(event['committee_names']))
else:
action = event['status']
if event['action_code'] not in ksapi.action_codes:
self.warning('unknown action code on %s: %s %s' %
(bill_id, event['action_code'],
event['status']))
atype = None
else:
atype = ksapi.action_codes[event['action_code']]
bill.add_action(
action, date, chamber=actor, classification=atype)
# Versions are exposed in `bill_data['versions'],
# but lack any descriptive text or identifiers;
# continue to scrape these from the HTML
yield from self.scrape_html(bill, session)
yield bill
示例6: scrape_bill
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]
def scrape_bill(self, bill_url, bill_id, session_id):
page = self.lxmlize(bill_url)
# create bill
title = page.xpath("//h1/text()")[0]
bill = Bill(identifier=bill_id,
legislative_session=session_id,
title=title)
bill.add_source(bill_url, note="detail")
# add additional fields
# abstract
try:
# abstract is directly above <h2>Legislative History</h2>
leg_his = page.xpath("//h2[text()='Legislative History']")[0]
abstract = leg_his.xpath("preceding-sibling::p/text()")[0]
bill.add_abstract(abstract=abstract.strip(), note="summary")
# TODO trim whitespace from summary
except IndexError:
print("No abstract for bill {} in session {}".format(bill_id, session_id))
# the rest of the fields are found inside this <table>
data_table = page.xpath("//table[contains(@class, 'data')]")[0]
# sponsor
sponsor_name = data_table.xpath(self.bill_table_query("Sponsor") + "/text()")[0]
bill.add_sponsorship(name=sponsor_name,
classification="Primary",
entity_type="person",
primary=True
)
# actions
action_lines = data_table.xpath(self.bill_table_query("Actions") + "/text()")
for line in action_lines:
line = line.join('')
try:
for date_str, action_type in self.parse_actions(line):
bill.add_action(date=date_str,
description=action_type,
classification=action_type)
except ValueError:
print("failed to parse these actions: {}".format([line]))
# co-sponsors
co_sponsors = data_table.xpath(self.bill_table_query("Co-Sponsors") + "/text()")
co_sponsors = [name.strip() for name in co_sponsors if name.strip()]
for name in co_sponsors:
bill.add_sponsorship(name=name,
classification="co-sponsor",
entity_type="person",
primary=False)
# committee (stored as another sponsorship in OCD)
committees = data_table.xpath(self.bill_table_query("Committee") + "/a/text()")
for comm in committees:
bill.add_sponsorship(name=comm,
classification="secondary", # classification ?
entity_type="organization",
primary=False)
return bill
示例7: scrape_chamber
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]
def scrape_chamber(self, chamber, session):
chamber_name = 'Senate' if chamber == 'upper' else 'House'
chamber_letter = chamber_name[0]
# perhaps we should save this data so we can make one request for both?
bill_request = self.get(ksapi.url + 'bill_status/').text
bill_request_json = json.loads(bill_request)
bills = bill_request_json['content']
for bill_data in bills:
bill_id = bill_data['BILLNO']
# filter other chambers
if not bill_id.startswith(chamber_letter):
continue
if 'CR' in bill_id:
btype = 'concurrent resolution'
elif 'R' in bill_id:
btype = 'resolution'
elif 'B' in bill_id:
btype = 'bill'
title = bill_data['SHORTTITLE'] or bill_data['LONGTITLE']
# main
bill = Bill(
bill_id,
session,
title,
chamber=chamber,
classification=btype,
)
bill.extras = {'status': bill_data['STATUS']}
bill.add_source(ksapi.url + 'bill_status/' + bill_id.lower())
if (bill_data['LONGTITLE'] and
bill_data['LONGTITLE'] != bill.title):
bill.add_title(bill_data['LONGTITLE'])
for sponsor in bill_data['SPONSOR_NAMES']:
stype = ('primary' if len(bill_data['SPONSOR_NAMES']) == 1
else 'cosponsor')
if sponsor:
bill.add_sponsorship(
name=sponsor,
entity_type='person',
primary=stype == 'primary',
classification=stype,
)
# history is backwards
for event in reversed(bill_data['HISTORY']):
actor = ('upper' if event['chamber'] == 'Senate'
else 'lower')
date = datetime.datetime.strptime(event['occurred_datetime'], "%Y-%m-%dT%H:%M:%S")
# append committee names if present
if 'committee_names' in event:
action = (event['status'] + ' ' +
' and '.join(event['committee_names']))
else:
action = event['status']
if event['action_code'] not in ksapi.action_codes:
self.warning('unknown action code on %s: %s %s' %
(bill_id, event['action_code'],
event['status']))
atype = None
else:
atype = ksapi.action_codes[event['action_code']]
bill.add_action(
action, date.strftime('%Y-%m-%d'), chamber=actor, classification=atype)
try:
yield from self.scrape_html(bill, session)
except scrapelib.HTTPError as e:
self.warning('unable to fetch HTML for bill {0}'.format(
bill['bill_id']))
yield bill
示例8: _scrape_bill
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]
def _scrape_bill(self, session, bill_data):
details = self._parse_bill_details(bill_data)
(senate_url, assembly_url, bill_chamber, bill_type, bill_id,
title, (prefix, number, active_version)) = details
bill = Bill(
bill_id,
legislative_session=session,
chamber=bill_chamber,
title=title or bill_data['summary'],
classification=bill_type,
)
if bill_data['summary']:
bill.add_abstract(bill_data['summary'], note='')
bill_active_version = bill_data['amendments']['items'][active_version]
# Parse sponsors.
if bill_data['sponsor'] is not None:
if bill_data['sponsor']['rules'] is True:
bill.add_sponsorship(
'Rules Committee',
entity_type='organization',
classification='primary',
primary=True,
)
elif not bill_data['sponsor']['budget']:
primary_sponsor = bill_data['sponsor']['member']
bill.add_sponsorship(
primary_sponsor['shortName'],
entity_type='person',
classification='primary',
primary=True,
)
# There *shouldn't* be cosponsors if there is no sponsor.
cosponsors = bill_active_version['coSponsors']['items']
for cosponsor in cosponsors:
bill.add_sponsorship(
cosponsor['shortName'],
entity_type='person',
classification='cosponsor',
primary=False,
)
# List companion bill.
same_as = bill_active_version.get('sameAs', {})
# Check whether "sameAs" property is populated with at least one bill.
if same_as['items']:
# Get companion bill ID.
companion_bill_id = same_as['items'][0]['basePrintNo']
# Build companion bill session.
start_year = same_as['items'][0]['session']
end_year = start_year + 1
companion_bill_session = '-'.join([str(start_year), str(end_year)])
# Attach companion bill data.
bill.add_related_bill(
companion_bill_id,
companion_bill_session,
relation_type='companion',
)
# Parse actions.
chamber_map = {
'senate': 'upper',
'assembly': 'lower',
}
for action in bill_data['actions']['items']:
chamber = chamber_map[action['chamber'].lower()]
action_datetime = datetime.datetime.strptime(action['date'], '%Y-%m-%d')
action_date = action_datetime.date()
types, _ = NYBillScraper.categorizer.categorize(action['text'])
bill.add_action(
action['text'],
action_date.strftime('%Y-%m-%d'),
chamber=chamber,
classification=types,
)
# Handling of sources follows. Sources serving either chamber
# maintain duplicate data, so we can see certain bill data
# through either chamber's resources. However, we have to refer
# to a specific chamber's resources if we want to grab certain
# specific information such as vote data.
#
# As such, I'm placing all potential sources in the interest of
# thoroughness. - Andy Lo
# List Open Legislation API endpoint as a source.
api_url = self.api_client.root + self.api_client.resources['bill'].format(
session_year=session,
bill_id=bill_id,
summary='',
detail='')
#.........这里部分代码省略.........
示例9: scrape
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]
#.........这里部分代码省略.........
if "Signed by Governor" in action['FullStatus']:
actor = 'executive'
elif action['ChamberCode'] == 'H':
actor = 'lower'
elif action['ChamberCode'] == 'S':
actor = 'upper'
else:
raise AssertionError("Unknown actor for bill action")
# Categorize action
if "Signed by Governor" in action['FullStatus']:
# assert chambers_passed == set("HS")
action_type = 'executive-signature'
elif "Vetoed by the Governor" in action['FullStatus']:
action_type = 'executive-veto'
elif "Read first time" in action['FullStatus'] \
or "Read 1st time" in action['FullStatus']:
action_type = 'introduction'
elif "Reported favorably" in action['FullStatus']:
action_type = 'committee-passage-favorable'
elif actor == 'lower' and any(x.lower().startswith('aspassed')
for x in action['keywords'].split(';')):
action_type = 'passage'
chambers_passed.add("H")
elif actor == 'upper' and any(x.lower().startswith(' aspassed')
or x.lower().startswith('aspassed')
for x in action['keywords'].split(';')):
action_type = 'passage'
chambers_passed.add("S")
else:
action_type = None
bill.add_action(
description=re.sub(HTML_TAGS_RE, "", action['FullStatus']),
date=datetime.datetime.strftime(
datetime.datetime.strptime(action['StatusDate'], '%m/%d/%Y'),
'%Y-%m-%d'
),
chamber=actor,
classification=action_type
)
# Capture votes
votes_url = 'http://legislature.vermont.gov/bill/loadBillRollCalls/{0}/{1}'.format(
year_slug, internal_bill_id)
votes_json = self.get(votes_url).text
votes = json.loads(votes_json)['data']
bill.add_source(votes_url)
for vote in votes:
roll_call_id = vote['VoteHeaderID']
roll_call_url = ('http://legislature.vermont.gov/bill/'
'loadBillRollCallDetails/{0}/{1}'.format(
year_slug, roll_call_id))
roll_call_json = self.get(roll_call_url).text
roll_call = json.loads(roll_call_json)['data']
roll_call_yea = []
roll_call_nay = []
roll_call_not_voting = []
for member in roll_call:
(member_name, _district) = member['MemberName'].split(" of ")
member_name = member_name.strip()
if member['MemberVote'] == "Yea":
示例10: scrape_bill_list
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]
#.........这里部分代码省略.........
bill.add_version_link(
name,
version_url,
media_type='application/pdf',
on_duplicate='ignore',
)
# Fiscal notes exist, but I can't figure out how to build their URL
fiscal_notes = bill_doc.xpath(
'//table[@class="box_fiscalnote"]')[1:]
for fiscal_note in fiscal_notes:
pass
# Budget Isolation Resolutions are handled as extra actions/votes
birs = bill_doc.xpath(
'//div[@class="box_bir"]//table//table/tr')[1:]
for bir in birs:
bir_action = bir.xpath('td[1]')[0].text_content().strip()
# Sometimes ALISON's database puts another bill's
# actions into the BIR action list; ignore these
if bill_id not in bir_action:
self.warning(
"BIR action found ({}) ".format(bir_action) +
"that doesn't match the bill ID ({})".format(bill_id))
continue
bir_date = datetime.datetime.strptime(
bir.xpath('td[2]/font/text()')[0], self.DATE_FORMAT)
bir_type = bir.xpath('td[1]/font/text()')[0].split(" ")[0]
bir_chamber = self.CHAMBERS[bir_type[0]]
bir_text = "{0}: {1}".format(
bir_type, bir.xpath('td[3]/font/text()')[0].strip())
bill.add_action(
bir_text,
TIMEZONE.localize(bir_date),
chamber=bir_chamber,
classification='other',
)
try:
(bir_vote_id, ) = bir.xpath('td[4]/font/input/@value')
except ValueError:
bir_vote_id = ''
bir_vote_id = bir_vote_id.strip()
if bir_vote_id.startswith("Roll "):
bir_vote_id = bir_vote_id.split(" ")[-1]
yield from self.scrape_vote(
bill=bill,
vote_chamber=bir_type[0],
bill_id="{0}%20for%20{1}".format(bir_type, bill_id),
vote_id=bir_vote_id,
vote_date=TIMEZONE.localize(bir_date),
action_text=bir_text
)
actions = bill_doc.xpath('//table[@id="ContentPlaceHolder1_gvHistory"]/tr')[1:]
action_date = None
for action in actions:
# If actions occur on the same day, only one date will exist
if (action.xpath('td[1]/font/text()')[0].
encode('ascii', 'ignore').strip()):
action_date = datetime.datetime.strptime(
action.xpath('td[1]/font/text()')[0], self.DATE_FORMAT)
示例11: scrape_bill_type
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]
#.........这里部分代码省略.........
act_str = act_str.replace(abbr, committee)
if not act_str.endswith('.'):
act_str = act_str + '.'
# Determine which chamber the action originated from.
changed = False
for committee_chamber in ['upper', 'lower', 'legislature']:
if actor.startswith(committee_chamber):
actor = committee_chamber
changed = True
break
if not changed:
actor = 'legislature'
if actor != action.actor:
actor_info = kwargs.get('actor_info', {})
actor_info['details'] = action.actor
kwargs['actor_info'] = actor_info
# Add strings for related legislators, if any.
rgx = r'(?:senator|assembly[mwp][^ .,:;]+)\s+[^ .,:;]+'
legislators = re.findall(rgx, action.action, re.I)
if legislators:
kwargs['legislators'] = legislators
date = action.action_date
date = self._tz.localize(date)
date = date.date()
if (actor, act_str, date) in seen_actions:
continue
kwargs.update(self.categorizer.categorize(act_str))
action = fsbill.add_action(act_str, date.strftime('%Y-%m-%d'), chamber=actor,
classification=kwargs['classification'])
for committee in kwargs.get('committees', []):
action.add_related_entity(
committee, entity_type='organization')
seen_actions.add((actor, act_str, date))
for vote_num, vote in enumerate(bill.votes):
if vote.vote_result == '(PASS)':
result = True
else:
result = False
if not vote.location:
continue
full_loc = vote.location.description
first_part = full_loc.split(' ')[0].lower()
if first_part in ['asm', 'assembly']:
vote_chamber = 'lower'
# vote_location = ' '.join(full_loc.split(' ')[1:])
elif first_part.startswith('sen'):
vote_chamber = 'upper'
# vote_location = ' '.join(full_loc.split(' ')[1:])
else:
raise ScrapeError("Bad location: %s" % full_loc)
if vote.motion:
motion = vote.motion.motion_text or ''
else:
motion = ''
if "Third Reading" in motion or "3rd Reading" in motion:
示例12: scrape
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]
def scrape(self) :
three_days_ago = datetime.datetime.now() - datetime.timedelta(3)
for matter in self.matters(three_days_ago) :
matter_id = matter['MatterId']
date = matter['MatterIntroDate']
title = matter['MatterTitle']
identifier = matter['MatterFile']
if not all((date, title, identifier)) :
continue
bill_session = self.session(self.toTime(date))
bill_type = BILL_TYPES[matter['MatterTypeName']]
if identifier.startswith('S'):
alternate_identifiers = [identifier]
identifier = identifier[1:]
else:
alternate_identifiers = []
bill = Bill(identifier=identifier,
legislative_session=bill_session,
title=title,
classification=bill_type,
from_organization={"name":"Chicago City Council"})
legistar_web = self.legislation_detail_url(matter_id)
legistar_api = 'http://webapi.legistar.com/v1/chicago/matters/{0}'.format(matter_id)
bill.add_source(legistar_web, note='web')
bill.add_source(legistar_api, note='api')
for identifier in alternate_identifiers:
bill.add_identifier(identifier)
for action, vote in self.actions(matter_id) :
act = bill.add_action(**action)
if action['description'] == 'Referred' :
body_name = matter['MatterBodyName']
if body_name != 'City Council' :
act.add_related_entity(body_name,
'organization',
entity_id = _make_pseudo_id(name=body_name))
result, votes = vote
if result :
vote_event = VoteEvent(legislative_session=bill.legislative_session,
motion_text=action['description'],
organization=action['organization'],
classification=None,
start_date=action['date'],
result=result,
bill=bill)
vote_event.add_source(legistar_web)
vote_event.add_source(legistar_api + '/histories')
for vote in votes :
raw_option = vote['VoteValueName'].lower()
clean_option = self.VOTE_OPTIONS.get(raw_option,
raw_option)
vote_event.vote(clean_option,
vote['VotePersonName'].strip())
yield vote_event
for sponsorship in self.sponsorships(matter_id) :
bill.add_sponsorship(**sponsorship)
for topic in self.topics(matter_id) :
bill.add_subject(topic['MatterIndexName'].strip())
for attachment in self.attachments(matter_id) :
if attachment['MatterAttachmentName'] :
bill.add_version_link(attachment['MatterAttachmentName'],
attachment['MatterAttachmentHyperlink'],
media_type="application/pdf")
bill.extras = {'local_classification' : matter['MatterTypeName']}
text = self.text(matter_id)
if text :
if text['MatterTextPlain'] :
bill.extras['plain_text'] = text['MatterTextPlain']
if text['MatterTextRtf'] :
bill.extras['rtf_text'] = text['MatterTextRtf'].replace(u'\u0000', '')
yield bill
示例13: scrape_bill
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]
def scrape_bill(self, chamber, session, bill_id, url):
try:
page = lxml.html.fromstring(self.get(url).text)
except scrapelib.HTTPError as e:
self.warning('error (%s) fetching %s, skipping' % (e, url))
return
title = page.xpath(
"string(//span[contains(@id, 'PlaceHolder1_txtST')])").strip()
if 'JR' in bill_id:
bill_type = ['joint resolution']
elif 'CR' in bill_id:
bill_type = ['concurrent resolution']
elif 'R' in bill_id:
bill_type = ['resolution']
else:
bill_type = ['bill']
bill = Bill(bill_id,
legislative_session=session,
chamber=chamber,
title=title,
classification=bill_type)
bill.add_source(url)
bill.subject = self.subject_map[bill_id]
for link in page.xpath("//a[contains(@id, 'Auth')]"):
name = link.xpath("string()").strip()
if ':' in name:
raise Exception(name)
if 'otherAuth' in link.attrib['id']:
bill.add_sponsorship(name, classification='cosponsor',
entity_type='person', primary=False)
else:
bill.add_sponsorship(name, classification='primary',
entity_type='person', primary=True)
act_table = page.xpath("//table[contains(@id, 'Actions')]")[0]
for tr in act_table.xpath("tr")[2:]:
action = tr.xpath("string(td[1])").strip()
if not action or action == 'None':
continue
date = tr.xpath("string(td[3])").strip()
date = datetime.datetime.strptime(date, "%m/%d/%Y").date()
actor = tr.xpath("string(td[4])").strip()
if actor == 'H':
actor = 'lower'
elif actor == 'S':
actor = 'upper'
attrs = self.categorizer.categorize(action)
related_entities = []
for item in attrs['committees']:
related_entities.append({
'type': 'committee',
'name': item
})
for item in attrs['legislators']:
related_entities.append({
'type': 'legislator',
'name': item
})
bill.add_action(description=action,
date=date.strftime('%Y-%m-%d'),
chamber=actor,
classification=attrs['classification'],
related_entities=related_entities)
version_table = page.xpath("//table[contains(@id, 'Versions')]")[0]
# Keep track of already seen versions to prevent processing duplicates.
version_urls = []
for link in version_table.xpath(".//a[contains(@href, '.PDF')]"):
version_url = link.attrib['href']
if version_url in version_urls:
self.warning('Skipping duplicate version URL.')
continue
else:
version_urls.append(version_url)
name = link.text.strip()
if re.search('COMMITTEE REPORTS|SCHEDULED CCR', version_url, re.IGNORECASE):
bill.add_document_link(note=name, url=version_url,
media_type='application/pdf')
continue
bill.add_version_link(note=name, url=version_url,
media_type='application/pdf')
for link in page.xpath(".//a[contains(@href, '_VOTES')]"):
if 'HT_' not in link.attrib['href']:
yield from self.scrape_votes(bill, self.urlescape(link.attrib['href']))
# # If the bill has no actions and no versions, it's a bogus bill on
# # their website, which appears to happen occasionally. Skip.
has_no_title = (bill.title == "Short Title Not Found.")
if has_no_title:
#.........这里部分代码省略.........
示例14: scrape_details
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]
def scrape_details(self, bill_detail_url, session, chamber, bill_id):
"""
Create the Bill and add the information obtained from the provided bill_detail_url.
and then yield the bill object.
:param bill_detail_url:
:param session:
:param chamber:
:param bill_id:
:return:
"""
page = self.get(bill_detail_url).text
if 'INVALID BILL NUMBER' in page:
self.warning('INVALID BILL %s' % bill_detail_url)
return
doc = lxml.html.fromstring(page)
doc.make_links_absolute(bill_detail_url)
bill_div = doc.xpath('//div[@style="margin:0 0 40px 0;"]')[0]
bill_type = bill_div.xpath('span/text()')[0]
if 'General Bill' in bill_type:
bill_type = 'bill'
elif 'Concurrent Resolution' in bill_type:
bill_type = 'concurrent resolution'
elif 'Joint Resolution' in bill_type:
bill_type = 'joint resolution'
elif 'Resolution' in bill_type:
bill_type = 'resolution'
else:
raise ValueError('unknown bill type: %s' % bill_type)
# this is fragile, but less fragile than it was
b = bill_div.xpath('./b[text()="Summary:"]')[0]
bill_summary = b.getnext().tail.strip()
bill = Bill(
bill_id,
legislative_session=session, # session name metadata's `legislative_sessions`
chamber=chamber, # 'upper' or 'lower'
title=bill_summary,
classification=bill_type
)
subjects = list(self._subjects[bill_id])
for subject in subjects:
bill.add_subject(subject)
# sponsors
for sponsor in doc.xpath('//a[contains(@href, "member.php")]/text()'):
bill.add_sponsorship(
name=sponsor,
classification='primary',
primary=True,
entity_type='person'
)
for sponsor in doc.xpath('//a[contains(@href, "committee.php")]/text()'):
sponsor = sponsor.replace(u'\xa0', ' ').strip()
bill.add_sponsorship(
name=sponsor,
classification='primary',
primary=True,
entity_type='organization'
)
# find versions
version_url = doc.xpath('//a[text()="View full text"]/@href')[0]
version_html = self.get(version_url).text
version_doc = lxml.html.fromstring(version_html)
version_doc.make_links_absolute(version_url)
for version in version_doc.xpath('//a[contains(@href, "/prever/")]'):
# duplicate versions with same date, use first appearance
bill.add_version_link(
note=version.text, # Description of the version from the state;
# eg, 'As introduced', 'Amended', etc.
url=version.get('href'),
on_duplicate='ignore',
media_type='text/html' # Still a MIME type
)
# actions
for row in bill_div.xpath('table/tr'):
date_td, chamber_td, action_td = row.xpath('td')
date = datetime.datetime.strptime(date_td.text, "%m/%d/%y")
action_chamber = {'Senate': 'upper',
'House': 'lower',
None: 'legislature'}[chamber_td.text]
action = action_td.text_content()
action = action.split('(House Journal')[0]
action = action.split('(Senate Journal')[0].strip()
atype = action_type(action)
bill.add_action(
#.........这里部分代码省略.........
示例15: bill_info
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]
def bill_info(self, bill_link, session, main_url):
bill_page = self.lxmlize(bill_link)
long_title = self.get_node(
bill_page,
'//div[@class="main-content"]/div[1]/div/h2').text.split()
bill_number = long_title[0]
title = ''
for x in range(2, len(long_title)):
title += long_title[x] + ' '
title = title[0:-1]
if not title:
self.error('no title, skipping %s', bill_number)
return
bill_type = 'resolution' if 'LR' in bill_number else 'bill'
bill = Bill(bill_number, session, title, classification=bill_type)
bill.add_source(main_url)
bill.add_source(bill_link)
introduced_by = self.get_node(
bill_page,
'//div[@class="main-content"]/div[3]/div[1]/ul/li[1]/a[1]/text()')
if not introduced_by:
introduced_by = self.get_node(
bill_page,
'//div[@class="main-content"]/div[3]/div[1]/ul/li[1]/text()')
introduced_by = introduced_by.split('Introduced By:')[1].strip()
bill.add_sponsorship(
name=introduced_by,
entity_type='person',
primary=True,
classification='primary',
)
action_nodes = self.get_nodes(
bill_page,
'//div[@class="main-content"]/div[5]//table/tbody/tr')
for action_node in action_nodes:
date = self.get_node(
action_node,
'./td[1]').text
date = datetime.strptime(date, '%b %d, %Y')
# The action node may have an anchor element within it, so
# we grab all the text within.
action = self.get_node(
action_node,
'./td[2]').text_content()
if 'Governor' in action:
actor = 'executive'
elif 'Speaker' in action:
actor = 'legislature'
else:
actor = 'legislature'
action_type = self.action_types(action)
bill.add_action(
action,
date.strftime('%Y-%m-%d'),
chamber=actor,
classification=action_type,
)
# Were in reverse chronological order.
bill.actions.reverse()
# Grabs bill version documents.
version_links = self.get_nodes(
bill_page,
'//div[@class="main-content"]/div[3]/div[2]/'
'div[@class="hidden-xs"]/ul[1]/li/a')
for version_link in version_links:
version_name = version_link.text
version_url = version_link.attrib['href']
# replace Current w/ session number
version_url = version_url.replace('Current', session)
bill.add_version_link(version_name, version_url, media_type='application/pdf')
# Adds any documents related to amendments.
amendment_links = self.get_nodes(
bill_page,
'//div[@class="main-content"]/div[5]/div[2]/table/tr/td[1]/a')
for amendment_link in amendment_links:
amendment_name = amendment_link.text
amendment_url = amendment_link.attrib['href']
bill.add_document_link(amendment_name, amendment_url)
# Related transcripts.
transcript_links = self.get_nodes(
#.........这里部分代码省略.........