本文整理汇总了Python中pupa.scrape.Bill.extras方法的典型用法代码示例。如果您正苦于以下问题:Python Bill.extras方法的具体用法?Python Bill.extras怎么用?Python Bill.extras使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pupa.scrape.Bill
的用法示例。
在下文中一共展示了Bill.extras方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: scrape
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import extras [as 别名]
#.........这里部分代码省略.........
if i == 0 :
primary = True
sponsorship_type = "Primary"
else :
primary = False
sponsorship_type = "Regular"
sponsor_name = sponsor['label']
# Does the Mayor/Clerk introduce legisislation as
# individuals role holders or as the OFfice of City
# Clerk and the Office of the Mayor?
entity_type = 'person'
if sponsor_name.startswith(('City Clerk',
'Mendoza, Susana')) :
sponsor_name = 'Office of the City Clerk'
entity_type = 'organization'
elif sponsor_name.startswith(('Emanuel, Rahm',)) :
sponsor_name = 'Office of the Mayor'
entity_type = 'organization'
if not sponsor_name.startswith(('Misc. Transmittal',
'No Sponsor',
'Dept./Agency')) :
bill.add_sponsorship(sponsor_name,
sponsorship_type,
entity_type,
primary,
entity_id = _make_pseudo_id(name=sponsor_name))
if 'Topic' in leg_details :
for subject in leg_details[u'Topic'].split(',') :
bill.add_subject(subject)
for attachment in leg_details.get('Attachments', []) :
if attachment['label'] :
bill.add_version_link(attachment['label'],
attachment['url'],
media_type="application/pdf")
for action in self.history(leg_summary['url']) :
action_description = action['Action']
try :
action_date = self.toTime(action['Date']).date().isoformat()
except AttributeError : # https://chicago.legistar.com/LegislationDetail.aspx?ID=1424866&GUID=CEC53337-B991-4268-AE8A-D4D174F8D492
continue
if action_description :
try :
responsible_org = action['Action\xa0By']['label']
except TypeError :
responsible_org = action['Action\xa0By']
if responsible_org == 'City Council' :
responsible_org = 'Chicago City Council'
act = bill.add_action(action_description,
action_date,
organization={'name': responsible_org},
classification=ACTION_CLASSIFICATION[action_description])
if action_description == 'Referred' :
try :
leg_details['Current Controlling Legislative Body']['label']
controlling_bodies = [leg_details['Current Controlling Legislative Body']]
except TypeError :
controlling_bodies = leg_details['Current Controlling Legislative Body']
if controlling_bodies :
for controlling_body in controlling_bodies :
body_name = controlling_body['label']
if body_name.startswith("Joint Committee") :
act.add_related_entity(body_name,
'organization')
else :
act.add_related_entity(body_name,
'organization',
entity_id = _make_pseudo_id(name=body_name))
if 'url' in action['Action\xa0Details'] :
action_detail_url = action['Action\xa0Details']['url']
result, votes = self.extractVotes(action_detail_url)
if votes and result : # see https://github.com/datamade/municipal-scrapers-us/issues/15
action_vote = VoteEvent(legislative_session=bill.legislative_session,
motion_text=action_description,
organization={'name': responsible_org},
classification=None,
start_date=action_date,
result=result,
bill=bill)
action_vote.add_source(action_detail_url)
for option, voter in votes :
action_vote.vote(option, voter)
yield action_vote
bill.extras = {'local_classification' : leg_summary['Type']}
yield bill
print(unreachable_urls)
示例2: scrape_chamber
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import extras [as 别名]
def scrape_chamber(self, chamber, session):
chamber_name = 'Senate' if chamber == 'upper' else 'House'
chamber_letter = chamber_name[0]
# perhaps we should save this data so we can make one request for both?
bill_request = self.get(ksapi.url + 'bill_status/').text
bill_request_json = json.loads(bill_request)
bills = bill_request_json['content']
for bill_data in bills:
bill_id = bill_data['BILLNO']
# filter other chambers
if not bill_id.startswith(chamber_letter):
continue
if 'CR' in bill_id:
btype = 'concurrent resolution'
elif 'R' in bill_id:
btype = 'resolution'
elif 'B' in bill_id:
btype = 'bill'
title = bill_data['SHORTTITLE'] or bill_data['LONGTITLE']
# main
bill = Bill(
bill_id,
session,
title,
chamber=chamber,
classification=btype,
)
bill.extras = {'status': bill_data['STATUS']}
bill.add_source(ksapi.url + 'bill_status/' + bill_id.lower())
if (bill_data['LONGTITLE'] and
bill_data['LONGTITLE'] != bill.title):
bill.add_title(bill_data['LONGTITLE'])
for sponsor in bill_data['SPONSOR_NAMES']:
stype = ('primary' if len(bill_data['SPONSOR_NAMES']) == 1
else 'cosponsor')
if sponsor:
bill.add_sponsorship(
name=sponsor,
entity_type='person',
primary=stype == 'primary',
classification=stype,
)
# history is backwards
for event in reversed(bill_data['HISTORY']):
actor = ('upper' if event['chamber'] == 'Senate'
else 'lower')
date = datetime.datetime.strptime(event['occurred_datetime'], "%Y-%m-%dT%H:%M:%S")
# append committee names if present
if 'committee_names' in event:
action = (event['status'] + ' ' +
' and '.join(event['committee_names']))
else:
action = event['status']
if event['action_code'] not in ksapi.action_codes:
self.warning('unknown action code on %s: %s %s' %
(bill_id, event['action_code'],
event['status']))
atype = None
else:
atype = ksapi.action_codes[event['action_code']]
bill.add_action(
action, date.strftime('%Y-%m-%d'), chamber=actor, classification=atype)
try:
yield from self.scrape_html(bill, session)
except scrapelib.HTTPError as e:
self.warning('unable to fetch HTML for bill {0}'.format(
bill['bill_id']))
yield bill
示例3: scrape_chamber
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import extras [as 别名]
def scrape_chamber(self, chamber, session):
chamber_name = 'Senate' if chamber == 'upper' else 'House'
chamber_letter = chamber_name[0]
# perhaps we should save this data so we can make one request for both?
bill_request = self.get(ksapi.url + 'bill_status/').text
bill_request_json = json.loads(bill_request)
bills = bill_request_json['content']
for bill_data in bills:
bill_id = bill_data['BILLNO']
# filter other chambers
if not bill_id.startswith(chamber_letter):
continue
if 'CR' in bill_id:
btype = 'concurrent resolution'
elif 'R' in bill_id:
btype = 'resolution'
elif 'B' in bill_id:
btype = 'bill'
title = bill_data['SHORTTITLE'] or bill_data['LONGTITLE']
# main
bill = Bill(
bill_id,
session,
title,
chamber=chamber,
classification=btype,
)
bill.extras = {'status': bill_data['STATUS']}
bill.add_source(ksapi.url + 'bill_status/' + bill_id.lower())
if (bill_data['LONGTITLE'] and
bill_data['LONGTITLE'] != bill.title):
bill.add_title(bill_data['LONGTITLE'])
# An "original sponsor" is the API's expression of "primary sponsor"
for primary_sponsor in bill_data['ORIGINAL_SPONSOR']:
bill.add_sponsorship(
name=primary_sponsor,
entity_type='organization' if "committee" in primary_sponsor.lower()
else 'person',
primary=True,
classification="original sponsor"
)
for sponsor in bill_data['SPONSOR_NAMES']:
if sponsor in bill_data['ORIGINAL_SPONSOR']:
continue
bill.add_sponsorship(
name=sponsor,
entity_type='organization' if "committee" in sponsor.lower() else 'person',
primary=False,
classification='cosponsor',
)
# history is backwards
for event in reversed(bill_data['HISTORY']):
actor = ('upper' if event['chamber'] == 'Senate'
else 'lower')
date = event['session_date']
# append committee names if present
if 'committee_names' in event:
action = (event['status'] + ' ' +
' and '.join(event['committee_names']))
else:
action = event['status']
if event['action_code'] not in ksapi.action_codes:
self.warning('unknown action code on %s: %s %s' %
(bill_id, event['action_code'],
event['status']))
atype = None
else:
atype = ksapi.action_codes[event['action_code']]
bill.add_action(
action, date, chamber=actor, classification=atype)
# Versions are exposed in `bill_data['versions'],
# but lack any descriptive text or identifiers;
# continue to scrape these from the HTML
yield from self.scrape_html(bill, session)
yield bill
示例4: scrape
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import extras [as 别名]
#.........这里部分代码省略.........
history = [x for x in instrument['StatusHistory'][0]]
actions = reversed([{
'code': x['Code'],
'action': x['Description'],
'_guid': x['Id'],
'date': x['Date']
} for x in history])
guid = instrument['Id']
# A little bit hacky.
bill_prefix = instrument['DocumentType']
bill_chamber = chamber_map[bill_prefix[0]]
bill_type = bill_type_map[bill_prefix[1:]]
bill_id = '%s %s' % (
bill_prefix,
instrument['Number'],
)
if instrument['Suffix']:
bill_id += instrument['Suffix']
title = instrument['Caption']
description = instrument['Summary']
if title is None:
continue
bill = Bill(
bill_id, legislative_session=session, chamber=bill_chamber, title=title,
classification=bill_type)
bill.add_abstract(description, note='description')
bill.extras = {'guid': guid}
if instrument['Votes']:
for vote_ in instrument['Votes']:
_, vote_ = vote_
vote_ = backoff(self.vservice.GetVote, vote_[0]['VoteId'])
vote = VoteEvent(
start_date=vote_['Date'].strftime('%Y-%m-%d'),
motion_text=vote_['Caption'] or 'Vote on Bill',
chamber={'House': 'lower', 'Senate': 'upper'}[vote_['Branch']],
result='pass' if vote_['Yeas'] > vote_['Nays'] else 'fail',
classification='passage',
bill=bill,
)
vote.set_count('yes', vote_['Yeas'])
vote.set_count('no', vote_['Nays'])
vote.set_count('other', vote_['Excused'] + vote_['NotVoting'])
vote.add_source(self.vsource)
methods = {'Yea': 'yes', 'Nay': 'no'}
for vdetail in vote_['Votes'][0]:
whom = vdetail['Member']
how = vdetail['MemberVoted']
vote.vote(methods.get(how, 'other'), whom['Name'])
yield vote
ccommittees = defaultdict(list)
committees = instrument['Committees']
if committees:
示例5: scrape
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import extras [as 别名]
def scrape(self):
for leg_summary in self.legislation(created_after=datetime.datetime(2014, 1, 1)) :
leg_type = BILL_TYPES[leg_summary['Type']]
bill = Bill(identifier=leg_summary['File\xa0#'],
title=leg_summary['Title'],
legislative_session=None,
classification=leg_type,
from_organization={"name":"New York City Council"})
bill.add_source(leg_summary['url'])
leg_details = self.legDetails(leg_summary['url'])
history = self.history(leg_summary['url'])
bill.add_title(leg_details['Name'],
note='created by administrative staff')
if 'Summary' in leg_details :
bill.add_abstract(leg_details['Summary'], note='')
if leg_details['Law number'] :
bill.add_identifier(leg_details['Law number'],
note='law number')
for sponsorship in self._sponsors(leg_details.get('Sponsors', [])) :
sponsor, sponsorship_type, primary = sponsorship
bill.add_sponsorship(sponsor, sponsorship_type,
'person', primary,
entity_id = make_pseudo_id(name=sponsor))
for attachment in leg_details.get('Attachments', []) :
bill.add_document_link(attachment['label'],
attachment['url'],
media_type="application/pdf")
history = list(history)
if history :
earliest_action = min(self.toTime(action['Date'])
for action in history)
bill.legislative_session = self.sessions(earliest_action)
else :
bill.legislative_session = str(self.SESSION_STARTS[0])
for action in history :
action_description = action['Action']
if not action_description :
continue
action_class = ACTION_CLASSIFICATION[action_description]
action_date = self.toDate(action['Date'])
responsible_org = action['Action\xa0By']
if responsible_org == 'City Council' :
responsible_org = 'New York City Council'
elif responsible_org == 'Administration' :
responsible_org = 'Mayor'
if responsible_org == 'Town Hall Meeting' :
continue
else :
act = bill.add_action(action_description,
action_date,
organization={'name': responsible_org},
classification=action_class)
if 'url' in action['Action\xa0Details'] :
action_detail_url = action['Action\xa0Details']['url']
if action_class == 'committee-referral' :
action_details = self.actionDetails(action_detail_url)
referred_committee = action_details['Action text'].rsplit(' to the ', 1)[-1]
act.add_related_entity(referred_committee,
'organization',
entity_id = make_pseudo_id(name=referred_committee))
result, votes = self.extractVotes(action_detail_url)
if votes :
action_vote = VoteEvent(legislative_session=bill.legislative_session,
motion_text=action_description,
organization={'name': responsible_org},
classification=action_class,
start_date=action_date,
result=result,
bill=bill)
action_vote.add_source(action_detail_url)
for option, voter in votes :
action_vote.vote(option, voter)
yield action_vote
text = self.text(leg_summary['url'])
if text :
bill.extras = {'local_classification' : leg_summary['Type'],
'full_text' : text}
else :
bill.extras = {'local_classification' : leg_summary['Type']}
#.........这里部分代码省略.........
示例6: scrape
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import extras [as 别名]
def scrape(self) :
three_days_ago = datetime.datetime.now() - datetime.timedelta(3)
for matter in self.matters(three_days_ago) :
matter_id = matter['MatterId']
date = matter['MatterIntroDate']
title = matter['MatterTitle']
identifier = matter['MatterFile']
if not all((date, title, identifier)) :
continue
bill_session = self.session(self.toTime(date))
bill_type = BILL_TYPES[matter['MatterTypeName']]
if identifier.startswith('S'):
alternate_identifiers = [identifier]
identifier = identifier[1:]
else:
alternate_identifiers = []
bill = Bill(identifier=identifier,
legislative_session=bill_session,
title=title,
classification=bill_type,
from_organization={"name":"Chicago City Council"})
legistar_web = self.legislation_detail_url(matter_id)
legistar_api = 'http://webapi.legistar.com/v1/chicago/matters/{0}'.format(matter_id)
bill.add_source(legistar_web, note='web')
bill.add_source(legistar_api, note='api')
for identifier in alternate_identifiers:
bill.add_identifier(identifier)
for action, vote in self.actions(matter_id) :
act = bill.add_action(**action)
if action['description'] == 'Referred' :
body_name = matter['MatterBodyName']
if body_name != 'City Council' :
act.add_related_entity(body_name,
'organization',
entity_id = _make_pseudo_id(name=body_name))
result, votes = vote
if result :
vote_event = VoteEvent(legislative_session=bill.legislative_session,
motion_text=action['description'],
organization=action['organization'],
classification=None,
start_date=action['date'],
result=result,
bill=bill)
vote_event.add_source(legistar_web)
vote_event.add_source(legistar_api + '/histories')
for vote in votes :
raw_option = vote['VoteValueName'].lower()
clean_option = self.VOTE_OPTIONS.get(raw_option,
raw_option)
vote_event.vote(clean_option,
vote['VotePersonName'].strip())
yield vote_event
for sponsorship in self.sponsorships(matter_id) :
bill.add_sponsorship(**sponsorship)
for topic in self.topics(matter_id) :
bill.add_subject(topic['MatterIndexName'].strip())
for attachment in self.attachments(matter_id) :
if attachment['MatterAttachmentName'] :
bill.add_version_link(attachment['MatterAttachmentName'],
attachment['MatterAttachmentHyperlink'],
media_type="application/pdf")
bill.extras = {'local_classification' : matter['MatterTypeName']}
text = self.text(matter_id)
if text :
if text['MatterTextPlain'] :
bill.extras['plain_text'] = text['MatterTextPlain']
if text['MatterTextRtf'] :
bill.extras['rtf_text'] = text['MatterTextRtf'].replace(u'\u0000', '')
yield bill
示例7: scrape_events_range
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import extras [as 别名]
def scrape_events_range(self, start_date, end_date):
def daterange(start_date, end_date):
number_of_days = int((end_date - start_date).days)
for n in range(number_of_days):
yield start_date + dt.timedelta(n)
for date in daterange(start_date, end_date):
events = self.extract_events_by_day(date)
for event in events:
tz = pytz.timezone("America/Toronto")
time = dt.datetime.strptime(event['time'], '%I:%M %p')
start = tz.localize(date.replace(hour=time.hour, minute=time.minute, second=0, microsecond=0))
source_url = CALENDAR_DAY_TEMPLATE.format(start.year, start.month, start.day)
org_name = event['meeting']
e = Event(
name = org_name,
start_time = start,
timezone = tz.zone,
location_name = event['location'],
status=STATUS_DICT.get(event['meeting_status'])
)
e.add_source(source_url)
e.extras = {
'meeting_number': event['no'],
'tmmis_meeting_id': event['meeting_id'],
}
e.add_participant(
name = org_name,
type = 'organization',
)
def is_agenda_available(event):
return event['publishing_status'] in ['Agenda Published', 'Minutes Published']
def is_council(event):
return True if event['meeting'] == self.jurisdiction.name else False
if is_agenda_available(event):
template = AGENDA_FULL_COUNCIL_TEMPLATE if is_council(event) else AGENDA_FULL_STANDARD_TEMPLATE
agenda_url = template.format(event['meeting_id'])
full_identifiers = list(self.full_identifiers(event['meeting_id'], is_council(event)))
e.add_source(agenda_url)
agenda_items = self.agenda_from_url(agenda_url)
for i, item in enumerate(agenda_items):
a = e.add_agenda_item(item['title'])
a.add_classification(item['type'].lower())
a['order'] = str(i)
def normalize_wards(raw):
if not raw: raw = 'All'
if raw == 'All':
return raw.lower()
else:
return raw.split(', ')
wards = normalize_wards(item['wards'])
identifier_regex = re.compile(r'^[0-9]{4}\.([A-Z]{2}[0-9]+\.[0-9]+)$')
[full_identifier] = [id for id in full_identifiers if identifier_regex.match(id).group(1) == item['identifier']]
a.add_bill(full_identifier)
if full_identifier not in self.seen_agenda_items:
b = Bill(
# TODO: Fix this hardcode
legislative_session = '2014-2018',
identifier = full_identifier,
title = item['title'],
from_organization = {'name': self.jurisdiction.name},
)
b.add_source(agenda_url)
b.add_document_link(note='canonical', media_type='text/html', url=AGENDA_ITEM_TEMPLATE.format(full_identifier))
b.extras = {
'wards': wards,
}
self.seen_agenda_items.append(full_identifier)
yield b
yield e