本文整理汇总了Python中pupa.scrape.Bill.add_identifier方法的典型用法代码示例。如果您正苦于以下问题:Python Bill.add_identifier方法的具体用法?Python Bill.add_identifier怎么用?Python Bill.add_identifier使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pupa.scrape.Bill
的用法示例。
在下文中一共展示了Bill.add_identifier方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: scrape
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_identifier [as 别名]
def scrape(self):
for leg_summary in self.legislation(created_after=datetime.datetime(2014, 1, 1)) :
leg_type = BILL_TYPES[leg_summary['Type']]
bill = Bill(identifier=leg_summary['File\xa0#'],
title=leg_summary['Title'],
legislative_session=None,
classification=leg_type,
from_organization={"name":"New York City Council"})
bill.add_source(leg_summary['url'])
leg_details = self.legDetails(leg_summary['url'])
history = self.history(leg_summary['url'])
bill.add_title(leg_details['Name'],
note='created by administrative staff')
if 'Summary' in leg_details :
bill.add_abstract(leg_details['Summary'], note='')
if leg_details['Law number'] :
bill.add_identifier(leg_details['Law number'],
note='law number')
for sponsorship in self._sponsors(leg_details.get('Sponsors', [])) :
sponsor, sponsorship_type, primary = sponsorship
bill.add_sponsorship(sponsor, sponsorship_type,
'person', primary,
entity_id = make_pseudo_id(name=sponsor))
for attachment in leg_details.get('Attachments', []) :
bill.add_document_link(attachment['label'],
attachment['url'],
media_type="application/pdf")
history = list(history)
if history :
earliest_action = min(self.toTime(action['Date'])
for action in history)
bill.legislative_session = self.sessions(earliest_action)
else :
bill.legislative_session = str(self.SESSION_STARTS[0])
for action in history :
action_description = action['Action']
if not action_description :
continue
action_class = ACTION_CLASSIFICATION[action_description]
action_date = self.toDate(action['Date'])
responsible_org = action['Action\xa0By']
if responsible_org == 'City Council' :
responsible_org = 'New York City Council'
elif responsible_org == 'Administration' :
responsible_org = 'Mayor'
if responsible_org == 'Town Hall Meeting' :
continue
else :
act = bill.add_action(action_description,
action_date,
organization={'name': responsible_org},
classification=action_class)
if 'url' in action['Action\xa0Details'] :
action_detail_url = action['Action\xa0Details']['url']
if action_class == 'committee-referral' :
action_details = self.actionDetails(action_detail_url)
referred_committee = action_details['Action text'].rsplit(' to the ', 1)[-1]
act.add_related_entity(referred_committee,
'organization',
entity_id = make_pseudo_id(name=referred_committee))
result, votes = self.extractVotes(action_detail_url)
if votes :
action_vote = VoteEvent(legislative_session=bill.legislative_session,
motion_text=action_description,
organization={'name': responsible_org},
classification=action_class,
start_date=action_date,
result=result,
bill=bill)
action_vote.add_source(action_detail_url)
for option, voter in votes :
action_vote.vote(option, voter)
yield action_vote
text = self.text(leg_summary['url'])
if text :
bill.extras = {'local_classification' : leg_summary['Type'],
'full_text' : text}
else :
bill.extras = {'local_classification' : leg_summary['Type']}
#.........这里部分代码省略.........
示例2: scrape_bill
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_identifier [as 别名]
#.........这里部分代码省略.........
actor = 'executive'
elif actor.lower() == 'house' or (actor.lower().startswith('lower (') and self.state == 'ca'):
actor = 'lower'
elif actor.lower() in ('senate', 'upper`') or (actor.lower().startswith('upper (') and self.state == 'ca'):
actor = 'upper'
elif actor in ('joint', 'other', 'Data Systems', 'Speaker', 'clerk',
'Office of the Legislative Fiscal Analyst', 'Became Law w',
'conference') or (actor.lower().startswith('legislature (') and self.state == 'ca'):
actor = 'legislature'
if actor in ('committee', 'sponsor') and self.state == 'pr':
actor = 'legislature'
# nebraska & DC
if actor in ('upper','council') and self.state in ('ne', 'dc'):
actor = 'legislature'
if act['action']:
newact = new.add_action(act['action'], act['date'][:10], chamber=actor,
classification=[action_types[c] for c in act['type'] if c != 'other'])
for re in act.get('related_entities', []):
if re['type'] == 'committee':
re['type'] = 'organization'
elif re['type'] == 'legislator':
re['type'] = 'person'
newact.add_related_entity(re['name'], re['type'])
for comp in old.pop('companions', []):
if self.state in ('nj', 'ny', 'mn'):
rtype = 'companion'
new.add_related_bill(comp['bill_id'], comp['session'], rtype)
for abid in old.pop('alternate_bill_ids', []) + old.pop('+alternate_bill_ids', []):
new.add_identifier(abid)
# generic OpenStates stuff
for id in old.pop('all_ids'):
new.add_identifier(id, scheme='openstates')
for source in old.pop('sources'):
source.pop('retrieved', None)
new.add_source(**source)
ext_title = old.pop('+extended_title', None)
if ext_title:
new.add_title(ext_title, note='Extended Title')
official_title = old.pop('+official_title', None)
if official_title:
new.add_title(official_title, note='Official Title')
to_extras = ['+status', '+final_disposition', '+volume_chapter', '+ld_number', '+referral',
'+companion', '+description', '+fiscal_note_probable:',
'+preintroduction_required:', '+drafter', '+category:', '+chapter',
'+requester', '+transmittal_date:', '+by_request_of', '+bill_draft_number:',
'+bill_lr', '+bill_url', '+rcs_num', '+fiscal_note', '+impact_clause', '+fiscal_notes',
'+short_title', '+type_', '+conference_committee', 'conference_committee',
'+companion_bill_ids', '+additional_information']
for k in to_extras:
v = old.pop(k, None)
if v:
new.extras[k.replace('+', '')] = v
# votes
vote_no = 1
for vote in old.pop('votes'):
示例3: test_full_bill
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_identifier [as 别名]
def test_full_bill():
create_jurisdiction()
person = Person.objects.create(id='person-id', name='Adam Smith')
org = ScrapeOrganization(name='House', classification='lower')
com = ScrapeOrganization(name='Arbitrary Committee', classification='committee',
parent_id=org._id)
oldbill = ScrapeBill('HB 99', '1899', 'Axe & Tack Tax Act',
classification='tax bill', from_organization=org._id)
bill = ScrapeBill('HB 1', '1900', 'Axe & Tack Tax Act',
classification='tax bill', from_organization=org._id)
bill.subject = ['taxes', 'axes']
bill.add_identifier('SB 9')
bill.add_title('Tack & Axe Tax Act')
bill.add_action('introduced in house', '1900-04-01', chamber='lower')
act = bill.add_action('sent to arbitrary committee', '1900-04-04', chamber='lower')
act.add_related_entity('arbitrary committee', 'organization', com._id)
bill.add_related_bill("HB 99", legislative_session="1899", relation_type="prior-session")
bill.add_sponsorship('Adam Smith', classification='extra sponsor', entity_type='person',
primary=False, entity_id=person.id)
bill.add_sponsorship('Jane Smith', classification='lead sponsor', entity_type='person',
primary=True)
bill.add_abstract('This is an act about axes and taxes and tacks.', note="official")
bill.add_document_link('Fiscal Note', 'http://example.com/fn.pdf',
media_type='application/pdf')
bill.add_document_link('Fiscal Note', 'http://example.com/fn.html', media_type='text/html')
bill.add_version_link('Fiscal Note', 'http://example.com/v/1', media_type='text/html')
bill.add_source('http://example.com/source')
# import bill
oi = OrganizationImporter('jid')
oi.import_data([org.as_dict(), com.as_dict()])
pi = PersonImporter('jid')
pi.json_to_db_id['person-id'] = 'person-id'
# Since we have to create this person behind the back of the import
# transaction, we'll fake the json-id to db-id, since they match in this
# case. This is *really* getting at some implementation detail, but it's
# the cleanest way to ensure we short-circut the json id lookup.
BillImporter('jid', oi, pi).import_data([oldbill.as_dict(), bill.as_dict()])
# get bill from db and assert it imported correctly
b = Bill.objects.get(identifier='HB 1')
assert b.from_organization.classification == 'lower'
assert b.identifier == bill.identifier
assert b.title == bill.title
assert b.classification == bill.classification
assert b.subject == ['taxes', 'axes']
assert b.abstracts.get().note == 'official'
# other_title, other_identifier added
assert b.other_titles.get().title == 'Tack & Axe Tax Act'
assert b.other_identifiers.get().identifier == 'SB 9'
# actions
actions = list(b.actions.all())
assert len(actions) == 2
# ensure order was preserved (if this breaks it'll be intermittent)
assert actions[0].organization == Organization.objects.get(classification='lower')
assert actions[0].description == "introduced in house"
assert actions[1].description == "sent to arbitrary committee"
assert (actions[1].related_entities.get().organization ==
Organization.objects.get(classification='committee'))
# related_bills were added
rb = b.related_bills.get()
assert rb.identifier == 'HB 99'
# and bill got resolved
assert rb.related_bill.identifier == 'HB 99'
# sponsors added, linked & unlinked
sponsorships = b.sponsorships.all()
assert len(sponsorships) == 2
for ss in sponsorships:
if ss.primary:
assert ss.person is None
assert ss.organization is None
else:
assert ss.person == person
# versions & documents with their links
versions = b.versions.all()
assert len(versions) == 1
assert versions[0].links.count() == 1
documents = b.documents.all()
assert len(documents) == 1
assert documents[0].links.count() == 2
# sources
assert b.sources.count() == 1
示例4: scrape
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_identifier [as 别名]
def scrape(self) :
three_days_ago = datetime.datetime.now() - datetime.timedelta(3)
for matter in self.matters(three_days_ago) :
matter_id = matter['MatterId']
date = matter['MatterIntroDate']
title = matter['MatterTitle']
identifier = matter['MatterFile']
if not all((date, title, identifier)) :
continue
bill_session = self.session(self.toTime(date))
bill_type = BILL_TYPES[matter['MatterTypeName']]
if identifier.startswith('S'):
alternate_identifiers = [identifier]
identifier = identifier[1:]
else:
alternate_identifiers = []
bill = Bill(identifier=identifier,
legislative_session=bill_session,
title=title,
classification=bill_type,
from_organization={"name":"Chicago City Council"})
legistar_web = self.legislation_detail_url(matter_id)
legistar_api = 'http://webapi.legistar.com/v1/chicago/matters/{0}'.format(matter_id)
bill.add_source(legistar_web, note='web')
bill.add_source(legistar_api, note='api')
for identifier in alternate_identifiers:
bill.add_identifier(identifier)
for action, vote in self.actions(matter_id) :
act = bill.add_action(**action)
if action['description'] == 'Referred' :
body_name = matter['MatterBodyName']
if body_name != 'City Council' :
act.add_related_entity(body_name,
'organization',
entity_id = _make_pseudo_id(name=body_name))
result, votes = vote
if result :
vote_event = VoteEvent(legislative_session=bill.legislative_session,
motion_text=action['description'],
organization=action['organization'],
classification=None,
start_date=action['date'],
result=result,
bill=bill)
vote_event.add_source(legistar_web)
vote_event.add_source(legistar_api + '/histories')
for vote in votes :
raw_option = vote['VoteValueName'].lower()
clean_option = self.VOTE_OPTIONS.get(raw_option,
raw_option)
vote_event.vote(clean_option,
vote['VotePersonName'].strip())
yield vote_event
for sponsorship in self.sponsorships(matter_id) :
bill.add_sponsorship(**sponsorship)
for topic in self.topics(matter_id) :
bill.add_subject(topic['MatterIndexName'].strip())
for attachment in self.attachments(matter_id) :
if attachment['MatterAttachmentName'] :
bill.add_version_link(attachment['MatterAttachmentName'],
attachment['MatterAttachmentHyperlink'],
media_type="application/pdf")
bill.extras = {'local_classification' : matter['MatterTypeName']}
text = self.text(matter_id)
if text :
if text['MatterTextPlain'] :
bill.extras['plain_text'] = text['MatterTextPlain']
if text['MatterTextRtf'] :
bill.extras['rtf_text'] = text['MatterTextRtf'].replace(u'\u0000', '')
yield bill
示例5: scrape_bill
# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_identifier [as 别名]
def scrape_bill(self, session, bill_url):
page = self.get(bill_url).text
page = lxml.html.fromstring(page)
page.make_links_absolute(bill_url)
try:
bill_id = page.xpath('//span[@id="lblBillNumber"]/a[1]')[0].text
except IndexError:
self.logger.warning("Something is wrong with bill page, skipping.")
return
secondary_bill_id = page.xpath('//span[@id="lblCompNumber"]/a[1]')
# checking if there is a matching bill
if secondary_bill_id:
secondary_bill_id = secondary_bill_id[0].text
# swap ids if * is in secondary_bill_id
if '*' in secondary_bill_id:
bill_id, secondary_bill_id = secondary_bill_id, bill_id
secondary_bill_id = secondary_bill_id.strip()
secondary_bill_id = secondary_bill_id.replace(' ', ' ')
bill_id = bill_id.replace('*', '').replace(' ', ' ').strip()
if 'B' in bill_id:
bill_type = 'bill'
elif 'JR' in bill_id:
bill_type = 'joint resolution'
elif 'R' in bill_id:
bill_type = 'resolution'
primary_chamber = 'lower' if 'H' in bill_id else 'upper'
# secondary_chamber = 'upper' if primary_chamber == 'lower' else 'lower'
title = page.xpath("//span[@id='lblAbstract']")[0].text
if title is None:
msg = '%s detail page was missing title info.'
self.logger.warning(msg % bill_id)
return
# bill subject
subject_pos = title.find('-')
subjects = [s.strip() for s in title[:subject_pos - 1].split(',')]
subjects = filter(None, subjects)
bill = Bill(
bill_id,
legislative_session=session,
chamber=primary_chamber,
title=title,
classification=bill_type,
)
for subject in subjects:
bill.add_subject(subject)
if secondary_bill_id:
bill.add_identifier(secondary_bill_id)
bill.add_source(bill_url)
# Primary Sponsor
sponsor = page.xpath("//span[@id='lblBillPrimeSponsor']")[0].text_content().split("by")[-1]
sponsor = sponsor.replace('*', '').strip()
if sponsor:
bill.add_sponsorship(
sponsor,
classification='primary',
entity_type='person',
primary=True,
)
# bill text
btext = page.xpath("//span[@id='lblBillNumber']/a")[0]
bill.add_version_link('Current Version', btext.get('href'),
media_type='application/pdf')
# documents
summary = page.xpath('//a[contains(@href, "BillSummaryArchive")]')
if summary:
bill.add_document_link('Summary', summary[0].get('href'))
fiscal = page.xpath('//span[@id="lblFiscalNote"]//a')
if fiscal:
bill.add_document_link('Fiscal Note', fiscal[0].get('href'))
amendments = page.xpath('//a[contains(@href, "/Amend/")]')
for amendment in amendments:
bill.add_document_link('Amendment ' + amendment.text, amendment.get('href'))
# amendment notes in image with alt text describing doc inside <a>
amend_fns = page.xpath('//img[contains(@alt, "Fiscal Memo")]')
for afn in amend_fns:
bill.add_document_link(
afn.get('alt'),
afn.getparent().get('href'),
on_duplicate='ignore'
)
# actions
atable = page.xpath("//table[@id='gvBillActionHistory']")[0]
actions_from_table(bill, atable)
# if there is a matching bill
if secondary_bill_id:
#.........这里部分代码省略.........