本文整理汇总了Python中pupa.scrape.Organization.add_identifier方法的典型用法代码示例。如果您正苦于以下问题:Python Organization.add_identifier方法的具体用法?Python Organization.add_identifier怎么用?Python Organization.add_identifier使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pupa.scrape.Organization
的用法示例。
在下文中一共展示了Organization.add_identifier方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: scrape
# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_identifier [as 别名]
def scrape(self):
sessions = reversed(self.jurisdiction.legislative_sessions)
committee_term_instances = committees_from_sessions(self, sessions)
committees_by_code = build_lookup_dict(self, data_list=committee_term_instances, index_key='code')
for code, instances in committees_by_code.items():
# TODO: Figure out how to edit city council org.
if code == 'CC':
continue
extras = {'tmmis_decision_body_ids': []}
for i, inst in enumerate(instances):
# TODO: Ensure this survives addition of new term (2017)
# so specific year always creates
canonical_i = 0
if i == canonical_i:
o = Organization(name=inst['name'], classification='committee')
extras.update({'description': inst['info']})
o.add_identifier(inst['code'], scheme=TWO_LETTER_ORG_CODE_SCHEME)
extras['tmmis_decision_body_ids'].append({inst['term']: inst['decision_body_id']})
o.extras = extras
o.add_source(inst['source_url'])
if instances[canonical_i]['name'] != inst['name']:
# TODO: Add start_date and end_date
o.add_name(inst['name'])
yield o
示例2: scrape_committees
# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_identifier [as 别名]
def scrape_committees(self, repos):
for repo in repos:
source = "https://raw.githubusercontent.com/unitedstates/congress-legislators/master/{0}".format(repo)
committees = self.fetch_yaml(source)
for committee in committees:
org = Organization(committee["name"], classification="committee")
org.add_source(source)
for key in committee.keys() & {"url", "rss_url"}:
org.add_link(committee[key])
for key in committee.keys() & {"phone", "address"}:
org.add_contact_detail(
type="voice", value=committee[key]
) if key == "phone" else org.add_contact_detail(type=key, value=committee[key])
for key in committee.keys() & {"senate_committee_id", "house_committee_id", "thomas_id"}:
org.add_identifier(committee[key], scheme=key)
if "subcommittees" in committee:
for subcommittee in committee["subcommittees"]:
sub_org = Organization(subcommittee["name"], classification="committee", parent_id=org._id)
sub_org.add_identifier(subcommittee["thomas_id"], scheme="thomas")
sub_org.add_source(source)
for key in subcommittee.keys() & {"phone", "address"}:
sub_org.add_contact_detail(
type="voice", value=committee[key]
) if key == "phone" else sub_org.add_contact_detail(type=key, value=committee[key])
yield sub_org
yield org
示例3: test_full_organization
# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_identifier [as 别名]
def test_full_organization():
org = ScrapeOrganization('United Nations', classification='international')
org.add_identifier('un')
org.add_name('UN', start_date='1945')
org.add_contact_detail(type='phone', value='555-555-1234', note='this is fake')
org.add_link('http://example.com/link')
org.add_source('http://example.com/source')
# import org
od = org.as_dict()
OrganizationImporter('jurisdiction-id').import_data([od])
# get person from db and assert it imported correctly
o = Organization.objects.get()
assert 'ocd-organization' in o.id
assert o.name == org.name
assert o.identifiers.all()[0].identifier == 'un'
assert o.identifiers.all()[0].scheme == ''
assert o.other_names.all()[0].name == 'UN'
assert o.other_names.all()[0].start_date == '1945'
assert o.contact_details.all()[0].type == 'phone'
assert o.contact_details.all()[0].value == '555-555-1234'
assert o.contact_details.all()[0].note == 'this is fake'
assert o.links.all()[0].url == 'http://example.com/link'
assert o.sources.all()[0].url == 'http://example.com/source'
示例4: scrape
# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_identifier [as 别名]
def scrape(self):
sessions = reversed(self.jurisdiction.legislative_sessions)
committee_term_instances = committees_from_sessions(self, sessions)
committees_by_code = build_lookup_dict(self, data_list=committee_term_instances, index_key='code')
for code, instances in committees_by_code.items():
# TODO: Figure out how to edit city council org.
if code == 'CC':
continue
# When there are no meetings scheduled and was no way to deduce committee code.
if not code:
continue
extras = {'tmmis_decision_body_ids': []}
for i, inst in enumerate(instances):
# TODO: Ensure this survives addition of new term (2017)
# so specific year always creates
canonical_i = 0
if i == canonical_i:
o = Organization(name=inst['name'], classification='committee')
extras.update({'description': inst['info']})
o.add_identifier(inst['code'], scheme=TWO_LETTER_ORG_CODE_SCHEME)
# TODO: Scrape non-councillor members
meeting_id = self.referenceMeetingId(inst['code'], inst['term'])
if meeting_id:
seen_posts = []
membership_url = MEMBERSHIP_URL_TEMPLATE.format(meeting_id)
for councillor in self.councillorMembers(membership_url):
o.add_member(councillor['name'], councillor['role'])
if councillor['role'] not in seen_posts:
o.add_post(
role=councillor['role'],
label=councillor['role'],
# TODO: More specific divisions for some committee?
division_id=self.jurisdiction.division_id,
)
seen_posts.append(councillor['role'])
extras['tmmis_decision_body_ids'].append({inst['term']: inst['decision_body_id']})
o.extras = extras
o.add_source(inst['source_url'])
if instances[canonical_i]['name'] != inst['name']:
# TODO: Add start_date and end_date
o.add_name(inst['name'])
yield o
示例5: scrape
# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_identifier [as 别名]
def scrape(self, session=None):
if session is None:
session = self.latest_session()
self.info('no session specified, using %s', session)
# com_types = ['J', 'SE', 'O']
# base_url = 'https://wyoleg.gov/LsoService/api/committeeList/2018/J'
url = 'https://wyoleg.gov/LsoService/api/committees/{}'.format(session)
response = self.get(url)
coms_json = json.loads(response.content.decode('utf-8'))
for row in coms_json:
com_url = 'https://wyoleg.gov/LsoService/api/committeeDetail/{}/{}'.format(
session, row['ownerID'])
com_response = self.get(com_url)
com = json.loads(com_response.content.decode('utf-8'))
# WY doesn't seem to have any house/senate only committees that I can find
committee = Organization(
name=com['commName'], chamber='legislature', classification='committee')
for member in com['commMembers']:
role = 'chairman' if member['chairman'] == 'Chairman' else 'member'
committee.add_member(member['name'], role)
# some WY committees have non-legislators appointed to the member by the Governor
# but the formatting is super inconsistent
if com['otherMembers']:
committee.extras['other_members'] = com['otherMembers']
committee.extras['wy_id'] = com['commID']
committee.extras['wy_code'] = com['ownerID']
committee.extras['wy_type_code'] = com['type']
committee.extras['budget'] = com['budget']
if com['statAuthority']:
committee.extras['statutory_authority'] = com['statAuthority']
if com['number']:
committee.extras['seat_distribution'] = com['number']
committee.add_identifier(
scheme='WY Committee ID', identifier=str(com['commID']))
committee.add_identifier(
scheme='WY Committee Code', identifier=str(com['ownerID']))
if com['description']:
committee.add_identifier(
scheme='Common Name', identifier=com['description'])
source_url = 'http://wyoleg.gov/Committees/{}/{}'.format(
session, com['ownerID'])
committee.add_source(source_url)
yield committee
示例6: scrape_committee
# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_identifier [as 别名]
def scrape_committee(self, committee_id):
old = self.api('committees/' + committee_id + '?')
id = old.pop('id')
old.pop('created_at')
old.pop('updated_at')
old.pop('country', None)
old.pop('level', None)
old.pop('state')
old.pop('votesmart_id', None)
old.pop('+short_name', None)
old.pop('+session', None)
old.pop('+az_committee_id', None)
com = old.pop('committee')
sub = old.pop('subcommittee')
parent_id = old.pop('parent_id')
chamber = old.pop('chamber')
if chamber == 'joint':
chamber = ''
if self.state in('ne', 'dc'):
chamber = 'legislature'
if sub:
if parent_id:
parent = self._committees[parent_id]._id
new = Organization(sub, parent_id=parent, classification='committee')
else:
new = Organization(com + ': ' + sub, chamber=chamber, classification='committee')
else:
new = Organization(com, chamber=chamber, classification='committee')
assert parent_id is None
# all_ids
for id in old.pop('all_ids'):
new.add_identifier(id, scheme='openstates')
self._committees[id] = new
# sources
for source in old.pop('sources'):
new.add_source(**source)
# members
start, end = self.get_term_years()
for role in old.pop('members'):
# leg_id, com_id, role, start, end
if role['leg_id']:
self._roles.add((role['leg_id'], id, role['role'], start, end))
to_extras = ['+twitter', '+description', '+code', '+secretary', '+office_hours',
'+office_phone', '+meetings_info', '+status', '+aide', '+contact_info',
'+comm_type', 'comm_type', 'aide', 'contact_info', '+town_represented',
'+action_code',
]
for k in to_extras:
v = old.pop(k, None)
if v:
new.extras[k.replace('+', '')] = v
assert not old, old.keys()
return new