本文整理汇总了Python中pupa.scrape.Organization类的典型用法代码示例。如果您正苦于以下问题:Python Organization类的具体用法?Python Organization怎么用?Python Organization使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Organization类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_fix_bill_id
def test_fix_bill_id():
j = create_jurisdiction()
j.legislative_sessions.create(name='1900', identifier='1900')
org1 = ScrapeOrganization(name='House', classification='lower')
bill = ScrapeBill('HB 1', '1900', 'Test Bill ID',
classification='bill', chamber='lower')
oi = OrganizationImporter('jid')
oi.import_data([org1.as_dict()])
from pupa.settings import IMPORT_TRANSFORMERS
IMPORT_TRANSFORMERS['bill'] = {
'identifier': lambda x: re.sub(r'([A-Z]*)\s*0*([-\d]+)', r'\1 \2', x, 1)
}
bi = BillImporter('jid', oi, DumbMockImporter())
bi.import_data([bill.as_dict()])
ve = ScrapeVoteEvent(legislative_session='1900', motion_text='passage',
start_date='1900-04-02', classification='passage:bill',
result='fail', bill_chamber='lower', bill='HB1',
identifier='4',
bill_action='passage',
organization=org1._id)
VoteEventImporter('jid', DumbMockImporter(), oi, bi).import_data([
ve.as_dict(),
])
IMPORT_TRANSFORMERS['bill'] = {}
ve = VoteEvent.objects.get()
ve.bill.identifier == 'HB 1'
示例2: scrape_committees
def scrape_committees(self, session):
session_key = SESSION_KEYS[session]
committees_response = self.api_client.get('committees', session=session_key)
legislators = index_legislators(self, session_key)
for committee in committees_response:
org = Organization(
chamber={'S': 'upper', 'H': 'lower',
'J': 'legislature'}[committee['HouseOfAction']],
name=committee['CommitteeName'],
classification='committee')
org.add_source(
'https://olis.leg.state.or.us/liz/{session}'
'/Committees/{committee}/Overview'.format(session=session_key,
committee=committee['CommitteeName']))
members_response = self.api_client.get('committee_members',
session=session_key,
committee=committee['CommitteeCode'])
for member in members_response:
try:
member_name = legislators[member['LegislatorCode']]
except KeyError:
logger.warn('Legislator {} not found in session {}'.format(
member['LegislatorCode'], session_key))
member_name = member['LegislatorCode']
org.add_member(member_name, role=member['Title'] if member['Title'] else '')
yield org
示例3: get_organizations
def get_organizations(self):
organization = Organization(self.name, classification=self.classification)
leader_role = styles_of_address[self.division_id]['Leader']
member_role = self.member_role or styles_of_address[self.division_id]['Member']
parent = Division.get(self.division_id)
# Don't yield posts for premiers.
if parent._type not in ('province', 'territory'):
# Yield posts to allow ca_on_toronto to make changes.
post = Post(role=leader_role, label=parent.name, division_id=parent.id, organization_id=organization._id)
yield post
children = [child for child in parent.children() if child._type != 'place' and child._type not in self.exclude_types]
for child in children:
if not self.skip_null_valid_from and not child.attrs.get('validFrom') or child.attrs.get('validFrom') and (child.attrs['validFrom'] <= datetime.now().strftime('%Y-%m-%d') or child.attrs['validFrom'] == self.valid_from):
if self.use_type_id:
label = child.id.rsplit('/', 1)[1].capitalize().replace(':', ' ')
else:
label = child.name
# Yield posts to allow ca_on_toronto to make changes.
post = Post(role=member_role, label=label, division_id=child.id, organization_id=organization._id)
yield post
if not children and parent.attrs['posts_count']:
for i in range(1, int(parent.attrs['posts_count'])): # exclude Mayor
organization.add_post(role=member_role, label='{} (seat {})'.format(parent.name, i), division_id=parent.id)
yield organization
示例4: test_parent_id_resolution
def test_parent_id_resolution():
parent = ScrapeOrganization('UN', classification='international')
child = ScrapeOrganization('UNESCO', classification='unknown', parent_id=parent._id)
OrganizationImporter('jurisdiction-id').import_data([parent.as_dict(), child.as_dict()])
assert Organization.objects.count() == 2
assert Organization.objects.get(name='UN').children.count() == 1
assert Organization.objects.get(name='UNESCO').parent.name == 'UN'
示例5: test_deduplication_other_name_exists
def test_deduplication_other_name_exists():
create_jurisdictions()
create_org()
org = ScrapeOrganization('UN', classification='international')
od = org.as_dict()
OrganizationImporter('jid1').import_data([od])
assert Organization.objects.all().count() == 1
示例6: scrape_lower_committee
def scrape_lower_committee(self, name, url):
page = self.lxmlize(url)
committee = Organization(chamber='lower', name=name,
classification="committee")
committee.add_source(url)
seen = set()
member_links = self.get_nodes(
page,
'//div[@class="mod-inner"]//a[contains(@href, "mem")]')
for member_link in member_links:
member_name = None
member_role = None
member_name = member_link.text
if member_name is None:
continue
# Figure out if this person is the chair.
if member_link == member_links[0]:
member_role = 'chair'
else:
member_role = 'member'
if name not in seen:
committee.add_member(member_name, member_role)
seen.add(member_name)
return committee
示例7: _scrape_lower_special_committees
def _scrape_lower_special_committees(self):
url = 'http://house.louisiana.gov/H_Cmtes/SpecialCommittees.aspx'
page = self.lxmlize(url)
committee_list = page.xpath('//div[@class="accordion"]')[0]
headers = committee_list.xpath('./h3')
for header in headers:
committee_name_text = header.xpath('string()')
committee_name = committee_name_text.strip()
committee_name = self._normalize_committee_name(committee_name)
chamber = 'legislature' if committee_name.startswith('Joint') else 'lower'
committee = Organization(committee_name, chamber=chamber,
classification='committee')
committee.add_source(url)
committee_memberlist = header.xpath('./following-sibling::div[@class="pane"]'
'//tr[@class="linkStyle2"]')
for row in committee_memberlist:
member_name = row.xpath('normalize-space(string(./th[1]))')
member_name = self._normalize_member_name(member_name)
member_role = row.xpath('normalize-space(string(./th[2]))')
member_role = self._normalize_member_role(member_role)
committee.add_member(member_name, member_role)
yield committee
示例8: get_organizations
def get_organizations(self):
exclude_type_ids = getattr(self, 'exclude_type_ids', [])
use_type_id = getattr(self, 'use_type_id', False)
organization = Organization(self.name, classification=self.classification)
parent = Division.get(self.division_id)
if parent._type not in ('province', 'territory'):
post = Post(role=styles_of_address[self.division_id]['Leader'], label=parent.name, division_id=parent.id, organization_id=organization._id)
yield post
children = [child for child in parent.children() if child._type != 'place' and child._type not in exclude_type_ids]
for child in children:
if child:
if use_type_id:
label = child.id.rsplit('/', 1)[1].capitalize().replace(':', ' ')
else:
label = child.name
post = Post(role=styles_of_address[self.division_id]['Member'], label=label, division_id=child.id, organization_id=organization._id)
yield post
if not children and parent.attrs['posts_count']:
for i in range(1, int(parent.attrs['posts_count'])): # exclude Mayor
organization.add_post(role=styles_of_address[self.division_id]['Member'], label='{} (seat {})'.format(parent.name, i), division_id=parent.id)
yield organization
示例9: add_committees
def add_committees(self, legislator_page, legislator, chamber, url):
# as of today, both chambers do committees the same way! Yay!
rows = self.get_nodes(
legislator_page,
'//div[@id="ContentPlaceHolder1_TabSenator_TabCommittees"]//table/'
'tr')
if len(rows) == 0:
return
for row in rows[1:]:
committee_name_text = self.get_node(row, './td[2]').text_content()
committee_name = committee_name_text.strip()
if not committee_name:
continue
role_text = self.get_node(row, './td[3]').text_content()
role = role_text.strip()
if committee_name not in self.committees:
comm = Organization(
name=committee_name, chamber=chamber, classification='committee')
comm.add_source(url)
self.committees[committee_name] = comm
self.committees[committee_name].add_member(
legislator.name,
role=role,
)
示例10: scrape_approp_subcommittees
def scrape_approp_subcommittees(self, url):
html = self.get(url).text
doc = lxml.html.fromstring(html)
for strong in doc.xpath('//strong'):
com = Organization(
name=strong.text.strip(),
parent_id={
'name': 'Appropriations',
'classification': 'committee',
},
classification='committee',
)
com.add_source(url)
legislators = strong.getnext().tail.replace('Senators', '').strip()
for leg in re.split(', | and ', legislators):
if leg.endswith('(C)'):
role = 'chairman'
leg = leg[:-4]
elif leg.endswith('(VC)'):
role = 'vice chairman'
leg = leg[:-5]
elif leg.endswith('(MVC)'):
role = 'minority vice chairman'
leg = leg[:-6]
else:
role = 'member'
com.add_member(leg, role=role)
yield com
示例11: scrape_committee
def scrape_committee(self, term, href, name):
page = self.get(href).text
page = lxml.html.fromstring(page)
page.make_links_absolute(href)
members = page.xpath("//div[@class='view-content']"
"//a[contains(@href, 'members')]")
if '/joint/' in href:
chamber = 'legislature'
elif '/senate/' in href:
chamber = 'upper'
elif '/house/' in href:
chamber = 'lower'
else:
# interim committees and others were causing duplicate committee issues, skipping
self.warning('Failed to identify chamber for {}; skipping'.format(href))
return
cttie = Organization(name, chamber=chamber, classification='committee')
for a in members:
member = a.text
role = a.xpath("ancestor::div/h2[@class='pane-title']/text()")[0].strip()
role = {"Legislative Members": "member",
"Chairman": "chair",
"Vice Chairman": "member"}[role]
if member is None or member.startswith("District"):
continue
member = member.replace('Senator ', '').replace('Representative ', '')
cttie.add_member(member, role=role)
cttie.add_source(href)
yield cttie
示例12: test_committee_add_member_person
def test_committee_add_member_person():
c = Organization('Defense', classification='committee')
p = Person('John Adams')
c.add_member(p, role='chairman')
assert c._related[0].person_id == p._id
assert c._related[0].organization_id == c._id
assert c._related[0].role == 'chairman'
示例13: scrape_chamber
def scrape_chamber(self, chamber, session):
url = "%s/GetActiveCommittees?biennium=%s" % (self._base_url, session)
page = self.get(url)
page = lxml.etree.fromstring(page.content)
for comm in xpath(page, "//wa:Committee"):
agency = xpath(comm, "string(wa:Agency)")
comm_chamber = {'House': 'lower', 'Senate': 'upper'}[agency]
if comm_chamber != chamber:
continue
name = xpath(comm, "string(wa:Name)")
# comm_id = xpath(comm, "string(wa:Id)")
# acronym = xpath(comm, "string(wa:Acronym)")
phone = xpath(comm, "string(wa:Phone)")
comm = Organization(name, chamber=chamber, classification='committee')
comm.extras['phone'] = phone
self.scrape_members(comm, agency)
comm.add_source(url)
if not comm._related:
self.warning('empty committee: %s', name)
else:
yield comm
示例14: scrape
def scrape(self):
urls = Urls(dict(list=legislators_url), self)
council = Organization(
'Temecula City Council',
classification='legislature')
council.add_source(urls.list.url)
yield council
for tr in urls.list.xpath('//table[2]//tr')[1:]:
# Parse some attributes.
name, role = tr.xpath('td/p[1]//font/text()')
image = tr.xpath('td/img/@src').pop()
# Create legislator.
person = Person(name, image=image)
# Add membership on council.
memb = person.add_membership(council, role=role)
# Add email address.
email, detail_url = tr.xpath('td//a/@href')
email = email[7:]
memb.contact_details.append(
dict(type='email', value=email, note='work'))
# Add sources.
person.add_source(urls.list.url)
person.add_source(detail_url)
yield person
示例15: scrape
def scrape(self, chamber=None):
base_url = ('http://www.ncga.state.nc.us/gascripts/Committees/'
'Committees.asp?bPrintable=true&sAction=ViewCommitteeType&sActionDetails=')
chamber_slugs = {'upper': ['Senate%20Standing', 'Senate%20Select'],
'lower': ['House%20Standing', 'House%20Select']}
if chamber:
chambers = [chamber]
else:
chambers = ['upper', 'lower']
for chamber in chambers:
for ctype in chamber_slugs[chamber]:
data = self.get(base_url + ctype).text
doc = lxml.html.fromstring(data)
doc.make_links_absolute(base_url + ctype)
for comm in doc.xpath('//ul/li/a'):
name = comm.text
# skip committee of whole Senate
if 'Whole Senate' in name:
continue
url = comm.get('href')
committee = Organization(name=name, chamber=chamber,
classification="committee")
self.scrape_committee(committee, url)
committee.add_source(url)
if not committee._related:
self.warning('empty committee: %s', name)
else:
yield committee