本文整理汇总了Python中pupa.scrape.Organization.add_member方法的典型用法代码示例。如果您正苦于以下问题:Python Organization.add_member方法的具体用法?Python Organization.add_member怎么用?Python Organization.add_member使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pupa.scrape.Organization
的用法示例。
在下文中一共展示了Organization.add_member方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: scrape_interim_committee
# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_member [as 别名]
def scrape_interim_committee(self, link, name):
url = re.sub(r'\s+', '', link.attrib['href'])
html = self.get(url).text
doc = lxml.html.fromstring(html)
doc.make_links_absolute(url)
if 'Subcommittee' in name:
# Check whether the parent committee is manually defined first
# before attempting to automatically resolve it.
parent = WVCommitteeScraper.subcommittee_parent_map.get(name, None)
if parent is None:
parent = name.partition('Subcommittee')[0].strip()
comm = Organization(
name=name,
classification='committee',
parent_id={'name': parent, 'classification': 'joint'}
)
else:
comm = Organization(name=name, classification='committee', chamber='joint')
comm.add_source(url)
xpath = '//a[contains(@href, "?member=")]'
for link in doc.xpath(xpath):
name = link.text_content().strip()
name = re.sub(r'^Delegate\s+', '', name)
name = re.sub(r'^Senator\s+', '', name)
role = link.getnext().text or 'member'
comm.add_member(name, role.strip())
return comm
示例2: test_committee_add_member_person
# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_member [as 别名]
def test_committee_add_member_person():
c = Organization('Defense', classification='committee')
p = Person('John Adams')
c.add_member(p, role='chairman')
assert c._related[0].person_id == p._id
assert c._related[0].organization_id == c._id
assert c._related[0].role == 'chairman'
示例3: scrape_committee
# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_member [as 别名]
def scrape_committee(self, chamber, name, url):
page = self.get(url).text
page = lxml.html.fromstring(page)
if page.xpath("//h3[. = 'Joint Committee']"):
chamber = 'joint'
subcommittee = page.xpath("//h3[@align='center']/text()")[0]
if "Subcommittee" not in subcommittee:
comm = Organization(
chamber=chamber, name=name, classification='committee')
else:
comm = Organization(
name=subcommittee, classification='committee',
parent_id={'classification': chamber, 'name': name})
comm.add_source(url)
for link in page.xpath("//a[contains(@href, 'member=')]"):
member = link.text.strip()
mtype = link.xpath("string(../preceding-sibling::td[1])")
mtype = mtype.strip(": \r\n\t").lower()
comm.add_member(member, mtype)
if not comm._related:
self.warning('not saving %s, appears to be empty' % name)
else:
yield comm
示例4: scrape_chamber
# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_member [as 别名]
def scrape_chamber(self, chamber):
session = self.latest_session()
# since we are scraping only latest_session
session_id = session_metadata.session_id_meta_data[session]
client = AZClient()
committees = client.list_committees(
sessionId=session_id,
includeOnlyCommitteesWithAgendas='false',
legislativeBody='S' if chamber == 'upper' else 'H',
)
for committee in committees.json():
c = Organization(name=committee['CommitteeName'],
chamber=chamber, classification='committee')
details = client.get_standing_committee(
sessionId=session_id,
legislativeBody='S' if chamber == 'upper' else 'H',
committeeId=committee['CommitteeId'],
includeMembers='true',
)
for member in details.json()[0]['Members']:
c.add_member(
u'{} {}'.format(member['FirstName'], member['LastName']),
role=parse_role(member),
)
c.add_source(details.url)
c.add_source(committees.url)
yield c
示例5: scrape_committee
# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_member [as 别名]
def scrape_committee(self, term, href, name):
page = self.get(href).text
page = lxml.html.fromstring(page)
page.make_links_absolute(href)
members = page.xpath("//div[@class='view-content']"
"//a[contains(@href, 'members')]")
if '/joint/' in href:
chamber = 'legislature'
elif '/senate/' in href:
chamber = 'upper'
elif '/house/' in href:
chamber = 'lower'
else:
# interim committees and others were causing duplicate committee issues, skipping
self.warning('Failed to identify chamber for {}; skipping'.format(href))
return
cttie = Organization(name, chamber=chamber, classification='committee')
for a in members:
member = a.text
role = a.xpath("ancestor::div/h2[@class='pane-title']/text()")[0].strip()
role = {"Legislative Members": "member",
"Chairman": "chair",
"Vice Chairman": "member"}[role]
if member is None or member.startswith("District"):
continue
member = member.replace('Senator ', '').replace('Representative ', '')
cttie.add_member(member, role=role)
cttie.add_source(href)
yield cttie
示例6: scrape
# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_member [as 别名]
def scrape(self):
url = 'http://www.mec.mo.gov/EthicsWeb/CampaignFinance/CF11_SearchComm.aspx'
for letter in ['a', 'e', 'i', 'o', 'u', 'y']:
print("Searching '{}'".format(letter))
initial = self.get(url).text
parsed = lxml.html.fromstring(initial)
page_n = 0
data = get_form_data(parsed, first_time=True)
data['ctl00$ContentPlaceHolder$txtCandLast'] = letter
while True:
page_n += 1
print("Page: {}".format(page_n))
r = self.post(url, data=data, cookies=dict(PageIndex=str(1)))
output = lxml.html.fromstring(r.text)
rows = output.cssselect('#ctl00_ContentPlaceHolder_grvSearch tr')
for r in rows:
tds = r.cssselect('td')
if len(tds) > 3:
name = tds[2].text_content().strip()
_registrant = Person(
name=name,
source_identified=True
)
committee_name = tds[1].text_content().strip()
_office = Organization(
name=committee_name,
classification='Committee',
# parent_id=self.jurisdiction._state,
source_identified=True
)
_office.add_member(
_registrant,
role='committee candidate',
label='candidate for {n}'.format(n=_office.name),
)
yield _registrant
yield _office
if not output.xpath("//*[@id='ctl00_ContentPlaceHolder_grvSearch_ctl28_lbtnNextPage']"):
print(output.xpath("//*[@id='ctl00_ContentPlaceHolder_grvSearch_ctl28_lbtnNextPage']"))
break
data = get_form_data(output)
示例7: scrape_approp_subcommittees
# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_member [as 别名]
def scrape_approp_subcommittees(self):
URL = 'http://www.senate.michigan.gov/committee/appropssubcommittee.html'
html = self.get(URL).text
doc = lxml.html.fromstring(html)
for strong in doc.xpath('//strong'):
com = Organization(
name=strong.text.strip(),
parent_id=self._senate_appropriations,
classification='committee',
)
com.add_source(URL)
legislators = strong.getnext().tail.replace('Senators', '').strip()
for leg in re.split(', | and ', legislators):
if leg.endswith('(C)'):
role = 'chairman'
leg = leg[:-4]
elif leg.endswith('(VC)'):
role = 'vice chairman'
leg = leg[:-5]
elif leg.endswith('(MVC)'):
role = 'minority vice chairman'
leg = leg[:-6]
else:
role = 'member'
com.add_member(leg, role=role)
yield com
示例8: scrape_committees
# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_member [as 别名]
def scrape_committees(self, session):
session_key = SESSION_KEYS[session]
committees_response = self.api_client.get('committees', session=session_key)
legislators = index_legislators(self, session_key)
for committee in committees_response:
org = Organization(
chamber={'S': 'upper', 'H': 'lower',
'J': 'legislature'}[committee['HouseOfAction']],
name=committee['CommitteeName'],
classification='committee')
org.add_source(
'https://olis.leg.state.or.us/liz/{session}'
'/Committees/{committee}/Overview'.format(session=session_key,
committee=committee['CommitteeName']))
members_response = self.api_client.get('committee_members',
session=session_key,
committee=committee['CommitteeCode'])
for member in members_response:
try:
member_name = legislators[member['LegislatorCode']]
except KeyError:
logger.warn('Legislator {} not found in session {}'.format(
member['LegislatorCode'], session_key))
member_name = member['LegislatorCode']
org.add_member(member_name, role=member['Title'] if member['Title'] else '')
yield org
示例9: scrape_reps_comm
# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_member [as 别名]
def scrape_reps_comm(self):
# As of 1/27/15, the committee page has the wrong
# session number (126th) at the top, but
# has newly elected people, so we're rolling with it.
url = 'http://legislature.maine.gov/house/hsecoms.htm'
page = self.get(url).text
root = lxml.html.fromstring(page)
count = 0
for n in range(1, 12, 2):
path = 'string(//body/center[%s]/h1/a)' % (n)
comm_name = root.xpath(path)
committee = Organization(chamber='lower', name=comm_name, classification='committee')
count = count + 1
path2 = '/html/body/ul[%s]/li/a' % (count)
for el in root.xpath(path2):
rep = el.text
if rep.find('(') != -1:
mark = rep.find('(')
rep = rep[15: mark].strip()
if 'chair' in rep.lower():
role = 'chair'
rep = re.sub(r'(?i)[\s,]*chair\s*$', '', rep).strip()
else:
role = 'member'
committee.add_member(rep, role)
committee.add_source(url)
yield committee
示例10: scrape_lower_committee
# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_member [as 别名]
def scrape_lower_committee(self, name, url):
page = self.lxmlize(url)
committee = Organization(chamber='lower', name=name,
classification="committee")
committee.add_source(url)
seen = set()
member_links = self.get_nodes(
page,
'//div[@class="mod-inner"]//a[contains(@href, "mem")]')
for member_link in member_links:
member_name = None
member_role = None
member_name = member_link.text
if member_name is None:
continue
# Figure out if this person is the chair.
if member_link == member_links[0]:
member_role = 'chair'
else:
member_role = 'member'
if name not in seen:
committee.add_member(member_name, member_role)
seen.add(member_name)
return committee
示例11: _scrape_upper_committee
# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_member [as 别名]
def _scrape_upper_committee(self, name, url2):
cat = "Assignments.asp"
url3 = url2.replace("default.asp", cat)
committee = Organization(name,
chamber="upper",
classification="committee"
)
committee.add_source(url2)
page = self.lxmlize(url3)
members = page.xpath('//table[@id="table38"]//font/a/b')
for link in members:
role = "member"
if link == members[0]:
role = "Chairman"
if link == members[1]:
role = "Vice-Chairman"
name = link.xpath('string()')
name = name.replace('Senator ', '')
name = re.sub('[\s]{2,}', ' ', name).strip()
committee.add_member(name, role)
yield committee
示例12: scrape_approp_subcommittees
# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_member [as 别名]
def scrape_approp_subcommittees(self, url):
html = self.get(url).text
doc = lxml.html.fromstring(html)
for strong in doc.xpath('//strong'):
com = Organization(
name=strong.text.strip(),
parent_id={
'name': 'Appropriations',
'classification': 'committee',
},
classification='committee',
)
com.add_source(url)
legislators = strong.getnext().tail.replace('Senators', '').strip()
for leg in re.split(', | and ', legislators):
if leg.endswith('(C)'):
role = 'chairman'
leg = leg[:-4]
elif leg.endswith('(VC)'):
role = 'vice chairman'
leg = leg[:-5]
elif leg.endswith('(MVC)'):
role = 'minority vice chairman'
leg = leg[:-6]
else:
role = 'member'
com.add_member(leg, role=role)
yield com
示例13: scrape_senate_committee
# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_member [as 别名]
def scrape_senate_committee(self, url):
html = self.get(url).text
doc = lxml.html.fromstring(html)
headers = doc.xpath('(//div[@class="row"])[2]//h1')
assert len(headers) == 1
name = ' '.join(headers[0].xpath('./text()'))
name = re.sub(r'\s+Committee.*$', '', name)
com = Organization(chamber='upper', name=name, classification='committee')
for member in doc.xpath('(//div[@class="row"])[3]/div[1]/ul[1]/li'):
text = member.text_content()
member_name = member.xpath('./a/text()')[0].replace('Representative ', '')
if 'Committee Chair' in text:
role = 'chair'
elif 'Minority Vice' in text:
role = 'minority vice chair'
elif 'Vice' in text:
role = 'majority vice chair'
else:
role = 'member'
com.add_member(member_name, role=role)
com.add_source(url)
yield com
示例14: handle_page
# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_member [as 别名]
def handle_page(self):
name = self.doc.xpath('//h2[@class="committeeName"]')[0].text
if name.startswith('Appropriations Subcommittee'):
name = name.replace('Appropriations ', '')
parent = {'name': 'Appropriations', 'classification': 'upper'}
chamber = None
else:
if name.startswith('Committee on'):
name = name.replace('Committee on ', '')
parent = None
chamber = 'upper'
comm = Organization(name=name, classification="committee",
chamber=chamber, parent_id=parent,
)
for dt in self.doc.xpath('//div[@id="members"]/dl/dt'):
role = dt.text.replace(': ', '').strip().lower()
member = dt.xpath('./following-sibling::dd')[0].text_content()
member = self.clean_name(member)
comm.add_member(member, role=role)
for ul in self.doc.xpath('//div[@id="members"]/ul/li'):
member = self.clean_name(ul.text_content())
comm.add_member(member)
comm.add_source(self.url)
yield comm
示例15: scrape_page
# 需要导入模块: from pupa.scrape import Organization [as 别名]
# 或者: from pupa.scrape.Organization import add_member [as 别名]
def scrape_page(self, link, chamber=None):
page = self.lxmlize(link.attrib['href'])
comName = link.text
roles = {
"Chair": "chair",
"Vice Chair": "vice-chair",
"Vice-Chair": "vice-chair",
}
committee = Organization(comName,
chamber=chamber,
classification='committee')
committee.add_source(link.attrib['href'])
for member in page.xpath('//div[@class="members"]/' +
'div[@class="roster-item"]'):
details = member.xpath('.//div[@class="member-details"]')[0]
person = details.xpath('./h4')[0].text_content()
# This page does random weird things with whitepace to names
person = ' '.join(person.strip().split())
if not person:
continue
role = details.xpath('./span[@class="member-role"]')
if role:
role = roles[role[0].text]
else:
role = 'member'
committee.add_member(person, role=role)
yield committee