本文整理汇总了Python中fiftystates.scrape.legislators.Legislator.add_role方法的典型用法代码示例。如果您正苦于以下问题:Python Legislator.add_role方法的具体用法?Python Legislator.add_role怎么用?Python Legislator.add_role使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类fiftystates.scrape.legislators.Legislator
的用法示例。
在下文中一共展示了Legislator.add_role方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: fetch_member
# 需要导入模块: from fiftystates.scrape.legislators import Legislator [as 别名]
# 或者: from fiftystates.scrape.legislators.Legislator import add_role [as 别名]
def fetch_member(self, url, name, term, chamber):
party_map = {'R': 'Republican', 'D': 'Democratic', 'I': 'Independent'}
party_district_re = re.compile(
r'\((R|D|I)\) - (?:House|Senate) District\s+(\d+)')
url = 'http://leg6.state.va.us' + url
# handle resignations, special elections
match = re.search(r'-(Resigned|Member) (\d{1,2}/\d{1,2})?', name)
if match:
action, date = match.groups()
name = name.rsplit('-')[0]
if action == 'Resigned':
pass # TODO: set end date
elif action == 'Member':
pass # TODO: set start date
with self.urlopen(url) as html:
doc = lxml.html.fromstring(html)
party_district_line = doc.xpath('//h3/font/text()')[0]
party, district = party_district_re.match(party_district_line).groups()
leg = Legislator(term, chamber, district, name.strip(),
party=party_map[party])
leg.add_source(url)
for com in doc.xpath('//ul[@class="linkSect"][1]/li/a/text()'):
leg.add_role('committee member', term=term, chamber=chamber,
committee=com)
self.save_legislator(leg)
示例2: scrape_legislator_data
# 需要导入模块: from fiftystates.scrape.legislators import Legislator [as 别名]
# 或者: from fiftystates.scrape.legislators.Legislator import add_role [as 别名]
def scrape_legislator_data(self, url, chamber):
party_fulls = {'R' : 'Republican', 'D' : 'Democrat'}
with self.urlopen(url) as page:
page = BeautifulSoup(page)
for data in page.find('table', id = 'ctl00_mainCopy_DataList1')('td'):
spans = data('span')
if len(spans) == 0:
self.debug('Found an empty cell in %s. Continuing' % url)
continue
full_name = ' '.join([span.string.strip() for span in spans])
if len(spans[0].string.strip().split()) == 2:
first_name, middle_name = spans[0].string.strip().split()
else:
first_name, middle_name = spans[0].string.strip(), ''
last_name = spans[1].string.strip()
details_url = get_abs_url(url, data.find('a')['href'])
with self.urlopen(details_url) as details:
details = BeautifulSoup(details)
district = details.find('a', id = 'ctl00_mainCopy_LegisInfo_DISTRICTLabel').string.strip()
party = party_fulls[details.find('span', id = 'ctl00_mainCopy_LegisInfo_PARTYLabel').string]
leg = Legislator('2010', chamber, district, full_name, first_name,
last_name, middle_name, party)
leg.add_source(details_url)
comms_table = details.find('table', id = 'ctl00_mainCopy_MembershipGrid')
for comms_raw_data in comms_table('tr')[1:]:
comm_data = comms_raw_data('td')
comm_role_type = comm_data[0].string.strip()
comm_name = comm_data[1]('a')[0].string.strip()
leg.add_role(comm_role_type, '2010', chamber = chamber, committee = comm_name)
self.save_legislator(leg)
示例3: scrape_member
# 需要导入模块: from fiftystates.scrape.legislators import Legislator [as 别名]
# 或者: from fiftystates.scrape.legislators.Legislator import add_role [as 别名]
def scrape_member(self, chamber, term, member_url):
with self.urlopen(member_url) as page:
root = lxml.html.fromstring(page)
root.make_links_absolute(member_url)
photo_url = root.xpath('//div[@class="bioPicContainer"]/img/@src')[0]
full_name = root.xpath('//div[@class="bioPicContainer"]/img/@alt')[0]
name_parts = full_name.split(' ')
first_name = last_name = middle_name = None
if len(name_parts) == 2:
first_name, last_name = name_parts
middle_name = ''
elif len(name_parts) == 3:
first_name, middle_name, last_name = name_parts
elif len(name_parts) > 3:
first_name = name_parts[0]
middle_name = name_parts[1]
last_name = name_parts[2]
district = root.xpath('//div[@id="District"]//div[@class="widgetContent"]')
if len(district):
district = district[0].text.strip()
if len(district.split(' - ')) > 1:
district = district.split(' - ')[0]
elif len(district.split('. ')) > 1:
district = district.split('. ')[0]
else:
district = district[0:32]
else:
district = 'NotFound'
party = root.xpath('//div[@class="bioDescription"]/div')[0].text.strip().split(',')[0]
if party == 'Democrat':
party = 'Democratic'
elif party == 'Republican':
party = 'Republican'
leg = Legislator(term, chamber, district, full_name,
party=party, photo_url=photo_url,
first_name=first_name, middle_name=middle_name,
last_name=last_name)
leg.add_source(member_url)
comm_div = root.xpath('//div[@id="Column5"]//div[@class="widgetContent"]')
if len(comm_div):
comm_div = comm_div[0]
for li in comm_div.xpath('/ul/li'):
role = li.xpath('text()').strip()
comm = li.xpath('/a/text()').strip()[0].strip(',')
if role == 'Member':
role = 'committee member'
leg.add_role(role, term, chamber=chamber,
committee=comm)
self.save_legislator(leg)
示例4: scrape_member
# 需要导入模块: from fiftystates.scrape.legislators import Legislator [as 别名]
# 或者: from fiftystates.scrape.legislators.Legislator import add_role [as 别名]
def scrape_member(self, chamber, year, member_url):
with self.urlopen(member_url) as page:
root = lxml.html.fromstring(page)
root.make_links_absolute(member_url)
sdiv = root.xpath('//div[@class="subtitle"]')[0]
table = sdiv.getnext()
photo_url = table.xpath('//img[@id="ctl00_ContentPlaceHolder1'
'_imgMember"]')[0].attrib['src']
td = table.xpath('//td[@valign="top"]')[0]
full_name = td.xpath('string(//div[2]/strong)').strip()
district = td.xpath('string(//div[3])').strip()
district = district.replace('District ', '')
party = td.xpath('string(//div[4])').strip()[0]
if party == 'D':
party = 'Democrat'
elif party == 'R':
party = 'Republican'
leg = Legislator('81', chamber, district, full_name,
party=party, photo_url=photo_url)
leg.add_source(member_url)
comm_div = root.xpath('//div[string() = "Committee Membership:"]'
'/following-sibling::div'
'[@class="rcwcontent"]')[0]
for br in comm_div.xpath('*/br'):
if br.tail:
leg.add_role('committee member', '81', chamber=chamber,
committee=br.tail.strip())
self.save_legislator(leg)
示例5: scrape
# 需要导入模块: from fiftystates.scrape.legislators import Legislator [as 别名]
# 或者: from fiftystates.scrape.legislators.Legislator import add_role [as 别名]
def scrape(self, chamber, term):
self.validate_term(term)
l1 = Legislator(term, chamber, '1st',
'Bob Smith', party='Democrat')
if chamber == 'upper':
l1.add_role('President of the Senate', term)
else:
l1.add_role('Speaker of the House', term)
l1.add_source('http://example.com/Bob_Smith.html')
l2 = Legislator(term, chamber, '2nd',
'Sally Johnson', party='Republican')
l2.add_role('Minority Leader', term)
l2.add_source('http://example.com/Sally_Johnson.html')
self.save_legislator(l1)
self.save_legislator(l2)
示例6: scrape
# 需要导入模块: from fiftystates.scrape.legislators import Legislator [as 别名]
# 或者: from fiftystates.scrape.legislators.Legislator import add_role [as 别名]
def scrape(self, chamber, year):
if year != '2009':
raise NoDataForYear
l1 = Legislator('2009-2010', chamber, '1st',
'Bob Smith', party='Democrat')
if chamber == 'upper':
l1.add_role('President of the Senate', '2009-2010')
else:
l1.add_role('Speaker of the House', '2009-2010')
l1.add_source('http://example.com/Bob_Smith.html')
l2 = Legislator('2009-2010', chamber, '2nd',
'Sally Johnson', party='Republican')
l2.add_role('Minority Leader', '2009-2010')
l2.add_source('http://example.com/Sally_Johnson.html')
self.save_legislator(l1)
self.save_legislator(l2)
示例7: scrape
# 需要导入模块: from fiftystates.scrape.legislators import Legislator [as 别名]
# 或者: from fiftystates.scrape.legislators.Legislator import add_role [as 别名]
def scrape(self, chamber, term):
self.validate_term(term)
session = self.get_session_for_term(term)
try:
session_id = self.get_session_id(session)
except KeyError:
raise NoDataForPeriod(session)
body = {'lower': 'H', 'upper': 'S'}[chamber]
url = 'http://www.azleg.gov/MemberRoster.asp?Session_ID=%s&body=%s' % (
session_id, body)
with self.urlopen(url) as page:
root = html.fromstring(page)
path = '//table[@id="%s"]/tr' % {'H': 'house', 'S': 'senate'}[body]
roster = root.xpath(path)[1:]
for row in roster:
position = ''
vacated = ''
name, district, party, email, room, phone, fax = row.getchildren()
link = name.xpath('string(a/@href)')
link = "http://www.azleg.gov" + link
if len(name) == 1:
name = name.text_content().strip()
else:
position = name.tail.strip()
name = name[0].text_content().strip()
district = district.text_content()
party = party.text_content().strip()
party = self.get_party(party)
email = email.text_content().strip()
if re.match('Vacated', email):
vacated = re.search('[0-9]*/[0-9]*/\d{4}', email).group()
email = ''
room = room.text_content().strip()
phone = phone.text_content().strip()
if not phone.startswith('602'):
phone = "602-" + phone
fax = fax.text_content().strip()
if not fax.startswith('602'):
fax = "602-" + fax
if vacated:
end_date = datetime.datetime.strptime(vacated, '%m/%d/%Y')
leg = Legislator( term, chamber, district, full_name=name,
party=party, url=link)
leg['roles'][0]['end_date'] = end_date
else:
leg = Legislator( term, chamber, district, full_name=name,
party=party, phone=phone, fax=fax, room=room,
email=email, url=link)
if position:
leg.add_role( position, term, chamber=chamber,
district=district, party=party)
leg.add_source(url)
#Probably just get this from the committee scraper
#self.scrape_member_page(link, session, chamber, leg)
self.save_legislator(leg)