本文整理汇总了Python中fiftystates.scrape.legislators.Legislator类的典型用法代码示例。如果您正苦于以下问题:Python Legislator类的具体用法?Python Legislator怎么用?Python Legislator使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Legislator类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: scrape_old_legislators
def scrape_old_legislators(self, chamber, session):
"""
Scrape pre-2009 legislators.
"""
if chamber == "upper":
chamber_name = "Senate"
else:
chamber_name = "House"
if int(session) < 2008:
filename = "district.htm"
else:
filename = "MembersDistrict.htm"
leg_list_url = "http://legis.state.sd.us/sessions/%s/%s" % (session, filename)
leg_list = self.soup_parser(self.urlopen(leg_list_url))
for district_str in leg_list.findAll("h2"):
district = district_str.contents[0].split(" ")[1].lstrip("0")
for row in district_str.findNext("table").findAll("tr")[1:]:
if row.findAll("td")[1].contents[0].strip() != chamber_name:
continue
full_name = row.td.a.contents[0].strip()
party = row.findAll("td")[3].contents[0].strip()
occupation = row.findAll("td")[4].contents[0].strip()
legislator = Legislator(session, chamber, district, full_name, party=party, occupation=occupation)
legislator.add_source(leg_list_url)
self.save_legislator(legislator)
示例2: scrape_new_legislators
def scrape_new_legislators(self, chamber, session):
"""
Scrape legislators from 2009 and later.
"""
if chamber == "upper":
search = "Senate Members"
else:
search = "House Members"
leg_list_url = "http://legis.state.sd.us/sessions/%s/" "MemberMenu.aspx" % (session)
leg_list = self.soup_parser(self.urlopen(leg_list_url))
list_div = leg_list.find(text=search).findNext("div")
for link in list_div.findAll("a"):
full_name = link.contents[0].strip()
leg_page_url = "http://legis.state.sd.us/sessions/%s/%s" % (session, link["href"])
leg_page = self.soup_parser(self.urlopen(leg_page_url))
party = leg_page.find(id="ctl00_contentMain_spanParty").contents[0].strip()
district = leg_page.find(id="ctl00_contentMain_spanDistrict").contents[0]
district = district.strip().lstrip("0")
occ_span = leg_page.find(id="ctl00_contentMain_spanOccupation")
if len(occ_span.contents) > 0:
occupation = occ_span.contents[0].strip()
else:
occupation = None
legislator = Legislator(session, chamber, district, full_name, party=party, occupation=occupation)
legislator.add_source(leg_page_url)
self.save_legislator(legislator)
示例3: scrape_senators
def scrape_senators(self, chamber, term):
sen_url = 'http://www.ohiosenate.gov/directory.html'
with self.urlopen(sen_url) as page:
root = lxml.etree.fromstring(page, lxml.etree.HTMLParser())
for el in root.xpath('//table[@class="fullWidth"]/tr/td'):
sen_link = el.xpath('a[@class="senatorLN"]')[1]
full_name = sen_link.text
full_name = full_name[0:-2]
if full_name == 'To Be Announced':
continue
district = el.xpath('string(h3)').split()[1]
party = el.xpath('string(a[@class="senatorLN"]/span)')
if party == "D":
party = "Democrat"
elif party == "R":
party = "Republican"
leg = Legislator(term, chamber, district, full_name,
'', '', '', party)
leg.add_source(sen_url)
self.save_legislator(leg)
示例4: scrape_senate
def scrape_senate(self, term):
url = 'http://www.senate.leg.state.mn.us/members/member_list.php'
with self.urlopen(url) as html:
doc = lxml.html.fromstring(html)
for row in doc.xpath('//tr'):
tds = row.xpath('td')
if len(tds) == 5 and tds[1].text_content() in self._parties:
district = tds[0].text_content()
party = tds[1].text_content()
name_a = tds[2].xpath('a')[0]
name = name_a.text.strip()
addr, phone = tds[3].text_content().split(u'\xa0\xa0')
email = tds[4].text_content()
leg = Legislator(term, 'upper', district, name,
party=self._parties[party],
office_address=addr, office_phone=phone)
if '@' in email:
leg['email'] = email
leg.add_source(url)
self.save_legislator(leg)
示例5: scrape
def scrape(self, chamber, term):
# this beautiful page is loaded from the council page via AJAX
url = 'http://www.dccouncil.washington.dc.us/include/linkedpage.aspx?linkedpage=2&page=17'
# do nothing if they're trying to get a lower chamber
if chamber == 'lower':
return
with self.urlopen(url) as data:
base_doc = lxml.html.fromstring(data)
for link in base_doc.xpath('//a'):
leg_url = 'http://www.dccouncil.washington.dc.us/' + link.get('href')
with self.urlopen(leg_url) as leg_html:
doc = lxml.html.fromstring(leg_html)
name = link.text
# Name, District
title = doc.get_element_by_id('PageTitle')
district = title.text.rsplit(', ')[-1]
# party
party = get_surrounding_block(doc, 'Political Affiliation')
if 'Democratic' in party:
party = 'Democratic'
else:
party = 'Independent'
legislator = Legislator(term, 'upper', district, name,
party=party)
legislator.add_source(leg_url)
self.save_legislator(legislator)
示例6: scrape_senators
def scrape_senators(self, chamber, year):
sen_url = 'http://www.ohiosenate.gov/directory.html'
with self.urlopen(sen_url) as page:
root = lxml.etree.fromstring(page, lxml.etree.HTMLParser())
for el in root.xpath('//table[@class="fullWidth"]/tr/td'):
sen_link = el.xpath('a[@class="senatorLN"]')[1]
full_name = sen_link.text
full_name = full_name[0 : len(full_name) - 2]
district = el.xpath('string(h3)')
district = district.split()[1]
party = el.xpath('string(a[@class="senatorLN"]/span)')
first_name = full_name.split()[0]
last_name = full_name.split()[1]
middle_name = ''
leg = Legislator('128', chamber, district, full_name,
first_name, last_name, middle_name, party)
leg.add_source(sen_url)
self.save_legislator(leg)
示例7: scrape_reps
def scrape_reps(self, chamber, session, term_name):
# There is only 99 districts
for district in range(1,100):
rep_url = 'http://www.house.state.oh.us/components/com_displaymembers/page.php?district=' + str(district)
with self.urlopen(rep_url) as page:
root = lxml.etree.fromstring(page, lxml.etree.HTMLParser())
for el in root.xpath('//table[@class="page"]'):
rep_link = el.xpath('tr/td/title')[0]
full_name = rep_link.text
party = full_name[-2]
full_name = full_name[0 : len(full_name)-3]
first_name = ""
last_name = ""
middle_name = ""
if party == "D":
party = "Democrat"
elif party == "R":
party = "Republican"
else:
party = party
leg = Legislator(term_name, chamber, str(district), full_name, first_name, last_name, middle_name, party)
leg.add_source(rep_url)
self.save_legislator(leg)
示例8: scrape_legislator_data
def scrape_legislator_data(self, url, chamber):
with self.lxml_context(url) as page:
legislator_table = page.get_element_by_id("ctl00_PlaceHolderMain_dlMembers")
legislators = legislator_table.cssselect('a')
for legislator in legislators:
name = legislator.text_content()
full_name, first_name, middle_name, last_name = self.separate_name(name)
name_for_url = last_name.lower()
name_for_url = re.sub("'", "", name_for_url)
if chamber == 'upper':
legislator_page_url = "http://www.leg.wa.gov/senate/senators/Pages/" + name_for_url + ".aspx"
else:
legislator_page_url = "http://www.leg.wa.gov/house/representatives/Pages/" + name_for_url + ".aspx"
with self.lxml_context(legislator_page_url) as legislator_page:
try:
full_name, first_name, middle_name, last_name = self.scrape_legislator_name(legislator_page)
except:
break
party_element = legislator_page.get_element_by_id("ctl00_PlaceHolderMain_lblParty")
if party_element.text_content() == '(R)':
party = 'Republican'
else:
party = 'Democrat'
district_element = legislator_page.get_element_by_id("ctl00_PlaceHolderMain_hlDistrict")
district = district_element.text_content()
legislator = Legislator('2009-2010', chamber, district, full_name, "", "", "", party)
legislator.add_source(legislator_page_url)
self.save_legislator(legislator)
示例9: scrape_legislator_data
def scrape_legislator_data(self, chamber, session):
with self.urlopen(house_url(chamber)) as page_html:
page = lxml.html.fromstring(page_html)
legislator_table = page.get_element_by_id("ctl00_PlaceHolderMain_dlMembers")
legislators = legislator_table.cssselect('a')
for legislator in legislators:
name = legislator.text_content()
full_name, first_name, middle_name, last_name = separate_name(name)
name_for_url = last_name.lower()
name_for_url = re.sub("'", "", name_for_url)
legislator_page_url = legs_url(chamber, name_for_url)
with self.urlopen(legislator_page_url) as legislator_page_html:
legislator_page = lxml.html.fromstring(legislator_page_html)
try:
full_name, first_name, middle_name, last_name = self.scrape_legislator_name(legislator_page)
except:
break
party_element = legislator_page.get_element_by_id("ctl00_PlaceHolderMain_lblParty")
if party_element.text_content() == '(R)':
party = 'Republican'
else:
party = 'Democrat'
district_element = legislator_page.get_element_by_id("ctl00_PlaceHolderMain_hlDistrict")
district = district_element.text_content()
legislator = Legislator(session, chamber, district, full_name, "", "", "", party)
legislator.add_source(legislator_page_url)
self.save_legislator(legislator)
示例10: scrape_reps
def scrape_reps(self, chamber, session):
rep_url = 'http://www.maine.gov/legis/house/dist_mem.htm'
with self.urlopen(rep_url) as page:
root = lxml.etree.fromstring(page, lxml.etree.HTMLParser())
#There are 151 districts
for district in range(1, 152):
if (district % 10) == 0:
path = 'string(/html/body/p[%s]/a[3])' % (district+4)
else:
path = 'string(/html/body/p[%s]/a[2])' % (district+4)
name = root.xpath(path)
if len(name) > 0:
if name.split()[0] != 'District':
mark = name.find('(')
party = name[mark + 1]
name = name[15 : mark]
firstname = ""
lastname = ""
middlename = ""
if party == "V":
name = "Vacant"
leg = Legislator(session, chamber, district, name, firstname, lastname, middlename, party)
leg.add_source(rep_url)
self.save_legislator(leg)
示例11: scrape_details
def scrape_details(self, chamber, term, leg_name, leg_link, role):
try:
url = 'http://billstatus.ls.state.ms.us/members/%s' % leg_link
with self.urlopen(url) as details_page:
details_page = details_page.decode('latin1').encode('utf8', 'ignore')
root = lxml.etree.fromstring(details_page, lxml.etree.HTMLParser())
party = root.xpath('string(//party)')
district = root.xpath('string(//district)')
first_name, middle_name, last_name = "", "", ""
home_phone = root.xpath('string(//h_phone)')
bis_phone = root.xpath('string(//b_phone)')
capital_phone = root.xpath('string(//cap_phone)')
other_phone = root.xpath('string(//oth_phone)')
org_info = root.xpath('string(//org_info)')
email_name = root.xpath('string(//email_address)')
email = '%[email protected]%s.ms.gov' % (email_name, chamber)
if party == 'D':
party = 'Democratic'
else:
party = 'Republican'
leg = Legislator(term, chamber, district, leg_name, first_name,
last_name, middle_name, party, role=role,
home_phone = home_phone, bis_phone=bis_phone,
capital_phone=capital_phone,
other_phone=other_phone, org_info=org_info,
email=email)
leg.add_source(url)
self.save_legislator(leg)
except scrapelib.HTTPError, e:
self.warning(str(e))
示例12: scrape
def scrape(self, chamber, year):
session = "%d-%d" % (int(year), int(year) + 1)
url = "http://www.ncga.state.nc.us/gascripts/members/"\
"memberList.pl?sChamber="
if chamber == 'lower':
url += 'House'
else:
url += 'Senate'
with self.urlopen(url) as (resp, data):
leg_list = self.soup_parser(data)
leg_table = leg_list.find('div', id='mainBody').find('table')
for row in leg_table.findAll('tr')[1:]:
party = row.td.contents[0].strip()
if party == 'Dem':
party = 'Democrat'
elif party == 'Rep':
party = 'Republican'
district = row.findAll('td')[1].contents[0].strip()
full_name = row.findAll('td')[2].a.contents[0].strip()
full_name = full_name.replace(u'\u00a0', ' ')
(first_name, last_name, middle_name, suffix) = split_name(
full_name)
legislator = Legislator(session, chamber, district, full_name,
first_name, last_name, middle_name,
party, suffix=suffix)
legislator.add_source(url)
self.save_legislator(legislator)
示例13: scrape_rep
def scrape_rep(self, name, term, url):
# special case names that confuses name_tools
if name == 'Franklin, A.B.':
name = 'Franklin, A. B.'
elif ', Jr., ' in name:
name.replace(', Jr., ', ' ')
name += ', Jr.'
elif ', III, ' in name:
name.replace(', III, ', ' ')
name += ', III'
with self.urlopen(url) as text:
page = lxml.html.fromstring(text)
district = page.xpath(
"//a[contains(@href, 'Maps')]")[0].attrib['href']
district = re.search("district(\d+).pdf", district).group(1)
if "Democrat District" in text:
party = "Democratic"
elif "Republican District" in text:
party = "Republican"
elif "Independent District" in text:
party = "Independent"
else:
party = "Other"
leg = Legislator(term, 'lower', district, name, party=party)
leg.add_source(url)
self.save_legislator(leg)
示例14: scrape
def scrape(self, chamber, year):
year = int(year)
session = internal_sessions[year][0][1]
# iterating through subsessions would be a better way to do this..
if year % 2 == 0 and (year != dt.date.today().year or year+1 != dt.date.today().year):
raise NoDataForYear(year)
if chamber == 'upper':
url = "http://legis.wi.gov/w3asp/contact/legislatorslist.aspx?house=senate"
else:
url = "http://legis.wi.gov/w3asp/contact/legislatorslist.aspx?house=assembly"
#body = unicode(self.urlopen(url), 'latin-1')
with self.urlopen(url) as body:
page = lxml.html.fromstring(body)
for row in page.cssselect("#ctl00_C_dgLegData tr"):
if len(row.cssselect("td a")) > 0:
rep_url = list(row)[0].cssselect("a[href]")[0].get("href")
legpart = re.findall(r'([\w\-\,\s\.]+)\s+\(([\w])\)', list(row)[0].text_content())
if legpart:
full_name, party = legpart[0]
district = str(int(list(row)[2].text_content()))
leg = Legislator(session, chamber, district, full_name,
party)
leg.add_source(rep_url)
leg = self.add_committees(leg, rep_url, session)
self.save_legislator(leg)
示例15: scrape_legislator_data
def scrape_legislator_data(self, url, chamber):
party_fulls = {'R' : 'Republican', 'D' : 'Democrat'}
with self.urlopen(url) as page:
page = BeautifulSoup(page)
for data in page.find('table', id = 'ctl00_mainCopy_DataList1')('td'):
spans = data('span')
if len(spans) == 0:
self.debug('Found an empty cell in %s. Continuing' % url)
continue
full_name = ' '.join([span.string.strip() for span in spans])
if len(spans[0].string.strip().split()) == 2:
first_name, middle_name = spans[0].string.strip().split()
else:
first_name, middle_name = spans[0].string.strip(), ''
last_name = spans[1].string.strip()
details_url = get_abs_url(url, data.find('a')['href'])
with self.urlopen(details_url) as details:
details = BeautifulSoup(details)
district = details.find('a', id = 'ctl00_mainCopy_LegisInfo_DISTRICTLabel').string.strip()
party = party_fulls[details.find('span', id = 'ctl00_mainCopy_LegisInfo_PARTYLabel').string]
leg = Legislator('2010', chamber, district, full_name, first_name,
last_name, middle_name, party)
leg.add_source(details_url)
comms_table = details.find('table', id = 'ctl00_mainCopy_MembershipGrid')
for comms_raw_data in comms_table('tr')[1:]:
comm_data = comms_raw_data('td')
comm_role_type = comm_data[0].string.strip()
comm_name = comm_data[1]('a')[0].string.strip()
leg.add_role(comm_role_type, '2010', chamber = chamber, committee = comm_name)
self.save_legislator(leg)