本文整理汇总了Python中fiftystates.scrape.committees.Committee类的典型用法代码示例。如果您正苦于以下问题:Python Committee类的具体用法?Python Committee怎么用?Python Committee使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Committee类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: scrape_reps_comm
def scrape_reps_comm(self, chamber, session):
url = 'http://www.maine.gov/legis/house/hsecoms.htm'
with self.urlopen(url) as page:
root = lxml.etree.fromstring(page, lxml.etree.HTMLParser())
count = 0
for n in range(1, 12, 2):
path = 'string(//body/center[%s]/h1/a)' % (n)
comm_name = root.xpath(path)
committee = Committee(chamber, comm_name)
count = count + 1
path2 = '/html/body/ul[%s]/li/a' % (count)
for el in root.xpath(path2):
rep = el.text
if rep.find('(') != -1:
mark = rep.find('(')
rep = rep[15: mark]
committee.add_member(rep)
committee.add_source(url)
self.save_committee(committee)
示例2: scrape_senate_committee
def scrape_senate_committee(self, term, link):
with self.urlopen(link) as html:
doc = lxml.html.fromstring(html)
# strip first 30 and last 10
# Minnesota Senate Committees - __________ Committee
committee_name = doc.xpath('//title/text()')[0][30:-10]
com = Committee('upper', committee_name)
# first id=bio table is members
for row in doc.xpath('//table[@id="bio"]')[0].xpath('tr'):
row = fix_whitespace(row.text_content())
# switch role
if ':' in row:
position, name = row.split(': ')
role = position.lower().strip()
else:
name = row
# add the member
com.add_member(name, role)
com.add_source(link)
self.save_committee(com)
示例3: scrape
def scrape(self, chamber, term):
if term != '2011-2012':
raise NoDataForPeriod(term)
chamber_abbr = {'upper': 's', 'lower': 'h'}[chamber]
url = "http://le.utah.gov/asp/interim/standing.asp?house=%s" % chamber_abbr
with self.urlopen(url) as page:
page = lxml.html.fromstring(page)
page.make_links_absolute(url)
for comm_link in page.xpath("//a[contains(@href, 'Com=')]"):
comm_name = comm_link.text.strip()
# Drop leading "House" or "Senate" from name
comm_name = re.sub(r"^(House|Senate) ", "", comm_name)
comm = Committee(chamber, comm_name)
for mbr_link in comm_link.xpath(
"../../../font[2]/a[not(contains(@href, 'mailto'))]"):
name = mbr_link.text.strip()
next_el = mbr_link.getnext()
if next_el is not None and next_el.tag == 'i':
type = next_el.text.strip()
else:
type = 'member'
comm.add_member(name, type)
self.save_committee(comm)
示例4: scrape_reps_comm
def scrape_reps_comm(self, chamber, term):
save_chamber = chamber
# id range for senate committees on their website
for comm_id in range(87, 124):
chamber = save_chamber
comm_url = (
"http://www.house.state.oh.us/index.php?option="
"com_displaycommittees&task=2&type=Regular&"
"committeeId=%d" % comm_id
)
with self.urlopen(comm_url) as page:
page = lxml.etree.fromstring(page, lxml.etree.HTMLParser())
comm_name = page.xpath('string(//table/tr[@class="committeeHeader"]/td)')
comm_name = comm_name.replace("/", " ")
if comm_id < 92:
chamber = "joint"
committee = Committee(chamber, comm_name)
committee.add_source(comm_url)
for link in page.xpath("//a[contains(@href, 'district')]"):
name = link.text
if name and name.strip():
committee.add_member(name.strip())
self.save_committee(committee)
示例5: scrape_house_committees
def scrape_house_committees(self, term):
url = 'http://www.house.leg.state.mn.us/comm/commemlist.asp'
with self.urlopen(url) as html:
doc = lxml.html.fromstring(html)
for com in doc.xpath('//h2[@class="commhighlight"]'):
members_url = com.xpath('following-sibling::p[1]/a[text()="Members"]/@href')[0]
com = Committee('lower', com.text)
com.add_source(members_url)
with self.urlopen(members_url) as member_html:
mdoc = lxml.html.fromstring(member_html)
# each legislator in their own table
# first row, second column contains all the info
for ltable in mdoc.xpath('//table/tr[1]/td[2]/p/b[1]'):
# name is tail string of last element
name = ltable.text_content()
# role is inside a nested b tag
role = ltable.xpath('b/*/text()')
if role:
# if there was a role, remove it from name
role = role[0]
name = name.replace(role, '')
else:
role = 'member'
com.add_member(name, role)
# save
self.save_committee(com)
示例6: scrape_reps_comm
def scrape_reps_comm(self, chamber, year):
save_chamber = chamber
#id range for senate committees on their website
for comm_id in range(87, 124):
chamber = save_chamber
comm_url = 'http://www.house.state.oh.us/index.php?option=com_displaycommittees&task=2&type=Regular&committeeId=' + str(comm_id)
with self.urlopen(comm_url) as page:
root = lxml.etree.fromstring(page, lxml.etree.HTMLParser())
comm_name = root.xpath('string(//table/tr[@class="committeeHeader"]/td)')
comm_name = comm_name.replace("/", " ")
#joint legislative committiees
if comm_id < 92:
chamber = "joint_legislation"
committee = Committee(chamber, comm_name)
path = '/html/body[@id="bd"]/div[@id="ja-wrapper"]/div[@id="ja-containerwrap-f"]/div[@id="ja-container"]/div[@id="ja-mainbody-f"]/div[@id="ja-contentwrap"]/div[@id="ja-content"]/table/tr[position() >=3]'
for el in root.xpath(path):
rep1 = el.xpath('string(td[1]/a)')
rep2 = el.xpath('string(td[4]/a)')
committee.add_member(rep1)
committee.add_member(rep2)
committee.add_source(comm_url)
self.save_committee(committee)
示例7: scrape_house
def scrape_house(self):
url = "http://house.louisiana.gov/H_Reps/H_Reps_CmtesFull.asp"
comm_cache = {}
with self.urlopen(url) as text:
page = lxml.html.fromstring(text)
for row in page.xpath("//table[@bordercolorlight='#EAEAEA']/tr"):
cells = row.xpath('td')
name = cells[0].xpath('string()').strip()
if name.startswith('Vacant'):
continue
font = cells[1].xpath('font')[0]
committees = []
if font.text:
committees.append(font.text.strip())
for br in font.xpath('br'):
if br.text:
committees.append(br.text.strip())
if br.tail:
committees.append(br.tail)
for comm_name in committees:
mtype = 'member'
if comm_name.endswith(', Chairman'):
mtype = 'chairman'
comm_name = comm_name.replace(', Chairman', '')
elif comm_name.endswith(', Co-Chairmain'):
mtype = 'co-chairmain'
comm_name = comm_name.replace(', Co-Chairmain', '')
elif comm_name.endswith(', Vice Chair'):
mtype = 'vice chair'
comm_name = comm_name.replace(', Vice Chair', '')
elif comm_name.endswith(', Ex Officio'):
mtype = 'ex officio'
comm_name = comm_name.replace(', Ex Officio', '')
if comm_name.startswith('Joint'):
chamber = 'joint'
else:
chamber = 'lower'
try:
committee = comm_cache[comm_name]
except KeyError:
committee = Committee(chamber, comm_name)
committee.add_source(url)
comm_cache[comm_name] = committee
committee.add_member(name, mtype)
for committee in comm_cache.values():
self.save_committee(committee)
示例8: scrape_committee
def scrape_committee(self, chamber, term, name, url):
with self.urlopen(url) as page:
page = lxml.html.fromstring(page)
mlist = page.xpath("//strong[contains(., 'Members:')]")[0].tail
mlist = re.sub(r'\s+', ' ', mlist)
committee = Committee(chamber, name)
committee.add_source(url)
for member in mlist.split(','):
member = re.sub(r'R\.M\.(M\.)?$', '', member.strip())
committee.add_member(member.strip())
chair = page.xpath("//strong[contains(., 'Chair:')]")[0]
chair_name = chair.tail.strip()
if chair_name:
committee.add_member(chair_name, 'chair')
vc = page.xpath("//strong[contains(., 'Vice Chair:')]")[0]
vc_name = vc.tail.strip()
if vc_name:
committee.add_member(vc_name, 'vice chair')
self.save_committee(committee)
示例9: scrape
def scrape(self, chamber, year):
if year != '2009':
raise NoDataForPeriod(year)
if chamber == 'upper':
url = ('http://www.legis.state.pa.us/cfdocs/legis/'
'home/member_information/senators_ca.cfm')
else:
url = ('http://www.legis.state.pa.us/cfdocs/legis/'
'home/member_information/representatives_ca.cfm')
with self.urlopen(url) as page:
page = lxml.html.fromstring(page)
committees = {}
for li in page.xpath("//a[contains(@href, 'bio.cfm')]/../.."):
name = li.xpath("string(b/a[contains(@href, 'bio.cfm')])")
name = name[0:-4]
for link in li.xpath("a"):
if not link.tail:
continue
committee_name = link.tail.strip()
committee_name = re.sub(r"\s+", " ", committee_name)
subcommittee_name = None
role = 'member'
rest = link.xpath('string(../i)')
if rest:
match = re.match(r',\s+(Subcommittee on .*)\s+-',
rest)
if match:
subcommittee_name = match.group(1)
role = rest.split('-')[1].strip()
else:
role = rest.replace(', ', '').strip()
try:
committee = committees[(chamber, committee_name,
subcommittee_name)]
except KeyError:
committee = Committee(chamber, committee_name)
if subcommittee_name:
committee['subcommittee'] = subcommittee_name
committees[(chamber, committee_name,
subcommittee_name)] = committee
committee.add_member(name, role)
for committee in committees.values():
self.save_committee(committee)
示例10: scrape_senate
def scrape_senate(self):
"""Scrape Senate Committees"""
for name, comm in nyss_openlegislation.models.committees.items():
name = name.title().replace('And', 'and')
committee = Committee('upper', name)
for member in comm.members:
committee.add_member(member.fullname)
self.save_committee(committee)
示例11: scrape
def scrape(self, chamber, year):
# TODO: scrape senate committees
house_url = 'http://www.msa.md.gov/msa/mdmanual/06hse/html/hsecom.html'
with self.urlopen(house_url) as html:
doc = lxml.html.fromstring(html)
# distinct URLs containing /com/
committees = set([l.get('href') for l in doc.cssselect('li a')
if l.get('href', '').find('/com/') != -1])
for com in committees:
com_url = 'http://www.msa.md.gov'+com
with self.urlopen(com_url) as chtml:
cdoc = lxml.html.fromstring(chtml)
for h in cdoc.cssselect('h2, h3'):
if h.text:
committee_name = h.text
break
cur_com = Committee('lower', committee_name)
cur_com.add_source(com_url)
for l in cdoc.cssselect('a[href]'):
if ' SUBCOMMITTEE' in (l.text or ''):
self.save_committee(cur_com)
cur_com = Committee('lower', l.text, committee_name)
cur_com.add_source(com_url)
elif 'html/msa' in l.get('href'):
cur_com.add_member(l.text)
self.save_committee(cur_com)
示例12: scrape_senate_committee
def scrape_senate_committee(self, name, url):
url = url.replace('Default.asp', 'Assignments.asp')
committee = Committee('upper', name)
with self.urlopen(url) as text:
page = lxml.html.fromstring(text)
links = page.xpath('//table[@bordercolor="#EBEAEC"]/tr/td/font/a')
for link in links:
name = link.xpath('string()')
name = name.replace('Senator ', '').strip()
committee.add_member(name)
self.save_committee(committee)
示例13: scrape_index
def scrape_index(self, chamber, session, session_id, committee_type):
url = base_url + 'xml/committees.asp?session=%s&type=%s' % (session_id,
committee_type)
with self.urlopen(url) as page:
root = etree.fromstring(page, etree.XMLParser(recover=True))
body = '//body[@Body="%s"]/committee' % {'upper': 'S',
'lower': 'H'}[chamber]
# TODO need to and make sure to add sub committees
for com in root.xpath(body):
c_id, name, short_name, sub = com.values()
c = Committee(chamber, name, short_name=short_name,
session=session, az_committee_id=c_id)
c.add_source(url)
self.scrape_com_info(session, session_id, c_id, c)
self.save_committee(c)
示例14: scrape
def scrape(self, chamber, term):
base_url = 'http://www.ncga.state.nc.us/gascripts/Committees/Committees.asp?bPrintable=true&sAction=ViewCommitteeType&sActionDetails='
chambers = {'upper': ['Senate%20Standing', 'Senate%20Select'],
'lower': ['House%20Standing', 'House%20Select']}
for ctype in chambers[chamber]:
with self.urlopen(base_url + ctype) as data:
doc = lxml.html.fromstring(data)
doc.make_links_absolute(base_url+ctype)
for comm in doc.xpath('//ul/li/a'):
name = comm.text
url = comm.get('href')
committee = Committee(chamber, name)
self.scrape_committee(committee, url)
committee.add_source(url)
self.save_committee(committee)
示例15: scrape_senate
def scrape_senate(self):
"""Scrape Senate Committees"""
senate_url = "http://www.nysenate.gov"
senate_committees_url = senate_url + "/committees"
with self.urlopen(senate_committees_url) as html:
doc = lxml.html.fromstring(html)
committee_paths = set([l.get("href") for l in doc.cssselect("li a")
if l.get("href", "").find("/committee/") != -1])
for committee_path in committee_paths:
committee_url = senate_url+committee_path
with self.urlopen(committee_url) as chtml:
cdoc = lxml.html.fromstring(chtml)
for h in cdoc.cssselect(".committee_name"):
if h.text:
committee_name = h.text
break
committee = Committee("upper", committee_name)
committee.add_source(committee_url)
for l in cdoc.cssselect(".committee-chair a[href]"):
if "/senator/" in l.get("href") and l.text and l.text.startswith("Sen."):
committee.add_member(l.text.split('Sen. ', 1)[1], "chair")
for l in cdoc.cssselect(".committee-members a[href]"):
if "/senator/" in l.get("href"):
committee.add_member(l.text)
self.save_committee(committee)