本文整理汇总了Python中fiftystates.scrape.committees.Committee.add_source方法的典型用法代码示例。如果您正苦于以下问题:Python Committee.add_source方法的具体用法?Python Committee.add_source怎么用?Python Committee.add_source使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类fiftystates.scrape.committees.Committee
的用法示例。
在下文中一共展示了Committee.add_source方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: scrape_committee
# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
def scrape_committee(self, chamber, term, name, url):
with self.urlopen(url) as page:
page = lxml.html.fromstring(page)
mlist = page.xpath("//strong[contains(., 'Members:')]")[0].tail
mlist = re.sub(r'\s+', ' ', mlist)
committee = Committee(chamber, name)
committee.add_source(url)
for member in mlist.split(','):
member = re.sub(r'R\.M\.(M\.)?$', '', member.strip())
committee.add_member(member.strip())
chair = page.xpath("//strong[contains(., 'Chair:')]")[0]
chair_name = chair.tail.strip()
if chair_name:
committee.add_member(chair_name, 'chair')
vc = page.xpath("//strong[contains(., 'Vice Chair:')]")[0]
vc_name = vc.tail.strip()
if vc_name:
committee.add_member(vc_name, 'vice chair')
self.save_committee(committee)
示例2: scrape_senate_committee
# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
def scrape_senate_committee(self, term, link):
with self.urlopen(link) as html:
doc = lxml.html.fromstring(html)
# strip first 30 and last 10
# Minnesota Senate Committees - __________ Committee
committee_name = doc.xpath('//title/text()')[0][30:-10]
com = Committee('upper', committee_name)
# first id=bio table is members
for row in doc.xpath('//table[@id="bio"]')[0].xpath('tr'):
row = fix_whitespace(row.text_content())
# switch role
if ':' in row:
position, name = row.split(': ')
role = position.lower().strip()
else:
name = row
# add the member
com.add_member(name, role)
com.add_source(link)
self.save_committee(com)
示例3: scrape_senate
# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
def scrape_senate(self):
"""Scrape Senate Committees"""
senate_url = "http://www.nysenate.gov"
senate_committees_url = senate_url + "/committees"
with self.urlopen(senate_committees_url) as html:
doc = lxml.html.fromstring(html)
committee_paths = set([l.get("href") for l in doc.cssselect("li a")
if l.get("href", "").find("/committee/") != -1])
for committee_path in committee_paths:
committee_url = senate_url+committee_path
with self.urlopen(committee_url) as chtml:
cdoc = lxml.html.fromstring(chtml)
for h in cdoc.cssselect(".committee_name"):
if h.text:
committee_name = h.text
break
committee = Committee("upper", committee_name)
committee.add_source(committee_url)
for l in cdoc.cssselect(".committee-chair a[href]"):
if "/senator/" in l.get("href") and l.text and l.text.startswith("Sen."):
committee.add_member(l.text.split('Sen. ', 1)[1], "chair")
for l in cdoc.cssselect(".committee-members a[href]"):
if "/senator/" in l.get("href"):
committee.add_member(l.text)
self.save_committee(committee)
示例4: scrape_reps_comm
# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
def scrape_reps_comm(self, chamber, session):
url = 'http://www.maine.gov/legis/house/hsecoms.htm'
with self.urlopen(url) as page:
root = lxml.etree.fromstring(page, lxml.etree.HTMLParser())
count = 0
for n in range(1, 12, 2):
path = 'string(//body/center[%s]/h1/a)' % (n)
comm_name = root.xpath(path)
committee = Committee(chamber, comm_name)
count = count + 1
path2 = '/html/body/ul[%s]/li/a' % (count)
for el in root.xpath(path2):
rep = el.text
if rep.find('(') != -1:
mark = rep.find('(')
rep = rep[15: mark]
committee.add_member(rep)
committee.add_source(url)
self.save_committee(committee)
示例5: scrape_reps_comm
# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
def scrape_reps_comm(self, chamber, year):
save_chamber = chamber
#id range for senate committees on their website
for comm_id in range(87, 124):
chamber = save_chamber
comm_url = 'http://www.house.state.oh.us/index.php?option=com_displaycommittees&task=2&type=Regular&committeeId=' + str(comm_id)
with self.urlopen(comm_url) as page:
root = lxml.etree.fromstring(page, lxml.etree.HTMLParser())
comm_name = root.xpath('string(//table/tr[@class="committeeHeader"]/td)')
comm_name = comm_name.replace("/", " ")
#joint legislative committiees
if comm_id < 92:
chamber = "joint_legislation"
committee = Committee(chamber, comm_name)
path = '/html/body[@id="bd"]/div[@id="ja-wrapper"]/div[@id="ja-containerwrap-f"]/div[@id="ja-container"]/div[@id="ja-mainbody-f"]/div[@id="ja-contentwrap"]/div[@id="ja-content"]/table/tr[position() >=3]'
for el in root.xpath(path):
rep1 = el.xpath('string(td[1]/a)')
rep2 = el.xpath('string(td[4]/a)')
committee.add_member(rep1)
committee.add_member(rep2)
committee.add_source(comm_url)
self.save_committee(committee)
示例6: scrape
# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
def scrape(self, chamber, year):
# TODO: scrape senate committees
house_url = 'http://www.msa.md.gov/msa/mdmanual/06hse/html/hsecom.html'
with self.urlopen(house_url) as html:
doc = lxml.html.fromstring(html)
# distinct URLs containing /com/
committees = set([l.get('href') for l in doc.cssselect('li a')
if l.get('href', '').find('/com/') != -1])
for com in committees:
com_url = 'http://www.msa.md.gov'+com
with self.urlopen(com_url) as chtml:
cdoc = lxml.html.fromstring(chtml)
for h in cdoc.cssselect('h2, h3'):
if h.text:
committee_name = h.text
break
cur_com = Committee('lower', committee_name)
cur_com.add_source(com_url)
for l in cdoc.cssselect('a[href]'):
if ' SUBCOMMITTEE' in (l.text or ''):
self.save_committee(cur_com)
cur_com = Committee('lower', l.text, committee_name)
cur_com.add_source(com_url)
elif 'html/msa' in l.get('href'):
cur_com.add_member(l.text)
self.save_committee(cur_com)
示例7: scrape_reps_comm
# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
def scrape_reps_comm(self, chamber, term):
save_chamber = chamber
# id range for senate committees on their website
for comm_id in range(87, 124):
chamber = save_chamber
comm_url = (
"http://www.house.state.oh.us/index.php?option="
"com_displaycommittees&task=2&type=Regular&"
"committeeId=%d" % comm_id
)
with self.urlopen(comm_url) as page:
page = lxml.etree.fromstring(page, lxml.etree.HTMLParser())
comm_name = page.xpath('string(//table/tr[@class="committeeHeader"]/td)')
comm_name = comm_name.replace("/", " ")
if comm_id < 92:
chamber = "joint"
committee = Committee(chamber, comm_name)
committee.add_source(comm_url)
for link in page.xpath("//a[contains(@href, 'district')]"):
name = link.text
if name and name.strip():
committee.add_member(name.strip())
self.save_committee(committee)
示例8: scrape_comm
# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
def scrape_comm(self, chamber, term_name):
url = "http://billstatus.ls.state.ms.us/htms/%s_cmtememb.xml" % chamber
with self.urlopen(url) as comm_page:
root = lxml.etree.fromstring(comm_page, lxml.etree.HTMLParser())
if chamber == "h":
chamber = "lower"
else:
chamber = "upper"
for mr in root.xpath("//committee"):
name = mr.xpath("string(name)")
comm = Committee(chamber, name)
chair = mr.xpath("string(chair)")
chair = chair.replace(", Chairman", "")
role = "Chairman"
if len(chair) > 0:
comm.add_member(chair, role=role)
vice_chair = mr.xpath("string(vice_chair)")
vice_chair = vice_chair.replace(", Vice-Chairman", "")
role = "Vice-Chairman"
if len(vice_chair) > 0:
comm.add_member(vice_chair, role=role)
members = mr.xpath("string(members)").split(";")
for leg in members:
if leg[0] == " ":
comm.add_member(leg[1 : len(leg)])
else:
comm.add_member(leg)
comm.add_source(url)
self.save_committee(comm)
示例9: scrape_assembly
# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
def scrape_assembly(self):
"""Scrape Assembly Committees"""
assembly_committees_url = "http://assembly.state.ny.us/comm/"
with self.urlopen(assembly_committees_url) as html:
doc = lxml.html.fromstring(html)
standing_committees, subcommittees, legislative_commissions, task_forces = doc.cssselect('#sitelinks ul')
committee_paths = set([l.get('href') for l in standing_committees.cssselect("li a[href]")
if l.get("href").startswith('?sec=mem')])
for committee_path in committee_paths:
committee_url = assembly_committees_url+committee_path
with self.urlopen(committee_url) as chtml:
cdoc = lxml.html.fromstring(chtml)
for h in cdoc.cssselect("#content .pagehdg"):
if h.text:
committee_name = h.text.split('Committee Members')[0].strip()
break
committee = Committee("lower", committee_name)
committee.add_source(committee_url)
members = cdoc.cssselect("#sitelinks")[0]
first = 1
for member in members.iter('span'):
member = member.xpath('li/a')[0].text
if first == 1:
committee.add_member(member, 'chair')
first = 0
else:
committee.add_member(member)
self.save_committee(committee)
示例10: scrape_house_committees
# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
def scrape_house_committees(self, term):
url = 'http://www.house.leg.state.mn.us/comm/commemlist.asp'
with self.urlopen(url) as html:
doc = lxml.html.fromstring(html)
for com in doc.xpath('//h2[@class="commhighlight"]'):
members_url = com.xpath('following-sibling::p[1]/a[text()="Members"]/@href')[0]
com = Committee('lower', com.text)
com.add_source(members_url)
with self.urlopen(members_url) as member_html:
mdoc = lxml.html.fromstring(member_html)
# each legislator in their own table
# first row, second column contains all the info
for ltable in mdoc.xpath('//table/tr[1]/td[2]/p/b[1]'):
# name is tail string of last element
name = ltable.text_content()
# role is inside a nested b tag
role = ltable.xpath('b/*/text()')
if role:
# if there was a role, remove it from name
role = role[0]
name = name.replace(role, '')
else:
role = 'member'
com.add_member(name, role)
# save
self.save_committee(com)
示例11: scrape_house
# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
def scrape_house(self):
url = "http://house.louisiana.gov/H_Reps/H_Reps_CmtesFull.asp"
comm_cache = {}
with self.urlopen(url) as text:
page = lxml.html.fromstring(text)
for row in page.xpath("//table[@bordercolorlight='#EAEAEA']/tr"):
cells = row.xpath('td')
name = cells[0].xpath('string()').strip()
if name.startswith('Vacant'):
continue
font = cells[1].xpath('font')[0]
committees = []
if font.text:
committees.append(font.text.strip())
for br in font.xpath('br'):
if br.text:
committees.append(br.text.strip())
if br.tail:
committees.append(br.tail)
for comm_name in committees:
mtype = 'member'
if comm_name.endswith(', Chairman'):
mtype = 'chairman'
comm_name = comm_name.replace(', Chairman', '')
elif comm_name.endswith(', Co-Chairmain'):
mtype = 'co-chairmain'
comm_name = comm_name.replace(', Co-Chairmain', '')
elif comm_name.endswith(', Vice Chair'):
mtype = 'vice chair'
comm_name = comm_name.replace(', Vice Chair', '')
elif comm_name.endswith(', Ex Officio'):
mtype = 'ex officio'
comm_name = comm_name.replace(', Ex Officio', '')
if comm_name.startswith('Joint'):
chamber = 'joint'
else:
chamber = 'lower'
try:
committee = comm_cache[comm_name]
except KeyError:
committee = Committee(chamber, comm_name)
committee.add_source(url)
comm_cache[comm_name] = committee
committee.add_member(name, mtype)
for committee in comm_cache.values():
self.save_committee(committee)
示例12: scrape
# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
def scrape(self, chamber, year):
com = Committee('lower', 'Committee on Finance')
com.add_source('http://example.com')
# can optionally specify role
com.add_member('Lou Adams', 'chairman')
com.add_member('Bill Smith')
# can also specify subcommittees
subcom = Committee('lower', 'Finance Subcommittee on Banking', 'Committee on Finance')
com.add_source('http://example.com')
com.add_member('Bill Smith')
示例13: scrape_joint_comm
# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
def scrape_joint_comm(self, chamber, session):
fileurl = 'http://www.maine.gov/legis/house/commlist.xls'
joint = urllib.urlopen(fileurl).read()
f = open('me_joint.xls', 'w')
f.write(joint)
f.close()
wb = xlrd.open_workbook('me_joint.xls')
sh = wb.sheet_by_index(0)
cur_comm_name = ''
chamber = 'joint'
for rownum in range(1, sh.nrows):
comm_name = sh.cell(rownum, 0).value
first_name = sh.cell(rownum, 3).value
middle_name = sh.cell(rownum, 4).value
last_name = sh.cell(rownum, 5).value
jrsr = sh.cell(rownum, 6).value
full_name = first_name + " " + middle_name + " " + last_name + " " + jrsr
party = sh.cell(rownum, 7).value
legalres = sh.cell(rownum, 8).value
address1 = sh.cell(rownum, 9).value
address2 = sh.cell(rownum, 10).value
town = sh.cell(rownum, 11).value
state = sh.cell(rownum, 12).value
zipcode = int(sh.cell(rownum, 13).value)
phone = str(sh.cell(rownum, 14).value)
home_email = sh.cell(rownum, 15).value
leg_email = sh.cell(rownum, 16).value
leg_chamber = sh.cell(rownum, 2).value
chair = sh.cell(rownum, 1).value
role = "member"
if chair == 1:
role = leg_chamber + " " + "Chair"
if comm_name != cur_comm_name:
cur_comm_name = comm_name
committee = Committee(chamber, comm_name)
committee.add_member(full_name, role = role, party = party, legalres= legalres, address1 = address1, address2 = address2, town = town, state = state, zipcode = zipcode, phone = phone, home_email = home_email, leg_email = leg_email)
committee.add_source(fileurl)
else:
committee.add_member(full_name, role = role, party = party, legalres = legalres, address1 = address1, address2 = address2, town = town, state = state, zipcode = zipcode, phone = phone, home_email = home_email, leg_email = leg_email)
self.save_committee(committee)
示例14: scrape_index
# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
def scrape_index(self, chamber, session, session_id, committee_type):
url = base_url + 'xml/committees.asp?session=%s&type=%s' % (session_id,
committee_type)
with self.urlopen(url) as page:
root = etree.fromstring(page, etree.XMLParser(recover=True))
body = '//body[@Body="%s"]/committee' % {'upper': 'S',
'lower': 'H'}[chamber]
# TODO need to and make sure to add sub committees
for com in root.xpath(body):
c_id, name, short_name, sub = com.values()
c = Committee(chamber, name, short_name=short_name,
session=session, az_committee_id=c_id)
c.add_source(url)
self.scrape_com_info(session, session_id, c_id, c)
self.save_committee(c)
示例15: scrape
# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
def scrape(self, chamber, term):
base_url = 'http://www.ncga.state.nc.us/gascripts/Committees/Committees.asp?bPrintable=true&sAction=ViewCommitteeType&sActionDetails='
chambers = {'upper': ['Senate%20Standing', 'Senate%20Select'],
'lower': ['House%20Standing', 'House%20Select']}
for ctype in chambers[chamber]:
with self.urlopen(base_url + ctype) as data:
doc = lxml.html.fromstring(data)
doc.make_links_absolute(base_url+ctype)
for comm in doc.xpath('//ul/li/a'):
name = comm.text
url = comm.get('href')
committee = Committee(chamber, name)
self.scrape_committee(committee, url)
committee.add_source(url)
self.save_committee(committee)