当前位置: 首页>>代码示例>>Python>>正文


Python Committee.add_source方法代码示例

本文整理汇总了Python中fiftystates.scrape.committees.Committee.add_source方法的典型用法代码示例。如果您正苦于以下问题:Python Committee.add_source方法的具体用法?Python Committee.add_source怎么用?Python Committee.add_source使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在fiftystates.scrape.committees.Committee的用法示例。


在下文中一共展示了Committee.add_source方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: scrape_committee

# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
    def scrape_committee(self, chamber, term, name, url):
        with self.urlopen(url) as page:
            page = lxml.html.fromstring(page)

            mlist = page.xpath("//strong[contains(., 'Members:')]")[0].tail
            mlist = re.sub(r'\s+', ' ', mlist)

            committee = Committee(chamber, name)
            committee.add_source(url)

            for member in mlist.split(','):
                member = re.sub(r'R\.M\.(M\.)?$', '', member.strip())
                committee.add_member(member.strip())

            chair = page.xpath("//strong[contains(., 'Chair:')]")[0]
            chair_name = chair.tail.strip()
            if chair_name:
                committee.add_member(chair_name, 'chair')

            vc = page.xpath("//strong[contains(., 'Vice Chair:')]")[0]
            vc_name = vc.tail.strip()
            if vc_name:
                committee.add_member(vc_name, 'vice chair')

            self.save_committee(committee)
开发者ID:ecocity,项目名称:openstates,代码行数:27,代码来源:committees.py

示例2: scrape_senate_committee

# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
    def scrape_senate_committee(self, term, link):
        with self.urlopen(link) as html:
            doc = lxml.html.fromstring(html)

            # strip first 30 and last 10
            # Minnesota Senate Committees - __________ Committee
            committee_name = doc.xpath('//title/text()')[0][30:-10]

            com = Committee('upper', committee_name)

            # first id=bio table is members
            for row in doc.xpath('//table[@id="bio"]')[0].xpath('tr'):
                row = fix_whitespace(row.text_content())

                # switch role
                if ':' in row:
                    position, name = row.split(': ')
                    role = position.lower().strip()
                else:
                    name = row

                # add the member
                com.add_member(name, role)

            com.add_source(link)
            self.save_committee(com)
开发者ID:ecocity,项目名称:openstates,代码行数:28,代码来源:committees.py

示例3: scrape_senate

# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
    def scrape_senate(self):
        """Scrape Senate Committees"""
        senate_url = "http://www.nysenate.gov"
        senate_committees_url = senate_url + "/committees"

        with self.urlopen(senate_committees_url) as html:
            doc = lxml.html.fromstring(html)
            committee_paths = set([l.get("href") for l in doc.cssselect("li a")
                              if l.get("href", "").find("/committee/") != -1])

        for committee_path in committee_paths:
            committee_url = senate_url+committee_path
            with self.urlopen(committee_url) as chtml:
                cdoc = lxml.html.fromstring(chtml)
                for h in cdoc.cssselect(".committee_name"):
                    if h.text:
                        committee_name = h.text
                        break

                committee = Committee("upper", committee_name)
                committee.add_source(committee_url)
                for l in cdoc.cssselect(".committee-chair a[href]"):
                    if "/senator/" in l.get("href") and l.text and l.text.startswith("Sen."):
                        committee.add_member(l.text.split('Sen. ', 1)[1], "chair")

                for l in cdoc.cssselect(".committee-members a[href]"):
                    if "/senator/" in l.get("href"):
                        committee.add_member(l.text)

                self.save_committee(committee)
开发者ID:Empact,项目名称:fiftystates,代码行数:32,代码来源:committees.py

示例4: scrape_reps_comm

# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
    def scrape_reps_comm(self, chamber, session):        
        
       url = 'http://www.maine.gov/legis/house/hsecoms.htm'

       with self.urlopen(url) as page:
            root = lxml.etree.fromstring(page, lxml.etree.HTMLParser())

            count = 0

            for n in range(1, 12, 2):
                path = 'string(//body/center[%s]/h1/a)' % (n)
                comm_name = root.xpath(path)
                committee = Committee(chamber, comm_name)
                count = count + 1                

                path2 = '/html/body/ul[%s]/li/a' % (count)

                for el in root.xpath(path2):
                   rep = el.text
                   if rep.find('(') != -1:
                        mark = rep.find('(')
                        rep = rep[15: mark]
                   committee.add_member(rep)
                committee.add_source(url)
                
                self.save_committee(committee)
开发者ID:marlonkeating,项目名称:fiftystates,代码行数:28,代码来源:committees.py

示例5: scrape_reps_comm

# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
    def scrape_reps_comm(self, chamber, year):

        save_chamber = chamber

        #id range for senate committees on their website
        for comm_id in range(87, 124):

            chamber = save_chamber

            comm_url = 'http://www.house.state.oh.us/index.php?option=com_displaycommittees&task=2&type=Regular&committeeId=' + str(comm_id)
            with self.urlopen(comm_url) as page:
                root = lxml.etree.fromstring(page, lxml.etree.HTMLParser())

                comm_name = root.xpath('string(//table/tr[@class="committeeHeader"]/td)')
                comm_name = comm_name.replace("/", " ")                
                
                #joint legislative committiees
                if comm_id < 92:
                    chamber = "joint_legislation"

                committee = Committee(chamber, comm_name)
               
                path = '/html/body[@id="bd"]/div[@id="ja-wrapper"]/div[@id="ja-containerwrap-f"]/div[@id="ja-container"]/div[@id="ja-mainbody-f"]/div[@id="ja-contentwrap"]/div[@id="ja-content"]/table/tr[position() >=3]'
                
                for el in root.xpath(path):
                    rep1 = el.xpath('string(td[1]/a)')
                    rep2 = el.xpath('string(td[4]/a)')
                    committee.add_member(rep1)
                    committee.add_member(rep2)
                    
                committee.add_source(comm_url)
                self.save_committee(committee)
开发者ID:Empact,项目名称:fiftystates,代码行数:34,代码来源:committees.py

示例6: scrape

# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
    def scrape(self, chamber, year):
        # TODO: scrape senate committees
        house_url = 'http://www.msa.md.gov/msa/mdmanual/06hse/html/hsecom.html'

        with self.urlopen(house_url) as html:
            doc = lxml.html.fromstring(html)
            # distinct URLs containing /com/
            committees = set([l.get('href') for l in doc.cssselect('li a')
                              if l.get('href', '').find('/com/') != -1])

        for com in committees:
            com_url = 'http://www.msa.md.gov'+com
            with self.urlopen(com_url) as chtml:
                cdoc = lxml.html.fromstring(chtml)
                for h in cdoc.cssselect('h2, h3'):
                    if h.text:
                        committee_name = h.text
                        break
                cur_com = Committee('lower', committee_name)
                cur_com.add_source(com_url)
                for l in cdoc.cssselect('a[href]'):
                    if ' SUBCOMMITTEE' in (l.text or ''):
                        self.save_committee(cur_com)
                        cur_com = Committee('lower', l.text, committee_name)
                        cur_com.add_source(com_url)
                    elif 'html/msa' in l.get('href'):
                        cur_com.add_member(l.text)
                self.save_committee(cur_com)
开发者ID:marlonkeating,项目名称:fiftystates,代码行数:30,代码来源:committees.py

示例7: scrape_reps_comm

# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
    def scrape_reps_comm(self, chamber, term):
        save_chamber = chamber

        # id range for senate committees on their website
        for comm_id in range(87, 124):
            chamber = save_chamber
            comm_url = (
                "http://www.house.state.oh.us/index.php?option="
                "com_displaycommittees&task=2&type=Regular&"
                "committeeId=%d" % comm_id
            )

            with self.urlopen(comm_url) as page:
                page = lxml.etree.fromstring(page, lxml.etree.HTMLParser())

                comm_name = page.xpath('string(//table/tr[@class="committeeHeader"]/td)')
                comm_name = comm_name.replace("/", " ")

                if comm_id < 92:
                    chamber = "joint"

                committee = Committee(chamber, comm_name)
                committee.add_source(comm_url)

                for link in page.xpath("//a[contains(@href, 'district')]"):
                    name = link.text
                    if name and name.strip():
                        committee.add_member(name.strip())

                self.save_committee(committee)
开发者ID:ecocity,项目名称:openstates,代码行数:32,代码来源:committees.py

示例8: scrape_comm

# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
    def scrape_comm(self, chamber, term_name):
        url = "http://billstatus.ls.state.ms.us/htms/%s_cmtememb.xml" % chamber
        with self.urlopen(url) as comm_page:
            root = lxml.etree.fromstring(comm_page, lxml.etree.HTMLParser())
            if chamber == "h":
                chamber = "lower"
            else:
                chamber = "upper"
            for mr in root.xpath("//committee"):
                name = mr.xpath("string(name)")
                comm = Committee(chamber, name)

                chair = mr.xpath("string(chair)")
                chair = chair.replace(", Chairman", "")
                role = "Chairman"
                if len(chair) > 0:
                    comm.add_member(chair, role=role)
                vice_chair = mr.xpath("string(vice_chair)")
                vice_chair = vice_chair.replace(", Vice-Chairman", "")
                role = "Vice-Chairman"
                if len(vice_chair) > 0:
                    comm.add_member(vice_chair, role=role)
                members = mr.xpath("string(members)").split(";")

                for leg in members:
                    if leg[0] == " ":
                        comm.add_member(leg[1 : len(leg)])
                    else:
                        comm.add_member(leg)
                comm.add_source(url)
                self.save_committee(comm)
开发者ID:runderwood,项目名称:fiftystates,代码行数:33,代码来源:committees.py

示例9: scrape_assembly

# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
    def scrape_assembly(self):
        """Scrape Assembly Committees"""
        assembly_committees_url = "http://assembly.state.ny.us/comm/"

        with self.urlopen(assembly_committees_url) as html:
            doc = lxml.html.fromstring(html)
            standing_committees, subcommittees, legislative_commissions, task_forces = doc.cssselect('#sitelinks ul')
            committee_paths = set([l.get('href') for l in standing_committees.cssselect("li a[href]")
                              if l.get("href").startswith('?sec=mem')])

        for committee_path in committee_paths:
            committee_url = assembly_committees_url+committee_path
            with self.urlopen(committee_url) as chtml:
                cdoc = lxml.html.fromstring(chtml)
                for h in cdoc.cssselect("#content .pagehdg"):
                    if h.text:
                        committee_name = h.text.split('Committee Members')[0].strip()
                        break

                committee = Committee("lower", committee_name)
                committee.add_source(committee_url)
                members = cdoc.cssselect("#sitelinks")[0]

                first = 1
                for member in members.iter('span'):
                    member = member.xpath('li/a')[0].text
                    if first == 1:
                        committee.add_member(member, 'chair')
                        first = 0
                    else:
                        committee.add_member(member)

                self.save_committee(committee)
开发者ID:Empact,项目名称:fiftystates,代码行数:35,代码来源:committees.py

示例10: scrape_house_committees

# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
    def scrape_house_committees(self, term):
        url = 'http://www.house.leg.state.mn.us/comm/commemlist.asp'

        with self.urlopen(url) as html:
            doc = lxml.html.fromstring(html)

            for com in doc.xpath('//h2[@class="commhighlight"]'):
                members_url = com.xpath('following-sibling::p[1]/a[text()="Members"]/@href')[0]

                com = Committee('lower', com.text)
                com.add_source(members_url)

                with self.urlopen(members_url) as member_html:
                    mdoc = lxml.html.fromstring(member_html)

                    # each legislator in their own table
                    # first row, second column contains all the info
                    for ltable in mdoc.xpath('//table/tr[1]/td[2]/p/b[1]'):

                        # name is tail string of last element
                        name = ltable.text_content()

                        # role is inside a nested b tag
                        role = ltable.xpath('b/*/text()')
                        if role:
                            # if there was a role, remove it from name
                            role = role[0]
                            name = name.replace(role, '')
                        else:
                            role = 'member'
                        com.add_member(name, role)

                # save
                self.save_committee(com)
开发者ID:ecocity,项目名称:openstates,代码行数:36,代码来源:committees.py

示例11: scrape_house

# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
    def scrape_house(self):
        url = "http://house.louisiana.gov/H_Reps/H_Reps_CmtesFull.asp"
        comm_cache = {}
        with self.urlopen(url) as text:
            page = lxml.html.fromstring(text)

            for row in page.xpath("//table[@bordercolorlight='#EAEAEA']/tr"):
                cells = row.xpath('td')

                name = cells[0].xpath('string()').strip()

                if name.startswith('Vacant'):
                    continue

                font = cells[1].xpath('font')[0]
                committees = []

                if font.text:
                    committees.append(font.text.strip())
                for br in font.xpath('br'):
                    if br.text:
                        committees.append(br.text.strip())
                    if br.tail:
                        committees.append(br.tail)

                for comm_name in committees:
                    mtype = 'member'
                    if comm_name.endswith(', Chairman'):
                        mtype = 'chairman'
                        comm_name = comm_name.replace(', Chairman', '')
                    elif comm_name.endswith(', Co-Chairmain'):
                        mtype = 'co-chairmain'
                        comm_name = comm_name.replace(', Co-Chairmain', '')
                    elif comm_name.endswith(', Vice Chair'):
                        mtype = 'vice chair'
                        comm_name = comm_name.replace(', Vice Chair', '')
                    elif comm_name.endswith(', Ex Officio'):
                        mtype = 'ex officio'
                        comm_name = comm_name.replace(', Ex Officio', '')

                    if comm_name.startswith('Joint'):
                        chamber = 'joint'
                    else:
                        chamber = 'lower'

                    try:
                        committee = comm_cache[comm_name]
                    except KeyError:
                        committee = Committee(chamber, comm_name)
                        committee.add_source(url)
                        comm_cache[comm_name] = committee

                    committee.add_member(name, mtype)

            for committee in comm_cache.values():
                self.save_committee(committee)
开发者ID:ecocity,项目名称:openstates,代码行数:58,代码来源:committees.py

示例12: scrape

# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
    def scrape(self, chamber, year):
        com = Committee('lower', 'Committee on Finance')
        com.add_source('http://example.com')
        # can optionally specify role
        com.add_member('Lou Adams', 'chairman')
        com.add_member('Bill Smith')

        # can also specify subcommittees
        subcom = Committee('lower', 'Finance Subcommittee on Banking', 'Committee on Finance')
        com.add_source('http://example.com')
        com.add_member('Bill Smith')
开发者ID:Empact,项目名称:fiftystates,代码行数:13,代码来源:committees.py

示例13: scrape_joint_comm

# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
    def scrape_joint_comm(self, chamber, session):

        fileurl = 'http://www.maine.gov/legis/house/commlist.xls'
        
        joint = urllib.urlopen(fileurl).read()
        f = open('me_joint.xls', 'w')
        f.write(joint)
        f.close()

        wb = xlrd.open_workbook('me_joint.xls')
        sh = wb.sheet_by_index(0)

        cur_comm_name = ''
        chamber = 'joint'

        for rownum in range(1, sh.nrows):
            
            comm_name = sh.cell(rownum, 0).value

            first_name = sh.cell(rownum, 3).value
            middle_name = sh.cell(rownum, 4).value
            last_name = sh.cell(rownum, 5).value
            jrsr = sh.cell(rownum, 6).value
            full_name = first_name + " " + middle_name + " " + last_name + " " + jrsr

            party = sh.cell(rownum, 7).value
            legalres = sh.cell(rownum, 8).value
            address1 = sh.cell(rownum, 9).value
            address2 = sh.cell(rownum, 10).value
            town = sh.cell(rownum, 11).value
            state = sh.cell(rownum, 12).value
            zipcode = int(sh.cell(rownum, 13).value)
            phone = str(sh.cell(rownum, 14).value)
            home_email = sh.cell(rownum, 15).value
            leg_email = sh.cell(rownum, 16).value
            
            leg_chamber = sh.cell(rownum, 2).value
            chair = sh.cell(rownum, 1).value
            role = "member"

            if chair == 1:
                role = leg_chamber + " " + "Chair"

            if comm_name != cur_comm_name:
                cur_comm_name = comm_name 
                committee = Committee(chamber, comm_name)
                committee.add_member(full_name, role = role, party = party, legalres= legalres, address1 = address1, address2 = address2, town = town, state = state, zipcode = zipcode, phone = phone, home_email = home_email, leg_email = leg_email)
                committee.add_source(fileurl)
            else:
                committee.add_member(full_name, role = role, party = party, legalres = legalres, address1 = address1, address2 = address2, town = town, state = state, zipcode = zipcode, phone = phone, home_email = home_email, leg_email = leg_email)
               
            self.save_committee(committee) 
开发者ID:marlonkeating,项目名称:fiftystates,代码行数:54,代码来源:committees.py

示例14: scrape_index

# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
 def scrape_index(self, chamber, session, session_id, committee_type):
     url = base_url + 'xml/committees.asp?session=%s&type=%s' % (session_id,
                                                              committee_type)
     with self.urlopen(url) as page:
         root = etree.fromstring(page, etree.XMLParser(recover=True))
         
         body = '//body[@Body="%s"]/committee' % {'upper': 'S',
                                                  'lower': 'H'}[chamber]
         # TODO need to and make sure to add sub committees
         for com in root.xpath(body):
             c_id, name, short_name, sub = com.values()
             c = Committee(chamber, name, short_name=short_name, 
                           session=session, az_committee_id=c_id)
             c.add_source(url)
             self.scrape_com_info(session, session_id, c_id, c)
             self.save_committee(c)
开发者ID:ecocity,项目名称:openstates,代码行数:18,代码来源:committees.py

示例15: scrape

# 需要导入模块: from fiftystates.scrape.committees import Committee [as 别名]
# 或者: from fiftystates.scrape.committees.Committee import add_source [as 别名]
    def scrape(self, chamber, term):
        base_url = 'http://www.ncga.state.nc.us/gascripts/Committees/Committees.asp?bPrintable=true&sAction=ViewCommitteeType&sActionDetails='

        chambers = {'upper': ['Senate%20Standing', 'Senate%20Select'],
                    'lower': ['House%20Standing', 'House%20Select']}

        for ctype in chambers[chamber]:
            with self.urlopen(base_url + ctype) as data:
                doc = lxml.html.fromstring(data)
                doc.make_links_absolute(base_url+ctype)
                for comm in doc.xpath('//ul/li/a'):
                    name = comm.text
                    url = comm.get('href')
                    committee = Committee(chamber, name)
                    self.scrape_committee(committee, url)
                    committee.add_source(url)
                    self.save_committee(committee)
开发者ID:acmewebservices,项目名称:openstates,代码行数:19,代码来源:committees.py


注:本文中的fiftystates.scrape.committees.Committee.add_source方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。