当前位置: 首页>>代码示例>>Python>>正文


Python committees.Committee类代码示例

本文整理汇总了Python中fiftystates.scrape.committees.Committee的典型用法代码示例。如果您正苦于以下问题:Python Committee类的具体用法?Python Committee怎么用?Python Committee使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Committee类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: scrape_reps_comm

    def scrape_reps_comm(self, chamber, session):        
        
       url = 'http://www.maine.gov/legis/house/hsecoms.htm'

       with self.urlopen(url) as page:
            root = lxml.etree.fromstring(page, lxml.etree.HTMLParser())

            count = 0

            for n in range(1, 12, 2):
                path = 'string(//body/center[%s]/h1/a)' % (n)
                comm_name = root.xpath(path)
                committee = Committee(chamber, comm_name)
                count = count + 1                

                path2 = '/html/body/ul[%s]/li/a' % (count)

                for el in root.xpath(path2):
                   rep = el.text
                   if rep.find('(') != -1:
                        mark = rep.find('(')
                        rep = rep[15: mark]
                   committee.add_member(rep)
                committee.add_source(url)
                
                self.save_committee(committee)
开发者ID:marlonkeating,项目名称:fiftystates,代码行数:26,代码来源:committees.py

示例2: scrape_senate_committee

    def scrape_senate_committee(self, term, link):
        with self.urlopen(link) as html:
            doc = lxml.html.fromstring(html)

            # strip first 30 and last 10
            # Minnesota Senate Committees - __________ Committee
            committee_name = doc.xpath('//title/text()')[0][30:-10]

            com = Committee('upper', committee_name)

            # first id=bio table is members
            for row in doc.xpath('//table[@id="bio"]')[0].xpath('tr'):
                row = fix_whitespace(row.text_content())

                # switch role
                if ':' in row:
                    position, name = row.split(': ')
                    role = position.lower().strip()
                else:
                    name = row

                # add the member
                com.add_member(name, role)

            com.add_source(link)
            self.save_committee(com)
开发者ID:ecocity,项目名称:openstates,代码行数:26,代码来源:committees.py

示例3: scrape

    def scrape(self, chamber, term):
        if term != '2011-2012':
            raise NoDataForPeriod(term)

        chamber_abbr = {'upper': 's', 'lower': 'h'}[chamber]

        url = "http://le.utah.gov/asp/interim/standing.asp?house=%s" % chamber_abbr
        with self.urlopen(url) as page:
            page = lxml.html.fromstring(page)
            page.make_links_absolute(url)

            for comm_link in page.xpath("//a[contains(@href, 'Com=')]"):
                comm_name = comm_link.text.strip()

                # Drop leading "House" or "Senate" from name
                comm_name = re.sub(r"^(House|Senate) ", "", comm_name)

                comm = Committee(chamber, comm_name)

                for mbr_link in comm_link.xpath(
                    "../../../font[2]/a[not(contains(@href, 'mailto'))]"):

                    name = mbr_link.text.strip()

                    next_el = mbr_link.getnext()
                    if next_el is not None and next_el.tag == 'i':
                        type = next_el.text.strip()
                    else:
                        type = 'member'

                    comm.add_member(name, type)

                self.save_committee(comm)
开发者ID:ecocity,项目名称:openstates,代码行数:33,代码来源:committees.py

示例4: scrape_reps_comm

    def scrape_reps_comm(self, chamber, term):
        save_chamber = chamber

        # id range for senate committees on their website
        for comm_id in range(87, 124):
            chamber = save_chamber
            comm_url = (
                "http://www.house.state.oh.us/index.php?option="
                "com_displaycommittees&task=2&type=Regular&"
                "committeeId=%d" % comm_id
            )

            with self.urlopen(comm_url) as page:
                page = lxml.etree.fromstring(page, lxml.etree.HTMLParser())

                comm_name = page.xpath('string(//table/tr[@class="committeeHeader"]/td)')
                comm_name = comm_name.replace("/", " ")

                if comm_id < 92:
                    chamber = "joint"

                committee = Committee(chamber, comm_name)
                committee.add_source(comm_url)

                for link in page.xpath("//a[contains(@href, 'district')]"):
                    name = link.text
                    if name and name.strip():
                        committee.add_member(name.strip())

                self.save_committee(committee)
开发者ID:ecocity,项目名称:openstates,代码行数:30,代码来源:committees.py

示例5: scrape_house_committees

    def scrape_house_committees(self, term):
        url = 'http://www.house.leg.state.mn.us/comm/commemlist.asp'

        with self.urlopen(url) as html:
            doc = lxml.html.fromstring(html)

            for com in doc.xpath('//h2[@class="commhighlight"]'):
                members_url = com.xpath('following-sibling::p[1]/a[text()="Members"]/@href')[0]

                com = Committee('lower', com.text)
                com.add_source(members_url)

                with self.urlopen(members_url) as member_html:
                    mdoc = lxml.html.fromstring(member_html)

                    # each legislator in their own table
                    # first row, second column contains all the info
                    for ltable in mdoc.xpath('//table/tr[1]/td[2]/p/b[1]'):

                        # name is tail string of last element
                        name = ltable.text_content()

                        # role is inside a nested b tag
                        role = ltable.xpath('b/*/text()')
                        if role:
                            # if there was a role, remove it from name
                            role = role[0]
                            name = name.replace(role, '')
                        else:
                            role = 'member'
                        com.add_member(name, role)

                # save
                self.save_committee(com)
开发者ID:ecocity,项目名称:openstates,代码行数:34,代码来源:committees.py

示例6: scrape_reps_comm

    def scrape_reps_comm(self, chamber, year):

        save_chamber = chamber

        #id range for senate committees on their website
        for comm_id in range(87, 124):

            chamber = save_chamber

            comm_url = 'http://www.house.state.oh.us/index.php?option=com_displaycommittees&task=2&type=Regular&committeeId=' + str(comm_id)
            with self.urlopen(comm_url) as page:
                root = lxml.etree.fromstring(page, lxml.etree.HTMLParser())

                comm_name = root.xpath('string(//table/tr[@class="committeeHeader"]/td)')
                comm_name = comm_name.replace("/", " ")                
                
                #joint legislative committiees
                if comm_id < 92:
                    chamber = "joint_legislation"

                committee = Committee(chamber, comm_name)
               
                path = '/html/body[@id="bd"]/div[@id="ja-wrapper"]/div[@id="ja-containerwrap-f"]/div[@id="ja-container"]/div[@id="ja-mainbody-f"]/div[@id="ja-contentwrap"]/div[@id="ja-content"]/table/tr[position() >=3]'
                
                for el in root.xpath(path):
                    rep1 = el.xpath('string(td[1]/a)')
                    rep2 = el.xpath('string(td[4]/a)')
                    committee.add_member(rep1)
                    committee.add_member(rep2)
                    
                committee.add_source(comm_url)
                self.save_committee(committee)
开发者ID:Empact,项目名称:fiftystates,代码行数:32,代码来源:committees.py

示例7: scrape_house

    def scrape_house(self):
        url = "http://house.louisiana.gov/H_Reps/H_Reps_CmtesFull.asp"
        comm_cache = {}
        with self.urlopen(url) as text:
            page = lxml.html.fromstring(text)

            for row in page.xpath("//table[@bordercolorlight='#EAEAEA']/tr"):
                cells = row.xpath('td')

                name = cells[0].xpath('string()').strip()

                if name.startswith('Vacant'):
                    continue

                font = cells[1].xpath('font')[0]
                committees = []

                if font.text:
                    committees.append(font.text.strip())
                for br in font.xpath('br'):
                    if br.text:
                        committees.append(br.text.strip())
                    if br.tail:
                        committees.append(br.tail)

                for comm_name in committees:
                    mtype = 'member'
                    if comm_name.endswith(', Chairman'):
                        mtype = 'chairman'
                        comm_name = comm_name.replace(', Chairman', '')
                    elif comm_name.endswith(', Co-Chairmain'):
                        mtype = 'co-chairmain'
                        comm_name = comm_name.replace(', Co-Chairmain', '')
                    elif comm_name.endswith(', Vice Chair'):
                        mtype = 'vice chair'
                        comm_name = comm_name.replace(', Vice Chair', '')
                    elif comm_name.endswith(', Ex Officio'):
                        mtype = 'ex officio'
                        comm_name = comm_name.replace(', Ex Officio', '')

                    if comm_name.startswith('Joint'):
                        chamber = 'joint'
                    else:
                        chamber = 'lower'

                    try:
                        committee = comm_cache[comm_name]
                    except KeyError:
                        committee = Committee(chamber, comm_name)
                        committee.add_source(url)
                        comm_cache[comm_name] = committee

                    committee.add_member(name, mtype)

            for committee in comm_cache.values():
                self.save_committee(committee)
开发者ID:ecocity,项目名称:openstates,代码行数:56,代码来源:committees.py

示例8: scrape_committee

    def scrape_committee(self, chamber, term, name, url):
        with self.urlopen(url) as page:
            page = lxml.html.fromstring(page)

            mlist = page.xpath("//strong[contains(., 'Members:')]")[0].tail
            mlist = re.sub(r'\s+', ' ', mlist)

            committee = Committee(chamber, name)
            committee.add_source(url)

            for member in mlist.split(','):
                member = re.sub(r'R\.M\.(M\.)?$', '', member.strip())
                committee.add_member(member.strip())

            chair = page.xpath("//strong[contains(., 'Chair:')]")[0]
            chair_name = chair.tail.strip()
            if chair_name:
                committee.add_member(chair_name, 'chair')

            vc = page.xpath("//strong[contains(., 'Vice Chair:')]")[0]
            vc_name = vc.tail.strip()
            if vc_name:
                committee.add_member(vc_name, 'vice chair')

            self.save_committee(committee)
开发者ID:ecocity,项目名称:openstates,代码行数:25,代码来源:committees.py

示例9: scrape

    def scrape(self, chamber, year):
        if year != '2009':
            raise NoDataForPeriod(year)

        if chamber == 'upper':
            url = ('http://www.legis.state.pa.us/cfdocs/legis/'
                   'home/member_information/senators_ca.cfm')
        else:
            url = ('http://www.legis.state.pa.us/cfdocs/legis/'
                   'home/member_information/representatives_ca.cfm')

        with self.urlopen(url) as page:
            page = lxml.html.fromstring(page)

            committees = {}

            for li in page.xpath("//a[contains(@href, 'bio.cfm')]/../.."):
                name = li.xpath("string(b/a[contains(@href, 'bio.cfm')])")
                name = name[0:-4]

                for link in li.xpath("a"):
                    if not link.tail:
                        continue

                    committee_name = link.tail.strip()
                    committee_name = re.sub(r"\s+", " ", committee_name)
                    subcommittee_name = None
                    role = 'member'

                    rest = link.xpath('string(../i)')
                    if rest:
                        match = re.match(r',\s+(Subcommittee on .*)\s+-',
                                         rest)

                        if match:
                            subcommittee_name = match.group(1)
                            role = rest.split('-')[1].strip()
                        else:
                            role = rest.replace(', ', '').strip()

                    try:
                        committee = committees[(chamber, committee_name,
                                                subcommittee_name)]
                    except KeyError:
                        committee = Committee(chamber, committee_name)
                        if subcommittee_name:
                            committee['subcommittee'] = subcommittee_name

                        committees[(chamber, committee_name,
                                    subcommittee_name)] = committee

                    committee.add_member(name, role)

            for committee in committees.values():
                self.save_committee(committee)
开发者ID:runderwood,项目名称:fiftystates,代码行数:55,代码来源:committees.py

示例10: scrape_senate

    def scrape_senate(self):
        """Scrape Senate Committees"""
        for name, comm in nyss_openlegislation.models.committees.items():
            name = name.title().replace('And', 'and')

            committee = Committee('upper', name)

            for member in comm.members:
                committee.add_member(member.fullname)

            self.save_committee(committee)
开发者ID:acmewebservices,项目名称:openstates,代码行数:11,代码来源:committees.py

示例11: scrape

    def scrape(self, chamber, year):
        # TODO: scrape senate committees
        house_url = 'http://www.msa.md.gov/msa/mdmanual/06hse/html/hsecom.html'

        with self.urlopen(house_url) as html:
            doc = lxml.html.fromstring(html)
            # distinct URLs containing /com/
            committees = set([l.get('href') for l in doc.cssselect('li a')
                              if l.get('href', '').find('/com/') != -1])

        for com in committees:
            com_url = 'http://www.msa.md.gov'+com
            with self.urlopen(com_url) as chtml:
                cdoc = lxml.html.fromstring(chtml)
                for h in cdoc.cssselect('h2, h3'):
                    if h.text:
                        committee_name = h.text
                        break
                cur_com = Committee('lower', committee_name)
                cur_com.add_source(com_url)
                for l in cdoc.cssselect('a[href]'):
                    if ' SUBCOMMITTEE' in (l.text or ''):
                        self.save_committee(cur_com)
                        cur_com = Committee('lower', l.text, committee_name)
                        cur_com.add_source(com_url)
                    elif 'html/msa' in l.get('href'):
                        cur_com.add_member(l.text)
                self.save_committee(cur_com)
开发者ID:marlonkeating,项目名称:fiftystates,代码行数:28,代码来源:committees.py

示例12: scrape_senate_committee

    def scrape_senate_committee(self, name, url):
        url = url.replace('Default.asp', 'Assignments.asp')

        committee = Committee('upper', name)
        with self.urlopen(url) as text:
            page = lxml.html.fromstring(text)

            links = page.xpath('//table[@bordercolor="#EBEAEC"]/tr/td/font/a')

            for link in links:
                name = link.xpath('string()')
                name = name.replace('Senator ', '').strip()

                committee.add_member(name)

        self.save_committee(committee)
开发者ID:tatsuhirosatou,项目名称:fiftystates,代码行数:16,代码来源:committees.py

示例13: scrape_index

 def scrape_index(self, chamber, session, session_id, committee_type):
     url = base_url + 'xml/committees.asp?session=%s&type=%s' % (session_id,
                                                              committee_type)
     with self.urlopen(url) as page:
         root = etree.fromstring(page, etree.XMLParser(recover=True))
         
         body = '//body[@Body="%s"]/committee' % {'upper': 'S',
                                                  'lower': 'H'}[chamber]
         # TODO need to and make sure to add sub committees
         for com in root.xpath(body):
             c_id, name, short_name, sub = com.values()
             c = Committee(chamber, name, short_name=short_name, 
                           session=session, az_committee_id=c_id)
             c.add_source(url)
             self.scrape_com_info(session, session_id, c_id, c)
             self.save_committee(c)
开发者ID:ecocity,项目名称:openstates,代码行数:16,代码来源:committees.py

示例14: scrape

    def scrape(self, chamber, term):
        base_url = 'http://www.ncga.state.nc.us/gascripts/Committees/Committees.asp?bPrintable=true&sAction=ViewCommitteeType&sActionDetails='

        chambers = {'upper': ['Senate%20Standing', 'Senate%20Select'],
                    'lower': ['House%20Standing', 'House%20Select']}

        for ctype in chambers[chamber]:
            with self.urlopen(base_url + ctype) as data:
                doc = lxml.html.fromstring(data)
                doc.make_links_absolute(base_url+ctype)
                for comm in doc.xpath('//ul/li/a'):
                    name = comm.text
                    url = comm.get('href')
                    committee = Committee(chamber, name)
                    self.scrape_committee(committee, url)
                    committee.add_source(url)
                    self.save_committee(committee)
开发者ID:acmewebservices,项目名称:openstates,代码行数:17,代码来源:committees.py

示例15: scrape_senate

    def scrape_senate(self):
        """Scrape Senate Committees"""
        senate_url = "http://www.nysenate.gov"
        senate_committees_url = senate_url + "/committees"

        with self.urlopen(senate_committees_url) as html:
            doc = lxml.html.fromstring(html)
            committee_paths = set([l.get("href") for l in doc.cssselect("li a")
                              if l.get("href", "").find("/committee/") != -1])

        for committee_path in committee_paths:
            committee_url = senate_url+committee_path
            with self.urlopen(committee_url) as chtml:
                cdoc = lxml.html.fromstring(chtml)
                for h in cdoc.cssselect(".committee_name"):
                    if h.text:
                        committee_name = h.text
                        break

                committee = Committee("upper", committee_name)
                committee.add_source(committee_url)
                for l in cdoc.cssselect(".committee-chair a[href]"):
                    if "/senator/" in l.get("href") and l.text and l.text.startswith("Sen."):
                        committee.add_member(l.text.split('Sen. ', 1)[1], "chair")

                for l in cdoc.cssselect(".committee-members a[href]"):
                    if "/senator/" in l.get("href"):
                        committee.add_member(l.text)

                self.save_committee(committee)
开发者ID:Empact,项目名称:fiftystates,代码行数:30,代码来源:committees.py


注:本文中的fiftystates.scrape.committees.Committee类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。