当前位置: 首页>>代码示例>>Python>>正文


Python Legislator.add_source方法代码示例

本文整理汇总了Python中fiftystates.scrape.legislators.Legislator.add_source方法的典型用法代码示例。如果您正苦于以下问题:Python Legislator.add_source方法的具体用法?Python Legislator.add_source怎么用?Python Legislator.add_source使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在fiftystates.scrape.legislators.Legislator的用法示例。


在下文中一共展示了Legislator.add_source方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: scrape_old_legislators

# 需要导入模块: from fiftystates.scrape.legislators import Legislator [as 别名]
# 或者: from fiftystates.scrape.legislators.Legislator import add_source [as 别名]
    def scrape_old_legislators(self, chamber, session):
        """
        Scrape pre-2009 legislators.
        """
        if chamber == "upper":
            chamber_name = "Senate"
        else:
            chamber_name = "House"

        if int(session) < 2008:
            filename = "district.htm"
        else:
            filename = "MembersDistrict.htm"

        leg_list_url = "http://legis.state.sd.us/sessions/%s/%s" % (session, filename)
        leg_list = self.soup_parser(self.urlopen(leg_list_url))

        for district_str in leg_list.findAll("h2"):
            district = district_str.contents[0].split(" ")[1].lstrip("0")

            for row in district_str.findNext("table").findAll("tr")[1:]:
                if row.findAll("td")[1].contents[0].strip() != chamber_name:
                    continue

                full_name = row.td.a.contents[0].strip()

                party = row.findAll("td")[3].contents[0].strip()
                occupation = row.findAll("td")[4].contents[0].strip()

                legislator = Legislator(session, chamber, district, full_name, party=party, occupation=occupation)
                legislator.add_source(leg_list_url)
                self.save_legislator(legislator)
开发者ID:Empact,项目名称:fiftystates,代码行数:34,代码来源:legislators.py

示例2: scrape_new_legislators

# 需要导入模块: from fiftystates.scrape.legislators import Legislator [as 别名]
# 或者: from fiftystates.scrape.legislators.Legislator import add_source [as 别名]
    def scrape_new_legislators(self, chamber, session):
        """
        Scrape legislators from 2009 and later.
        """

        if chamber == "upper":
            search = "Senate Members"
        else:
            search = "House Members"

        leg_list_url = "http://legis.state.sd.us/sessions/%s/" "MemberMenu.aspx" % (session)
        leg_list = self.soup_parser(self.urlopen(leg_list_url))

        list_div = leg_list.find(text=search).findNext("div")

        for link in list_div.findAll("a"):
            full_name = link.contents[0].strip()

            leg_page_url = "http://legis.state.sd.us/sessions/%s/%s" % (session, link["href"])
            leg_page = self.soup_parser(self.urlopen(leg_page_url))

            party = leg_page.find(id="ctl00_contentMain_spanParty").contents[0].strip()

            district = leg_page.find(id="ctl00_contentMain_spanDistrict").contents[0]
            district = district.strip().lstrip("0")

            occ_span = leg_page.find(id="ctl00_contentMain_spanOccupation")
            if len(occ_span.contents) > 0:
                occupation = occ_span.contents[0].strip()
            else:
                occupation = None

            legislator = Legislator(session, chamber, district, full_name, party=party, occupation=occupation)
            legislator.add_source(leg_page_url)
            self.save_legislator(legislator)
开发者ID:Empact,项目名称:fiftystates,代码行数:37,代码来源:legislators.py

示例3: scrape_senators

# 需要导入模块: from fiftystates.scrape.legislators import Legislator [as 别名]
# 或者: from fiftystates.scrape.legislators.Legislator import add_source [as 别名]
    def scrape_senators(self, chamber, term):
        sen_url = 'http://www.ohiosenate.gov/directory.html'
        with self.urlopen(sen_url) as page:
            root = lxml.etree.fromstring(page, lxml.etree.HTMLParser())

            for el in root.xpath('//table[@class="fullWidth"]/tr/td'):
                sen_link = el.xpath('a[@class="senatorLN"]')[1]

                full_name = sen_link.text
                full_name = full_name[0:-2]
                if full_name == 'To Be Announced':
                    continue

                district = el.xpath('string(h3)').split()[1]

                party = el.xpath('string(a[@class="senatorLN"]/span)')

                if party == "D":
                    party = "Democrat"
                elif party == "R":
                    party = "Republican"

                leg = Legislator(term, chamber, district, full_name,
                        '', '', '', party)
                leg.add_source(sen_url)

                self.save_legislator(leg)
开发者ID:ecocity,项目名称:openstates,代码行数:29,代码来源:legislators.py

示例4: scrape_senate

# 需要导入模块: from fiftystates.scrape.legislators import Legislator [as 别名]
# 或者: from fiftystates.scrape.legislators.Legislator import add_source [as 别名]
    def scrape_senate(self, term):
        url = 'http://www.senate.leg.state.mn.us/members/member_list.php'

        with self.urlopen(url) as html:
            doc = lxml.html.fromstring(html)

            for row in doc.xpath('//tr'):
                tds = row.xpath('td')
                if len(tds) == 5 and tds[1].text_content() in self._parties:
                    district = tds[0].text_content()
                    party = tds[1].text_content()
                    name_a = tds[2].xpath('a')[0]
                    name = name_a.text.strip()
                    addr, phone = tds[3].text_content().split(u'\xa0\xa0')
                    email = tds[4].text_content()

                    leg = Legislator(term, 'upper', district, name,
                                     party=self._parties[party],
                                     office_address=addr, office_phone=phone)

                    if '@' in email:
                        leg['email'] = email

                    leg.add_source(url)

                    self.save_legislator(leg)
开发者ID:ecocity,项目名称:openstates,代码行数:28,代码来源:legislators.py

示例5: scrape

# 需要导入模块: from fiftystates.scrape.legislators import Legislator [as 别名]
# 或者: from fiftystates.scrape.legislators.Legislator import add_source [as 别名]
    def scrape(self, chamber, term):
        # this beautiful page is loaded from the council page via AJAX
        url = 'http://www.dccouncil.washington.dc.us/include/linkedpage.aspx?linkedpage=2&page=17'

        # do nothing if they're trying to get a lower chamber
        if chamber == 'lower':
            return

        with self.urlopen(url) as data:
            base_doc = lxml.html.fromstring(data)

            for link in base_doc.xpath('//a'):
                leg_url = 'http://www.dccouncil.washington.dc.us/' + link.get('href')
                with self.urlopen(leg_url) as leg_html:
                    doc = lxml.html.fromstring(leg_html)
                    name = link.text

                    # Name, District
                    title = doc.get_element_by_id('PageTitle')
                    district = title.text.rsplit(', ')[-1]

                    # party
                    party = get_surrounding_block(doc, 'Political Affiliation')
                    if 'Democratic' in party:
                        party = 'Democratic'
                    else:
                        party = 'Independent'

                    legislator = Legislator(term, 'upper', district, name,
                                            party=party)
                    legislator.add_source(leg_url)
                self.save_legislator(legislator)
开发者ID:acmewebservices,项目名称:openstates,代码行数:34,代码来源:legislators.py

示例6: scrape_senators

# 需要导入模块: from fiftystates.scrape.legislators import Legislator [as 别名]
# 或者: from fiftystates.scrape.legislators.Legislator import add_source [as 别名]
    def scrape_senators(self, chamber, year):


        sen_url = 'http://www.ohiosenate.gov/directory.html' 
        with self.urlopen(sen_url) as page:
            root = lxml.etree.fromstring(page, lxml.etree.HTMLParser())

            for el in root.xpath('//table[@class="fullWidth"]/tr/td'):


                sen_link = el.xpath('a[@class="senatorLN"]')[1]
                full_name = sen_link.text
                full_name = full_name[0 : len(full_name) - 2]
                district = el.xpath('string(h3)')
                district = district.split()[1]
                party = el.xpath('string(a[@class="senatorLN"]/span)')

                first_name = full_name.split()[0]
                last_name = full_name.split()[1]
                middle_name = ''

                leg = Legislator('128', chamber, district, full_name, 
                        first_name, last_name, middle_name, party)
                leg.add_source(sen_url)

                self.save_legislator(leg)
开发者ID:Empact,项目名称:fiftystates,代码行数:28,代码来源:legislators.py

示例7: scrape_reps

# 需要导入模块: from fiftystates.scrape.legislators import Legislator [as 别名]
# 或者: from fiftystates.scrape.legislators.Legislator import add_source [as 别名]
    def scrape_reps(self, chamber, session, term_name):
        # There is only 99 districts
        for district in range(1,100):
            rep_url = 'http://www.house.state.oh.us/components/com_displaymembers/page.php?district=' + str(district)
            with self.urlopen(rep_url) as page:
                root = lxml.etree.fromstring(page, lxml.etree.HTMLParser())

                for el in root.xpath('//table[@class="page"]'):
                    rep_link = el.xpath('tr/td/title')[0]
                    full_name = rep_link.text
                    party = full_name[-2]
                    full_name = full_name[0 : len(full_name)-3]
                    first_name = ""
                    last_name = ""
                    middle_name = ""                    
                    
                    if party == "D":
                        party = "Democrat"
                    elif party == "R":
                        party = "Republican"
                    else:
                        party = party

                    leg = Legislator(term_name, chamber, str(district), full_name, first_name, last_name, middle_name, party)
                    leg.add_source(rep_url)

                self.save_legislator(leg)
开发者ID:acmewebservices,项目名称:openstates,代码行数:29,代码来源:legislators.py

示例8: scrape_legislator_data

# 需要导入模块: from fiftystates.scrape.legislators import Legislator [as 别名]
# 或者: from fiftystates.scrape.legislators.Legislator import add_source [as 别名]
    def scrape_legislator_data(self, url, chamber):
        with self.lxml_context(url) as page:
            legislator_table = page.get_element_by_id("ctl00_PlaceHolderMain_dlMembers")
            legislators = legislator_table.cssselect('a')
            for legislator in legislators:
                name = legislator.text_content()
                full_name, first_name, middle_name, last_name = self.separate_name(name)
                name_for_url = last_name.lower()
                name_for_url = re.sub("'", "", name_for_url)
        
                if chamber == 'upper':
                    legislator_page_url = "http://www.leg.wa.gov/senate/senators/Pages/" + name_for_url + ".aspx"
                else: 
                    legislator_page_url = "http://www.leg.wa.gov/house/representatives/Pages/" + name_for_url + ".aspx"

                with self.lxml_context(legislator_page_url) as legislator_page:
                    try:
                        full_name, first_name, middle_name, last_name = self.scrape_legislator_name(legislator_page)
                    except:
                        break     
    
                    party_element = legislator_page.get_element_by_id("ctl00_PlaceHolderMain_lblParty")
                    
                    if party_element.text_content() == '(R)':
                        party = 'Republican'
                    else:
                        party = 'Democrat'
  
                    district_element = legislator_page.get_element_by_id("ctl00_PlaceHolderMain_hlDistrict")
                    district = district_element.text_content()        
               
                    legislator = Legislator('2009-2010', chamber, district, full_name, "", "", "", party)
                    legislator.add_source(legislator_page_url)
                    self.save_legislator(legislator)
开发者ID:marlonkeating,项目名称:fiftystates,代码行数:36,代码来源:legislators.py

示例9: scrape_legislator_data

# 需要导入模块: from fiftystates.scrape.legislators import Legislator [as 别名]
# 或者: from fiftystates.scrape.legislators.Legislator import add_source [as 别名]
    def scrape_legislator_data(self, chamber, session):
        with self.urlopen(house_url(chamber)) as page_html:
            page = lxml.html.fromstring(page_html)
            legislator_table = page.get_element_by_id("ctl00_PlaceHolderMain_dlMembers")
            legislators = legislator_table.cssselect('a')
            for legislator in legislators:
                name = legislator.text_content()
                full_name, first_name, middle_name, last_name = separate_name(name)
                name_for_url = last_name.lower()
                name_for_url = re.sub("'", "", name_for_url)
        
                legislator_page_url = legs_url(chamber, name_for_url)

                with self.urlopen(legislator_page_url) as legislator_page_html:
                    legislator_page = lxml.html.fromstring(legislator_page_html)
                    try:
                        full_name, first_name, middle_name, last_name = self.scrape_legislator_name(legislator_page)
                    except:
                        break     
    
                    party_element = legislator_page.get_element_by_id("ctl00_PlaceHolderMain_lblParty")
                    
                    if party_element.text_content() == '(R)':
                        party = 'Republican'
                    else:
                        party = 'Democrat'
  
                    district_element = legislator_page.get_element_by_id("ctl00_PlaceHolderMain_hlDistrict")
                    district = district_element.text_content()        
               
                    legislator = Legislator(session, chamber, district, full_name, "", "", "", party)
                    legislator.add_source(legislator_page_url)
                    self.save_legislator(legislator)
开发者ID:acmewebservices,项目名称:openstates,代码行数:35,代码来源:legislators.py

示例10: scrape_reps

# 需要导入模块: from fiftystates.scrape.legislators import Legislator [as 别名]
# 或者: from fiftystates.scrape.legislators.Legislator import add_source [as 别名]
    def scrape_reps(self, chamber, session):

       rep_url = 'http://www.maine.gov/legis/house/dist_mem.htm'

       with self.urlopen(rep_url) as page:
            root = lxml.etree.fromstring(page, lxml.etree.HTMLParser())

            #There are 151 districts
            for district in range(1, 152):

                if (district % 10) == 0:
                    path = 'string(/html/body/p[%s]/a[3])' % (district+4)
                else:
                    path = 'string(/html/body/p[%s]/a[2])' % (district+4)
                name = root.xpath(path)

                if len(name) > 0:
                    if name.split()[0] != 'District':
                        mark = name.find('(')
                        party = name[mark + 1]
                        name = name[15 : mark]

                        firstname = ""
                        lastname = ""
                        middlename = ""

                        if party == "V":
                            name = "Vacant"

                        leg = Legislator(session, chamber, district, name, firstname, lastname, middlename, party)
                        leg.add_source(rep_url)
                        self.save_legislator(leg)
开发者ID:marlonkeating,项目名称:fiftystates,代码行数:34,代码来源:legislators.py

示例11: scrape_details

# 需要导入模块: from fiftystates.scrape.legislators import Legislator [as 别名]
# 或者: from fiftystates.scrape.legislators.Legislator import add_source [as 别名]
    def scrape_details(self, chamber, term, leg_name, leg_link, role):
        try:
            url = 'http://billstatus.ls.state.ms.us/members/%s' % leg_link
            with self.urlopen(url) as details_page:
                details_page = details_page.decode('latin1').encode('utf8', 'ignore')
                root = lxml.etree.fromstring(details_page, lxml.etree.HTMLParser())
                party = root.xpath('string(//party)')
                district = root.xpath('string(//district)')
                first_name, middle_name, last_name = "", "", ""

                home_phone = root.xpath('string(//h_phone)')
                bis_phone = root.xpath('string(//b_phone)')
                capital_phone = root.xpath('string(//cap_phone)')
                other_phone = root.xpath('string(//oth_phone)')
                org_info = root.xpath('string(//org_info)')
                email_name = root.xpath('string(//email_address)')
                email = '%[email protected]%s.ms.gov' % (email_name, chamber)
                if party == 'D':
                    party = 'Democratic'
                else:
                    party = 'Republican'

                leg = Legislator(term, chamber, district, leg_name, first_name,
                                 last_name, middle_name, party, role=role,
                                 home_phone = home_phone, bis_phone=bis_phone,
                                 capital_phone=capital_phone,
                                 other_phone=other_phone, org_info=org_info,
                                 email=email)
                leg.add_source(url)
                self.save_legislator(leg)
        except scrapelib.HTTPError, e:
            self.warning(str(e))
开发者ID:ecocity,项目名称:openstates,代码行数:34,代码来源:legislators.py

示例12: scrape

# 需要导入模块: from fiftystates.scrape.legislators import Legislator [as 别名]
# 或者: from fiftystates.scrape.legislators.Legislator import add_source [as 别名]
    def scrape(self, chamber, year):
        session = "%d-%d" % (int(year), int(year) + 1)

        url = "http://www.ncga.state.nc.us/gascripts/members/"\
            "memberList.pl?sChamber="

        if chamber == 'lower':
            url += 'House'
        else:
            url += 'Senate'

        with self.urlopen(url) as (resp, data):
            leg_list = self.soup_parser(data)
            leg_table = leg_list.find('div', id='mainBody').find('table')

            for row in leg_table.findAll('tr')[1:]:
                party = row.td.contents[0].strip()
                if party == 'Dem':
                    party = 'Democrat'
                elif party == 'Rep':
                    party = 'Republican'

                district = row.findAll('td')[1].contents[0].strip()
                full_name = row.findAll('td')[2].a.contents[0].strip()
                full_name = full_name.replace(u'\u00a0', ' ')
                (first_name, last_name, middle_name, suffix) = split_name(
                    full_name)

                legislator = Legislator(session, chamber, district, full_name,
                                        first_name, last_name, middle_name,
                                        party, suffix=suffix)
                legislator.add_source(url)
                self.save_legislator(legislator)
开发者ID:Empact,项目名称:fiftystates,代码行数:35,代码来源:legislators.py

示例13: scrape_rep

# 需要导入模块: from fiftystates.scrape.legislators import Legislator [as 别名]
# 或者: from fiftystates.scrape.legislators.Legislator import add_source [as 别名]
    def scrape_rep(self, name, term, url):
        # special case names that confuses name_tools
        if name == 'Franklin, A.B.':
            name = 'Franklin, A. B.'
        elif ', Jr., ' in name:
            name.replace(', Jr., ', ' ')
            name += ', Jr.'
        elif ', III, ' in name:
            name.replace(', III, ', ' ')
            name += ', III'

        with self.urlopen(url) as text:
            page = lxml.html.fromstring(text)

            district = page.xpath(
                "//a[contains(@href, 'Maps')]")[0].attrib['href']
            district = re.search("district(\d+).pdf", district).group(1)

            if "Democrat&nbsp;District" in text:
                party = "Democratic"
            elif "Republican&nbsp;District" in text:
                party = "Republican"
            elif "Independent&nbsp;District" in text:
                party = "Independent"
            else:
                party = "Other"

            leg = Legislator(term, 'lower', district, name, party=party)
            leg.add_source(url)
            self.save_legislator(leg)
开发者ID:acmewebservices,项目名称:openstates,代码行数:32,代码来源:legislators.py

示例14: scrape

# 需要导入模块: from fiftystates.scrape.legislators import Legislator [as 别名]
# 或者: from fiftystates.scrape.legislators.Legislator import add_source [as 别名]
    def scrape(self, chamber, year):
        year = int(year)
        session = internal_sessions[year][0][1]
        # iterating through subsessions would be a better way to do this..
        if year % 2 == 0 and (year != dt.date.today().year or  year+1 != dt.date.today().year):
            raise NoDataForYear(year)

        if chamber == 'upper':
            url = "http://legis.wi.gov/w3asp/contact/legislatorslist.aspx?house=senate"
        else:
            url = "http://legis.wi.gov/w3asp/contact/legislatorslist.aspx?house=assembly"

        #body = unicode(self.urlopen(url), 'latin-1')
        with self.urlopen(url) as body:
            page = lxml.html.fromstring(body)

            for row in page.cssselect("#ctl00_C_dgLegData tr"):
                if len(row.cssselect("td a")) > 0:
                    rep_url = list(row)[0].cssselect("a[href]")[0].get("href")

                    legpart = re.findall(r'([\w\-\,\s\.]+)\s+\(([\w])\)', list(row)[0].text_content())
                    if legpart:
                        full_name, party = legpart[0]

                        district = str(int(list(row)[2].text_content()))

                        leg = Legislator(session, chamber, district, full_name,
                                         party)
                        leg.add_source(rep_url)

                        leg = self.add_committees(leg, rep_url, session)
                        self.save_legislator(leg)
开发者ID:marlonkeating,项目名称:fiftystates,代码行数:34,代码来源:legislators.py

示例15: scrape_legislator_data

# 需要导入模块: from fiftystates.scrape.legislators import Legislator [as 别名]
# 或者: from fiftystates.scrape.legislators.Legislator import add_source [as 别名]
    def scrape_legislator_data(self, url, chamber):
        party_fulls = {'R' : 'Republican', 'D' : 'Democrat'}
        with self.urlopen(url) as page:
            page = BeautifulSoup(page)
            for data in page.find('table', id = 'ctl00_mainCopy_DataList1')('td'):
                spans = data('span')
                if len(spans) == 0:
                    self.debug('Found an empty cell in %s. Continuing' % url)
                    continue
                full_name = ' '.join([span.string.strip() for span in spans])
                if len(spans[0].string.strip().split()) == 2:
                    first_name, middle_name = spans[0].string.strip().split()
                else:
                    first_name, middle_name = spans[0].string.strip(), ''
                last_name = spans[1].string.strip()

                details_url = get_abs_url(url, data.find('a')['href'])
                with self.urlopen(details_url) as details:
                    details = BeautifulSoup(details)
                    district = details.find('a', id = 'ctl00_mainCopy_LegisInfo_DISTRICTLabel').string.strip()
                    party = party_fulls[details.find('span', id = 'ctl00_mainCopy_LegisInfo_PARTYLabel').string]

                    leg = Legislator('2010', chamber, district, full_name, first_name, 
                            last_name, middle_name, party)
                    leg.add_source(details_url)

                    comms_table = details.find('table', id = 'ctl00_mainCopy_MembershipGrid')
                    for comms_raw_data in comms_table('tr')[1:]:
                        comm_data = comms_raw_data('td')
                        comm_role_type = comm_data[0].string.strip()
                        comm_name = comm_data[1]('a')[0].string.strip()
                        leg.add_role(comm_role_type, '2010', chamber = chamber, committee = comm_name)

                    self.save_legislator(leg)
开发者ID:Empact,项目名称:fiftystates,代码行数:36,代码来源:legislators.py


注:本文中的fiftystates.scrape.legislators.Legislator.add_source方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。