當前位置: 首頁>>代碼示例>>Python>>正文


Python Bill.add_source方法代碼示例

本文整理匯總了Python中pyutils.legislation.Bill.add_source方法的典型用法代碼示例。如果您正苦於以下問題:Python Bill.add_source方法的具體用法?Python Bill.add_source怎麽用?Python Bill.add_source使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在pyutils.legislation.Bill的用法示例。


在下文中一共展示了Bill.add_source方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: parse_senate_billpage

# 需要導入模塊: from pyutils.legislation import Bill [as 別名]
# 或者: from pyutils.legislation.Bill import add_source [as 別名]
    def parse_senate_billpage(self, bill_url, year):
        with self.soup_context(bill_url) as bill_page:
            # get all the info needed to record the bill
            bill_id = bill_page.find(id="lblBillNum").b.font.contents[0]
            bill_title = bill_page.find(id="lblBillTitle").font.string
            bill_desc = bill_page.find(id="lblBriefDesc").font.contents[0]
            bill_lr = bill_page.find(id="lblLRNum").font.string

            bill = Bill(year, 'upper', bill_id, bill_desc, bill_url=bill_url,
                        bill_lr=bill_lr, official_title=bill_title)
            bill.add_source(bill_url)

            # Get the primary sponsor
            bill_sponsor = bill_page.find(id="hlSponsor").i.font.contents[0]
            bill_sponsor_link = bill_page.find(id="hlSponsor").href
            bill.add_sponsor('primary', bill_sponsor,
                             sponsor_link=bill_sponsor_link)

            # cosponsors show up on their own page, if they exist
            cosponsor_tag = bill_page.find(id="hlCoSponsors")
            if cosponsor_tag and 'href' in cosponsor_tag:
                self.parse_senate_cosponsors(bill, cosponsor_tag['href'])

            # get the actions
            action_url = bill_page.find(id="hlAllActions")['href']
            self.parse_senate_actions(bill, action_url)

            # stored on a separate page
            versions_url = bill_page.find(id="hlFullBillText")
            if versions_url:
                self.parse_senate_bill_versions(bill, versions_url['href'])

        self.save_bill(bill)
開發者ID:rcadby,項目名稱:fiftystates,代碼行數:35,代碼來源:get_legislation.py

示例2: scrape_bills

# 需要導入模塊: from pyutils.legislation import Bill [as 別名]
# 或者: from pyutils.legislation.Bill import add_source [as 別名]
    def scrape_bills(self,chamber,year):
        self.log("Getting bill list for %s %s" % (chamber, year))

        if chamber == 'upper':
            min_id = self.upper_min_id
            max_id = self.upper_max_id
        elif chamber == 'lower':
            min_id = self.lower_min_id
            max_id = self.lower_max_id

        for id in range(min_id, max_id):
            bill_info_url = 'http://dlr.leg.wa.gov/billsummary/default.aspx?year=%s&bill=%s' % (year, id)
            with self.soup_context(bill_info_url) as soup:
                print('opened %s', id)
                bill_id = soup.find('span', id='ctl00_contentRegion_lblShortBillID').string
                bill_title = soup.find('span', id='ctl00_contentRegion_lblBriefDescription').string

                print('bill_id ', bill_id)
                print('bill_title ', bill_title)
                session_name = self._session_dict[year]

                bill = Bill(session_name, chamber, bill_id, bill_title)
                bill.add_source(bill_info_url)

                self._scrape_bill_docs(soup, bill)

                self._scrape_bill_sponsors(soup, bill)
                self._scrape_bill_votes(soup, bill, chamber)

                self.add_bill(bill)
開發者ID:katpet,項目名稱:fiftystates,代碼行數:32,代碼來源:get_legislation.py

示例3: parse_bill

# 需要導入模塊: from pyutils.legislation import Bill [as 別名]
# 或者: from pyutils.legislation.Bill import add_source [as 別名]
    def parse_bill(self, chamber, session, bill_id, bill_info_url):
        with self.urlopen_context(bill_info_url) as bill_info_data:
            bill_info = self.soup_parser(bill_info_data)
            version_url = '%s/bill.doc' % bill_id
            version_link = bill_info.find(href=version_url)

            if not version_link:
                # This bill was withdrawn
                return

            bill_title = version_link.findNext('p').contents[0].strip()

            bill = Bill(session, chamber, bill_id, bill_title)
            bill.add_version("Most Recent Version",
                             session_url(session) + version_url)
            bill.add_source(bill_info_url)

            sponsor_links = bill_info.findAll(href=re.compile(
                    'legislator/[SH]\d+\.htm'))

            for sponsor_link in sponsor_links:
                bill.add_sponsor('primary', sponsor_link.contents[0].strip())

            action_p = version_link.findAllNext('p')[-1]
            for action in action_p.findAll(text=True):
                action = action.strip()
                if (not action or action == 'last action' or
                    'Prefiled' in action):
                    continue

                action_date = action.split('-')[0]
                action_date = dt.datetime.strptime(action_date, '%b %d')
                # Fix:
                action_date = action_date.replace(
                    year=int('20' + session[2:4]))

                action = '-'.join(action.split('-')[1:])

                if action.endswith('House') or action.endswith('(H)'):
                    actor = 'lower'
                elif action.endswith('Senate') or action.endswith('(S)'):
                    actor = 'upper'
                else:
                    actor = chamber

                bill.add_action(actor, action, action_date)

            vote_link = bill_info.find(href=re.compile('.*/vote_history.pdf'))
            if vote_link:
                bill.add_document(
                    'vote_history.pdf',
                    bill_info_url.replace('.htm', '') + "/vote_history.pdf")

            self.add_bill(bill)
開發者ID:HughP,項目名稱:fiftystates,代碼行數:56,代碼來源:get_legislation.py

示例4: scrape_session

# 需要導入模塊: from pyutils.legislation import Bill [as 別名]
# 或者: from pyutils.legislation.Bill import add_source [as 別名]
    def scrape_session(self, chamber, session):
        if chamber == "lower":
            bill_abbr = "HB"
        else:
            bill_abbr = "SB"

        bill_list_url = "http://www.le.state.ut.us/~%s/bills.htm" % (
            session.replace(' ', ''))
        self.log("Getting bill list for %s, %s" % (session, chamber))

        try:
            base_bill_list = self.soup_parser(self.urlopen(bill_list_url))
        except:
            # this session doesn't exist for this year
            return

        bill_list_link_re = re.compile('.*%s\d+ht.htm$' % bill_abbr)

        for link in base_bill_list.findAll('a', href=bill_list_link_re):
            bill_list = self.soup_parser(self.urlopen(link['href']))
            bill_link_re = re.compile('.*billhtm/%s.*.htm' % bill_abbr)

            for bill_link in bill_list.findAll('a', href=bill_link_re):
                bill_id = bill_link.find(text=True).strip()

                bill_info_url = bill_link['href']
                bill_info = self.soup_parser(self.urlopen(bill_info_url))

                bill_title, primary_sponsor = bill_info.h3.contents[2].replace(
                    ' ', ' ').strip().split(' -- ')

                bill = Bill(session, chamber, bill_id, bill_title)
                bill.add_source(bill_info_url)
                bill.add_sponsor('primary', primary_sponsor)

                status_re = re.compile('.*billsta/%s.*.htm' %
                                       bill_abbr.lower())
                status_link = bill_info.find('a', href=status_re)

                if status_link:
                    self.parse_status(bill, status_link['href'])

                text_find = bill_info.find(
                    text="Bill Text (If you are having trouble viewing")

                if text_find:
                    text_link_re = re.compile('.*\.htm')
                    for text_link in text_find.parent.parent.findAll(
                        'a', href=text_link_re)[1:]:
                        version_name = text_link.previous.strip()
                        bill.add_version(version_name, text_link['href'])

                self.add_bill(bill)
開發者ID:HughP,項目名稱:fiftystates,代碼行數:55,代碼來源:get_legislation.py

示例5: parse_bill

# 需要導入模塊: from pyutils.legislation import Bill [as 別名]
# 或者: from pyutils.legislation.Bill import add_source [as 別名]
    def parse_bill(self, chamber, session, bill_id, bill_info_url):
        with self.urlopen_context(bill_info_url) as bill_info_data:
            bill_info = self.soup_parser(bill_info_data)
            version_url = "%s/bill.doc" % bill_id
            version_link = bill_info.find(href=version_url)

            if not version_link:
                # This bill was withdrawn
                return

            bill_title = version_link.findNext("p").contents[0].strip()

            bill = Bill(session, chamber, bill_id, bill_title)
            bill.add_version("Most Recent Version", session_url(session) + version_url)
            bill.add_source(bill_info_url)

            sponsor_links = bill_info.findAll(href=re.compile("legislator/[SH]\d+\.htm"))

            for sponsor_link in sponsor_links:
                bill.add_sponsor("primary", sponsor_link.contents[0].strip())

            action_p = version_link.findAllNext("p")[-1]
            for action in action_p.findAll(text=True):
                action = action.strip()
                if not action or action == "last action" or "Prefiled" in action:
                    continue

                action_date = action.split("-")[0]
                action_date = dt.datetime.strptime(action_date, "%b %d")
                # Fix:
                action_date = action_date.replace(year=int("20" + session[2:4]))

                action = "-".join(action.split("-")[1:])

                if action.endswith("House") or action.endswith("(H)"):
                    actor = "lower"
                elif action.endswith("Senate") or action.endswith("(S)"):
                    actor = "upper"
                else:
                    actor = chamber

                bill.add_action(actor, action, action_date)

            vote_link = bill_info.find(href=re.compile(".*/vote_history.pdf"))
            if vote_link:
                bill.add_document("vote_history.pdf", bill_info_url.replace(".htm", "") + "/vote_history.pdf")

            self.save_bill(bill)
開發者ID:rcadby,項目名稱:fiftystates,代碼行數:50,代碼來源:get_legislation.py

示例6: scrape_bills

# 需要導入模塊: from pyutils.legislation import Bill [as 別名]
# 或者: from pyutils.legislation.Bill import add_source [as 別名]
    def scrape_bills(self,chamber,year):
        if int(year) %2 == 0:  
            raise NoDataForYear(year)
        # 
        year = int(year)
        oyear = year #save off the original of the session
        if chamber == 'upper':
            bill_no = 1
            abbr = 'SB'
        else:
            bill_no = 4001
            abbr = 'HB'
        while True:
            (bill_page,year) = self.scrape_bill(year, abbr, bill_no)
            # if we can't find a page, we must be done. This is a healthy thing.
            if bill_page == None: return
            title = ''.join(self.flatten(bill_page.findAll(id='frg_billstatus_ObjectSubject')[0]))
            title = title.replace('\n','').replace('\r','')
            bill_id = "%s %d" % (abbr, bill_no)

            the_bill = Bill("Regular Session %d" % oyear, chamber, bill_id, title)

            #sponsors
            first = 0
            for name in bill_page.findAll(id='frg_billstatus_SponsorList')[0].findAll('a'):
                the_bill.add_sponsor(['primary', 'cosponsor'][first], name.string)
                first = 1

            #versions
            for doc in bill_page.findAll(id='frg_billstatus_DocumentGridTable')[0].findAll('tr'):
                r = self.parse_doc(the_bill, doc)
                if r: the_bill.add_version(*r)

            #documents
            if 'frg_billstatus_HlaTable' in str(bill_page):
                for doc in bill_page.findAll(id='frg_billstatus_HlaTable')[0].findAll('tr'):
                    r = self.parse_doc(the_bill, doc)
                    if r: the_bill.add_document(*r)
            if 'frg_billstatus_SfaSection' in str(bill_page):
                for doc in bill_page.findAll(id='frg_billstatus_SfaSection')[0].findAll('tr'):
                    r = self.parse_doc(the_bill, doc)
                    if r: the_bill.add_document(*r)

            the_bill.add_source('http://legislature.mi.gov/doc.aspx?%d-%s-%04d' % (year, abbr, bill_no))
            self.parse_actions(the_bill, bill_page.findAll(id='frg_billstatus_HistoriesGridView')[0])
            self.add_bill(the_bill)
            bill_no = bill_no + 1
        pass
開發者ID:HughP,項目名稱:fiftystates,代碼行數:50,代碼來源:get_legislation.py

示例7: scrape_bills

# 需要導入模塊: from pyutils.legislation import Bill [as 別名]
# 或者: from pyutils.legislation.Bill import add_source [as 別名]
    def scrape_bills(self, chamber, year):
        if year != "2009":
            raise NoDataForYear

        if chamber == "upper":
            other_chamber = "lower"
            bill_id = "SB 1"
        else:
            other_chamber = "upper"
            bill_id = "HB 1"

        b1 = Bill("2009-2010", chamber, bill_id, "A super bill")
        b1.add_source("http://example.com")
        b1.add_version("As Introduced", "http://example.com/SB1.html")
        b1.add_document("Google", "http://google.com")
        b1.add_sponsor("primary", "Bob Smith")
        b1.add_sponsor("secondary", "Johnson, Sally")

        d1 = datetime.datetime.strptime("1/29/2010", "%m/%d/%Y")
        v1 = Vote("upper", d1, "Final passage", True, 2, 0, 0)
        v1.yes("Bob Smith")
        v1.yes("Sally Johnson")

        d2 = datetime.datetime.strptime("1/30/2010", "%m/%d/%Y")
        v2 = Vote("lower", d2, "Final passage", False, 0, 1, 1)
        v2.no("B. Smith")
        v2.other("Sally Johnson")

        b1.add_vote(v1)
        b1.add_vote(v2)

        b1.add_action(chamber, "introduced", d1)
        b1.add_action(chamber, "read first time", d1)
        b1.add_action(other_chamber, "introduced", d2)

        self.save_bill(b1)
開發者ID:rcadby,項目名稱:fiftystates,代碼行數:38,代碼來源:get_legislation.py

示例8: scrape_session

# 需要導入模塊: from pyutils.legislation import Bill [as 別名]
# 或者: from pyutils.legislation.Bill import add_source [as 別名]
    def scrape_session(self, chamber, year, prefix, session):
        def parse_sponsors(bill, line, chamber):
            sponsor_type = None
            if chamber == "upper":
                leg_chamber = {"primary": "upper", "cosponsor": "lower"}
            else:
                leg_chamber = {"primary": "lower", "cosponsor": "upper"}
            for r in re.split(r"\sand\s|\,|;", line):
                r = r.strip()
                if r.find("Introduced by") != -1:
                    sponsor_type = "primary"
                    r = re.split(r"Introduced by \w+", r)[1]
                if r.find("cosponsored by") != -1:
                    sponsor_type = "cosponsor"
                    r = re.split(r"cosponsored by \w+", r)[1]
                bill.add_sponsor(sponsor_type, r.strip(), chamber=leg_chamber[sponsor_type])

        def parse_action(bill, line, actor, date):
            line = lxml.html.fromstring(line)
            sane = line.text_content()
            # "06-18.  S. Received from Assembly  ................................... 220 "
            # "___________                      __________________________________________"
            #    11
            sane = sane.strip()[11:]  # take out the date and house
            if sane.find("..") != -1:
                sane = sane[0 : sane.find(" ..")]  # clear out bookkeeping
            bill.add_action(actor, sane, date)
            for doc in line.findall("a"):
                # have this treat amendments better, as they show up like "1" or "3" now..
                bill.add_document(doc.text_content(), doc.get("href"))

            if sane.find("Ayes") != -1:
                self.add_vote(bill, actor, date, sane)

        house = "SB" if (chamber == "upper") else "AB"
        chambers = {"S": "upper", "A": "lower"}
        i = 1
        while True:
            try:
                url = "http://www.legis.state.wi.us/%s/data/%s%s%dhst.html" % (year, prefix, house, i)
                body = unicode(self.urlopen(url), "latin-1")
            except urllib2.HTTPError as e:  # 404tastic
                return

            page = lxml.html.fromstring(body).cssselect("pre")[0]
            # split the history into each line, exluding all blank lines and the title line
            history = filter(lambda x: len(x.strip()) > 0, lxml.html.tostring(page).split("\n"))[2:-1]
            buffer = ""
            bill_id = page.find("a").text_content()
            bill_title = None
            bill_sponsors = False

            current_year = None
            action_date = None
            current_chamber = None

            for line in history:
                stop = False

                # the year changed
                if re.match(r"^(\d{4})[\s]{0,1}$", line):
                    current_year = int(line.strip())
                    continue

                # the action changed.
                if re.match(r"\s+(\d{2})-(\d{2}).\s\s([AS])\.\s", line):
                    dm = re.findall(r"\s+(\d{2})-(\d{2}).\s\s([AS])\.\s", line)[0]
                    workdata = buffer
                    buffer = ""
                    stop = True

                buffer = buffer + " " + line.strip()
                if stop and not bill_title:
                    bill_title = workdata
                    bill = Bill(session, chamber, bill_id, bill_title)
                    continue

                if stop and not bill_sponsors:
                    parse_sponsors(bill, workdata, chamber)
                    bill_sponsors = True
                    current_chamber = chambers[dm[2]]
                    action_date = dt.datetime(current_year, int(dm[0]), int(dm[1]))
                    continue

                if stop:
                    parse_action(bill, workdata, current_chamber, action_date)
                    # now update the date
                    current_chamber = chambers[dm[2]]
                    action_date = dt.datetime(current_year, int(dm[0]), int(dm[1]))

            current_chamber = chambers[dm[2]]
            action_date = dt.datetime(current_year, int(dm[0]), int(dm[1]))
            parse_action(bill, buffer, current_chamber, action_date)
            bill.add_source(url)
            self.save_bill(bill)
            i = i + 1
開發者ID:rcadby,項目名稱:fiftystates,代碼行數:98,代碼來源:get_legislation.py

示例9: scrape_session_new

# 需要導入模塊: from pyutils.legislation import Bill [as 別名]
# 或者: from pyutils.legislation.Bill import add_source [as 別名]
    def scrape_session_new(self, chamber, session):
        if chamber == "lower":
            bill_abbr = "H."
        else:
            bill_abbr = "S."

        bill_list_path = "docs/bills.cfm?Session=%s&Body=%s" % (
            session.split('-')[1], bill_abbr[0])
        bill_list_url = "http://www.leg.state.vt.us/" + bill_list_path
        bill_list = BeautifulSoup(self.urlopen(bill_list_url))

        bill_link_re = re.compile('.*?Bill=%s\.\d+.*' % bill_abbr[0])
        for bill_link in bill_list.findAll('a', href=bill_link_re):
            bill_id = bill_link.string
            bill_title = bill_link.parent.findNext('b').string
            bill_info_url = "http://www.leg.state.vt.us" + bill_link['href']

            bill = Bill(session, chamber, bill_id, bill_title)
            bill.add_source(bill_info_url)

            info_page = BeautifulSoup(self.urlopen(bill_info_url))

            text_links = info_page.findAll('blockquote')[1].findAll('a')
            for text_link in text_links:
                bill.add_version(text_link.string,
                                 "http://www.leg.state.vt.us" +
                                 text_link['href'])

            act_table = info_page.findAll('blockquote')[2].table
            for row in act_table.findAll('tr')[1:]:
                action = ""
                for s in row.findAll('td')[1].findAll(text=True):
                    action += s + " "
                action = action.strip()

                match = re.search('Governor on (.*)$', action)
                if match:
                    act_date = parse_exec_date(match.group(1).strip())
                    actor = 'Governor'
                else:
                    if row['bgcolor'] == 'Salmon':
                        actor = 'lower'
                    else:
                        actor = 'upper'

                    if row.td.a:
                        act_date = row.td.a.string
                    else:
                        act_date = row.td.string

                    act_date = re.search(
                        '\d{1,2}/\d{1,2}/\d{4,4}', act_date).group(0)
                    act_date = dt.datetime.strptime(act_date, '%m/%d/%Y')

                bill.add_action(actor, action, act_date)

                vote_link = row.find('a', text='Details')
                if vote_link:
                    vote_url = vote_link.parent['href']
                    self.parse_vote_new(bill, actor, vote_url)

            sponsors = info_page.find(
                text='Sponsor(s):').parent.parent.findAll('b')
            bill.add_sponsor('primary', sponsors[0].string)
            for sponsor in sponsors[1:]:
                bill.add_sponsor('cosponsor', sponsor.string)

            self.save_bill(bill)
開發者ID:rcadby,項目名稱:fiftystates,代碼行數:70,代碼來源:get_legislation.py

示例10: scrape_session_old

# 需要導入模塊: from pyutils.legislation import Bill [as 別名]
# 或者: from pyutils.legislation.Bill import add_source [as 別名]
    def scrape_session_old(self, chamber, session):
        if chamber == "lower":
            bill_abbr = "H."
            chamber_name = "House"
            other_chamber = "Senate"
        else:
            bill_abbr = "S."
            chamber_name = "Senate"
            other_chamber = "House"

        start_date = '1/1/%s' % session.split('-')[0]
        data = urllib.urlencode({'Date': start_date,
                                 'Body': bill_abbr[0],
                                 'Session': session.split('-')[1]})
        bill_list_url = "http://www.leg.state.vt.us/database/"\
            "rintro/results.cfm"
        bill_list = BeautifulSoup(urllib2.urlopen(bill_list_url, data))

        bill_link_re = re.compile('.*?Bill=%s.\d+.*' % bill_abbr[0])
        for bill_link in bill_list.findAll('a', href=bill_link_re):
            bill_id = bill_link.string
            bill_title = bill_link.parent.parent.findAll('td')[1].string
            bill_info_url = "http://www.leg.state.vt.us" + bill_link['href']

            bill = Bill(session, chamber, bill_id, bill_title)
            bill.add_source(bill_info_url)

            info_page = BeautifulSoup(self.urlopen(bill_info_url))

            text_links = info_page.findAll('blockquote')[-1].findAll('a')
            for text_link in text_links:
                bill.add_version(text_link.string,
                                 "http://www.leg.state.vt.us" +
                                 text_link['href'])

            sponsors = info_page.find(
                text='Sponsor(s):').parent.findNext('td').findAll('b')
            bill.add_sponsor('primary', sponsors[0].string)
            for sponsor in sponsors[1:]:
                bill.add_sponsor('cosponsor', sponsor.string)

            # Grab actions from the originating chamber
            act_table = info_page.find(
                text='%s Status:' % chamber_name).findNext('table')
            for row in act_table.findAll('tr')[3:]:
                action = row.td.string.replace(' ', '').strip(':')

                act_date = row.findAll('td')[1].b.string.replace(' ', '')
                if act_date != "":
                    detail = row.findAll('td')[2].b
                    if detail and detail.string != "":
                        action += ": %s" % detail.string.replace(' ', '')
                    bill.add_action(chamber, action, act_date)

            # Grab actions from the other chamber
            act_table = info_page.find(
                text='%s Status:' % other_chamber).findNext('table')
            if act_table:
                if chamber == 'upper':
                    act_chamber = 'lower'
                else:
                    act_chamber = 'upper'
                for row in act_table.findAll('tr')[3:]:
                    action = row.td.string.replace(' ', '').strip(':')

                    act_date = row.findAll('td')[1].b.string.replace(
                        ' ', '')
                    if act_date != "":
                        detail = row.findAll('td')[2].b
                        if detail and detail.string != "":
                            action += ": %s" % detail.string.replace(
                                ' ', '')
                        date = dt.datetime.strptime(act_date, '%m/%d/%Y')
                        bill.add_action(act_chamber, action, act_date)

            self.save_bill(bill)
開發者ID:rcadby,項目名稱:fiftystates,代碼行數:78,代碼來源:get_legislation.py

示例11: parse_house_bill

# 需要導入模塊: from pyutils.legislation import Bill [as 別名]
# 或者: from pyutils.legislation.Bill import add_source [as 別名]
    def parse_house_bill(self, url, session):
        url = re.sub("content", "print", url)

        with self.urlopen_context(url) as bill_page_data:
            bill_page = self.soup_parser(bill_page_data)
            header_table = bill_page.table

            # get all the info needed to record the bill
            bill_id = header_table.b.contents[0]
            bill_id = clean_text(bill_id)

            bill_desc = header_table.findAll('td')[1].contents[0]
            bill_desc = clean_text(bill_desc)

            lr_label_tag = bill_page.find(text=re.compile("LR Number:"))
            bill_lr = lr_label_tag.next.contents[0].strip()

            # could substitute the description for the name,
            # but keeping it separate for now.
            bill = Bill(session, 'lower', bill_id, bill_desc,
                        bill_url=url, bill_lr=bill_lr)
            bill.add_source(url)

            # get the sponsors and cosponsors
            sponsor_dirty = bill_page.em.contents[0]
            m = re.search("(.*)\(.*\)", sponsor_dirty)
            if m:
                bill_sponsor = m.group(1)
            else:
                bill_sponsor = sponsor_dirty

            # find the table with bill details...it'll be useful later
            bill_details_tbl = bill_page.table.nextSibling.nextSibling

            bill_sponsor_link = None
            if bill_details_tbl.a:
                bill_sponsor_link = bill_details_tbl.a['href']

            bill.add_sponsor('primary', bill_sponsor,
                             sponsor_link=bill_sponsor_link)

            # check for cosponsors
            cosponsor_cell = bill_details_tbl.find(
                text=re.compile("CoSponsor")).next

            if cosponsor_cell.a:
                self.parse_house_cosponsors(bill, cosponsor_cell)

            # parse out all the actions
            actions_link_tag = bill_page.find(
                'a', text='ACTIONS').previous.previous

            actions_link = actions_link_tag['href']
            actions_link = re.sub("content", "print", actions_link)
            self.parse_house_actions(bill, actions_link)

            # get bill versions
            version_tags = bill_page.findAll(href=re.compile("biltxt"))
            if version_tags:
                for version_tag in version_tags:
                    if version_tag.b:
                        version = clean_text(version_tag.b.contents[0])
                        text_url = version_tag['href']
                        pdf_url = version_tag.previousSibling
                        pdf_url = pdf_url.previousSibling['href']
                        bill.add_version(version, text_url, pdf_url=pdf_url)

        self.save_bill(bill)
開發者ID:rcadby,項目名稱:fiftystates,代碼行數:70,代碼來源:get_legislation.py

示例12: scrape_session

# 需要導入模塊: from pyutils.legislation import Bill [as 別名]
# 或者: from pyutils.legislation.Bill import add_source [as 別名]
    def scrape_session(self, chamber, year):
        if chamber == "upper":
            bill_abbr = "SB|SCR|SJR"
        elif chamber == "lower":
            bill_abbr = "HB|HCR|HJR"

        # Sessions last 2 years, 1993-1994 was the 18th
        session = str(18 + ((int(year) - 1993) / 2))
        year2 = str(int(year) + 1)

        # Full calendar year
        date1 = "0101" + year[2:]
        date2 = "1231" + year2[2:]

        # Get bill list
        bill_list_url = "http://www.legis.state.ak.us/" "basis/range_multi.asp?session=%s&date1=%s&date2=%s" % (
            session,
            date1,
            date2,
        )
        self.log("Getting bill list for %s %s (this may take a long time)." % (chamber, session))
        bill_list = self.soup_parser(self.urlopen(bill_list_url))

        # Find bill links
        re_str = "bill=%s\d+" % bill_abbr
        links = bill_list.findAll(href=re.compile(re_str))

        for link in links:
            bill_id = link.contents[0].replace(" ", "")
            bill_name = link.parent.parent.findNext("td").find("font").contents[0].strip()
            bill = Bill(session, chamber, bill_id, bill_name.strip())

            # Get the bill info page and strip malformed t
            info_url = "http://www.legis.state.ak.us/basis/%s" % link["href"]
            info_page = self.soup_parser(self.urlopen(info_url))
            bill.add_source(info_url)

            # Get sponsors
            spons_str = info_page.find(text="SPONSOR(s):").parent.parent.contents[1]
            sponsors_match = re.match(" (SENATOR|REPRESENTATIVE)\([Ss]\) ([^,]+(,[^,]+){0,})", spons_str)
            if sponsors_match:
                sponsors = sponsors_match.group(2).split(",")
                bill.add_sponsor("primary", sponsors[0].strip())

                for sponsor in sponsors[1:]:
                    bill.add_sponsor("cosponsor", sponsor.strip())
            else:
                # Committee sponsorship
                bill.add_sponsor("committee", spons_str.strip())

            # Get actions
            act_rows = info_page.findAll("table", "myth")[1].findAll("tr")[1:]
            for row in act_rows:
                cols = row.findAll("td")
                act_date = cols[0].font.contents[0]
                act_date = dt.datetime.strptime(act_date, "%m/%d/%y")

                if cols[2].font.string == "(H)":
                    act_chamber = "lower"
                elif cols[2].font.string == "(S)":
                    act_chamber = "upper"
                else:
                    act_chamber = chamber

                action = cols[3].font.contents[0].strip()
                if re.match("\w+ Y(\d+) N(\d+)", action):
                    vote = self.parse_vote(bill, action, act_chamber, act_date, cols[1].a["href"])
                    bill.add_vote(vote)

                bill.add_action(act_chamber, action, act_date)

            # Get subjects
            bill["subjects"] = []
            subject_link_re = re.compile(".*subject=\w+$")
            for subject_link in info_page.findAll("a", href=subject_link_re):
                subject = subject_link.contents[0].strip()
                bill["subjects"].append(subject)

            # Get versions
            text_list_url = "http://www.legis.state.ak.us/" "basis/get_fulltext.asp?session=%s&bill=%s" % (
                session,
                bill_id,
            )
            text_list = self.soup_parser(self.urlopen(text_list_url))
            bill.add_source(text_list_url)

            text_link_re = re.compile("^get_bill_text?")
            for text_link in text_list.findAll("a", href=text_link_re):
                text_name = text_link.parent.previousSibling.contents[0]
                text_name = text_name.strip()

                text_url = "http://www.legis.state.ak.us/basis/%s" % (text_link["href"])

                bill.add_version(text_name, text_url)

            self.add_bill(bill)
開發者ID:katpet,項目名稱:fiftystates,代碼行數:98,代碼來源:get_legislation.py

示例13: get_bill_info

# 需要導入模塊: from pyutils.legislation import Bill [as 別名]
# 或者: from pyutils.legislation.Bill import add_source [as 別名]
    def get_bill_info(self, session, sub, bill_id):
        bill_detail_url = (
            "http://www.ncga.state.nc.us/gascripts/"
            "BillLookUp/BillLookUp.pl?bPrintable=true"
            "&Session=%s&BillID=%s&votesToView=all" % (session[0:4] + sub, bill_id)
        )

        # parse the bill data page, finding the latest html text
        if bill_id[0] == "H":
            chamber = "lower"
        else:
            chamber = "upper"

        bill_data = self.urlopen(bill_detail_url)
        bill_soup = self.soup_parser(bill_data)

        bill_title = bill_soup.findAll(
            "div", style="text-align: center; font: bold" " 20px Arial; margin-top: 15px;" " margin-bottom: 8px;"
        )[0].contents[0]

        bill = Bill(session + sub, chamber, bill_id, bill_title)
        bill.add_source(bill_detail_url)

        # get all versions
        links = bill_soup.findAll("a", href=re.compile("/Sessions/%s/Bills/\w+/HTML" % session[0:4]))

        for link in links:
            version_name = link.parent.previousSibling.previousSibling
            version_name = version_name.contents[0].replace(" ", " ")
            version_name = version_name.replace(u"\u00a0", " ")

            version_url = "http://www.ncga.state.nc.us" + link["href"]
            bill.add_version(version_name, version_url)

        # figure out which table has sponsor data
        sponsor_table = bill_soup.findAll("th", text="Sponsors", limit=1)[0].findParents("table", limit=1)[0]

        sponsor_rows = sponsor_table.findAll("tr")
        for leg in sponsor_rows[1].td.findAll("a"):
            bill.add_sponsor("primary", leg.contents[0].replace(u"\u00a0", " "))
        for leg in sponsor_rows[2].td.findAll("a"):
            bill.add_sponsor("cosponsor", leg.contents[0].replace(u"\u00a0", " "))

        action_table = bill_soup.findAll("th", text="Chamber", limit=1)[0].findParents("table", limit=1)[0]

        for row in action_table.findAll("tr"):
            cells = row.findAll("td")
            if len(cells) != 3:
                continue

            act_date, actor, action = map(lambda x: self.flatten(x), cells)
            act_date = dt.datetime.strptime(act_date, "%m/%d/%Y")

            if actor == "Senate":
                actor = "upper"
            elif actor == "House":
                actor = "lower"
            elif action.endswith("Gov."):
                actor = "Governor"

            bill.add_action(actor, action, act_date)

        for vote in bill_soup.findAll("a", href=re.compile("RollCallVoteTranscript")):
            self.get_vote(bill, vote["href"])

        self.add_bill(bill)
開發者ID:katpet,項目名稱:fiftystates,代碼行數:68,代碼來源:get_legislation.py

示例14: get_bill_info

# 需要導入模塊: from pyutils.legislation import Bill [as 別名]
# 或者: from pyutils.legislation.Bill import add_source [as 別名]
    def get_bill_info(self, session, sub, bill_id):
        bill_detail_url = 'http://www.ncga.state.nc.us/gascripts/'\
            'BillLookUp/BillLookUp.pl?bPrintable=true'\
            '&Session=%s&BillID=%s&votesToView=all' % (
            session[0:4] + sub, bill_id)

        # parse the bill data page, finding the latest html text
        if bill_id[0] == 'H':
            chamber = 'lower'
        else:
            chamber = 'upper'

        bill_data = self.urlopen(bill_detail_url)
        bill_soup = self.soup_parser(bill_data)

        bill_title = bill_soup.findAll('div',
                                       style="text-align: center; font: bold"
                                       " 20px Arial; margin-top: 15px;"
                                       " margin-bottom: 8px;")[0].contents[0]

        bill = Bill(session + sub, chamber, bill_id, bill_title)
        bill.add_source(bill_detail_url)

        # get all versions
        links = bill_soup.findAll('a', href=re.compile(
                '/Sessions/%s/Bills/\w+/HTML' % session[0:4]))

        for link in links:
            version_name = link.parent.previousSibling.previousSibling
            version_name = version_name.contents[0].replace(' ', ' ')
            version_name = version_name.replace(u'\u00a0', ' ')

            version_url = 'http://www.ncga.state.nc.us' + link['href']
            bill.add_version(version_name, version_url)

        # figure out which table has sponsor data
        sponsor_table = bill_soup.findAll('th', text='Sponsors',
                                          limit=1)[0].findParents(
            'table', limit=1)[0]

        sponsor_rows = sponsor_table.findAll('tr')
        for leg in sponsor_rows[1].td.findAll('a'):
            bill.add_sponsor('primary',
                             leg.contents[0].replace(u'\u00a0', ' '))
        for leg in sponsor_rows[2].td.findAll('a'):
            bill.add_sponsor('cosponsor',
                             leg.contents[0].replace(u'\u00a0', ' '))

        action_table = bill_soup.findAll('th', text='Chamber',
                                         limit=1)[0].findParents(
            'table', limit=1)[0]

        for row in action_table.findAll('tr'):
            cells = row.findAll('td')
            if len(cells) != 3:
                continue

            act_date, actor, action = map(lambda x: self.flatten(x), cells)
            act_date = dt.datetime.strptime(act_date, '%m/%d/%Y')

            if actor == 'Senate':
                actor = 'upper'
            elif actor == 'House':
                actor = 'lower'
            elif action.endswith('Gov.'):
                actor = 'Governor'

            bill.add_action(actor, action, act_date)

        for vote in bill_soup.findAll('a', href=re.compile(
                'RollCallVoteTranscript')):
            self.get_vote(bill, vote['href'])

        self.save_bill(bill)
開發者ID:rcadby,項目名稱:fiftystates,代碼行數:76,代碼來源:get_legislation.py

示例15: scrape_new_session

# 需要導入模塊: from pyutils.legislation import Bill [as 別名]
# 或者: from pyutils.legislation.Bill import add_source [as 別名]
    def scrape_new_session(self, chamber, session):
        """
        Scrapes SD's bill data from 2009 on.
        """

        if chamber == 'upper':
            bill_abbr = 'SB'
        elif chamber == 'lower':
            bill_abbr = 'HB'

        # Get bill list page
        session_url = 'http://legis.state.sd.us/sessions/%s/' % session
        bill_list_url = session_url + 'BillList.aspx'
        self.log('Getting bill list for %s %s' % (chamber, session))
        bill_list = self.soup_parser(self.urlopen(bill_list_url))

        # Format of bill link contents
        bill_re = re.compile(u'%s\xa0(\d+)' % bill_abbr)
        date_re = re.compile('\d{2}/\d{2}/\d{4}')

        for bill_link in bill_list.findAll('a'):
            if len(bill_link.contents) == 0:
                # Empty link
                continue

            #print bill_link.contents[0]
            bill_match = bill_re.search(bill_link.contents[0])
            if not bill_match:
                continue

            # Parse bill ID and name
            bill_id = bill_link.contents[0].replace(u'\xa0', ' ')
            bill_name = bill_link.findNext().contents[0]

            # Download history page
            hist_url = session_url + bill_link['href']
            history = self.soup_parser(self.urlopen(hist_url))

            bill = Bill(session, chamber, bill_id, bill_name)
            bill.add_source(hist_url)

            # Get all bill versions
            text_table = history.findAll('table')[1]
            for row in text_table.findAll('tr')[2:]:
                #version_date = row.find('td').string
                version_path = row.findAll('td')[1].a['href']
                version_url = "http://legis.state.sd.us/sessions/%s/%s" % (
                    session, version_path)

                version_name = row.findAll('td')[1].a.contents[0].strip()

                bill.add_version(version_name, version_url)

            # Get actions
            act_table = history.find('table')
            for act_row in act_table.findAll('tr')[6:]:
                if act_row.find(text='Action'):
                    continue

                # Get the date (if can't find one then this isn't an action)
                date_match = date_re.match(act_row.td.a.contents[0])
                if not date_match:
                    continue
                act_date = date_match.group(0)
                act_date = dt.datetime.strptime(act_date, "%m/%d/%Y")

                # Get the action string
                action = ""
                for node in act_row.findAll('td')[1].contents:
                    if hasattr(node, 'contents'):
                        action += node.contents[0]

                        if node.contents[0].startswith('YEAS'):
                            # This is a vote!
                            vote_url = "http://legis.state.sd.us/sessions/"\
                                "%s/%s" % (session, node['href'])

                            vote = self.scrape_new_vote(vote_url)
                            vote['date'] = act_date
                            bill.add_vote(vote)
                    else:
                        action += node
                action = action.strip()

                # Add action
                bill.add_action(chamber, action, act_date)

            self.save_bill(bill)
開發者ID:rcadby,項目名稱:fiftystates,代碼行數:90,代碼來源:get_legislation.py


注:本文中的pyutils.legislation.Bill.add_source方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。