当前位置: 首页>>代码示例>>Python>>正文


Python Bill.title方法代码示例

本文整理汇总了Python中pupa.scrape.Bill.title方法的典型用法代码示例。如果您正苦于以下问题:Python Bill.title方法的具体用法?Python Bill.title怎么用?Python Bill.title使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pupa.scrape.Bill的用法示例。


在下文中一共展示了Bill.title方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: scrape_bill_list

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import title [as 别名]
    def scrape_bill_list(self, url):
        bill_list = self._get_bill_list(url)

        for bill_info in bill_list:

            (bill_id, ) = bill_info.xpath('td[1]/font/input/@value')
            (sponsor, ) = bill_info.xpath('td[2]/font/input/@value')
            (subject, ) = bill_info.xpath('td[3]//text()')
            subject = subject.strip()
            chamber = self.CHAMBERS[bill_id[0]]

            if 'B' in bill_id:
                bill_type = 'bill'
            elif 'JR' in bill_id:
                bill_type = 'joint resolution'
            elif 'R' in bill_id:
                bill_type = 'resolution'
            else:
                raise AssertionError(
                    "Unknown bill type for bill '{}'".format(bill_id))

            bill = Bill(
                bill_id,
                legislative_session=self.session,
                chamber=chamber,
                title='',
                classification=bill_type,
            )
            if subject:
                bill.subject = [subject]
            if sponsor:
                bill.add_sponsorship(
                    name=sponsor,
                    entity_type='person',
                    classification='primary',
                    primary=True,
                )
            bill.add_source(url)

            bill_url = ('http://alisondb.legislature.state.al.us/Alison/'
                        'SESSBillStatusResult.aspx?BILL={}'.format(bill_id))
            bill.add_source(bill_url)

            bill_html = self._get_bill_response(bill_url)
            if bill_html is None:
                self.warning("Bill {} has no webpage, and will be skipped".
                             format(bill_id))
                continue
            bill_doc = lxml.html.fromstring(bill_html)

            if (bill_doc.xpath('//span[@id="ContentPlaceHolder1_lblShotTitle"]')):
                title = bill_doc.xpath(
                    '//span[@id="ContentPlaceHolder1_lblShotTitle"]'
                )[0].text_content().strip()
            if not title:
                title = "[No title given by state]"
            bill.title = title

            version_url_base = (
                'http://alisondb.legislature.state.al.us/ALISON/'
                'SearchableInstruments/{0}/PrintFiles/{1}-'.
                format(self.session, bill_id))
            versions = bill_doc.xpath(
                '//table[@class="box_versions"]/tr/td[2]/font/text()')
            for version in versions:
                name = version
                if version == "Introduced":
                    version_url = version_url_base + 'int.pdf'
                elif version == "Engrossed":
                    version_url = version_url_base + 'eng.pdf'
                elif version == "Enrolled":
                    version_url = version_url_base + 'enr.pdf'
                else:
                    raise NotImplementedError(
                        "Unknown version type found: '{}'".format(name))

                bill.add_version_link(
                    name,
                    version_url,
                    media_type='application/pdf',
                    on_duplicate='ignore',
                )

            # Fiscal notes exist, but I can't figure out how to build their URL
            fiscal_notes = bill_doc.xpath(
                '//table[@class="box_fiscalnote"]')[1:]
            for fiscal_note in fiscal_notes:
                pass

            # Budget Isolation Resolutions are handled as extra actions/votes
            birs = bill_doc.xpath(
                '//div[@class="box_bir"]//table//table/tr')[1:]
            for bir in birs:
                bir_action = bir.xpath('td[1]')[0].text_content().strip()
                # Sometimes ALISON's database puts another bill's
                # actions into the BIR action list; ignore these
                if bill_id not in bir_action:
                    self.warning(
                        "BIR action found ({}) ".format(bir_action) +
                        "that doesn't match the bill ID ({})".format(bill_id))
#.........这里部分代码省略.........
开发者ID:sunlightlabs,项目名称:openstates,代码行数:103,代码来源:bills.py

示例2: scrape_bill_type

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import title [as 别名]
    def scrape_bill_type(self, chamber, session, bill_type, type_abbr,
                         committee_abbr_regex=get_committee_name_regex()):
        bills = self.session.query(CABill).filter_by(
            session_year=session).filter_by(
            measure_type=type_abbr)

        for bill in bills:
            bill_session = session
            if bill.session_num != '0':
                bill_session += ' Special Session %s' % bill.session_num

            bill_id = bill.short_bill_id

            fsbill = Bill(bill_id, session, title='', chamber=chamber)
            if ((bill_id.startswith('S') and chamber == 'lower') or
                    (bill_id.startswith('A') and chamber == 'upper')):
                print("!!!! BAD ID/CHAMBER PAIR !!!!", bill)
                continue

            # # Construct session for web query, going from '20092010' to '0910'
            # source_session = session[2:4] + session[6:8]

            # # Turn 'AB 10' into 'ab_10'
            # source_num = "%s_%s" % (bill.measure_type.lower(),
            #                         bill.measure_num)

            # Construct a fake source url
            source_url = ('http://leginfo.legislature.ca.gov/faces/'
                          'billNavClient.xhtml?bill_id=%s') % bill.bill_id

            fsbill.add_source(source_url)
            fsbill.add_version_link(bill_id, source_url, media_type='text/html')

            title = ''
            type_ = ['bill']
            subject = ''
            all_titles = set()

            # Get digest test (aka "summary") from latest version.
            if bill.versions:
                version = bill.versions[-1]
                nsmap = version.xml.nsmap
                xpath = '//caml:DigestText/xhtml:p'
                els = version.xml.xpath(xpath, namespaces=nsmap)
                chunks = []
                for el in els:
                    t = etree_text_content(el)
                    t = re.sub(r'\s+', ' ', t)
                    t = re.sub(r'\)(\S)', lambda m: ') %s' % m.group(1), t)
                    chunks.append(t)
                summary = '\n\n'.join(chunks)

            for version in bill.versions:
                if not version.bill_xml:
                    continue

                version_date = self._tz.localize(version.bill_version_action_date)

                # create a version name to match the state's format
                # 02/06/17 - Enrolled
                version_date_human = version_date.strftime(
                    '%m/%d/%y')
                version_name = "{} - {}".format(
                    version_date_human, version.bill_version_action)

                version_base = "https://leginfo.legislature.ca.gov/faces"

                version_url_pdf = "{}/billPdf.xhtml?bill_id={}&version={}".format(
                    version_base, version.bill_id, version.bill_version_id)

                fsbill.add_version_link(
                    version_name,
                    version_url_pdf,
                    media_type='application/pdf',
                    date=version_date.date())

                # CA is inconsistent in that some bills have a short title
                # that is longer, more descriptive than title.
                if bill.measure_type in ('AB', 'SB'):
                    impact_clause = clean_title(version.title)
                    title = clean_title(version.short_title)
                else:
                    impact_clause = None
                    if len(version.title) < len(version.short_title) and \
                            not version.title.lower().startswith('an act'):
                        title = clean_title(version.short_title)
                    else:
                        title = clean_title(version.title)

                if title:
                    all_titles.add(title)

                type_ = [bill_type]

                if version.appropriation == 'Yes':
                    type_.append('appropriation')

                tags = []
                if version.fiscal_committee == 'Yes':
                    tags.append('fiscal committee')
#.........这里部分代码省略.........
开发者ID:sunlightlabs,项目名称:openstates,代码行数:103,代码来源:bills.py

示例3: scrape

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import title [as 别名]
    def scrape(self, session=None, chambers=None):
        # Bills endpoint can sometimes take a very long time to load
        self.timeout = 300

        if not session:
            session = self.latest_session()
            self.info('no session, using %s', session)

        if int(session) < 128:
            raise AssertionError("No data for period {}".format(session))

        elif int(session) < 131:
            # they changed their data format starting in 131st and added
            # an undocumented API
            yield from self.old_scrape(session)

        else:
            chamber_dict = {"Senate": "upper", "House": "lower",
                            "House of Representatives": "lower",
                            "house": "lower", "senate": "upper"}

            # so presumanbly not everything passes, but we haven't
            # seen anything not pass yet, so we'll need to wait
            # till it fails and get the right language in here
            vote_results = {"approved": True,
                            "passed": True,
                            "adopted": True,
                            "true": True,
                            "false": False,
                            "failed": False,
                            True: True,
                            False: False}

            action_dict = {"ref_ctte_100": "referral-committee",
                           "intro_100": "introduction",
                           "intro_101": "introduction",
                           "pass_300": "passage",
                           "intro_110": "reading-1",
                           "refer_210": "referral-committee",
                           "crpt_301": None,
                           "crpt_317": None,
                           "concur_606": "passage",
                           "pass_301": "passage",
                           "refer_220": "referral-committee",
                           "intro_102": ["introduction", "passage"],
                           "intro_105": ["introduction", "passage"],
                           "intro_ref_ctte_100": "referral-committee",
                           "refer_209": None,
                           "intro_108": ["introduction", "passage"],
                           "intro_103": ["introduction", "passage"],
                           "msg_reso_503": "passage",
                           "intro_107": ["introduction", "passage"],
                           "imm_consid_360": "passage",
                           "refer_213": None,
                           "adopt_reso_100": "passage",
                           "adopt_reso_110": "passage",
                           "msg_507": "amendment-passage",
                           "confer_713": None,
                           "concur_603": None,
                           "confer_712": None,
                           "msg_506": "amendment-failure",
                           "receive_message_100": "passage",
                           "motion_920": None,
                           "concur_611": None,
                           "confer_735": None,
                           "third_429": None,
                           "final_501": None,
                           "concur_608": None,
                           }

            base_url = "http://search-prod.lis.state.oh.us"
            first_page = base_url
            first_page += "/solarapi/v1/general_assembly_{session}/".format(session=session)
            legislators = self.get_legislator_ids(first_page)
            all_amendments = self.get_other_data_source(first_page, base_url, "amendments")
            all_fiscals = self.get_other_data_source(first_page, base_url, "fiscals")
            all_synopsis = self.get_other_data_source(first_page, base_url, "synopsiss")
            all_analysis = self.get_other_data_source(first_page, base_url, "analysiss")

            for row in self.get_bill_rows(session):
                spacer, number_link, _ga, title, primary_sponsor, status, spacer = row.xpath('td')

                # S.R.No.1 -> SR1
                bill_id = number_link.text_content().replace('No.', '')
                bill_id = bill_id.replace('.', '').replace(' ', '')
                # put one space back in between type and number
                bill_id = re.sub(r'([a-zA-Z]+)(\d+)', r'\1 \2', bill_id)

                title = title.text_content().strip()
                title = re.sub(r'^Title', '', title)

                chamber = 'lower' if 'H' in bill_id else 'upper'
                classification = 'bill' if 'B' in bill_id else 'resolution'

                bill = Bill(bill_id, legislative_session=session, chamber=chamber,
                            title=title, classification=classification)
                bill.add_source(number_link.xpath('a/@href')[0])

                # get bill from API
                bill_api_url = ('http://search-prod.lis.state.oh.us/solarapi/v1/'
#.........这里部分代码省略.........
开发者ID:sunlightlabs,项目名称:openstates,代码行数:103,代码来源:bills.py


注:本文中的pupa.scrape.Bill.title方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。