当前位置: 首页>>代码示例>>Python>>正文


Python Bill.add_subject方法代码示例

本文整理汇总了Python中pupa.scrape.Bill.add_subject方法的典型用法代码示例。如果您正苦于以下问题:Python Bill.add_subject方法的具体用法?Python Bill.add_subject怎么用?Python Bill.add_subject使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pupa.scrape.Bill的用法示例。


在下文中一共展示了Bill.add_subject方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: scrape

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_subject [as 别名]
    def scrape(self):
        self.session = '2011'

        for i, page in enumerate(self.searchLegislation()) :
            for legislation_summary in self.parseSearchResults(page) :
                title = legislation_summary['Title'].strip()
                if title == "":
                    continue

                bill = Bill(name=legislation_summary['Record #'],
                            session=self.session,
                            title=title,
                            type=[legislation_summary['Type'].lower()],
                            organization=self.jurisdiction.name)

                bill.add_source(legislation_summary['URL'])

                legislation_details = self.expandLegislationSummary(legislation_summary)

                for related_bill in legislation_details.get('Related files', []) :
                    bill.add_related_bill(name = related_bill,
                                          session = self.session,
                                          relation='other-session',
                                          chamber=None)

                for i, sponsor in enumerate(legislation_details.get('Sponsors', [])) :
                    if i == 0 :
                        primary = True
                        sponsorship_type = "Primary"
                    else :
                        primary = False
                        sponsorship_type = "Regular"

                    bill.add_sponsor(sponsor, sponsorship_type,
                                     'person', primary)

                for subject in legislation_details.get(u'Topics', []) :
                    bill.add_subject(subject)

                for attachment in legislation_details.get(u'Attachments', []) :
                    bill.add_version_link('PDF',
                                          attachment['url'],
                                          mimetype="application/pdf")


                yield bill
开发者ID:ChaelCodes,项目名称:scrapers-us-municipal,代码行数:48,代码来源:bills.py

示例2: scrape_bill

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_subject [as 别名]
    def scrape_bill(self, session, chamber, bill_type, url):
        bill_html = self.get(url).text
        bill_page = lxml.html.fromstring(bill_html)

        qs = dict(urlparse.parse_qsl(urlparse.urlparse(url).query))
        bill_id = '{}{}'.format(qs['billtype'], qs['billnumber'])
        versions = bill_page.xpath("//table[contains(@id, 'GridViewVersions')]")[0]

        metainf_table = bill_page.xpath('//div[contains(@id, "itemPlaceholder")]//table[1]')[0]
        action_table = bill_page.xpath('//div[contains(@id, "UpdatePanel1")]//table[1]')[0]

        meta = self.parse_bill_metainf_table(metainf_table)

        subs = [s.strip() for s in meta['Report Title'].split(";")]
        if "" in subs:
            subs.remove("")
        b = Bill(bill_id, session, meta['Measure Title'],
                 chamber=chamber,
                 classification=bill_type)
        if meta['Description']:
            b.add_abstract(meta['Description'], 'description')
        for subject in subs:
            b.add_subject(subject)
        if url:
            b.add_source(url)

        prior_session = '{} Regular Session'.format(str(int(session[:4]) - 1))
        companion = meta['Companion'].strip()
        if companion:
            b.add_related_bill(identifier=companion.replace(u'\xa0', ' '),
                               legislative_session=prior_session,
                               relation_type="companion")
        prior = bill_page.xpath(
            "//table[@id='ContentPlaceHolderCol1_GridViewStatus']/tr/td/font/text()")[-1]
        if 'carried over' in prior.lower():
            b.add_related_bill(identifier=bill_id.replace(u'\xa0', ' '),
                               legislative_session=prior_session,
                               relation_type="companion")
        for sponsor in meta['Introducer(s)']:
            b.add_sponsorship(sponsor, 'primary', 'person', True)
        versions = self.parse_bill_versions_table(b, versions)
        yield from self.parse_bill_actions_table(b, action_table, bill_id, session, url, chamber)
        yield b
开发者ID:sunlightlabs,项目名称:openstates,代码行数:45,代码来源:bills.py

示例3: scrape_bill

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_subject [as 别名]

#.........这里部分代码省略.........
                    title=title,
                    classification=btype
                    )
        bill.add_source(url)

        regex_ns = "http://exslt.org/regular-expressions"
        version_links = page.xpath(
            "//a[re:test(@href, 'Bill.aspx\?File=.*\.htm', 'i')]",
            namespaces={'re': regex_ns})
        for link in version_links:
            bill.add_version_link(
                                link.xpath('string()').strip(),
                                link.attrib['href'],
                                media_type='text/html',
                                on_duplicate='ignore'
                )

        sponsor_links = page.xpath(
            "//td[contains(@id, 'tdSponsors')]/a")
        for link in sponsor_links:
            bill.add_sponsorship(
                    link.text,
                    classification='primary',
                    primary=True,
                    entity_type='person'
                )

        actor = chamber
        use_row = False
        self.debug(bill_id)
        for row in page.xpath("//table[contains(@id, 'BillActions')]/tr"):

            if 'Date' in row.text_content() and 'Action' in row.text_content():
                use_row = True
                continue
            elif not use_row:
                continue

            action = row.xpath("string(td[2])").strip()

            atypes = []
            if action.startswith('First read'):
                atypes.append('introduction')
                atypes.append('reading-1')
            elif action.startswith('Signed by Governor'):
                atypes.append('executive-signature')
                actor = 'executive'

            match = re.match(r'(.*) Do Pass( Amended)?, (Passed|Failed)',
                             action)
            if match:
                if match.group(1) in ['Senate',
                                      'House of Representatives']:
                    first = ''
                else:
                    first = 'committee-'
                if match.group(3).lower() == 'passed':
                    second = 'passage'
                elif match.group(3).lower() == 'failed':
                    second = 'failure'
                atypes.append("%s%s" % (first, second))

            if 'referred to' in action.lower():
                atypes.append('referral-committee')

            if 'Motion to amend, Passed Amendment' in action:
                atypes.append('amendment-introduction')
                atypes.append('amendment-passage')

            if 'Veto override, Passed' in action:
                atypes.append('veto-override-passage')
            elif 'Veto override, Failed' in action:
                atypes.append('veto-override-failure')

            if 'Delivered to the Governor' in action:
                atypes.append('executive-receipt')

            match = re.match("First read in (Senate|House)", action)
            if match:
                if match.group(1) == 'Senate':
                    actor = 'upper'
                else:
                    actor = 'lower'

            date = row.xpath("string(td[1])").strip()
            match = re.match('\d{2}/\d{2}/\d{4}', date)
            if not match:
                self.warning("Bad date: %s" % date)
                continue
            date = datetime.datetime.strptime(date, "%m/%d/%Y").date()

            for link in row.xpath("td[2]/a[contains(@href, 'RollCall')]"):
                yield from self.scrape_vote(bill, date, link.attrib['href'])

            bill.add_action(action, date, chamber=actor, classification=atypes)

        for link in page.xpath("//a[contains(@href, 'Keyword')]"):
            bill.add_subject(link.text.strip())

        yield bill
开发者ID:neelneelpurk,项目名称:openstates,代码行数:104,代码来源:bills.py

示例4: scrape

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_subject [as 别名]

#.........这里部分代码省略.........
                classification = 'bill' if 'B' in bill_id else 'resolution'

                bill = Bill(bill_id, legislative_session=session, chamber=chamber,
                            title=title, classification=classification)
                bill.add_source(number_link.xpath('a/@href')[0])

                # get bill from API
                bill_api_url = ('http://search-prod.lis.state.oh.us/solarapi/v1/'
                                'general_assembly_{}/{}/{}/'.format(
                                    session,
                                    'bills' if 'B' in bill_id else 'resolutions',
                                    bill_id.lower().replace(' ', '')
                                ))
                data = self.get(bill_api_url).json()

                # add title if no short title
                if not bill.title:
                    bill.title = data['items'][0]['longtitle']
                bill.add_title(data['items'][0]['longtitle'], 'long title')

                # this stuff is version-specific
                for version in data['items']:
                    version_name = version["version"]
                    version_link = base_url+version["pdfDownloadLink"]
                    bill.add_version_link(version_name, version_link, media_type='application/pdf')

                # we'll use latest bill_version for everything else
                bill_version = data['items'][0]
                bill.add_source(bill_api_url)

                # subjects
                for subj in bill_version["subjectindexes"]:
                    try:
                        bill.add_subject(subj["primary"])
                    except KeyError:
                        pass
                    try:
                        secondary_subj = subj["secondary"]
                    except KeyError:
                        secondary_subj = ""
                    if secondary_subj:
                        bill.add_subject(secondary_subj)

                # sponsors
                sponsors = bill_version["sponsors"]
                for sponsor in sponsors:
                    sponsor_name = self.get_sponsor_name(sponsor)
                    bill.add_sponsorship(
                                        sponsor_name,
                                        classification='primary',
                                        entity_type='person',
                                        primary=True
                        )

                cosponsors = bill_version["cosponsors"]
                for sponsor in cosponsors:
                    sponsor_name = self.get_sponsor_name(sponsor)
                    bill.add_sponsorship(
                                         sponsor_name,
                                         classification='cosponsor',
                                         entity_type='person',
                                         primary=False,
                        )

                try:
                    action_doc = self.get(base_url+bill_version["action"][0]["link"])
开发者ID:sunlightlabs,项目名称:openstates,代码行数:70,代码来源:bills.py

示例5: get_bill_info

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_subject [as 别名]
    def get_bill_info(self, chamber, session, bill_detail_url, version_list_url):
        """
        Extracts all the requested info for a given bill.

        Calls the parent's methods to enter the results into JSON files.
        """
        chamber = 'lower' if chamber.lower() == 'house' else chamber
        chamber = 'upper' if chamber.lower() == 'senate' else chamber

        # Get html and parse
        doc = self.lxmlize(bill_detail_url)

        # Check if bill hasn't been transmitted to the other chamber yet
        transmit_check = self.get_node(
            doc,
            '//h1[text()[contains(.,"Bills")]]/following-sibling::ul/li/text()'
        )
        if (transmit_check is not None and
                'has not been transmitted' in transmit_check.strip()):
            self.logger.debug('Bill has not been transmitted to other chamber '
                              '... skipping {0}'.format(bill_detail_url))
            return

        # Get the basic parts of the bill
        bill_id = self.get_node(doc, '//h1[contains(@class,"card-title float-left mr-4")]/text()')
        self.logger.debug(bill_id)
        bill_title_text = self.get_node(
            doc,
            '//h2[text()[contains(.,"Description")]]/following-sibling::p/text()'
        )
        if bill_title_text is not None:
            bill_title = bill_title_text.strip()
        else:
            long_desc_url = self.get_node(
                doc,
                '//a[text()[contains(.,"Long Description")]]/@href'
            )
            long_desc_page = self.lxmlize(long_desc_url)
            long_desc_text = self.get_node(long_desc_page, '//h1/'
                                           'following-sibling::p/text()')
            if long_desc_text is not None:
                bill_title = long_desc_text.strip()
            else:
                bill_title = 'No title found.'
                self.logger.warning('No title found for {}.'.format(bill_id))
        self.logger.debug(bill_title)
        bill_type = {'F': 'bill', 'R': 'resolution',
                     'C': 'concurrent resolution'}[bill_id[1].upper()]
        bill = Bill(bill_id, legislative_session=session, chamber=chamber,
                    title=bill_title, classification=bill_type)

        # Add source
        bill.add_source(bill_detail_url)

        for subject in self._subject_mapping[bill_id]:
            bill.add_subject(subject)

        # Get companion bill.
        companion = doc.xpath('//table[@class="status_info"]//tr[1]/td[2]'
                              '/a[starts-with(@href, "?")]/text()')
        companion = self.make_bill_id(companion[0]) if len(companion) > 0 else None
        companion_chamber = self.chamber_from_bill(companion)
        if companion is not None:
            bill.add_companion(companion, chamber=companion_chamber)

        # Grab sponsors
        bill = self.extract_sponsors(bill, doc, chamber)

        # Add Actions performed on the bill.
        bill = self.extract_actions(bill, doc, chamber)

        # Get all versions of the bill.
        bill = self.extract_versions(bill, doc, chamber, version_list_url)

        yield bill
开发者ID:sunlightlabs,项目名称:openstates,代码行数:77,代码来源:bills.py

示例6: scrape_matter

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_subject [as 别名]
    def scrape_matter(self, matter_link, sess):
        matter_types = {
        "Additions":"other",
        "Administrative Order":"order",
        "Annual Evaluation":"other",
        "Bid Advertisement":"other",
        "Bid Awards":"other",
        "Bid Contract":"contract",
        "Bid Protest":"other",
        "Bid Rejection":"other",
        "Birthday Scroll":"commemoration",
        "Certificate of Appreciation":"commemoration",
        "Change Order":"order",
        "Citizen's Presentation":"other",
        "Commendation":"commemoration",
        "Conflict Waiver":"other",
        "Congratulatory Certificate":"commemoration",
        "Deferrals":"other",
        "Discussion Item":"other",
        "Distinguished Visitor":"other",
        "Joint Meeting/Workshop":"other",
        "Mayoral Veto":"other",
        "Miscellaneous":"other",
        "Nomination":"nomination",
        "Oath of Office":"other",
        "Omnibus Reserve":"bill",
        "Ordinance":"ordinance",
        "Plaque":"commemoration",
        "Presentation":"other",
        "Proclamation":"proclamation",
        "Professional Service Agreement":"contract",
        "Public Hearing":"other",
        "Report":"other",
        "Request for Proposals":"other",
        "Request for Qualifications":"other",
        "Request to Advertise":"other",
        "Resolution":"resolution",
        "Resolution of Sympathy":"resolution",
        "Service Awards":"commemoration",
        "Special Item":"other",
        "Special Presentation":"other",
        "Supplement":"other",
        "Swearing-In":"other",
        "Time Sensitive Items":"other",
        "Withdrawals":"other",
        "Workshop Item":"other",
        "Zoning":"other",
        "Zoning Resolution":"resolution"
        }
        matter_doc = self.lxmlize(matter_link)
        info_dict = self.matter_table_to_dict(matter_doc)
        #we're going to use the year of the intro date as the session
        #until/unless we come up with something better
        intro_date = datetime.strptime(info_dict["Introduced"],"%m/%d/%Y")
        session = sess["identifier"]
        category = matter_types[info_dict["File Type"]]
        if 'File Name' in info_dict:
            title = info_dict["File Name"]
        elif "Title" in info_dict and info_dict["Title"].strip():
            title = info_dict["Title"].strip()
        else:
            self.warning("bill has no title")
            return
        if category == 'other':
            bill = Bill(identifier=info_dict["File Number"],
                legislative_session=session,
                title=title
                )
        else:
            bill = Bill(identifier=info_dict["File Number"],
                legislative_session=session,
                title=title,
                classification=category
                )
        for spons in info_dict["Sponsors"]:
            if spons == "NONE":
                continue
            try:
                name,spons_type = spons.rsplit(",",1)
            except ValueError:
                name = spons
                spons_type = "Sponsor"
            primary = True if "Prime Sponsor" in spons_type else False
            entity = "person"
            if "committee" in name:
                entity = committee
            bill.add_sponsorship(name,spons_type,entity,primary)
        if "Indexes" in info_dict:
            for subj in info_dict["Indexes"]:
                if subj.strip() and subj.strip() != "NONE":
                    bill.add_subject(subj.strip())
        if "Title" in info_dict and info_dict["Title"].strip():
            note = "bill's long title'"
            if ("Note" in info_dict and info_dict["Note"].strip()):
                note = info_dict["Note"]
            bill.add_abstract(abstract=info_dict["Title"],note=note)
        self.process_action_table(matter_doc,bill)
        bill.add_source(matter_link, note='web')

        yield bill
开发者ID:patcon,项目名称:scrapers-us-municipal,代码行数:102,代码来源:bills.py

示例7: scrape

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_subject [as 别名]
    def scrape(self):
        unreachable_urls = []

        for leg_summary in self.legislation(created_after=datetime.datetime(2015, 5, 17)) :
            title = leg_summary['Title'].strip()

            if not title or not leg_summary['Intro\xa0Date'] :
                continue
                # https://chicago.legistar.com/LegislationDetail.aspx?ID=1800754&GUID=29575A7A-5489-4D8B-8347-4FC91808B201&Options=Advanced&Search=
                # doesn't have an intro date

            bill_type = BILL_TYPES[leg_summary['Type']]

            bill_session = self.session(self.toTime(leg_summary['Intro\xa0Date']))
            bill = Bill(identifier=leg_summary['Record #'],
                        legislative_session=bill_session,
                        title=title,
                        classification=bill_type,
                        from_organization={"name":"Chicago City Council"})

            bill.add_source(leg_summary['url'])

            try :
                leg_details = self.legDetails(leg_summary['url'])
            except IndexError :
                unreachable_urls.append(leg_summary['url'])
                yield bill
                continue

            for related_bill in leg_details.get('Related files', []) :
                lower_title = title.lower()
                if "sundry" in title or "miscellaneous" in title: #these are ominbus
                    bill.add_related_bill(identifier = related_bill['label'],
                                          legislative_session = bill.legislative_session,
                                          relation_type='replaces')
                #for now we're skipping related bills if they
                #don't contain words that make us think they're
                #in a ominbus relationship with each other
                
            for i, sponsor in enumerate(leg_details.get('Sponsors', [])) :
                if i == 0 :
                    primary = True
                    sponsorship_type = "Primary"
                else :
                    primary = False
                    sponsorship_type = "Regular"

                sponsor_name = sponsor['label']

                # Does the Mayor/Clerk introduce legisislation as
                # individuals role holders or as the OFfice of City
                # Clerk and the Office of the Mayor?
                entity_type = 'person'
                if sponsor_name.startswith(('City Clerk', 
                                            'Mendoza, Susana')) :
                    sponsor_name = 'Office of the City Clerk'
                    entity_type = 'organization'
                elif sponsor_name.startswith(('Emanuel, Rahm',)) :
                    sponsor_name = 'Office of the Mayor'
                    entity_type = 'organization'
                if not sponsor_name.startswith(('Misc. Transmittal',
                                                'No Sponsor',
                                                'Dept./Agency')) :
                    bill.add_sponsorship(sponsor_name, 
                                         sponsorship_type,
                                         entity_type,
                                         primary,
                                         entity_id = _make_pseudo_id(name=sponsor_name))

            if 'Topic' in leg_details :
                for subject in leg_details[u'Topic'].split(',') :
                    bill.add_subject(subject)

            for attachment in leg_details.get('Attachments', []) :
                if attachment['label'] :
                    bill.add_version_link(attachment['label'],
                                          attachment['url'],
                                          media_type="application/pdf")

            for action in self.history(leg_summary['url']) :
                action_description = action['Action']
                try :
                    action_date =  self.toTime(action['Date']).date().isoformat()
                except AttributeError : # https://chicago.legistar.com/LegislationDetail.aspx?ID=1424866&GUID=CEC53337-B991-4268-AE8A-D4D174F8D492
                    continue

                if action_description :
                    try :
                        responsible_org = action['Action\xa0By']['label']
                    except TypeError  :
                        responsible_org = action['Action\xa0By']
                    if responsible_org == 'City Council' :
                        responsible_org = 'Chicago City Council'

                    act = bill.add_action(action_description,
                                          action_date,
                                          organization={'name': responsible_org},
                                          classification=ACTION_CLASSIFICATION[action_description])

                    if action_description == 'Referred' :
#.........这里部分代码省略.........
开发者ID:rchrist,项目名称:scrapers-us-municipal,代码行数:103,代码来源:bills.py

示例8: scrape_details

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_subject [as 别名]
    def scrape_details(self, bill_detail_url, session, chamber, bill_id):
        """
        Create the Bill and add the information obtained from the provided bill_detail_url.
        and then yield the bill object.
        :param bill_detail_url:
        :param session:
        :param chamber:
        :param bill_id:
        :return:
        """
        page = self.get(bill_detail_url).text

        if 'INVALID BILL NUMBER' in page:
            self.warning('INVALID BILL %s' % bill_detail_url)
            return

        doc = lxml.html.fromstring(page)
        doc.make_links_absolute(bill_detail_url)

        bill_div = doc.xpath('//div[@style="margin:0 0 40px 0;"]')[0]

        bill_type = bill_div.xpath('span/text()')[0]

        if 'General Bill' in bill_type:
            bill_type = 'bill'
        elif 'Concurrent Resolution' in bill_type:
            bill_type = 'concurrent resolution'
        elif 'Joint Resolution' in bill_type:
            bill_type = 'joint resolution'
        elif 'Resolution' in bill_type:
            bill_type = 'resolution'
        else:
            raise ValueError('unknown bill type: %s' % bill_type)

        # this is fragile, but less fragile than it was
        b = bill_div.xpath('./b[text()="Summary:"]')[0]
        bill_summary = b.getnext().tail.strip()

        bill = Bill(
            bill_id,
            legislative_session=session,  # session name metadata's `legislative_sessions`
            chamber=chamber,  # 'upper' or 'lower'
            title=bill_summary,
            classification=bill_type
        )

        subjects = list(self._subjects[bill_id])

        for subject in subjects:
            bill.add_subject(subject)

        # sponsors
        for sponsor in doc.xpath('//a[contains(@href, "member.php")]/text()'):
            bill.add_sponsorship(
                name=sponsor,
                classification='primary',
                primary=True,
                entity_type='person'
            )
        for sponsor in doc.xpath('//a[contains(@href, "committee.php")]/text()'):
            sponsor = sponsor.replace(u'\xa0', ' ').strip()
            bill.add_sponsorship(
                name=sponsor,
                classification='primary',
                primary=True,
                entity_type='organization'
            )

        # find versions
        version_url = doc.xpath('//a[text()="View full text"]/@href')[0]
        version_html = self.get(version_url).text
        version_doc = lxml.html.fromstring(version_html)
        version_doc.make_links_absolute(version_url)
        for version in version_doc.xpath('//a[contains(@href, "/prever/")]'):
            # duplicate versions with same date, use first appearance

            bill.add_version_link(
                note=version.text,  # Description of the version from the state;
                                    #  eg, 'As introduced', 'Amended', etc.
                url=version.get('href'),
                on_duplicate='ignore',
                media_type='text/html'  # Still a MIME type
            )

        # actions
        for row in bill_div.xpath('table/tr'):
            date_td, chamber_td, action_td = row.xpath('td')

            date = datetime.datetime.strptime(date_td.text, "%m/%d/%y")
            action_chamber = {'Senate': 'upper',
                              'House': 'lower',
                              None: 'legislature'}[chamber_td.text]

            action = action_td.text_content()
            action = action.split('(House Journal')[0]
            action = action.split('(Senate Journal')[0].strip()

            atype = action_type(action)

            bill.add_action(
#.........这里部分代码省略.........
开发者ID:sunlightlabs,项目名称:openstates,代码行数:103,代码来源:bills.py

示例9: _scrape_bills

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_subject [as 别名]
    def _scrape_bills(self):
        """
        Does the following

        1) Scrapes bill data from unitedstates project and saves the data to path specified in UnitedStates module
        2) Iterates over bill data and converts each one to an OCD-compliant bill model.
        3) Yields the OCD-compliant bill model instance

        @return: generator for federal US bills in OCD-compliant format
        @rtype: generator
        """

        # run scraper first to pull in all the bill data
        self._run_unitedstates_bill_scraper()
        # iterate over all the files and build and yield Bill objects
        for filename in find_files(settings.SCRAPED_DATA_DIR, '.*/data/[0-9]+/bills/[^\/]+/[^\/]+/data.json'):
            try:
                with open(filename) as json_file:
                    json_data = json.load(json_file)

                    # Initialize Object
                    bill = Bill(constants.TYPE_MAP[json_data['bill_type']]['canonical'] + ' ' + json_data['number'],
                                json_data['congress'],
                                json_data['official_title'],
                                chamber=constants.TYPE_MAP[json_data['bill_type']]['chamber']
                    )

                    # add source of data
                    bill.add_source(json_data['url'], note='all')

                    # add subjects
                    for subject in json_data['subjects']:
                        bill.add_subject(subject)

                    # add summary
                    if 'summary' in json_data and json_data['summary'] is not None:
                        bill.add_abstract(json_data['summary']['text'],
                                          json_data['summary']['as'],
                                          json_data['summary']['date'])

                    # add titles
                    for item in json_data['titles']:
                        bill.add_title(item['title'], item['type'])

                    # add other/related Bills
                    for b in json_data['related_bills']:
                        if 'type' in b and b['type'] == 'bill':
                            split = b['bill_id'].split('-')
                            m = UnitedStatesBillScraper.BILL_SPLIT.match(split[0])

                            bill.add_related_bill(constants.TYPE_MAP[m.group(1)]['canonical'] + ' ' + m.group(2),
                                                  legislative_session=split[1],
                                                  relation_type='companion')

                    # add sponsor
                    bill.add_sponsorship_by_identifier(json_data['sponsor']['name'], 'person', 'person', True,
                                                       scheme='thomas_id', identifier=json_data['sponsor']['thomas_id'],
                                                       chamber=constants.TYPE_MAP[json_data['bill_type']]['chamber'])

                    # add cosponsors
                    for cs in json_data['cosponsors']:
                        bill.add_sponsorship_by_identifier(cs['name'], 'person', 'person', False,
                                                           scheme='thomas_id', identifier=cs['thomas_id'],
                                                           chamber=constants.TYPE_MAP[json_data['bill_type']]['chamber'])

                    # add introduced_at and actions
                    bill.add_action('date of introduction', datetime_to_date(json_data['introduced_at']),
                                    chamber=constants.TYPE_MAP[json_data['bill_type']]['chamber'],
                                    related_entities=[])

                    # add other actions
                    for action in json_data['actions']:
                        bill.actions.append({'date': datetime_to_date(action['acted_at']),
                                             'type': [action['type']],
                                             'description': action['text'],
                                             'actor': constants.TYPE_MAP[json_data['bill_type']]['chamber'],
                                             'related_entities': []
                                             })

                    # add bill versions
                    for version_path in find_files(os.path.join(settings.SCRAPED_DATA_DIR,
                                                   'data', bill.legislative_session, 'bills', json_data['bill_type'],
                                                   json_data['bill_type'] + json_data['number'],
                                                   'text-versions'), '/.*/*\.json'):
                        try:
                            with open(version_path) as version_file:
                                version_json_data = json.load(version_file)
                                for k, v in version_json_data['urls'].items():
                                    bill.versions.append({'date': datetime_to_date(version_json_data['issued_on']),
                                      'type': version_json_data['version_code'],
                                      'name': constants.VERSION_MAP[version_json_data['version_code']],
                                      'links': [{'mimetype': k, 'url': v}]})
                        except IOError:
                            print("Unable to open or parse file with path " + version_path)
                            continue

                    # finally yield bill object
                    yield bill

            except IOError:
#.........这里部分代码省略.........
开发者ID:crdunwel,项目名称:scrapers-us-federal,代码行数:103,代码来源:bill.py

示例10: scrape_bills

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_subject [as 别名]

#.........这里部分代码省略.........
                                      media_type='text/html')

            # amendments
            # ex: http://billstatus.ls.state.ms.us/2018/pdf/history/HB/HB1040.xml
            for amd in details_root.xpath('//AMENDMENTS/*'):
                if amd.tag == 'HAM':
                    name = amd.xpath('HAM_DESC[1]/text()')[0]
                    name = append_parens(amd, 'HAM_DISP', name)
                    name = append_parens(amd, 'HAM_VDESC', name)

                    pdf_url = amd.xpath('string(HAM_PDF'
                                        ')').replace("../", "")

                    html_url = amd.xpath('string(HAM_OTHER'
                                         ')').replace("../", "")
                elif amd.tag == 'SAM':
                    name = amd.xpath('SAM_DESC[1]/text()')[0]
                    name = append_parens(amd, 'SAM_DISP', name)
                    name = append_parens(amd, 'SAM_VDESC', name)

                    pdf_url = amd.xpath('string(SAM_PDF'
                                        ')').replace("../", "")

                    html_url = amd.xpath('string(SAM_OTHER'
                                         ')').replace("../", "")
                elif amd.tag == 'AMRPT':
                    name = amd.xpath('AMRPT_DESC[1]/text()')[0]
                    pdf_url = amd.xpath('string(AMRPT_PDF'
                                        ')').replace("../", "")

                    html_url = amd.xpath('string(AMRPT_OTHER'
                                         ')').replace("../", "")

                pdf_url = 'http://billstatus.ls.state.ms.us/' + pdf_url
                html_url = 'http://billstatus.ls.state.ms.us/' + html_url

                if 'adopted' in name.lower() or 'amendment report' in name.lower():
                    bill.add_version_link(name, pdf_url,
                                          on_duplicate='ignore',
                                          media_type='application/pdf')
                    bill.add_version_link(name, html_url,
                                          on_duplicate='ignore',
                                          media_type='text/html')

            # avoid duplicate votes
            seen_votes = set()

            # Actions
            for action in details_root.xpath('//HISTORY/ACTION'):
                # action_num  = action.xpath('string(ACT_NUMBER)').strip()
                # action_num = int(action_num)
                act_vote = action.xpath('string(ACT_VOTE)').replace("../../../..", "")
                action_desc = action.xpath('string(ACT_DESC)')
                date, action_desc = action_desc.split(" ", 1)
                date = date + "/" + session[0:4]
                date = datetime.strptime(date, "%m/%d/%Y")

                if action_desc.startswith("(H)"):
                    actor = "lower"
                    action = action_desc[4:]
                elif action_desc.startswith("(S)"):
                    actor = "upper"
                    action = action_desc[4:]
                else:
                    actor = "executive"
                    action = action_desc

                if "Veto" in action and actor == 'executive':
                    version_path = details_root.xpath("string(//VETO_OTHER)")
                    version_path = version_path.replace("../../../../", "")
                    version_url = "http://billstatus.ls.state.ms.us/" + version_path
                    bill.add_document_link("Veto", version_url)

                atype = 'other'
                for prefix, prefix_type in self._action_types:
                    if action.startswith(prefix):
                        atype = prefix_type
                        break

                bill.add_action(action, self._tz.localize(date),
                                chamber=actor,
                                classification=atype if atype != 'other' else None)

                # use committee names as scraped subjects
                subjects = details_root.xpath('//H_NAME/text()')
                subjects += details_root.xpath('//S_NAME/text()')

                for subject in subjects:
                    if subject not in bill.subject:
                        bill.add_subject(subject)

                if act_vote:
                    vote_url = 'http://billstatus.ls.state.ms.us%s' % act_vote
                    if vote_url not in seen_votes:
                        seen_votes.add(vote_url)
                        yield from self.scrape_votes(vote_url, action,
                                                     date, actor, bill)

            bill.add_source(bill_details_url)
            yield bill
开发者ID:sunlightlabs,项目名称:openstates,代码行数:104,代码来源:bills.py

示例11: get_bill

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_subject [as 别名]
    def get_bill(self, matter):
        '''Make Bill object from given matter.'''
        
        '''
        Currently, NYC Legistar does not have conventional "Types" for 
        three newly added committees: https://legistar.council.nyc.gov/Departments.aspx
        We communicated the issue to NYC, and until we learn more, we will
        skip the bills attached to those committees.
        '''
        orgs_without_type = ['Charter Revision Commission 2019',
                             'New York City Advisory Commission on Property Tax Reform',
                             'Democratic Conference of the Council of the City of New York']
        if matter['MatterBodyName'].strip() in orgs_without_type:
            return None

        matter_id = matter['MatterId']
        if matter_id in DUPLICATED_ACTIONS:
            return None

        date = matter['MatterIntroDate']
        title = matter['MatterName']
        identifier = matter['MatterFile']

        if not all((date, title, identifier)):
            return None

        leg_type = BILL_TYPES[matter['MatterTypeName']]

        bill_session = self.sessions(self.toTime(date))

        bill = Bill(identifier=identifier,
                    title=title,
                    classification=leg_type,
                    legislative_session=bill_session,
                    from_organization={"name": "New York City Council"})

        legistar_web = matter['legistar_url']
        legistar_api = self.BASE_URL + '/matters/{0}'.format(matter_id)

        bill.add_source(legistar_web, note='web')
        bill.add_source(legistar_api, note='api')

        if matter['MatterTitle']:
            bill.add_title(matter['MatterTitle'])

        if matter['MatterEXText5']:
            bill.add_abstract(matter['MatterEXText5'], note='')

        try:
            for sponsorship in self.sponsorships(matter_id):
                bill.add_sponsorship(**sponsorship)
        except KeyError:
            self.version_errors.append(legistar_web)
            return None

        for attachment in self.attachments(matter_id):

            if attachment['MatterAttachmentId'] == 103315:  # Duplicate
                return None

            if attachment['MatterAttachmentName']:
                bill.add_document_link(attachment['MatterAttachmentName'],
                                       attachment['MatterAttachmentHyperlink'],
                                       media_type='application/pdf')

        for topic in self.topics(matter_id) :
            bill.add_subject(topic['MatterIndexName'].strip())

        for relation in self.relations(matter_id):
            try:
                related_bill = self.endpoint('/matters/{0}', relation['MatterRelationMatterId'])
            except scrapelib.HTTPError:
                return None
            else:
                date = related_bill['MatterIntroDate']
                related_bill_session = self.session(self.toTime(date))
                identifier = related_bill['MatterFile']
                bill.add_related_bill(identifier=identifier,
                                      legislative_session=related_bill_session,
                                      relation_type='companion')

        try:
            text = self.text(matter_id)
        except KeyError:
            self.version_errors.append(legistar_web)
            return None

        bill.extras['local_classification'] = matter['MatterTypeName']

        if text:
            if text['MatterTextPlain']:
                bill.extras['plain_text'] = text['MatterTextPlain'].replace(u'\u0000', '')

            if text['MatterTextRtf']:
                bill.extras['rtf_text'] = text['MatterTextRtf'].replace(u'\u0000', '')

        return bill
开发者ID:datamade,项目名称:scrapers-us-municipal,代码行数:99,代码来源:bills.py

示例12: parse_bill_status_page

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_subject [as 别名]
    def parse_bill_status_page(self, status_url, bill_url, session, chamber):
        status_page = lxml.html.fromstring(self.get(status_url).text)
        # see 2007 HB 2... weird.
        bill_re = r'.*?/([A-Z]+)0*(\d+)\.pdf'
        bill_xpath = '//a[contains(@href, ".pdf") and contains(@href, "billpdf")]/@href'
        bill_id = re.search(bill_re, status_page.xpath(bill_xpath)[0],
                            re.IGNORECASE).groups()
        bill_id = "{0} {1}".format(bill_id[0], int(bill_id[1]))

        try:
            xp = '//b[text()="Short Title:"]/../following-sibling::td/text()'
            title = status_page.xpath(xp).pop()
        except IndexError:
            title = status_page.xpath('//tr[1]/td[2]')[0].text_content()

        # Add bill type.
        _bill_id = bill_id.lower()
        if 'b' in _bill_id:
            classification = 'bill'
        elif 'j' in _bill_id or 'jr' in _bill_id:
            classification = 'joint resolution'
        elif 'cr' in _bill_id:
            classification = 'concurrent resolution'
        elif 'r' in _bill_id:
            classification = 'resolution'

        bill = Bill(bill_id, legislative_session=session, chamber=chamber,
                    title=title, classification=classification)

        self.add_actions(bill, status_page)
        votes = self.add_votes(bill, status_page, status_url)

        tabledata = self._get_tabledata(status_page)

        # Add sponsor info.
        bill.add_sponsorship(tabledata['primary sponsor:'][0], classification='primary',
                             entity_type='person', primary=True)

        # A various plus fields MT provides.
        plus_fields = [
            'requester',
            ('chapter number:', 'chapter'),
            'transmittal date:',
            'drafter',
            'fiscal note probable:',
            'bill draft number:',
            'preintroduction required:',
            'by request of',
            'category:']

        for x in plus_fields:
            if isinstance(x, tuple):
                _key, key = x
            else:
                _key = key = x
                key = key.replace(' ', '_')

            try:
                val = tabledata[_key]
            except KeyError:
                continue

            if len(val) == 1:
                val = val[0]

            bill.extras[key] = val

        # Add bill subjects.
        xp = '//th[contains(., "Revenue/Approp.")]/ancestor::table/tr'
        subjects = []
        for tr in status_page.xpath(xp):
            try:
                subj = tr.xpath('td')[0].text_content()
            except:
                continue
            subjects.append(subj)

        for s in subjects:
            bill.add_subject(s)

        self.add_fiscal_notes(status_page, bill)

        return bill, list(votes)
开发者ID:jalbertbowden,项目名称:openstates,代码行数:85,代码来源:bills.py

示例13: parse_bill_status_page

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_subject [as 别名]
    def parse_bill_status_page(self, url, page, session, chamber):
        # see 2007 HB 2... weird.
        parsed_url = urllib.parse.urlparse(url)
        parsed_query = dict(urllib.parse.parse_qsl(parsed_url.query))
        bill_id = "{0} {1}".format(
            parsed_query['P_BLTP_BILL_TYP_CD'],
            parsed_query['P_BILL_NO1'])

        try:
            xp = '//b[text()="Short Title:"]/../following-sibling::td/text()'
            title = page.xpath(xp).pop()
        except IndexError:
            title = page.xpath('//tr[1]/td[2]')[0].text_content()

        # Add bill type.
        _bill_id = bill_id.lower()
        if 'b' in _bill_id:
            classification = 'bill'
        elif 'j' in _bill_id or 'jr' in _bill_id:
            classification = 'joint resolution'
        elif 'cr' in _bill_id:
            classification = 'concurrent resolution'
        elif 'r' in _bill_id:
            classification = 'resolution'

        bill = Bill(bill_id, legislative_session=session, chamber=chamber,
                    title=title, classification=classification)

        self.add_actions(bill, page)
        votes = self.add_votes(bill, page, url)

        tabledata = self._get_tabledata(page)

        # Add sponsor info.
        bill.add_sponsorship(tabledata['primary sponsor:'][0], classification='primary',
                             entity_type='person', primary=True)

        # A various plus fields MT provides.
        plus_fields = [
            'requester',
            ('chapter number:', 'chapter'),
            'transmittal date:',
            'drafter',
            'fiscal note probable:',
            'bill draft number:',
            'preintroduction required:',
            'by request of',
            'category:']

        for x in plus_fields:
            if isinstance(x, tuple):
                _key, key = x
            else:
                _key = key = x
                key = key.replace(' ', '_')

            try:
                val = tabledata[_key]
            except KeyError:
                continue

            if len(val) == 1:
                val = val[0]

            bill.extras[key] = val

        # Add bill subjects.
        xp = '//th[contains(., "Revenue/Approp.")]/ancestor::table/tr'
        subjects = []
        for tr in page.xpath(xp):
            try:
                subj = tr.xpath('td')[0].text_content()
            except IndexError:
                continue
            subjects.append(subj)

        for s in subjects:
            bill.add_subject(s)

        self.add_fiscal_notes(page, bill)

        return bill, list(votes)
开发者ID:sunlightlabs,项目名称:openstates,代码行数:84,代码来源:bills.py

示例14: scrape

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_subject [as 别名]
    def scrape(self) :
        three_days_ago = datetime.datetime.now() - datetime.timedelta(3)
        for matter in self.matters(three_days_ago) :
            matter_id = matter['MatterId']

            date = matter['MatterIntroDate']
            title = matter['MatterTitle']
            identifier = matter['MatterFile']

            if not all((date, title, identifier)) :
                continue

            bill_session = self.session(self.toTime(date))
            bill_type = BILL_TYPES[matter['MatterTypeName']]

            if identifier.startswith('S'):
                alternate_identifiers = [identifier]
                identifier = identifier[1:]
            else:
                alternate_identifiers = []

            bill = Bill(identifier=identifier,
                        legislative_session=bill_session,
                        title=title,
                        classification=bill_type,
                        from_organization={"name":"Chicago City Council"})

            legistar_web = self.legislation_detail_url(matter_id)
            legistar_api = 'http://webapi.legistar.com/v1/chicago/matters/{0}'.format(matter_id)

            bill.add_source(legistar_web, note='web')
            bill.add_source(legistar_api, note='api')

            for identifier in alternate_identifiers:
                bill.add_identifier(identifier)

            for action, vote in self.actions(matter_id) :
                act = bill.add_action(**action)

                if action['description'] == 'Referred' :
                    body_name = matter['MatterBodyName']
                    if body_name != 'City Council' :
                        act.add_related_entity(body_name,
                                               'organization',
                                               entity_id = _make_pseudo_id(name=body_name))

                result, votes = vote
                if result :
                    vote_event = VoteEvent(legislative_session=bill.legislative_session, 
                                           motion_text=action['description'],
                                           organization=action['organization'],
                                           classification=None,
                                           start_date=action['date'],
                                           result=result,
                                           bill=bill)

                    vote_event.add_source(legistar_web)
                    vote_event.add_source(legistar_api + '/histories')

                    for vote in votes :
                        raw_option = vote['VoteValueName'].lower()
                        clean_option = self.VOTE_OPTIONS.get(raw_option,
                                                             raw_option)
                        vote_event.vote(clean_option, 
                                        vote['VotePersonName'].strip())

                    yield vote_event


            for sponsorship in self.sponsorships(matter_id) :
                bill.add_sponsorship(**sponsorship)

            for topic in self.topics(matter_id) :
                bill.add_subject(topic['MatterIndexName'].strip())

            for attachment in self.attachments(matter_id) :
                if attachment['MatterAttachmentName'] :
                    bill.add_version_link(attachment['MatterAttachmentName'],
                                          attachment['MatterAttachmentHyperlink'],
                                          media_type="application/pdf")

            bill.extras = {'local_classification' : matter['MatterTypeName']}

            text = self.text(matter_id)

            if text :
                if text['MatterTextPlain'] :
                    bill.extras['plain_text'] = text['MatterTextPlain']

                if text['MatterTextRtf'] :
                    bill.extras['rtf_text'] = text['MatterTextRtf'].replace(u'\u0000', '')

            yield bill
开发者ID:ErnieAtLYD,项目名称:scrapers-us-municipal,代码行数:95,代码来源:bills.py

示例15: scrape_bill

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_subject [as 别名]
    def scrape_bill(self, bill_id):
        old = self.api('bills/' + bill_id + '?')

        # not needed
        old.pop('id')
        old.pop('state')
        old.pop('level', None)
        old.pop('country', None)
        old.pop('created_at')
        old.pop('updated_at')
        old.pop('action_dates')
        old.pop('+bill_type',None)
        old.pop('+subject', None)
        old.pop('+scraped_subjects', None)
        old.pop('subjects', [])

        classification = old.pop('type')

        # ca weirdness
        if 'fiscal committee' in classification:
            classification.remove('fiscal committee')
        if 'urgency' in classification:
            classification.remove('urgency')
        if 'local program' in classification:
            classification.remove('local program')
        if 'tax levy' in classification:
            classification.remove('tax levy')

        if classification[0] in ['miscellaneous', 'jres', 'cres']:
            return

        if classification == ['memorial resolution'] and self.state == 'ar':
            classification = ['memorial']
        if classification == ['concurrent memorial resolution'] and self.state == 'ar':
            classification = ['concurrent memorial']
        if classification == ['joint session resolution'] and self.state == 'il':
            classification = ['joint resolution']
        if classification == ['legislative resolution'] and self.state == 'ny':
            classification = ['resolution']
        if classification == ['address'] and self.state == 'nh':
            classification = ['resolution']

        if not old['title'] and self.state == 'me':
            old['title'] = '(unknown)'

        chamber = old.pop('chamber')
        if self.state in ('ne', 'dc'):
            chamber = 'legislature'
        elif chamber in ('joint', 'conference'):
            chamber = 'legislature'

        new = Bill(old.pop('bill_id'), old.pop('session'), old.pop('title'),
                   chamber=chamber, classification=classification)

        abstract = old.pop('summary', None)
        if abstract:
            new.add_abstract(abstract, note='')

        for title in old.pop('alternate_titles'):
            new.add_title(title)

        for doc in old.pop('documents'):
            new.add_document_link(doc['name'], doc['url'], on_duplicate='ignore')

        for doc in old.pop('versions'):
            new.add_version_link(doc['name'], doc['url'], media_type=doc.pop('mimetype', ''))

        for subj in old.pop('scraped_subjects', []):
            if subj:
                new.add_subject(subj)

        for spon in old.pop('sponsors'):
            if spon.get('committee_id') is not None:
                entity_type = 'organization'
            elif spon.get('leg_id') is not None:
                entity_type = 'person'
            else:
                entity_type = ''
            new.add_sponsorship(spon['name'], spon['type'], entity_type,
                                spon['type'] == 'primary')

        for act in old.pop('actions'):
            actor = act['actor']
            if actor.lower() in ('governor', 'mayor', 'secretary of state'):
                actor = 'executive'
            elif actor.lower() == 'house' or (actor.lower().startswith('lower (') and self.state == 'ca'):
                actor = 'lower'
            elif actor.lower() in ('senate', 'upper`') or (actor.lower().startswith('upper (') and self.state == 'ca'):
                actor = 'upper'
            elif actor in ('joint', 'other', 'Data Systems', 'Speaker', 'clerk',
                           'Office of the Legislative Fiscal Analyst', 'Became Law w',
                           'conference') or (actor.lower().startswith('legislature (') and self.state == 'ca'):
                actor = 'legislature'

            if actor in ('committee', 'sponsor') and self.state == 'pr':
                actor = 'legislature'

            # nebraska & DC
            if actor in ('upper','council') and self.state in ('ne', 'dc'):
                actor = 'legislature'
#.........这里部分代码省略.........
开发者ID:opencivicdata,项目名称:scrapers-us-state,代码行数:103,代码来源:bills.py


注:本文中的pupa.scrape.Bill.add_subject方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。