当前位置: 首页>>代码示例>>Python>>正文


Python Bill.add_action方法代码示例

本文整理汇总了Python中pupa.scrape.Bill.add_action方法的典型用法代码示例。如果您正苦于以下问题:Python Bill.add_action方法的具体用法?Python Bill.add_action怎么用?Python Bill.add_action使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pupa.scrape.Bill的用法示例。


在下文中一共展示了Bill.add_action方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: scrape_bills

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]
    def scrape_bills(self, session):
        session_key = SESSION_KEYS[session]
        measures_response = self.api_client.get('measures', page=500, session=session_key)

        legislators = index_legislators(self, session_key)

        for measure in measures_response:
            bid = '{} {}'.format(measure['MeasurePrefix'], measure['MeasureNumber'])

            chamber = self.chamber_code[bid[0]]
            bill = Bill(
                bid.replace(' ', ''),
                legislative_session=session,
                chamber=chamber,
                title=measure['RelatingTo'],
                classification=self.bill_types[measure['MeasurePrefix'][1:]]
            )
            bill.add_abstract(measure['MeasureSummary'].strip(), note='summary')

            for sponsor in measure['MeasureSponsors']:
                legislator_code = sponsor['LegislatoreCode']  # typo in API
                if legislator_code:
                    try:
                        legislator = legislators[legislator_code]
                    except KeyError:
                        logger.warn('Legislator {} not found in session {}'.format(
                            legislator_code, session))
                        legislator = legislator_code
                    bill.add_sponsorship(
                        name=legislator,
                        classification={'Chief': 'primary', 'Regular': 'cosponsor'}[
                            sponsor['SponsorLevel']],
                        entity_type='person',
                        primary=True if sponsor['SponsorLevel'] == 'Chief' else False
                    )

            bill.add_source(
                "https://olis.leg.state.or.us/liz/{session}/Measures/Overview/{bid}".format(
                    session=session_key, bid=bid.replace(' ', ''))
            )
            for document in measure['MeasureDocuments']:
                # TODO: probably mixing documents & versions here - should revisit
                try:
                    bill.add_version_link(document['VersionDescription'], document['DocumentUrl'],
                                          media_type='application/pdf')
                except ValueError:
                    logger.warn('Duplicate link found for {}'.format(document['DocumentUrl']))
            for action in measure['MeasureHistoryActions']:
                classifiers = self.determine_action_classifiers(action['ActionText'])
                when = datetime.datetime.strptime(action['ActionDate'], '%Y-%m-%dT%H:%M:%S')
                when = self.tz.localize(when)
                bill.add_action(action['ActionText'], when,
                                chamber=self.chamber_code[action['Chamber']],
                                classification=classifiers)

            yield bill
开发者ID:cliftonmcintosh,项目名称:openstates,代码行数:58,代码来源:bills.py

示例2: test_vote_event_bill_actions_two_stage

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]
def test_vote_event_bill_actions_two_stage():
    # this test is very similar to what we're testing in test_vote_event_bill_actions w/
    # ve3 and ve4, that two bills that reference the same action won't conflict w/ the
    # OneToOneField, but in this case we do it in two stages so that the conflict is found
    # even if the votes weren't in the same scrape
    j = create_jurisdiction()
    j.legislative_sessions.create(name='1900', identifier='1900')
    org1 = ScrapeOrganization(name='House', classification='lower')
    bill = ScrapeBill('HB 1', '1900', 'Axe & Tack Tax Act', from_organization=org1._id)

    bill.add_action(description='passage', date='1900-04-02', chamber='lower')

    ve1 = ScrapeVoteEvent(legislative_session='1900', motion_text='passage',
                          start_date='1900-04-02', classification='passage:bill',
                          result='pass', bill_chamber='lower', bill='HB 1',
                          bill_action='passage',
                          organization=org1._id)
    ve2 = ScrapeVoteEvent(legislative_session='1900', motion_text='passage',
                          start_date='1900-04-02', classification='passage:bill',
                          result='pass', bill_chamber='lower', bill='HB 1',
                          bill_action='passage',
                          organization=org1._id)
    # disambiguate them
    ve1.pupa_id = 'one'
    ve2.pupa_id = 'two'

    oi = OrganizationImporter('jid')
    oi.import_data([org1.as_dict()])

    bi = BillImporter('jid', oi, DumbMockImporter())
    bi.import_data([bill.as_dict()])

    # first imports just fine
    VoteEventImporter('jid', DumbMockImporter(), oi, bi).import_data([
        ve1.as_dict(),
    ])
    votes = list(VoteEvent.objects.all())
    assert len(votes) == 1
    assert votes[0].bill_action is not None

    # when second is imported, ensure that action stays pinned to first just as it would
    # have if they were both in same import
    VoteEventImporter('jid', DumbMockImporter(), oi, bi).import_data([
        ve1.as_dict(),
        ve2.as_dict(),
    ])
    votes = list(VoteEvent.objects.all())
    assert len(votes) == 2
    assert votes[0].bill_action is not None
    assert votes[1].bill_action is None
开发者ID:opencivicdata,项目名称:pupa,代码行数:52,代码来源:test_vote_event_importer.py

示例3: scrape_bill

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]

#.........这里部分代码省略.........
                new.add_subject(subj)

        for spon in old.pop('sponsors'):
            if spon.get('committee_id') is not None:
                entity_type = 'organization'
            elif spon.get('leg_id') is not None:
                entity_type = 'person'
            else:
                entity_type = ''
            new.add_sponsorship(spon['name'], spon['type'], entity_type,
                                spon['type'] == 'primary')

        for act in old.pop('actions'):
            actor = act['actor']
            if actor.lower() in ('governor', 'mayor', 'secretary of state'):
                actor = 'executive'
            elif actor.lower() == 'house' or (actor.lower().startswith('lower (') and self.state == 'ca'):
                actor = 'lower'
            elif actor.lower() in ('senate', 'upper`') or (actor.lower().startswith('upper (') and self.state == 'ca'):
                actor = 'upper'
            elif actor in ('joint', 'other', 'Data Systems', 'Speaker', 'clerk',
                           'Office of the Legislative Fiscal Analyst', 'Became Law w',
                           'conference') or (actor.lower().startswith('legislature (') and self.state == 'ca'):
                actor = 'legislature'

            if actor in ('committee', 'sponsor') and self.state == 'pr':
                actor = 'legislature'

            # nebraska & DC
            if actor in ('upper','council') and self.state in ('ne', 'dc'):
                actor = 'legislature'

            if act['action']:
                newact = new.add_action(act['action'], act['date'][:10], chamber=actor,
                                        classification=[action_types[c] for c in act['type'] if c != 'other'])
                for re in act.get('related_entities', []):
                    if re['type'] == 'committee':
                        re['type'] = 'organization'
                    elif re['type'] == 'legislator':
                        re['type'] = 'person'
                    newact.add_related_entity(re['name'], re['type'])

        for comp in old.pop('companions', []):
            if self.state in ('nj', 'ny', 'mn'):
                rtype = 'companion'
            new.add_related_bill(comp['bill_id'], comp['session'], rtype)

        for abid in old.pop('alternate_bill_ids', []) + old.pop('+alternate_bill_ids', []):
            new.add_identifier(abid)


        # generic OpenStates stuff
        for id in old.pop('all_ids'):
            new.add_identifier(id, scheme='openstates')

        for source in old.pop('sources'):
            source.pop('retrieved', None)
            new.add_source(**source)

        ext_title = old.pop('+extended_title', None)
        if ext_title:
            new.add_title(ext_title, note='Extended Title')
        official_title = old.pop('+official_title', None)
        if official_title:
            new.add_title(official_title, note='Official Title')
开发者ID:opencivicdata,项目名称:scrapers-us-state,代码行数:69,代码来源:bills.py

示例4: scrape_bill

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]
    def scrape_bill(self, chamber, session, bill_id):
        # try and get bill for the first year of the session biennium
        url = 'http://legislature.mi.gov/doc.aspx?%s-%s' % (
            session[:4], bill_id.replace(' ', '-'))
        html = self.get(url).text
        # Otherwise, try second year of the session biennium
        if ('Page Not Found' in html or
                'The bill you are looking for is not available yet' in html):
            url = 'http://legislature.mi.gov/doc.aspx?%s-%s' % (
                session[-4:], bill_id.replace(' ', '-'))
            html = self.get(url).text
            if ('Page Not Found' in html or
                    'The bill you are looking for is not available yet' in html):
                self.warning("Cannot open bill page for {}; skipping".format(bill_id))
                return

        doc = lxml.html.fromstring(html)
        doc.make_links_absolute('http://legislature.mi.gov')

        title = doc.xpath('//span[@id="frg_billstatus_ObjectSubject"]')[0].text_content()

        # get B/R/JR/CR part and look up bill type
        bill_type = bill_types[bill_id.split(' ')[0][1:]]

        bill = Bill(bill_id, session, title, chamber=chamber,
                    classification=bill_type)
        bill.add_source(url)

        # sponsors
        sponsors = doc.xpath('//span[@id="frg_billstatus_SponsorList"]/a')
        for sponsor in sponsors:
            name = sponsor.text.replace(u'\xa0', ' ')
            # sometimes district gets added as a link
            if name.isnumeric():
                continue

            if len(sponsors) > 1:
                classification = (
                    'primary'
                    if sponsor.tail and 'primary' in sponsor.tail
                    else 'cosponsor'
                )
            else:
                classification = 'primary'
            bill.add_sponsorship(
                name=name,
                chamber=chamber,
                entity_type='person',
                primary=classification == 'primary',
                classification=classification,
            )

        bill.subject = doc.xpath('//span[@id="frg_billstatus_CategoryList"]/a/text()')

        # actions (skip header)
        for row in doc.xpath('//table[@id="frg_billstatus_HistoriesGridView"]/tr')[1:]:
            tds = row.xpath('td')  # date, journal link, action
            date = tds[0].text_content()
            journal = tds[1].text_content()
            action = tds[2].text_content()
            date = TIMEZONE.localize(datetime.datetime.strptime(date, "%m/%d/%Y"))
            # instead of trusting upper/lower case, use journal for actor
            actor = 'upper' if 'SJ' in journal else 'lower'
            classification = categorize_action(action)
            bill.add_action(action, date, chamber=actor, classification=classification)

            # check if action mentions a sub
            submatch = re.search(r'WITH SUBSTITUTE\s+([\w\-\d]+)', action, re.IGNORECASE)
            if submatch and tds[2].xpath('a'):
                version_url = tds[2].xpath('a/@href')[0]
                version_name = tds[2].xpath('a/text()')[0].strip()
                version_name = 'Substitute {}'.format(version_name)
                self.info("Found Substitute {}".format(version_url))
                if version_url.lower().endswith('.pdf'):
                    mimetype = 'application/pdf'
                elif version_url.lower().endswith('.htm'):
                    mimetype = 'text/html'
                bill.add_version_link(version_name, version_url, media_type=mimetype)

            # check if action mentions a vote
            rcmatch = re.search(r'Roll Call # (\d+)', action, re.IGNORECASE)
            if rcmatch:
                rc_num = rcmatch.groups()[0]
                # in format mileg.aspx?page=getobject&objectname=2011-SJ-02-10-011
                journal_link = tds[1].xpath('a/@href')
                if journal_link:
                    objectname = journal_link[0].rsplit('=', 1)[-1]
                    chamber_name = {'upper': 'Senate', 'lower': 'House'}[actor]
                    vote_url = BASE_URL + '/documents/%s/Journal/%s/htm/%s.htm' % (
                        session, chamber_name, objectname)
                    results = self.parse_roll_call(vote_url, rc_num)
                    vote = VoteEvent(
                        start_date=date,
                        chamber=actor,
                        bill=bill,
                        motion_text=action,
                        result='pass' if len(results['yes']) > len(results['no']) else 'fail',
                        classification='passage',
                    )

#.........这里部分代码省略.........
开发者ID:sunlightlabs,项目名称:openstates,代码行数:103,代码来源:bills.py

示例5: scrape_chamber

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]
    def scrape_chamber(self, chamber, session):
        chamber_name = 'Senate' if chamber == 'upper' else 'House'
        chamber_letter = chamber_name[0]
        # perhaps we should save this data so we can make one request for both?
        bill_request = self.get(ksapi.url + 'bill_status/').text
        bill_request_json = json.loads(bill_request)
        bills = bill_request_json['content']
        for bill_data in bills:

            bill_id = bill_data['BILLNO']

            # filter other chambers
            if not bill_id.startswith(chamber_letter):
                continue

            if 'CR' in bill_id:
                btype = 'concurrent resolution'
            elif 'R' in bill_id:
                btype = 'resolution'
            elif 'B' in bill_id:
                btype = 'bill'

            title = bill_data['SHORTTITLE'] or bill_data['LONGTITLE']

            # main
            bill = Bill(
                bill_id,
                session,
                title,
                chamber=chamber,
                classification=btype,
            )
            bill.extras = {'status': bill_data['STATUS']}

            bill.add_source(ksapi.url + 'bill_status/' + bill_id.lower())

            if (bill_data['LONGTITLE'] and
                    bill_data['LONGTITLE'] != bill.title):
                bill.add_title(bill_data['LONGTITLE'])

            # An "original sponsor" is the API's expression of "primary sponsor"
            for primary_sponsor in bill_data['ORIGINAL_SPONSOR']:
                bill.add_sponsorship(
                    name=primary_sponsor,
                    entity_type='organization' if "committee" in primary_sponsor.lower()
                                else 'person',
                    primary=True,
                    classification="original sponsor"
                )
            for sponsor in bill_data['SPONSOR_NAMES']:
                if sponsor in bill_data['ORIGINAL_SPONSOR']:
                    continue
                bill.add_sponsorship(
                    name=sponsor,
                    entity_type='organization' if "committee" in sponsor.lower() else 'person',
                    primary=False,
                    classification='cosponsor',
                )

            # history is backwards
            for event in reversed(bill_data['HISTORY']):
                actor = ('upper' if event['chamber'] == 'Senate'
                         else 'lower')

                date = event['session_date']
                # append committee names if present
                if 'committee_names' in event:
                    action = (event['status'] + ' ' +
                              ' and '.join(event['committee_names']))
                else:
                    action = event['status']

                if event['action_code'] not in ksapi.action_codes:
                    self.warning('unknown action code on %s: %s %s' %
                                 (bill_id, event['action_code'],
                                  event['status']))
                    atype = None
                else:
                    atype = ksapi.action_codes[event['action_code']]
                bill.add_action(
                    action, date, chamber=actor, classification=atype)

            # Versions are exposed in `bill_data['versions'],
            # but lack any descriptive text or identifiers;
            # continue to scrape these from the HTML
            yield from self.scrape_html(bill, session)

            yield bill
开发者ID:sunlightlabs,项目名称:openstates,代码行数:90,代码来源:bills.py

示例6: scrape_bill

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]
	def scrape_bill(self, bill_url, bill_id, session_id):
		page = self.lxmlize(bill_url)
		# create bill
		title = page.xpath("//h1/text()")[0]
		bill = Bill(identifier=bill_id,
			        legislative_session=session_id,
			        title=title)
		bill.add_source(bill_url, note="detail")

		# add additional fields

		# abstract
		try:
			# abstract is directly above <h2>Legislative History</h2>
			leg_his = page.xpath("//h2[text()='Legislative History']")[0]
			abstract = leg_his.xpath("preceding-sibling::p/text()")[0]
			bill.add_abstract(abstract=abstract.strip(), note="summary")
			# TODO trim whitespace from summary
		except IndexError:
			print("No abstract for bill {} in session {}".format(bill_id, session_id))

		# the rest of the fields are found inside this <table>
		data_table = page.xpath("//table[contains(@class, 'data')]")[0]

		# sponsor
		sponsor_name = data_table.xpath(self.bill_table_query("Sponsor") + "/text()")[0]
		bill.add_sponsorship(name=sponsor_name,
				classification="Primary",
				entity_type="person",
				primary=True
				)

		# actions
		action_lines = data_table.xpath(self.bill_table_query("Actions") + "/text()")
		for line in action_lines:
			line = line.join('')
			try:
				for date_str, action_type in self.parse_actions(line):
					bill.add_action(date=date_str,
						description=action_type,	
						classification=action_type)
			except ValueError:
				print("failed to parse these actions: {}".format([line]))


		# co-sponsors
		co_sponsors = data_table.xpath(self.bill_table_query("Co-Sponsors") + "/text()")
		co_sponsors = [name.strip() for name in co_sponsors if name.strip()]
		for name in co_sponsors:
			bill.add_sponsorship(name=name,
						classification="co-sponsor",
						entity_type="person",
						primary=False)

		# committee (stored as another sponsorship in OCD)
		committees = data_table.xpath(self.bill_table_query("Committee") + "/a/text()")
		for comm in committees:
			bill.add_sponsorship(name=comm,
							classification="secondary", # classification ?
							entity_type="organization",
							primary=False)

		return bill
开发者ID:datamade,项目名称:scrapers-us-municipal,代码行数:65,代码来源:bills.py

示例7: scrape_chamber

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]
    def scrape_chamber(self, chamber, session):
        chamber_name = 'Senate' if chamber == 'upper' else 'House'
        chamber_letter = chamber_name[0]
        # perhaps we should save this data so we can make one request for both?
        bill_request = self.get(ksapi.url + 'bill_status/').text
        bill_request_json = json.loads(bill_request)
        bills = bill_request_json['content']
        for bill_data in bills:

            bill_id = bill_data['BILLNO']

            # filter other chambers
            if not bill_id.startswith(chamber_letter):
                continue

            if 'CR' in bill_id:
                btype = 'concurrent resolution'
            elif 'R' in bill_id:
                btype = 'resolution'
            elif 'B' in bill_id:
                btype = 'bill'

            title = bill_data['SHORTTITLE'] or bill_data['LONGTITLE']

            # main
            bill = Bill(
                bill_id,
                session,
                title,
                chamber=chamber,
                classification=btype,
            )
            bill.extras = {'status': bill_data['STATUS']}

            bill.add_source(ksapi.url + 'bill_status/' + bill_id.lower())

            if (bill_data['LONGTITLE'] and
                    bill_data['LONGTITLE'] != bill.title):
                bill.add_title(bill_data['LONGTITLE'])

            for sponsor in bill_data['SPONSOR_NAMES']:
                stype = ('primary' if len(bill_data['SPONSOR_NAMES']) == 1
                         else 'cosponsor')
                if sponsor:
                    bill.add_sponsorship(
                        name=sponsor,
                        entity_type='person',
                        primary=stype == 'primary',
                        classification=stype,
                    )

            # history is backwards
            for event in reversed(bill_data['HISTORY']):
                actor = ('upper' if event['chamber'] == 'Senate'
                         else 'lower')

                date = datetime.datetime.strptime(event['occurred_datetime'], "%Y-%m-%dT%H:%M:%S")
                # append committee names if present
                if 'committee_names' in event:
                    action = (event['status'] + ' ' +
                              ' and '.join(event['committee_names']))
                else:
                    action = event['status']

                if event['action_code'] not in ksapi.action_codes:
                    self.warning('unknown action code on %s: %s %s' %
                                 (bill_id, event['action_code'],
                                  event['status']))
                    atype = None
                else:
                    atype = ksapi.action_codes[event['action_code']]
                bill.add_action(
                    action, date.strftime('%Y-%m-%d'), chamber=actor, classification=atype)

            try:
                yield from self.scrape_html(bill, session)
            except scrapelib.HTTPError as e:
                self.warning('unable to fetch HTML for bill {0}'.format(
                    bill['bill_id']))

            yield bill
开发者ID:cliftonmcintosh,项目名称:openstates,代码行数:83,代码来源:bills.py

示例8: _scrape_bill

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]
    def _scrape_bill(self, session, bill_data):
        details = self._parse_bill_details(bill_data)

        (senate_url, assembly_url, bill_chamber, bill_type, bill_id,
         title, (prefix, number, active_version)) = details

        bill = Bill(
            bill_id,
            legislative_session=session,
            chamber=bill_chamber,
            title=title or bill_data['summary'],
            classification=bill_type,
        )

        if bill_data['summary']:
            bill.add_abstract(bill_data['summary'], note='')

        bill_active_version = bill_data['amendments']['items'][active_version]

        # Parse sponsors.
        if bill_data['sponsor'] is not None:
            if bill_data['sponsor']['rules'] is True:
                bill.add_sponsorship(
                    'Rules Committee',
                    entity_type='organization',
                    classification='primary',
                    primary=True,
                )
            elif not bill_data['sponsor']['budget']:
                primary_sponsor = bill_data['sponsor']['member']
                bill.add_sponsorship(
                    primary_sponsor['shortName'],
                    entity_type='person',
                    classification='primary',
                    primary=True,
                )

                # There *shouldn't* be cosponsors if there is no sponsor.
                cosponsors = bill_active_version['coSponsors']['items']
                for cosponsor in cosponsors:
                    bill.add_sponsorship(
                        cosponsor['shortName'],
                        entity_type='person',
                        classification='cosponsor',
                        primary=False,
                    )

        # List companion bill.
        same_as = bill_active_version.get('sameAs', {})
        # Check whether "sameAs" property is populated with at least one bill.
        if same_as['items']:
            # Get companion bill ID.
            companion_bill_id = same_as['items'][0]['basePrintNo']

            # Build companion bill session.
            start_year = same_as['items'][0]['session']
            end_year = start_year + 1
            companion_bill_session = '-'.join([str(start_year), str(end_year)])

            # Attach companion bill data.
            bill.add_related_bill(
                companion_bill_id,
                companion_bill_session,
                relation_type='companion',
            )

        # Parse actions.
        chamber_map = {
            'senate': 'upper',
            'assembly': 'lower',
        }

        for action in bill_data['actions']['items']:
            chamber = chamber_map[action['chamber'].lower()]
            action_datetime = datetime.datetime.strptime(action['date'], '%Y-%m-%d')
            action_date = action_datetime.date()
            types, _ = NYBillScraper.categorizer.categorize(action['text'])

            bill.add_action(
                action['text'],
                action_date.strftime('%Y-%m-%d'),
                chamber=chamber,
                classification=types,
            )

        # Handling of sources follows. Sources serving either chamber
        # maintain duplicate data, so we can see certain bill data
        # through either chamber's resources. However, we have to refer
        # to a specific chamber's resources if we want to grab certain
        # specific information such as vote data.
        #
        # As such, I'm placing all potential sources in the interest of
        # thoroughness. - Andy Lo

        # List Open Legislation API endpoint as a source.
        api_url = self.api_client.root + self.api_client.resources['bill'].format(
            session_year=session,
            bill_id=bill_id,
            summary='',
            detail='')
#.........这里部分代码省略.........
开发者ID:neelneelpurk,项目名称:openstates,代码行数:103,代码来源:bills.py

示例9: scrape

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]

#.........这里部分代码省略.........

                if "Signed by Governor" in action['FullStatus']:
                    actor = 'executive'
                elif action['ChamberCode'] == 'H':
                    actor = 'lower'
                elif action['ChamberCode'] == 'S':
                    actor = 'upper'
                else:
                    raise AssertionError("Unknown actor for bill action")

                # Categorize action
                if "Signed by Governor" in action['FullStatus']:
                    # assert chambers_passed == set("HS")
                    action_type = 'executive-signature'
                elif "Vetoed by the Governor" in action['FullStatus']:
                    action_type = 'executive-veto'
                elif "Read first time" in action['FullStatus'] \
                        or "Read 1st time" in action['FullStatus']:
                    action_type = 'introduction'
                elif "Reported favorably" in action['FullStatus']:
                    action_type = 'committee-passage-favorable'
                elif actor == 'lower' and any(x.lower().startswith('aspassed')
                                              for x in action['keywords'].split(';')):
                    action_type = 'passage'
                    chambers_passed.add("H")
                elif actor == 'upper' and any(x.lower().startswith(' aspassed')
                                              or x.lower().startswith('aspassed')
                                              for x in action['keywords'].split(';')):
                    action_type = 'passage'
                    chambers_passed.add("S")
                else:
                    action_type = None

                bill.add_action(
                    description=re.sub(HTML_TAGS_RE, "", action['FullStatus']),
                    date=datetime.datetime.strftime(
                        datetime.datetime.strptime(action['StatusDate'], '%m/%d/%Y'),
                        '%Y-%m-%d'
                    ),
                    chamber=actor,
                    classification=action_type
                )

            # Capture votes
            votes_url = 'http://legislature.vermont.gov/bill/loadBillRollCalls/{0}/{1}'.format(
                year_slug, internal_bill_id)
            votes_json = self.get(votes_url).text
            votes = json.loads(votes_json)['data']
            bill.add_source(votes_url)

            for vote in votes:
                roll_call_id = vote['VoteHeaderID']
                roll_call_url = ('http://legislature.vermont.gov/bill/'
                                 'loadBillRollCallDetails/{0}/{1}'.format(
                                     year_slug, roll_call_id))
                roll_call_json = self.get(roll_call_url).text
                roll_call = json.loads(roll_call_json)['data']

                roll_call_yea = []
                roll_call_nay = []
                roll_call_not_voting = []
                for member in roll_call:
                    (member_name, _district) = member['MemberName'].split(" of ")
                    member_name = member_name.strip()

                    if member['MemberVote'] == "Yea":
开发者ID:sunlightlabs,项目名称:openstates,代码行数:70,代码来源:bills.py

示例10: scrape_bill_list

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]

#.........这里部分代码省略.........
                bill.add_version_link(
                    name,
                    version_url,
                    media_type='application/pdf',
                    on_duplicate='ignore',
                )

            # Fiscal notes exist, but I can't figure out how to build their URL
            fiscal_notes = bill_doc.xpath(
                '//table[@class="box_fiscalnote"]')[1:]
            for fiscal_note in fiscal_notes:
                pass

            # Budget Isolation Resolutions are handled as extra actions/votes
            birs = bill_doc.xpath(
                '//div[@class="box_bir"]//table//table/tr')[1:]
            for bir in birs:
                bir_action = bir.xpath('td[1]')[0].text_content().strip()
                # Sometimes ALISON's database puts another bill's
                # actions into the BIR action list; ignore these
                if bill_id not in bir_action:
                    self.warning(
                        "BIR action found ({}) ".format(bir_action) +
                        "that doesn't match the bill ID ({})".format(bill_id))
                    continue

                bir_date = datetime.datetime.strptime(
                    bir.xpath('td[2]/font/text()')[0], self.DATE_FORMAT)
                bir_type = bir.xpath('td[1]/font/text()')[0].split(" ")[0]
                bir_chamber = self.CHAMBERS[bir_type[0]]
                bir_text = "{0}: {1}".format(
                    bir_type, bir.xpath('td[3]/font/text()')[0].strip())

                bill.add_action(
                    bir_text,
                    TIMEZONE.localize(bir_date),
                    chamber=bir_chamber,
                    classification='other',
                )

                try:
                    (bir_vote_id, ) = bir.xpath('td[4]/font/input/@value')
                except ValueError:
                    bir_vote_id = ''

                bir_vote_id = bir_vote_id.strip()
                if bir_vote_id.startswith("Roll "):
                    bir_vote_id = bir_vote_id.split(" ")[-1]

                    yield from self.scrape_vote(
                        bill=bill,
                        vote_chamber=bir_type[0],
                        bill_id="{0}%20for%20{1}".format(bir_type, bill_id),
                        vote_id=bir_vote_id,
                        vote_date=TIMEZONE.localize(bir_date),
                        action_text=bir_text
                    )

            actions = bill_doc.xpath('//table[@id="ContentPlaceHolder1_gvHistory"]/tr')[1:]
            action_date = None
            for action in actions:
                # If actions occur on the same day, only one date will exist
                if (action.xpath('td[1]/font/text()')[0].
                        encode('ascii', 'ignore').strip()):
                    action_date = datetime.datetime.strptime(
                        action.xpath('td[1]/font/text()')[0], self.DATE_FORMAT)
开发者ID:sunlightlabs,项目名称:openstates,代码行数:70,代码来源:bills.py

示例11: scrape_bill_type

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]

#.........这里部分代码省略.........
                        act_str = act_str.replace(abbr, committee)
                        if not act_str.endswith('.'):
                            act_str = act_str + '.'

                # Determine which chamber the action originated from.
                changed = False
                for committee_chamber in ['upper', 'lower', 'legislature']:
                    if actor.startswith(committee_chamber):
                        actor = committee_chamber
                        changed = True
                        break
                if not changed:
                    actor = 'legislature'

                if actor != action.actor:
                    actor_info = kwargs.get('actor_info', {})
                    actor_info['details'] = action.actor
                    kwargs['actor_info'] = actor_info

                # Add strings for related legislators, if any.
                rgx = r'(?:senator|assembly[mwp][^ .,:;]+)\s+[^ .,:;]+'
                legislators = re.findall(rgx, action.action, re.I)
                if legislators:
                    kwargs['legislators'] = legislators

                date = action.action_date
                date = self._tz.localize(date)
                date = date.date()
                if (actor, act_str, date) in seen_actions:
                    continue

                kwargs.update(self.categorizer.categorize(act_str))

                action = fsbill.add_action(act_str, date.strftime('%Y-%m-%d'), chamber=actor,
                                           classification=kwargs['classification'])
                for committee in kwargs.get('committees', []):
                    action.add_related_entity(
                        committee, entity_type='organization')
                seen_actions.add((actor, act_str, date))

            for vote_num, vote in enumerate(bill.votes):
                if vote.vote_result == '(PASS)':
                    result = True
                else:
                    result = False

                if not vote.location:
                    continue

                full_loc = vote.location.description
                first_part = full_loc.split(' ')[0].lower()
                if first_part in ['asm', 'assembly']:
                    vote_chamber = 'lower'
                    # vote_location = ' '.join(full_loc.split(' ')[1:])
                elif first_part.startswith('sen'):
                    vote_chamber = 'upper'
                    # vote_location = ' '.join(full_loc.split(' ')[1:])
                else:
                    raise ScrapeError("Bad location: %s" % full_loc)

                if vote.motion:
                    motion = vote.motion.motion_text or ''
                else:
                    motion = ''

                if "Third Reading" in motion or "3rd Reading" in motion:
开发者ID:sunlightlabs,项目名称:openstates,代码行数:70,代码来源:bills.py

示例12: scrape

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]
    def scrape(self) :
        three_days_ago = datetime.datetime.now() - datetime.timedelta(3)
        for matter in self.matters(three_days_ago) :
            matter_id = matter['MatterId']

            date = matter['MatterIntroDate']
            title = matter['MatterTitle']
            identifier = matter['MatterFile']

            if not all((date, title, identifier)) :
                continue

            bill_session = self.session(self.toTime(date))
            bill_type = BILL_TYPES[matter['MatterTypeName']]

            if identifier.startswith('S'):
                alternate_identifiers = [identifier]
                identifier = identifier[1:]
            else:
                alternate_identifiers = []

            bill = Bill(identifier=identifier,
                        legislative_session=bill_session,
                        title=title,
                        classification=bill_type,
                        from_organization={"name":"Chicago City Council"})

            legistar_web = self.legislation_detail_url(matter_id)
            legistar_api = 'http://webapi.legistar.com/v1/chicago/matters/{0}'.format(matter_id)

            bill.add_source(legistar_web, note='web')
            bill.add_source(legistar_api, note='api')

            for identifier in alternate_identifiers:
                bill.add_identifier(identifier)

            for action, vote in self.actions(matter_id) :
                act = bill.add_action(**action)

                if action['description'] == 'Referred' :
                    body_name = matter['MatterBodyName']
                    if body_name != 'City Council' :
                        act.add_related_entity(body_name,
                                               'organization',
                                               entity_id = _make_pseudo_id(name=body_name))

                result, votes = vote
                if result :
                    vote_event = VoteEvent(legislative_session=bill.legislative_session, 
                                           motion_text=action['description'],
                                           organization=action['organization'],
                                           classification=None,
                                           start_date=action['date'],
                                           result=result,
                                           bill=bill)

                    vote_event.add_source(legistar_web)
                    vote_event.add_source(legistar_api + '/histories')

                    for vote in votes :
                        raw_option = vote['VoteValueName'].lower()
                        clean_option = self.VOTE_OPTIONS.get(raw_option,
                                                             raw_option)
                        vote_event.vote(clean_option, 
                                        vote['VotePersonName'].strip())

                    yield vote_event


            for sponsorship in self.sponsorships(matter_id) :
                bill.add_sponsorship(**sponsorship)

            for topic in self.topics(matter_id) :
                bill.add_subject(topic['MatterIndexName'].strip())

            for attachment in self.attachments(matter_id) :
                if attachment['MatterAttachmentName'] :
                    bill.add_version_link(attachment['MatterAttachmentName'],
                                          attachment['MatterAttachmentHyperlink'],
                                          media_type="application/pdf")

            bill.extras = {'local_classification' : matter['MatterTypeName']}

            text = self.text(matter_id)

            if text :
                if text['MatterTextPlain'] :
                    bill.extras['plain_text'] = text['MatterTextPlain']

                if text['MatterTextRtf'] :
                    bill.extras['rtf_text'] = text['MatterTextRtf'].replace(u'\u0000', '')

            yield bill
开发者ID:ErnieAtLYD,项目名称:scrapers-us-municipal,代码行数:95,代码来源:bills.py

示例13: scrape_bill

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]
    def scrape_bill(self, chamber, session, bill_id, url):
        try:
            page = lxml.html.fromstring(self.get(url).text)
        except scrapelib.HTTPError as e:
            self.warning('error (%s) fetching %s, skipping' % (e, url))
            return

        title = page.xpath(
            "string(//span[contains(@id, 'PlaceHolder1_txtST')])").strip()

        if 'JR' in bill_id:
            bill_type = ['joint resolution']
        elif 'CR' in bill_id:
            bill_type = ['concurrent resolution']
        elif 'R' in bill_id:
            bill_type = ['resolution']
        else:
            bill_type = ['bill']

        bill = Bill(bill_id,
                    legislative_session=session,
                    chamber=chamber,
                    title=title,
                    classification=bill_type)
        bill.add_source(url)
        bill.subject = self.subject_map[bill_id]

        for link in page.xpath("//a[contains(@id, 'Auth')]"):
            name = link.xpath("string()").strip()

            if ':' in name:
                raise Exception(name)
            if 'otherAuth' in link.attrib['id']:
                bill.add_sponsorship(name, classification='cosponsor',
                                     entity_type='person', primary=False)
            else:
                bill.add_sponsorship(name, classification='primary',
                                     entity_type='person', primary=True)

        act_table = page.xpath("//table[contains(@id, 'Actions')]")[0]
        for tr in act_table.xpath("tr")[2:]:
            action = tr.xpath("string(td[1])").strip()
            if not action or action == 'None':
                continue

            date = tr.xpath("string(td[3])").strip()
            date = datetime.datetime.strptime(date, "%m/%d/%Y").date()

            actor = tr.xpath("string(td[4])").strip()
            if actor == 'H':
                actor = 'lower'
            elif actor == 'S':
                actor = 'upper'

            attrs = self.categorizer.categorize(action)
            related_entities = []
            for item in attrs['committees']:
                related_entities.append({
                    'type': 'committee',
                    'name': item
                })
            for item in attrs['legislators']:
                related_entities.append({
                    'type': 'legislator',
                    'name': item
                })
            bill.add_action(description=action,
                            date=date.strftime('%Y-%m-%d'),
                            chamber=actor,
                            classification=attrs['classification'],
                            related_entities=related_entities)

        version_table = page.xpath("//table[contains(@id, 'Versions')]")[0]
        # Keep track of already seen versions to prevent processing duplicates.
        version_urls = []
        for link in version_table.xpath(".//a[contains(@href, '.PDF')]"):
            version_url = link.attrib['href']
            if version_url in version_urls:
                self.warning('Skipping duplicate version URL.')
                continue
            else:
                version_urls.append(version_url)
            name = link.text.strip()

            if re.search('COMMITTEE REPORTS|SCHEDULED CCR', version_url, re.IGNORECASE):
                bill.add_document_link(note=name, url=version_url,
                                       media_type='application/pdf')
                continue

            bill.add_version_link(note=name, url=version_url,
                                  media_type='application/pdf')

        for link in page.xpath(".//a[contains(@href, '_VOTES')]"):
            if 'HT_' not in link.attrib['href']:
                yield from self.scrape_votes(bill, self.urlescape(link.attrib['href']))

        # # If the bill has no actions and no versions, it's a bogus bill on
        # # their website, which appears to happen occasionally. Skip.
        has_no_title = (bill.title == "Short Title Not Found.")
        if has_no_title:
#.........这里部分代码省略.........
开发者ID:neelneelpurk,项目名称:openstates,代码行数:103,代码来源:bills.py

示例14: scrape_details

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]
    def scrape_details(self, bill_detail_url, session, chamber, bill_id):
        """
        Create the Bill and add the information obtained from the provided bill_detail_url.
        and then yield the bill object.
        :param bill_detail_url:
        :param session:
        :param chamber:
        :param bill_id:
        :return:
        """
        page = self.get(bill_detail_url).text

        if 'INVALID BILL NUMBER' in page:
            self.warning('INVALID BILL %s' % bill_detail_url)
            return

        doc = lxml.html.fromstring(page)
        doc.make_links_absolute(bill_detail_url)

        bill_div = doc.xpath('//div[@style="margin:0 0 40px 0;"]')[0]

        bill_type = bill_div.xpath('span/text()')[0]

        if 'General Bill' in bill_type:
            bill_type = 'bill'
        elif 'Concurrent Resolution' in bill_type:
            bill_type = 'concurrent resolution'
        elif 'Joint Resolution' in bill_type:
            bill_type = 'joint resolution'
        elif 'Resolution' in bill_type:
            bill_type = 'resolution'
        else:
            raise ValueError('unknown bill type: %s' % bill_type)

        # this is fragile, but less fragile than it was
        b = bill_div.xpath('./b[text()="Summary:"]')[0]
        bill_summary = b.getnext().tail.strip()

        bill = Bill(
            bill_id,
            legislative_session=session,  # session name metadata's `legislative_sessions`
            chamber=chamber,  # 'upper' or 'lower'
            title=bill_summary,
            classification=bill_type
        )

        subjects = list(self._subjects[bill_id])

        for subject in subjects:
            bill.add_subject(subject)

        # sponsors
        for sponsor in doc.xpath('//a[contains(@href, "member.php")]/text()'):
            bill.add_sponsorship(
                name=sponsor,
                classification='primary',
                primary=True,
                entity_type='person'
            )
        for sponsor in doc.xpath('//a[contains(@href, "committee.php")]/text()'):
            sponsor = sponsor.replace(u'\xa0', ' ').strip()
            bill.add_sponsorship(
                name=sponsor,
                classification='primary',
                primary=True,
                entity_type='organization'
            )

        # find versions
        version_url = doc.xpath('//a[text()="View full text"]/@href')[0]
        version_html = self.get(version_url).text
        version_doc = lxml.html.fromstring(version_html)
        version_doc.make_links_absolute(version_url)
        for version in version_doc.xpath('//a[contains(@href, "/prever/")]'):
            # duplicate versions with same date, use first appearance

            bill.add_version_link(
                note=version.text,  # Description of the version from the state;
                                    #  eg, 'As introduced', 'Amended', etc.
                url=version.get('href'),
                on_duplicate='ignore',
                media_type='text/html'  # Still a MIME type
            )

        # actions
        for row in bill_div.xpath('table/tr'):
            date_td, chamber_td, action_td = row.xpath('td')

            date = datetime.datetime.strptime(date_td.text, "%m/%d/%y")
            action_chamber = {'Senate': 'upper',
                              'House': 'lower',
                              None: 'legislature'}[chamber_td.text]

            action = action_td.text_content()
            action = action.split('(House Journal')[0]
            action = action.split('(Senate Journal')[0].strip()

            atype = action_type(action)

            bill.add_action(
#.........这里部分代码省略.........
开发者ID:sunlightlabs,项目名称:openstates,代码行数:103,代码来源:bills.py

示例15: bill_info

# 需要导入模块: from pupa.scrape import Bill [as 别名]
# 或者: from pupa.scrape.Bill import add_action [as 别名]
    def bill_info(self, bill_link, session, main_url):
        bill_page = self.lxmlize(bill_link)

        long_title = self.get_node(
            bill_page,
            '//div[@class="main-content"]/div[1]/div/h2').text.split()

        bill_number = long_title[0]
        title = ''
        for x in range(2, len(long_title)):
            title += long_title[x] + ' '
        title = title[0:-1]

        if not title:
            self.error('no title, skipping %s', bill_number)
            return

        bill_type = 'resolution' if 'LR' in bill_number else 'bill'

        bill = Bill(bill_number, session, title, classification=bill_type)

        bill.add_source(main_url)
        bill.add_source(bill_link)

        introduced_by = self.get_node(
            bill_page,
            '//div[@class="main-content"]/div[3]/div[1]/ul/li[1]/a[1]/text()')

        if not introduced_by:
            introduced_by = self.get_node(
                bill_page,
                '//div[@class="main-content"]/div[3]/div[1]/ul/li[1]/text()')
            introduced_by = introduced_by.split('Introduced By:')[1].strip()

        bill.add_sponsorship(
            name=introduced_by,
            entity_type='person',
            primary=True,
            classification='primary',
        )

        action_nodes = self.get_nodes(
            bill_page,
            '//div[@class="main-content"]/div[5]//table/tbody/tr')

        for action_node in action_nodes:
            date = self.get_node(
                action_node,
                './td[1]').text
            date = datetime.strptime(date, '%b %d, %Y')

            # The action node may have an anchor element within it, so
            # we grab all the text within.
            action = self.get_node(
                action_node,
                './td[2]').text_content()

            if 'Governor' in action:
                actor = 'executive'
            elif 'Speaker' in action:
                actor = 'legislature'
            else:
                actor = 'legislature'

            action_type = self.action_types(action)
            bill.add_action(
                action,
                date.strftime('%Y-%m-%d'),
                chamber=actor,
                classification=action_type,
            )

        # Were in reverse chronological order.
        bill.actions.reverse()

        # Grabs bill version documents.
        version_links = self.get_nodes(
            bill_page,
            '//div[@class="main-content"]/div[3]/div[2]/'
            'div[@class="hidden-xs"]/ul[1]/li/a')

        for version_link in version_links:
            version_name = version_link.text
            version_url = version_link.attrib['href']
            # replace Current w/ session number
            version_url = version_url.replace('Current', session)
            bill.add_version_link(version_name, version_url, media_type='application/pdf')

        # Adds any documents related to amendments.
        amendment_links = self.get_nodes(
            bill_page,
            '//div[@class="main-content"]/div[5]/div[2]/table/tr/td[1]/a')

        for amendment_link in amendment_links:
            amendment_name = amendment_link.text
            amendment_url = amendment_link.attrib['href']
            bill.add_document_link(amendment_name, amendment_url)

        # Related transcripts.
        transcript_links = self.get_nodes(
#.........这里部分代码省略.........
开发者ID:neelneelpurk,项目名称:openstates,代码行数:103,代码来源:bills.py


注:本文中的pupa.scrape.Bill.add_action方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。