本文整理汇总了Python中pupa.scrape.VoteEvent.pupa_id方法的典型用法代码示例。如果您正苦于以下问题:Python VoteEvent.pupa_id方法的具体用法?Python VoteEvent.pupa_id怎么用?Python VoteEvent.pupa_id使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pupa.scrape.VoteEvent
的用法示例。
在下文中一共展示了VoteEvent.pupa_id方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_vote_event_bill_actions_two_stage
# 需要导入模块: from pupa.scrape import VoteEvent [as 别名]
# 或者: from pupa.scrape.VoteEvent import pupa_id [as 别名]
def test_vote_event_bill_actions_two_stage():
# this test is very similar to what we're testing in test_vote_event_bill_actions w/
# ve3 and ve4, that two bills that reference the same action won't conflict w/ the
# OneToOneField, but in this case we do it in two stages so that the conflict is found
# even if the votes weren't in the same scrape
j = create_jurisdiction()
j.legislative_sessions.create(name='1900', identifier='1900')
org1 = ScrapeOrganization(name='House', classification='lower')
bill = ScrapeBill('HB 1', '1900', 'Axe & Tack Tax Act', from_organization=org1._id)
bill.add_action(description='passage', date='1900-04-02', chamber='lower')
ve1 = ScrapeVoteEvent(legislative_session='1900', motion_text='passage',
start_date='1900-04-02', classification='passage:bill',
result='pass', bill_chamber='lower', bill='HB 1',
bill_action='passage',
organization=org1._id)
ve2 = ScrapeVoteEvent(legislative_session='1900', motion_text='passage',
start_date='1900-04-02', classification='passage:bill',
result='pass', bill_chamber='lower', bill='HB 1',
bill_action='passage',
organization=org1._id)
# disambiguate them
ve1.pupa_id = 'one'
ve2.pupa_id = 'two'
oi = OrganizationImporter('jid')
oi.import_data([org1.as_dict()])
bi = BillImporter('jid', oi, DumbMockImporter())
bi.import_data([bill.as_dict()])
# first imports just fine
VoteEventImporter('jid', DumbMockImporter(), oi, bi).import_data([
ve1.as_dict(),
])
votes = list(VoteEvent.objects.all())
assert len(votes) == 1
assert votes[0].bill_action is not None
# when second is imported, ensure that action stays pinned to first just as it would
# have if they were both in same import
VoteEventImporter('jid', DumbMockImporter(), oi, bi).import_data([
ve1.as_dict(),
ve2.as_dict(),
])
votes = list(VoteEvent.objects.all())
assert len(votes) == 2
assert votes[0].bill_action is not None
assert votes[1].bill_action is None
示例2: add_vote
# 需要导入模块: from pupa.scrape import VoteEvent [as 别名]
# 或者: from pupa.scrape.VoteEvent import pupa_id [as 别名]
def add_vote(self, bill, chamber, date, text, url):
votes = re.findall(r'Ayes,?[\s]?(\d+)[,;]\s+N(?:oes|ays),?[\s]?(\d+)', text)
yes, no = int(votes[0][0]), int(votes[0][1])
vtype = 'other'
for regex, type in motion_classifiers.items():
if re.match(regex, text):
vtype = type
break
v = VoteEvent(
chamber=chamber,
start_date=TIMEZONE.localize(date),
motion_text=text,
result='pass' if yes > no else 'fail',
classification=vtype,
bill=bill,
)
v.pupa_id = url.split('/')[-1]
v.set_count('yes', yes)
v.set_count('no', no)
# fetch the vote itself
if url:
v.add_source(url)
if 'av' in url:
self.add_house_votes(v, url)
elif 'sv' in url:
self.add_senate_votes(v, url)
return v
示例3: build_vote
# 需要导入模块: from pupa.scrape import VoteEvent [as 别名]
# 或者: from pupa.scrape.VoteEvent import pupa_id [as 别名]
def build_vote(session, bill_id, url, vote_record, chamber, motion_text):
# When they vote in a substitute they mark it as XHB
bill_id = bill_id.replace('XHB', 'HB')
passed = len(vote_record['yes']) > len(vote_record['no'])
vote_event = VoteEvent(
result='pass' if passed else 'fail',
chamber=chamber,
start_date=vote_record['date'].strftime('%Y-%m-%d'),
motion_text=motion_text,
classification='passage',
legislative_session=session,
bill=bill_id,
bill_chamber='upper' if bill_id[0] == 'S' else 'lower'
)
vote_event.pupa_id = url
vote_event.set_count('yes', len(vote_record['yes']))
vote_event.set_count('no', len(vote_record['no']))
vote_event.set_count('excused', len(vote_record['excused']))
vote_event.set_count('absent', len(vote_record['absent']))
vote_event.set_count('other', len(vote_record['other']))
for vote_type in ['yes', 'no', 'excused', 'absent', 'other']:
for voter in vote_record[vote_type]:
vote_event.vote(vote_type, voter)
vote_event.add_source(url)
return vote_event
示例4: test_vote_event_pupa_identifier_dedupe
# 需要导入模块: from pupa.scrape import VoteEvent [as 别名]
# 或者: from pupa.scrape.VoteEvent import pupa_id [as 别名]
def test_vote_event_pupa_identifier_dedupe():
j = create_jurisdiction()
j.legislative_sessions.create(name='1900', identifier='1900')
Organization.objects.create(id='org-id', name='Legislature',
classification='legislature',
jurisdiction=j)
vote_event = ScrapeVoteEvent(legislative_session='1900', start_date='2013',
classification='anything', result='passed',
motion_text='a vote on something',
identifier='Roll Call No. 1')
vote_event.pupa_id = 'foo'
dmi = DumbMockImporter()
oi = OrganizationImporter('jid')
bi = BillImporter('jid', dmi, oi)
_, what = VoteEventImporter('jid', dmi, oi, bi).import_item(vote_event.as_dict())
assert what == 'insert'
assert VoteEvent.objects.count() == 1
# same exact vote event, no changes
_, what = VoteEventImporter('jid', dmi, oi, bi).import_item(vote_event.as_dict())
assert what == 'noop'
assert VoteEvent.objects.count() == 1
# new info, update
vote_event.result = 'failed'
_, what = VoteEventImporter('jid', dmi, oi, bi).import_item(vote_event.as_dict())
assert what == 'update'
assert VoteEvent.objects.count() == 1
# new bill identifier, update
vote_event.identifier = 'First Roll Call'
_, what = VoteEventImporter('jid', dmi, oi, bi).import_item(vote_event.as_dict())
assert what == 'update'
assert VoteEvent.objects.count() == 1
# new pupa identifier, insert
vote_event.pupa_id = 'bar'
_, what = VoteEventImporter('jid', dmi, oi, bi).import_item(vote_event.as_dict())
assert what == 'insert'
assert VoteEvent.objects.count() == 2
示例5: parse_vote
# 需要导入模块: from pupa.scrape import VoteEvent [as 别名]
# 或者: from pupa.scrape.VoteEvent import pupa_id [as 别名]
def parse_vote(self, bill, link):
member_doc = lxml.html.fromstring(self.get(link).text)
motion = member_doc.xpath("//div[@id='main_content']/h4/text()")
opinions = member_doc.xpath("//div[@id='main_content']/h3/text()")
if len(opinions) > 0:
temp = opinions[0].split()
vote_chamber = temp[0]
vote_date = datetime.datetime.strptime(temp[-1], '%m/%d/%Y')
vote_status = " ".join(temp[2:-2])
vote_status = vote_status if vote_status.strip() else motion[0]
vote_chamber = 'upper' if vote_chamber == 'Senate' else 'lower'
for i in opinions:
try:
count = int(i[i.find("(") + 1:i.find(")")])
except:
pass
if "yea" in i.lower():
yes_count = count
elif "nay" in i.lower():
no_count = count
elif "present" in i.lower():
p_count = count
elif "absent" in i.lower():
a_count = count
vote = VoteEvent(
bill=bill,
start_date=vote_date.strftime('%Y-%m-%d'),
chamber=vote_chamber,
motion_text=vote_status,
result='pass' if yes_count > no_count else 'fail',
classification='passage',
)
vote.pupa_id = link
vote.set_count('yes', yes_count)
vote.set_count('no', no_count)
vote.set_count('abstain', p_count)
vote.set_count('absent', a_count)
vote.add_source(link)
a_links = member_doc.xpath("//div[@id='main_content']/a/text()")
for i in range(1, len(a_links)):
if i <= yes_count:
vote.vote('yes', re.sub(',', '', a_links[i]).split()[0])
elif no_count != 0 and i > yes_count and i <= yes_count + no_count:
vote.vote('no', re.sub(',', '', a_links[i]).split()[0])
else:
vote.vote('other', re.sub(',', '', a_links[i]).split()[0])
yield vote
else:
self.warning("No Votes for: %s", link)
示例6: scrape_vote
# 需要导入模块: from pupa.scrape import VoteEvent [as 别名]
# 或者: from pupa.scrape.VoteEvent import pupa_id [as 别名]
def scrape_vote(self, chamber, session, bill_id, vote_url):
NO_VOTE_URL = 'http://www.house.leg.state.mn.us/votes/novotefound.asp'
resp = self.get(vote_url)
html = resp.text
# sometimes the link is broken, will redirect to NO_VOTE_URL
if resp.url == NO_VOTE_URL:
return
doc = lxml.html.fromstring(html)
try:
motion = doc.xpath("//div[@id='leg_PageContent']/div/h2/text()")[0]
except IndexError:
self.logger.warning("Bill was missing a motion number, skipping")
return
vote_count = doc.xpath(".//div[@id='leg_PageContent']/div/h3/text()")[1].split()
yeas = int(vote_count[0])
nays = int(vote_count[3])
# second paragraph has date
paragraphs = doc.xpath(".//div[@id='leg_PageContent']/div/p/text()")
date = None
for p in paragraphs:
try:
date = datetime.datetime.strptime(p.strip(), '%m/%d/%Y').date()
break
except ValueError:
pass
if date is None:
self.logger.warning("No date could be found for vote on %s" % motion)
return
vote = VoteEvent(chamber='lower', start_date=date, motion_text=motion,
result='pass' if yeas > nays else 'fail',
classification='passage',
legislative_session=session, bill=bill_id,
bill_chamber=chamber)
vote.set_count('yes', yeas)
vote.set_count('no', nays)
vote.add_source(vote_url)
vote.pupa_id = vote_url
# first table has YEAs
for name in doc.xpath('//table[1]/tr/td/font/text()'):
vote.yes(name.strip())
# second table is nays
for name in doc.xpath('//table[2]/tr/td/font/text()'):
vote.no(name.strip())
yield vote
示例7: parse_committee_votes
# 需要导入模块: from pupa.scrape import VoteEvent [as 别名]
# 或者: from pupa.scrape.VoteEvent import pupa_id [as 别名]
def parse_committee_votes(self, bill, url):
bill.add_source(url)
html = self.get(url).text
doc = lxml.html.fromstring(html)
doc.make_links_absolute(url)
chamber = ('upper' if 'Senate' in doc.xpath('string(//h1)') else 'lower')
committee = tuple(doc.xpath('//h2')[0].itertext())[-2].strip()
for link in doc.xpath("//a[contains(@href, 'listVoteSummary.cfm')]"):
# Date
for fmt in ("%m/%d/%Y", "%m-%d-%Y"):
date = link.xpath('../../td')[0].text_content()
try:
date = datetime.datetime.strptime(date, fmt)
except ValueError:
continue
break
# Motion
motion = link.text_content().split(' - ')[-1].strip()
motion = 'Committee vote (%s): %s' % (committee, motion)
# Roll call
vote_url = link.attrib['href']
rollcall = self.parse_upper_committee_vote_rollcall(bill, vote_url)
vote = VoteEvent(
chamber=chamber,
start_date=tz.localize(date),
motion_text=motion,
classification='other',
result='pass' if rollcall['passed'] else 'fail',
bill=bill,
)
vote.pupa_id = vote_url
vote.set_count('yes', rollcall['yes_count'])
vote.set_count('no', rollcall['no_count'])
vote.set_count('other', rollcall['other_count'])
for voteval in ('yes', 'no', 'other'):
for name in rollcall.get(voteval + '_votes', []):
vote.vote(voteval, name)
vote.add_source(url)
vote.add_source(vote_url)
yield vote
示例8: scrape_votes
# 需要导入模块: from pupa.scrape import VoteEvent [as 别名]
# 或者: from pupa.scrape.VoteEvent import pupa_id [as 别名]
def scrape_votes(self, bill, page):
base_url = 'https://apps.azleg.gov/api/BillStatusFloorAction'
for header in page['FloorHeaders']:
params = {
'billStatusId': page['BillId'],
'billStatusActionId': header['BillStatusActionId'],
'includeVotes': 'true',
}
resp = self.get(base_url, params=params)
actions = json.loads(resp.content.decode('utf-8'))
for action in actions:
if action['Action'] == 'No Action':
continue
action_date = datetime.datetime.strptime(action['ReportDate'], '%Y-%m-%dT%H:%M:%S')
vote = VoteEvent(
chamber={
'S': 'upper',
'H': 'lower',
}[header['LegislativeBody']],
motion_text=action['Action'],
classification='passage',
result=(
'pass'
if action['UnanimouslyAdopted'] or action['Ayes'] > action['Nays']
else 'fail'
),
start_date=action_date.strftime('%Y-%m-%d'),
bill=bill,
)
vote.add_source(resp.url)
vote.set_count('yes', action['Ayes'] or 0)
vote.set_count('no', action['Nays'] or 0)
vote.set_count('other', (action['Present'] or 0))
vote.set_count('absent', (action['Absent'] or 0))
vote.set_count('excused', (action['Excused'] or 0))
vote.set_count('not voting', (action['NotVoting'] or 0))
for v in action['Votes']:
vote_type = {
'Y': 'yes',
'N': 'no',
}.get(v['Vote'], 'other')
vote.vote(vote_type, v['Legislator']['FullName'])
vote.pupa_id = resp.url+str(action['ReferralNumber'])
yield vote
示例9: scrape_chamber_votes
# 需要导入模块: from pupa.scrape import VoteEvent [as 别名]
# 或者: from pupa.scrape.VoteEvent import pupa_id [as 别名]
def scrape_chamber_votes(self, chamber, session):
url = {
"upper": "%s/%s" % (RI_URL_BASE, "SVotes"),
"lower": "%s/%s" % (RI_URL_BASE, "HVotes")
}[chamber]
action = "%s/%s" % (url, "votes.asp")
dates = self.get_vote_dates(url, session)
for date in dates:
votes = self.parse_vote_page(self.post_to(action, date), url, session)
for vote_dict in votes:
for vote in vote_dict.values():
count = vote['count']
chamber = {
"H": "lower",
"S": "upper"
}[vote['meta']['chamber']]
try:
bill_id = self._bill_id_by_type[(chamber, vote['meta']['bill'])]
except KeyError:
self.warning('no such bill_id %s %s', chamber, vote['meta']['bill'])
continue
v = VoteEvent(
chamber=chamber,
start_date=vote['time'].strftime('%Y-%m-%d'),
motion_text=vote['meta']['extra']['motion'],
result='pass' if count['passage'] else 'fail',
classification='passage',
legislative_session=session,
bill=bill_id,
bill_chamber=chamber,
)
v.set_count('yes', int(count['YEAS']))
v.set_count('no', int(count['NAYS']))
v.set_count('other', int(count['NOT VOTING']))
v.add_source(vote['source'])
v.pupa_id = vote['source']
for vt in vote['votes']:
key = {
'Y': 'yes',
'N': 'no',
}.get(vt['vote'], 'other')
v.vote(key, vt['name'])
yield v
示例10: asvote
# 需要导入模块: from pupa.scrape import VoteEvent [as 别名]
# 或者: from pupa.scrape.VoteEvent import pupa_id [as 别名]
def asvote(self):
v = VoteEvent(
chamber=self.chamber(),
start_date=self.date(),
motion_text=self.motion(),
result='pass' if self.passed() else 'fail',
classification='passage',
bill=self.bill,
)
v.pupa_id = self.url # URL contains sequence number
v.set_count('yes', self.yes_count())
v.set_count('no', self.no_count())
v.set_count('other', self.other_count())
for voter in self.yes_votes():
v.yes(voter)
for voter in self.no_votes():
v.no(voter)
for voter in self.other_votes():
v.vote('other', voter)
v.add_source(self.url)
return v
示例11: scrape_votes
# 需要导入模块: from pupa.scrape import VoteEvent [as 别名]
# 或者: from pupa.scrape.VoteEvent import pupa_id [as 别名]
def scrape_votes(self, session):
votes = {}
other_counts = defaultdict(int)
last_line = []
vote_url = 'http://gencourt.state.nh.us/dynamicdatafiles/RollCallSummary.txt'
lines = self.get(vote_url).content.decode('utf-8').splitlines()
for line in lines:
if len(line) < 2:
continue
if line.strip() == "":
continue
line = line.split('|')
if len(line) < 14:
if len(last_line + line[1:]) == 14:
line = last_line
self.warning('used bad vote line')
else:
last_line = line
self.warning('bad vote line %s' % '|'.join(line))
session_yr = line[0].replace('\xef\xbb\xbf', '')
body = line[1]
vote_num = line[2]
timestamp = line[3]
bill_id = line[4].strip()
yeas = int(line[5])
nays = int(line[6])
# present = int(line[7])
# absent = int(line[8])
motion = line[11].strip() or '[not available]'
if session_yr == session and bill_id in self.bills_by_id:
actor = 'lower' if body == 'H' else 'upper'
time = dt.datetime.strptime(timestamp,
'%m/%d/%Y %I:%M:%S %p')
time = pytz.timezone('America/New_York').localize(time).isoformat()
# TODO: stop faking passed somehow
passed = yeas > nays
vote = Vote(chamber=actor,
start_date=time,
motion_text=motion,
result='pass' if passed else 'fail',
classification='passage',
bill=self.bills_by_id[bill_id])
vote.set_count('yes', yeas)
vote.set_count('no', nays)
vote.add_source(vote_url)
vote.pupa_id = session_yr + body + vote_num # unique ID for vote
votes[body+vote_num] = vote
for line in self.get('http://gencourt.state.nh.us/dynamicdatafiles/RollCallHistory.txt') \
.content.decode('utf-8').splitlines():
if len(line) < 2:
continue
# 2016|H|2|330795||Yea|
# 2012 | H | 2 | 330795 | 964 | HB309 | Yea | 1/4/2012 8:27:03 PM
session_yr, body, v_num, _, employee, bill_id, vote, date = \
line.split('|')
if not bill_id:
continue
if session_yr == session and bill_id.strip() in self.bills_by_id:
try:
leg = " ".join(self.legislators[employee]['name'].split())
except KeyError:
self.warning("Error, can't find person %s" % employee)
continue
vote = vote.strip()
if body+v_num not in votes:
self.warning("Skipping processing this vote:")
self.warning("Bad ID: %s" % (body+v_num))
continue
# code = self.legislators[employee]['seat']
if vote == 'Yea':
votes[body+v_num].yes(leg)
elif vote == 'Nay':
votes[body+v_num].no(leg)
else:
votes[body+v_num].vote('other', leg)
# hack-ish, but will keep the vote count sync'd
other_counts[body+v_num] += 1
votes[body+v_num].set_count('other', other_counts[body+v_num])
for vid, vote in votes.items():
yield vote
示例12: scrape_bill_type
# 需要导入模块: from pupa.scrape import VoteEvent [as 别名]
# 或者: from pupa.scrape.VoteEvent import pupa_id [as 别名]
#.........这里部分代码省略.........
result = True
else:
result = False
if not vote.location:
continue
full_loc = vote.location.description
first_part = full_loc.split(' ')[0].lower()
if first_part in ['asm', 'assembly']:
vote_chamber = 'lower'
# vote_location = ' '.join(full_loc.split(' ')[1:])
elif first_part.startswith('sen'):
vote_chamber = 'upper'
# vote_location = ' '.join(full_loc.split(' ')[1:])
else:
raise ScrapeError("Bad location: %s" % full_loc)
if vote.motion:
motion = vote.motion.motion_text or ''
else:
motion = ''
if "Third Reading" in motion or "3rd Reading" in motion:
vtype = 'passage'
elif "Do Pass" in motion:
vtype = 'passage'
else:
vtype = 'other'
motion = motion.strip()
# Why did it take until 2.7 to get a flags argument on re.sub?
motion = re.compile(r'(\w+)( Extraordinary)? Session$',
re.IGNORECASE).sub('', motion)
motion = re.compile(r'^(Senate|Assembly) ',
re.IGNORECASE).sub('', motion)
motion = re.sub(r'^(SCR|SJR|SB|AB|AJR|ACR)\s?\d+ \w+\.? ',
'', motion)
motion = re.sub(r' \(\w+\)$', '', motion)
motion = re.sub(r'(SCR|SB|AB|AJR|ACR)\s?\d+ \w+\.?$',
'', motion)
motion = re.sub(r'(SCR|SJR|SB|AB|AJR|ACR)\s?\d+ \w+\.? '
r'Urgency Clause$',
'(Urgency Clause)', motion)
motion = re.sub(r'\s+', ' ', motion)
if not motion:
self.warning("Got blank motion on vote for %s" % bill_id)
continue
# XXX this is responsible for all the CA 'committee' votes, not
# sure if that's a feature or bug, so I'm leaving it as is...
# vote_classification = chamber if (vote_location == 'Floor') else 'committee'
# org = {
# 'name': vote_location,
# 'classification': vote_classification
# }
fsvote = VoteEvent(
motion_text=motion,
start_date=self._tz.localize(vote.vote_date_time),
result='pass' if result else 'fail',
classification=vtype,
# organization=org,
chamber=vote_chamber,
bill=fsbill,
)
fsvote.extras = {'threshold': vote.threshold}
source_url = (
'http://leginfo.legislature.ca.gov/faces'
'/billVotesClient.xhtml?bill_id={}'
).format(fsbill.identifier)
fsvote.add_source(source_url)
fsvote.pupa_id = source_url + '#' + str(vote_num)
rc = {'yes': [], 'no': [], 'other': []}
for record in vote.votes:
if record.vote_code == 'AYE':
rc['yes'].append(record.legislator_name)
elif record.vote_code.startswith('NO'):
rc['no'].append(record.legislator_name)
else:
rc['other'].append(record.legislator_name)
# Handle duplicate votes
for key in rc.keys():
rc[key] = list(set(rc[key]))
for key, voters in rc.items():
for voter in voters:
fsvote.vote(key, voter)
# Set counts by summed votes for accuracy
fsvote.set_count(key, len(voters))
yield fsvote
yield fsbill
self.session.expire_all()
示例13: _parse_votes
# 需要导入模块: from pupa.scrape import VoteEvent [as 别名]
# 或者: from pupa.scrape.VoteEvent import pupa_id [as 别名]
def _parse_votes(self, url, vote, bill):
'''Given a vote url and a vote object, extract the voters and
the vote counts from the vote page and update the vote object.
'''
if url.lower().endswith('.pdf'):
try:
resp = self.get(url)
except HTTPError:
# This vote document wasn't found.
msg = 'No document found at url %r' % url
self.logger.warning(msg)
return
try:
v = PDFCommitteeVote(url, resp.content, bill)
return v.asvote()
except PDFCommitteeVoteParseError:
# Warn and skip.
self.warning("Could't parse committee vote at %r" % url)
return
html = self.get(url).text
doc = lxml.html.fromstring(html)
doc.make_links_absolute(url)
# Yes, no, excused, absent.
try:
vals = doc.xpath('//table')[1].xpath('tr/td/text()')
except IndexError:
# Most likely was a bogus link lacking vote data.
return
yes_count, no_count, excused_count, absent_count = map(int, vals)
# Get the motion.
try:
motion = doc.xpath('//br')[-1].tail.strip()
except IndexError:
# Some of them mysteriously have no motion listed.
motion = vote['action']
if not motion:
motion = vote['action']
vote['motion'] = motion
action = vote['action']
vote_url = vote['vote_url']
vote = VoteEvent(
chamber=vote['chamber'],
start_date=vote['date'],
motion_text=vote['motion'],
result='fail', # placeholder
classification='passage',
bill=bill,
bill_action=vote['action'],
)
vote.pupa_id = vote_url # URL contains sequence number
vote.add_source(vote_url)
vote.set_count('yes', yes_count)
vote.set_count('no', no_count)
vote.set_count('excused', excused_count)
vote.set_count('absent', absent_count)
for text in doc.xpath('//table')[2].xpath('tr/td/text()'):
if not text.strip(u'\xa0'):
continue
v, name = filter(None, text.split(u'\xa0'))
# Considering Name is brackets as short name
regex = re.compile(r".*?\((.*?)\)")
short_name = re.findall(regex, name)
if len(short_name) > 0:
note = 'Short Name: ' + short_name[0]
else:
note = ''
# Name without brackets like 'Kary, Douglas'
name = re.sub(r"[\(\[].*?[\)\]]", "", name)
if v == 'Y':
vote.yes(name, note=note)
elif v == 'N':
vote.no(name, note=note)
elif v == 'E':
vote.vote('excused', name, note=note)
elif v == 'A':
vote.vote('absent', name, note=note)
# code to deterimine value of `passed`
passed = None
# some actions take a super majority, so we aren't just
# comparing the yeas and nays here.
for i in vote_passage_indicators:
if i in action:
passed = True
break
for i in vote_failure_indicators:
if i in action and passed:
# a quick explanation: originally an exception was
#.........这里部分代码省略.........
示例14: parse_vote
# 需要导入模块: from pupa.scrape import VoteEvent [as 别名]
# 或者: from pupa.scrape.VoteEvent import pupa_id [as 别名]
def parse_vote(self, bill, link):
# Server sometimes sends proper error headers,
# sometimes not
try:
self.info("Get {}".format(link))
text = requests.get(link).text
except requests.exceptions.HTTPError as err:
self.warning("{} fetching vote {}, skipping".format(err, link))
return
if 'Varnish cache server' in text:
self.warning("Scrape rate is too high, try re-scraping with "
"The --rpm set to a lower number")
return
if 'Page Not Found' in text or 'Page Unavailable' in text:
self.warning("missing vote, skipping")
return
member_doc = lxml.html.fromstring(text)
motion = member_doc.xpath("//div[@id='main_content']/h4/text()")
chamber_date_line = ''.join(member_doc.xpath("//div[@id='main_content']/h3[1]//text()"))
chamber_date_line_words = chamber_date_line.split()
vote_chamber = chamber_date_line_words[0]
vote_date = datetime.datetime.strptime(chamber_date_line_words[-1], '%m/%d/%Y')
vote_status = " ".join(chamber_date_line_words[2:-2])
opinions = member_doc.xpath("//div[@id='main_content']/h3[position() > 1]/text()")
if len(opinions) > 0:
vote_status = vote_status if vote_status.strip() else motion[0]
vote_chamber = 'upper' if vote_chamber == 'Senate' else 'lower'
for i in opinions:
try:
count = int(i[i.find("(") + 1:i.find(")")])
except ValueError:
# This is likely not a vote-count text chunk
# It's probably '`On roll call the vote was:`
pass
else:
if "yea" in i.lower():
yes_count = count
elif "nay" in i.lower():
no_count = count
elif "present" in i.lower():
p_count = count
elif "absent" in i.lower():
a_count = count
vote = VoteEvent(
bill=bill,
start_date=vote_date.strftime('%Y-%m-%d'),
chamber=vote_chamber,
motion_text=vote_status,
result='pass' if yes_count > no_count else 'fail',
classification='passage',
)
vote.pupa_id = link
vote.set_count('yes', yes_count)
vote.set_count('no', no_count)
vote.set_count('abstain', p_count)
vote.set_count('absent', a_count)
vote.add_source(link)
a_links = member_doc.xpath("//div[@id='main_content']/a/text()")
for i in range(1, len(a_links)):
if i <= yes_count:
vote.vote('yes', re.sub(',', '', a_links[i]).split()[0])
elif no_count != 0 and i > yes_count and i <= yes_count + no_count:
vote.vote('no', re.sub(',', '', a_links[i]).split()[0])
else:
vote.vote('other', re.sub(',', '', a_links[i]).split()[0])
yield vote
else:
self.warning("No Votes for: %s", link)
示例15: scrape_pdf_for_votes
# 需要导入模块: from pupa.scrape import VoteEvent [as 别名]
# 或者: from pupa.scrape.VoteEvent import pupa_id [as 别名]
#.........这里部分代码省略.........
# consider pass/fail as a document property instead of a result of the vote count
# extract the vote count from the document instead of just using counts of names
if not line.strip():
continue
elif line.strip() in PASS_FAIL_WORDS:
# Crash on duplicate pass/fail status that differs from previous status
if passed is not None and passed != PASS_FAIL_WORDS[line.strip()]:
raise Exception("Duplicate pass/fail matches in [%s]" % href)
passed = PASS_FAIL_WORDS[line.strip()]
elif COUNT_RE.match(line):
(yes_count, no_count, present_count,
not_voting_count) = COUNT_RE.match(line).groups()
yes_count = int(yes_count)
no_count = int(no_count)
present_count = int(present_count)
counts_found = True
elif counts_found:
for value in VOTE_VALUES:
if re.search(r'^\s*({})\s+\w'.format(value), line):
vote_lines.append(line)
break
votes = find_columns_and_parse(vote_lines)
for name, vcode in votes.items():
if name == 'Mr. Speaker':
name = session_details[session]['speaker']
elif name == 'Mr. President':
name = session_details[session]['president']
else:
# Converts "Davis,William" to "Davis, William".
name = re.sub(r'\,([a-zA-Z])', r', \1', name)
if vcode == 'Y':
yes_votes.append(name)
elif vcode == 'N':
no_votes.append(name)
elif vcode == 'P':
present_votes.append(name)
elif vcode == 'E':
excused_votes.append(name)
elif vcode == 'NV':
not_voting.append(name)
elif vcode == 'A':
absent_votes.append(name)
# fake the counts
if yes_count == 0 and no_count == 0 and present_count == 0:
yes_count = len(yes_votes)
no_count = len(no_votes)
else: # audit
if yes_count != len(yes_votes):
self.warning("Mismatched yes count [expect: %i] [have: %i]" %
(yes_count, len(yes_votes)))
warned = True
if no_count != len(no_votes):
self.warning("Mismatched no count [expect: %i] [have: %i]" %
(no_count, len(no_votes)))
warned = True
if passed is None:
if actor['classification'] == 'lower': # senate doesn't have these lines
self.warning("No pass/fail word found; fall back to comparing yes and no vote.")
warned = True
passed = 'pass' if yes_count > no_count else 'fail'
classification, _ = _categorize_action(motion)
vote_event = VoteEvent(legislative_session=session,
motion_text=motion,
classification=classification,
organization=actor,
start_date=date,
result=passed)
for name in yes_votes:
vote_event.yes(name)
for name in no_votes:
vote_event.no(name)
for name in present_votes:
vote_event.vote('other', name)
for name in excused_votes:
vote_event.vote('excused', name)
for name in not_voting:
vote_event.vote('not voting', name)
for name in absent_votes:
vote_event.vote('absent', name)
vote_event.set_count('yes', yes_count)
vote_event.set_count('no', no_count)
vote_event.set_count('other', present_count)
vote_event.set_count('excused', len(excused_votes))
vote_event.set_count('absent', len(absent_votes))
vote_event.set_count('not voting', len(not_voting))
vote_event.add_source(href)
# for distinguishing between votes with the same id and on same day
vote_event.pupa_id = href
if warned:
self.warning("Warnings were issued. Best to check %s" % href)
return vote_event