本文整理汇总了Python中pupa.scrape.Event.add_participant方法的典型用法代码示例。如果您正苦于以下问题:Python Event.add_participant方法的具体用法?Python Event.add_participant怎么用?Python Event.add_participant使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pupa.scrape.Event
的用法示例。
在下文中一共展示了Event.add_participant方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse_div
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_participant [as 别名]
def parse_div(self, row, chamber, com):
cal_link = row.xpath('.//a[.//span[@id="calendarmarker"]]/@href')[0]
# event_date = row.xpath('string(.//div[contains(@class,"ItemDate")])').strip()
title, location, start_date, end_date = self.parse_gcal(cal_link)
event = Event(
start_date=start_date,
end_date=end_date,
name=title,
location_name=location,
)
event.add_source('http://mgaleg.maryland.gov/webmga/frmHearingSchedule.aspx')
for item in row.xpath('.//div[@class="col-xs-12a Item"]'):
description = item.xpath('string(.)').strip()
agenda = event.add_agenda_item(description=description)
for item in row.xpath('.//div[contains(@class,"ItemContainer")]/a'):
description = item.xpath('string(.)').strip()
agenda = event.add_agenda_item(description=description)
event.add_document(
description,
item.xpath('@href')[0],
media_type="application/pdf",
on_duplicate="ignore"
)
for item in row.xpath('.//div[contains(@class,"ItemContainer")]'
'[./div[@class="col-xs-1 Item"]]'):
description = item.xpath('string(.)').strip()
agenda = event.add_agenda_item(description=description)
bill = item.xpath('.//div[@class="col-xs-1 Item"]/a/text()')[0].strip()
agenda.add_bill(bill)
video = row.xpath('.//a[./span[@class="OnDemand"]]')
if video:
event.add_media_link(
'Video of Hearing',
video[0].xpath('@href')[0],
'text/html'
)
if 'subcommittee' in title.lower():
subcom = title.split('-')[0].strip()
event.add_participant(
subcom,
type='committee',
note='host',
)
else:
event.add_participant(
com,
type='committee',
note='host',
)
yield event
示例2: scrape_house_weekly_schedule
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_participant [as 别名]
def scrape_house_weekly_schedule(self):
url = "http://house.louisiana.gov/H_Sched/Hse_MeetingSchedule.aspx"
page = self.lxmlize(url)
meeting_rows = page.xpath('//table[@id = "table229"]/tr')
valid_meetings = [row for row in meeting_rows if row.xpath(
'./td[1]')[0].text_content().replace(u'\xa0', '') and row.xpath(
'./td/a/img[contains(@src, "PDF-AGENDA.png")]') and 'Not Meeting' not in row.xpath(
'./td[2]')[0].text_content()]
for meeting in valid_meetings:
try:
guid = meeting.xpath('./td/a[descendant::img[contains(@src,'
'"PDF-AGENDA.png")]]/@href')[0]
# self.logger.debug(guid)
self.warning("logger.debug" + guid)
except KeyError:
continue # Sometimes we have a dead link. This is only on
# dead entries.
committee_name = meeting.xpath('./td[1]/text()')[0].strip()
meeting_string = meeting.xpath('./td[2]')[0].text_content()
if "@" in meeting_string:
continue # Contains no time data.
date, time, location = ([s.strip() for s in meeting_string.split(
',') if s] + [None]*3)[:3]
# check for time in date because of missing comma
time_srch = re.search(r'\d{2}:\d{2} (AM|PM)', date)
if time_srch:
location = time
time = time_srch.group()
date = date.replace(time, '')
# self.logger.debug(location)
self.warning("logger.debug" + location)
year = datetime.datetime.now().year
datetime_string = ' '.join((date, str(year), time))
when = datetime.datetime.strptime(datetime_string, '%b %d %Y %I:%M %p')
when = self._tz.localize(when)
description = 'Committee Meeting: {}'.format(committee_name)
# self.logger.debug(description)
self.warning("logger.debug" + description)
event = Event(name=description,
start_date=self._tz.localize(when),
location_name=location)
event.add_source(url)
event.add_participant(committee_name, type='committee', note='host')
event.add_document(note='Agenda', url=guid, text='agenda',
media_type='application/pdf')
yield event
示例3: scrape_event_page
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_participant [as 别名]
def scrape_event_page(self, url, event_type):
page = self.lxmlize(url)
page.make_links_absolute('https://malegislature.gov/')
title = page.xpath('string(//div[contains(@class,"followable")]/h1)')
title = title.replace('Hearing Details', '').strip()
title = title.replace('Special Event Details', '')
start_day = page.xpath('string(//dl[contains(@class,"eventInformation")]/dd[2])').strip()
start_time = page.xpath('string(//dl[contains(@class,"eventInformation")]/dd[3])').strip()
location = page.xpath('string(//dl[contains(@class,"eventInformation")]/dd[4]//a)').strip()
description = page.xpath('string(//dl[contains(@class,"eventInformation")]/dd[5])').strip()
start_date = self._TZ.localize(
dateutil.parser.parse(
'{} {}'.format(start_day, start_time),
)
)
event = Event(
start_date=start_date,
name=title,
location_name=location,
description=description
)
event.add_source(url)
agenda_rows = page.xpath(
'//div[contains(@class,"col-sm-8") and .//h2[contains(@class,"agendaHeader")]]'
'/div/div/div[contains(@class,"panel-default")]')
for row in agenda_rows:
# only select the text node, not the spans
agenda_title = row.xpath('string(.//h4/a/text()[normalize-space()])').strip()
if agenda_title == '':
agenda_title = row.xpath('string(.//h4/text()[normalize-space()])').strip()
agenda = event.add_agenda_item(description=agenda_title)
bills = row.xpath('.//tbody/tr/td[1]/a/text()')
for bill in bills:
bill = bill.strip().replace('.', ' ')
agenda.add_bill(bill)
if event_type == 'Hearing':
event.add_participant(
title,
type='committee',
note='host',
)
yield event
示例4: scrape
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_participant [as 别名]
def scrape(self):
calendar_url = "http://dccouncil.us/calendar"
data = self.get(calendar_url).text
doc = lxml.html.fromstring(data)
committee_regex = re.compile("(Committee .*?)will")
event_list = doc.xpath("//div[@class='event-description-dev']")
for event in event_list:
place_and_time = event.xpath(".//div[@class='event-description-dev-metabox']/p/text()")
when = " ".join([place_and_time[0].strip(), place_and_time[1].strip()])
if len(place_and_time) > 2:
location = place_and_time[2]
else:
location = "unknown"
# when is now of the following format:
# Wednesday, 2/25/2015 9:30am
when = datetime.datetime.strptime(when, "%A, %m/%d/%Y %I:%M%p")
description_content = event.xpath(".//div[@class='event-description-content-dev']")[0]
description_lines = description_content.xpath("./*")
name = description_lines[0].text_content()
desc_without_title = " ".join(d.text_content() for d in description_lines[1:])
description = re.sub(r'\s+', " ", description_content.text_content()).strip()
potential_bills = description_content.xpath(".//li")
committee = committee_regex.search(desc_without_title)
event_type = 'other'
if committee is not None:
committee = committee.group(1).strip()
event_type = 'committee:meeting'
e = Event(name=name,
description=description,
start_date=self._tz.localize(when),
location_name=location,
classification=event_type,
)
for b in potential_bills:
bill = b.xpath("./a/text()")
if len(bill) == 0:
continue
bill = bill[0]
bill_desc = b.text_content().replace(bill, "").strip(", ").strip()
ses, num = bill.split("-")
bill = ses.replace(" ", "") + "-" + num.zfill(4)
item = e.add_agenda_item(bill_desc)
item.add_bill(bill)
e.add_source(calendar_url)
if committee:
e.add_participant(committee, type='organization', note='host')
yield e
示例5: scrape_upper
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_participant [as 别名]
def scrape_upper(self):
listing_url = 'https://www.senate.mo.gov/hearingsschedule/hrings.htm'
html = self.get(listing_url).text
# The HTML here isn't wrapped in a container per-event
# which makes xpath a pain. So string split by <hr>
# then parse each event's fragment for cleaner results
for fragment in html.split('<hr />')[1:]:
page = lxml.html.fromstring(fragment)
when_date = self.row_content(page, 'Date:')
when_time = self.row_content(page, 'Time:')
location = self.row_content(page, 'Room:')
location = '{}, {}'.format(
location,
'201 W Capitol Ave, Jefferson City, MO 65101'
)
# com = self.row_content(page, 'Committee:')
com = page.xpath('//td[descendant::b[contains(text(),"Committee")]]/a/text()')[0]
com = com.split(', Senator')[0].strip()
start_date = self._TZ.localize(
dateutil.parser.parse('{} {}'.format(when_date, when_time))
)
event = Event(
start_date=start_date,
name=com,
location_name=location
)
event.add_source(listing_url)
event.add_participant(
com,
type='committee',
note='host',
)
for bill_table in page.xpath('//table[@width="85%" and @border="0"]'):
bill_link = ''
if bill_table.xpath(self.bill_link_xpath):
agenda_line = bill_table.xpath('string(tr[2])').strip()
agenda_item = event.add_agenda_item(description=agenda_line)
bill_link = bill_table.xpath(self.bill_link_xpath)[0].strip()
agenda_item.add_bill(bill_link)
else:
agenda_line = bill_table.xpath('string(tr[1])').strip()
agenda_item = event.add_agenda_item(description=agenda_line)
yield event
示例6: scrape
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_participant [as 别名]
def scrape(self):
EVENTS_URL = 'http://www.akleg.gov/basis/Meeting/Find'
events = self.lxmlize(EVENTS_URL).xpath('//ul[@id="meetingResults"]/li')
for info in events:
event_url = info.xpath('span[@class="col04"]/a/@href')[0]
doc = self.lxmlize(event_url)
# Skip events that are placeholders or tentative
# Also skip whole-chamber events
if any(x.strip().startswith("No Meeting") for x in
doc.xpath('//div[@class="schedule"]//text()')) \
or "session" in \
info.xpath('span[@class="col01"]/text()')[0].lower():
continue
name = " ".join(
x.strip()
for x in doc.xpath('//div[@class="schedule"]//text()')
if x.strip()
)
# Skip events with no name
if not name:
continue
event = Event(
start_date=self._TZ.localize(
datetime.datetime.strptime(
info.xpath('span[@class="col02"]/text()')[0],
self._DATETIME_FORMAT,
)
),
name=name,
location_name=doc.xpath(
'//div[@class="heading-container"]/span/text()'
)[0].title()
)
event.add_participant(
info.xpath('span[@class="col01"]/text()')[0].title(),
type='committee',
note='host',
)
for document in doc.xpath('//td[@data-label="Document"]/a'):
event.add_document(
document.xpath('text()')[0],
url=document.xpath('@href')[0]
)
event.add_source(EVENTS_URL)
event.add_source(event_url.replace(" ", "%20"))
yield event
示例7: scrape_page
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_participant [as 别名]
def scrape_page(self, url, session, chamber):
html = self.get(url).text
doc = lxml.html.fromstring(html)
doc.make_links_absolute(url)
ctty_name = doc.xpath("//span[@class='heading']")[0].text_content()
tables = doc.xpath("//table[@cellpadding='3']")
info = tables[0]
rows = info.xpath(".//tr")
metainf = {}
for row in rows:
tds = row.xpath(".//td")
key = tds[0].text_content().strip()
value = tds[1].text_content().strip()
metainf[key] = value
where = metainf['Location:']
subject_matter = metainf['Subject Matter:']
description = "{}, {}".format(ctty_name, subject_matter)
datetime = metainf['Scheduled Date:']
datetime = re.sub("\s+", " ", datetime)
repl = {
"AM": " AM",
"PM": " PM" # Space shim.
}
for r in repl:
datetime = datetime.replace(r, repl[r])
datetime = self.localize(dt.datetime.strptime(datetime, "%b %d, %Y %I:%M %p"))
event = Event(description,
start_date=datetime,
location_name=where)
event.add_source(url)
if ctty_name.startswith('Hearing Notice For'):
ctty_name.replace('Hearing Notice For', '')
event.add_participant(ctty_name, 'organization')
bills = tables[1]
for bill in bills.xpath(".//tr")[1:]:
tds = bill.xpath(".//td")
if len(tds) < 4:
continue
# First, let's get the bill ID:
bill_id = tds[0].text_content()
agenda_item = event.add_agenda_item(bill_id)
agenda_item.add_bill(bill_id)
return event
示例8: parse_event
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_participant [as 别名]
def parse_event(self, row, chamber):
# sample event available at http://www.akleg.gov/apptester.html
committee_code = row.xpath('string(Sponsor)').strip()
committee_name = '{} {}'.format(
self.COMMITTEES_PRETTY[chamber],
self.COMMITTEES[chamber][committee_code]['name']
)
name = '{} {}'.format(
self.COMMITTEES_PRETTY[chamber],
row.xpath('string(Title)').strip()
)
# If name is missing, make it "<CHAMBER> <COMMITTEE NAME>"
if name == '':
name = committee_name
location = row.xpath('string(Location)').strip()
# events with no location all seem to be committee hearings
if location == '':
location = 'Alaska State Capitol, 120 4th St, Juneau, AK 99801'
start_date = dateutil.parser.parse(row.xpath('string(Schedule)'))
# todo: do i need to self._TZ.localize() ?
event = Event(
start_date=start_date,
name=name,
location_name=location
)
event.add_source('http://w3.akleg.gov/index.php#tab4')
event.add_participant(
committee_name,
type='committee',
note='host',
)
for item in row.xpath('Agenda/Item'):
agenda_desc = item.xpath('string(Text)').strip()
if agenda_desc != '':
agenda_item = event.add_agenda_item(description=agenda_desc)
if item.xpath('BillRoot'):
bill_id = item.xpath('string(BillRoot)')
# AK Bill ids have a bunch of extra spaces
bill_id = re.sub(r'\s+', ' ', bill_id)
agenda_item.add_bill(bill_id)
yield event
示例9: scrape_lower_item
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_participant [as 别名]
def scrape_lower_item(self, page):
# print(lxml.etree.tostring(page, pretty_print=True))
com = self.table_row_content(page, 'Committee:')
when_date = self.table_row_content(page, 'Date:')
when_time = self.table_row_content(page, 'Time:')
location = self.table_row_content(page, 'Location:')
if 'house hearing room' in location.lower():
location = '{}, {}'.format(
location,
'201 W Capitol Ave, Jefferson City, MO 65101'
)
# fix some broken times, e.g. '12 :00'
when_time = when_time.replace(' :', ':')
# some times have extra info after the AM/PM
if 'upon' in when_time:
when_time = when_time.split('AM', 1)[0]
when_time = when_time.split('PM', 1)[0]
start_date = self._TZ.localize(
dateutil.parser.parse('{} {}'.format(when_date, when_time))
)
event = Event(
start_date=start_date,
name=com,
location_name=location
)
event.add_source('https://house.mo.gov/HearingsTimeOrder.aspx')
event.add_participant(
com,
type='committee',
note='host',
)
# different from general MO link xpath due to the <b>
house_link_xpath = './/a[contains(@href, "Bill.aspx") ' \
'or contains(@href, "bill.aspx")]/b/text()'
for bill_title in page.xpath(house_link_xpath):
bill_no = bill_title.split('--')[0].strip()
bill_no = bill_no.replace('HCS', '').strip()
agenda_item = event.add_agenda_item(description=bill_title)
agenda_item.add_bill(bill_no)
yield event
示例10: scrape
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_participant [as 别名]
def scrape(self, session=None, chamber=None):
if not session:
session = self.latest_session()
self.info('no session specified, using %s', session)
url = "ftp://www.arkleg.state.ar.us/dfadooas/ScheduledMeetings.txt"
page = self.get(url)
page = csv.reader(StringIO(page.text), delimiter='|')
for row in page:
# Deal with embedded newline characters, which cause fake new rows
LINE_LENGTH = 11
while len(row) < LINE_LENGTH:
row += next(page)
desc = row[7].strip()
match = re.match(r'^(.*)- (HOUSE|SENATE)$', desc)
if match:
comm = match.group(1).strip()
comm = re.sub(r'\s+', ' ', comm)
location = row[5].strip() or 'Unknown'
when = datetime.datetime.strptime(row[2], '%Y-%m-%d %H:%M:%S')
when = self._tz.localize(when)
# Only assign events to a session if they are in the same year
# Given that session metadata have some overlap and
# missing end dates, this is the best option available
session_year = int(session[:4])
if session_year != when.year:
continue
description = "%s MEETING" % comm
event = Event(
name=description,
start_time=when,
location_name=location,
description=description,
timezone=self._tz.zone
)
event.add_source(url)
event.add_participant(comm, type='committee', note='host')
# time = row[3].strip()
# if time in TIMECODES:
# event['notes'] = TIMECODES[time]
yield event
示例11: scrape_chamber
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_participant [as 别名]
def scrape_chamber(self, chamber, session, start, end):
page = self.get_xml(start, end)
for row in xpath(page, '//wa:CommitteeMeeting'):
event_cancelled = xpath(row, 'string(wa:Cancelled)')
if event_cancelled == 'true':
continue
event_chamber = xpath(row, 'string(wa:Agency)')
if self.chambers[event_chamber] != chamber:
continue
event_date = datetime.datetime.strptime(
xpath(row, 'string(wa:Date)'), "%Y-%m-%dT%H:%M:%S")
event_date = self._tz.localize(event_date)
event_com = xpath(row, 'string(wa:Committees/'
'wa:Committee/wa:LongName)')
agenda_id = xpath(row, 'string(wa:AgendaId)')
notes = xpath(row, 'string(wa:Notes)')
room = xpath(row, 'string(wa:Room)')
building = xpath(row, 'string(wa:Building)')
# XML has a wa:Address but it seems useless
city = xpath(row, 'string(wa:City)')
state = xpath(row, 'string(wa:State)')
location = '{}, {}, {} {}'.format(
room,
building,
city,
state
)
event = Event(name=event_com, start_date=event_date,
location_name=location,
description=notes)
source_url = 'https://app.leg.wa.gov/committeeschedules/Home/Agenda/{}'.format(
agenda_id)
event.add_source(source_url)
event.add_participant(event_com, type='committee', note='host')
event.extras['agendaId'] = agenda_id
self.scrape_agenda_items(agenda_id, event)
yield event
示例12: scrape_event_page
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_participant [as 别名]
def scrape_event_page(self, session, chamber, url, datetime):
page = self.lxmlize(url)
info = page.xpath("//p")
metainfo = {}
plaintext = ""
for p in info:
content = re.sub("\s+", " ", p.text_content())
plaintext += content + "\n"
if ":" in content:
key, val = content.split(":", 1)
metainfo[key.strip()] = val.strip()
committee = metainfo['COMMITTEE']
where = metainfo['PLACE']
if "CHAIR" in where:
where, chair = where.split("CHAIR:")
metainfo['PLACE'] = where.strip()
metainfo['CHAIR'] = chair.strip()
chair = None
if "CHAIR" in metainfo:
chair = metainfo['CHAIR']
plaintext = re.sub("\s+", " ", plaintext).strip()
regexp = r"(S|J|H)(B|M|R) (\d+)"
bills = re.findall(regexp, plaintext)
event = Event(
name=committee,
start_date=self._tz.localize(datetime),
location_name=where
)
event.add_source(url)
event.add_participant(committee, type='committee', note='host')
if chair is not None:
event.add_participant(chair, type='legislator', note='chair')
for bill in bills:
chamber, type, number = bill
bill_id = "%s%s %s" % (chamber, type, number)
item = event.add_agenda_item('Bill up for discussion')
item.add_bill(bill_id)
event.add_agenda_item(plaintext)
yield event
示例13: scrape_events
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_participant [as 别名]
def scrape_events(self, chamber, event_id):
url = '%s%s' % (self.upper_url, event_id)
html = self.get(url).text
doc = lxml.html.fromstring(html)
doc.make_links_absolute(url)
rows = doc.xpath("//div[@id='WebPartWPQ2']")
# some ids are empty
if len(rows):
table_data = rows[0].find('table')[1]
for link in table_data.iterchildren('td'):
td = link.xpath('//td[@class="ms-formbody"]')
description = td[18].text
when = td[19].text
where = td[25].text
# type = td[27].text
meeting_lead = td[28].text
when = datetime.datetime.strptime(when, "%m/%d/%Y %H:%M %p")
when = self._tz.localize(when)
if where is None or where == "":
where = 'State House'
event = Event(name=description,
start_date=when,
location_name=where)
if td[20].text is None:
participants = meeting_lead
else:
participants = td[20].text.split(';')
if participants:
for participant in participants:
name = participant.strip().replace('HON.', '', 1)
if name != "":
event.add_participant(name, type='committee',
note='host')
event.add_source(url)
yield event
else:
# hack so we dont fail on the first id numbers where there are some gaps
# between the numbers that work and not.
if event_id > 1700:
raise Exception("Parsing is done we are on future ids that are not used yet.")
示例14: scrape_chamber
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_participant [as 别名]
def scrape_chamber(self, chamber, session):
cha = {"upper": "7", "lower": "3", "other": "4"}[chamber]
print_format = "%m/%d/%Y"
now = dt.datetime.now()
start = now.strftime(print_format)
end = (now+timedelta(days=30)).strftime(print_format)
url = event_page % (cha, start, end)
page = self.lxmlize(url)
committees = page.xpath("//a[contains(@href,'Agendas?CommitteeId')]/@href")
for comm in committees:
comm_page = self.lxmlize(comm)
meetings = comm_page.xpath("//li[contains(@class, 'partialagendaitems')]")
for meeting in meetings:
heading, content = meeting.xpath("./ul/li")
who, when = heading.text.split(" - ")
meeting_title = "Scheduled meeting of %s" % who.strip()
where_lines = content.text_content().split("\r\n")
where = "\r\n".join([l.strip() for l in where_lines[6:9]])
when = dt.datetime.strptime(when.strip(), "%m/%d/%Y %I:%M:%S %p")
location = (where or '').strip() or "unknown"
event = Event(name=meeting_title, start_time=self._tz.localize(when),
timezone=self._tz.zone, location_name=location,
description=meeting_title)
event.add_participant(who.strip(), type='committee', note='host')
event.add_source(url)
# only scraping public hearing bills for now.
bills = meeting.xpath(".//div[text() = 'Public Hearing']/following-sibling::li"
"[contains(@class, 'visible-lg')]")
for bill in bills:
bill_id, descr = bill.xpath("./a/text()")[0].split(" - ")
item = event.add_agenda_item(descr.strip())
item.add_bill(bill_id.strip())
yield event
示例15: scrape
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_participant [as 别名]
def scrape(self, window=None) :
if window:
n_days_ago = datetime.datetime.utcnow() - datetime.timedelta(float(window))
else:
n_days_ago = None
events = self.events(n_days_ago)
for event, web_event in self._merge_events(events):
body_name = event["EventBodyName"]
if 'Board of Directors -' in body_name:
body_name, event_name = [part.strip()
for part
in body_name.split('-')]
else:
event_name = body_name
# Events can have an EventAgendaStatusName of "Final", "Final Revised",
# and "Final 2nd Revised."
# We classify these events as "passed."
status_name = event['EventAgendaStatusName']
if status_name.startswith('Final'):
status = 'passed'
elif status_name == 'Draft':
status = 'confirmed'
elif status_name == 'Canceled':
status = 'cancelled'
else:
status = 'tentative'
location = event["EventLocation"]
if not location:
# We expect some events to have no location. LA Metro would
# like these displayed in the Councilmatic interface. However,
# OCD requires a value for this field. Add a sane default.
location = 'Not available'
e = Event(event_name,
start_date=event["start"],
description='',
location_name=location,
status=status)
e.pupa_id = str(event['EventId'])
# Metro requires the EventGuid to build out MediaPlayer links.
# Add both the English event GUID, and the Spanish event GUID if
# it exists, to the extras dict.
e.extras = {'guid': event['EventGuid']}
legistar_api_url = self.BASE_URL + '/events/{0}'.format(event['EventId'])
e.add_source(legistar_api_url, note='api')
if event.get('SAPEventGuid'):
e.extras['sap_guid'] = event['SAPEventGuid']
if 'event_details' in event:
# if there is not a meeting detail page on legistar
# don't capture the agenda data from the API
for item in self.agenda(event):
agenda_item = e.add_agenda_item(item["EventItemTitle"])
if item["EventItemMatterFile"]:
identifier = item["EventItemMatterFile"]
agenda_item.add_bill(identifier)
if item["EventItemAgendaNumber"]:
# To the notes field, add the item number as given in the agenda minutes
note = "Agenda number, {}".format(item["EventItemAgendaNumber"])
agenda_item['notes'].append(note)
# The EventItemAgendaSequence provides
# the line number of the Legistar agenda grid.
agenda_item['extras']['item_agenda_sequence'] = item['EventItemAgendaSequence']
# Historically, the Legistar system has duplicated the EventItemAgendaSequence,
# resulting in data inaccuracies. The scrape should fail in such cases, until Metro
# cleans the data.
item_agenda_sequences = [item['extras']['item_agenda_sequence'] for item in e.agenda]
if len(item_agenda_sequences) != len(set(item_agenda_sequences)):
error_msg = 'An agenda has duplicate agenda items on the Legistar grid: \
{event_name} on {event_date} ({legistar_api_url}). \
Contact Metro, and ask them to remove the duplicate EventItemAgendaSequence.'
raise ValueError(error_msg.format(event_name=e.name,
event_date=e.start_date.strftime("%B %d, %Y"),
legistar_api_url=legistar_api_url))
e.add_participant(name=body_name,
type="organization")
if event.get('SAPEventId'):
e.add_source(self.BASE_URL + '/events/{0}'.format(event['SAPEventId']),
note='api (sap)')
if event['EventAgendaFile']:
e.add_document(note= 'Agenda',
url = event['EventAgendaFile'],
media_type="application/pdf")
#.........这里部分代码省略.........