本文整理汇总了Python中pupa.scrape.Event.add_bill方法的典型用法代码示例。如果您正苦于以下问题:Python Event.add_bill方法的具体用法?Python Event.add_bill怎么用?Python Event.add_bill使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pupa.scrape.Event
的用法示例。
在下文中一共展示了Event.add_bill方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: scrape
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_bill [as 别名]
#.........这里部分代码省略.........
:param chamber:
:param session:
:return: yielded Event objects
"""
chambers = {
'upper': {'name': 'Senate', 'title': 'Senator'},
'lower': {'name': 'House', 'title': 'Representative'},
}
if chamber == 'other':
return
if chamber is None:
self.info('no chamber specified, using Joint Committee Meeting Schedule')
events_url = 'http://www.scstatehouse.gov/meetings.php'
else:
events_url = 'http://www.scstatehouse.gov/meetings.php?chamber=%s' % (
chambers[chamber]['name'].upper()[0]
)
page = self.get_page_from_url(events_url)
meeting_year = page.xpath(
'//h2[@class="barheader"]/span')[0].text_content()
meeting_year = re.search(
r'Week of [A-Z][a-z]+\s+[0-9]{1,2}, ([0-9]{4})',
meeting_year).group(1)
dates = page.xpath("//div[@id='contentsection']/ul")
for date in dates:
date_string = date.xpath('span')
if len(date_string) == 1:
date_string = date_string[0].text_content()
else:
continue
# If a event is in the next calendar year, the date_string
# will have a year in it
if date_string.count(",") == 2:
event_year = date_string[-4:]
date_string = date_string[:-6]
elif date_string.count(",") == 1:
event_year = meeting_year
else:
raise AssertionError("This is not a valid date: '{}'"). \
format(date_string)
for meeting in date.xpath('li'):
time_string = meeting.xpath('span')[0].text_content()
if time_string == 'CANCELED' or len(
meeting.xpath(
'.//span[contains(text(), "CANCELED")]')) > 0:
continue
time_string = normalize_time(time_string)
date_time = datetime.datetime.strptime(
event_year + ' ' + date_string
+ ' ' + time_string, "%Y %A, %B %d %I:%M %p")
date_time = self._tz.localize(date_time)
meeting_info = meeting.xpath(
'br[1]/preceding-sibling::node()')[1]
location, description = re.search(
r'-- (.*?) -- (.*)', meeting_info).groups()
# if re.search(r'committee', description, re.I):
# meeting_type = 'committee:meeting'
# else:
# meeting_type = 'other:meeting'
event = Event(name=description, # Event Name
start_date=date_time, # When the event will take place
location_name=location) # Where the event will be
event.add_source(events_url)
agenda_url = meeting.xpath(".//a[contains(@href,'agendas')]")
if agenda_url:
agenda_url = agenda_url[0].attrib['href']
event.add_source(agenda_url)
event.add_document(note="Agenda",
url=agenda_url,
media_type="application/pdf")
agenda_page = self.get_page_from_url(agenda_url)
for bill in agenda_page.xpath(
".//a[contains(@href,'billsearch.php')]"):
# bill_url = bill.attrib['href']
bill_id = bill.text_content().replace(
'.', '').replace(' ', '')
# bill_description = self.get_bill_description(bill_url)
event.add_bill(bill_id)
yield event