本文整理汇总了Python中pupa.scrape.Event.add_committee方法的典型用法代码示例。如果您正苦于以下问题:Python Event.add_committee方法的具体用法?Python Event.add_committee怎么用?Python Event.add_committee使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pupa.scrape.Event
的用法示例。
在下文中一共展示了Event.add_committee方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: scrape_meeting_notice
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_committee [as 别名]
def scrape_meeting_notice(self, chamber, item, url):
# Since Event Name is not provided for all mettings.
event_name = str(item['CommitteeName'])
# 04/25/2012 03:00:00 PM
fmt = "%m/%d/%y %I:%M %p"
start_time = dt.datetime.strptime(str(item['MeetingDateTime']), fmt)
location_name = str(item['AddressAliasNickname'])
event = Event(location_name=location_name,
start_date=self._tz.localize(start_time),
name=event_name,
description='Committee Meeting Status: {}'
.format(item['CommitteeMeetingStatusName'])
)
event.add_source(url)
event.add_committee(name=str(item['CommitteeName']), id=item['CommitteeId'])
page_url = ("http://legis.delaware.gov/json/MeetingNotice/"
"GetCommitteeMeetingItems?committeeMeetingId={}".format(
item['CommitteeMeetingId'])
)
event.add_source(page_url)
page_data = self.post(page_url).json()['Data']
for item in page_data:
event.add_agenda_item(description=str(item['ItemDescription']))
event.add_person(name=str(item['PrimarySponsorShortName']),
id=str(item['PrimarySponsorPersonId']),
note='Sponsor')
yield event
示例2: scrape
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_committee [as 别名]
def scrape(self):
page = self.lxmlize(calurl)
events = page.xpath("//table[@class='agenda-body']//tr")[1:]
for event in events:
comit_url = event.xpath(
".//a[contains(@href, '/Pages/comm-info.aspx?c=')]")
if len(comit_url) != 1:
raise Exception
comit_url = comit_url[0]
who = self.scrape_participants(comit_url.attrib['href'])
tds = event.xpath("./*")
date = tds[0].text_content().strip()
cttie = tds[1].text_content().strip()
_chamber, cttie = [x.strip() for x in cttie.split(" - ", 1)]
info = tds[2]
name = info.xpath("./a[contains(@href, 'raw')]")[0]
notice = name.attrib['href']
name = name.text
time, where = info.xpath("./i/text()")
what = tds[3].text_content()
what = what.replace("Items: ", "")
if "(None)" in what:
continue
what = [x.strip() for x in what.split(";")]
when = ", ".join([date, str(dt.datetime.now().year), time])
when = dt.datetime.strptime(when, "%a %b %d, %Y, %I:%M %p")
event = Event(
name=name,
location_name=where,
start_date=self._tz.localize(when),
)
event.add_source(calurl)
event.add_committee(cttie, note='host')
event.add_document("notice", notice, media_type='application/pdf')
for entry in what:
item = event.add_agenda_item(entry)
if entry.startswith('AB') or entry.startswith('SB'):
item.add_bill(entry)
for thing in who:
event.add_person(thing['name'])
yield event
示例3: scrape
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_committee [as 别名]
def scrape(self):
tz = pytz.timezone("US/Eastern")
get_short_codes(self)
page = self.lxmlize(URL)
table = page.xpath(
"//table[@id='ctl00_ContentPlaceHolderCol1_GridView1']")[0]
for event in table.xpath(".//tr")[1:]:
tds = event.xpath("./td")
committee = tds[0].text_content().strip()
descr = [x.text_content() for x in tds[1].xpath(".//span")]
if len(descr) != 1:
raise Exception
descr = descr[0].replace('.', '').strip()
when = tds[2].text_content().strip()
where = tds[3].text_content().strip()
notice = tds[4].xpath(".//a")[0]
notice_href = notice.attrib['href']
notice_name = notice.text
when = dt.datetime.strptime(when, "%m/%d/%Y %I:%M %p")
when = pytz.utc.localize(when)
event = Event(name=descr, start_time=when, classification='committee-meeting',
description=descr, location_name=where, timezone=tz.zone)
if "/" in committee:
committees = committee.split("/")
else:
committees = [committee]
for committee in committees:
if "INFO" not in committee:
committee = self.short_ids.get("committee", {"chamber": "unknown",
"name": committee})
else:
committee = {
"chamber": "joint",
"name": committee,
}
event.add_committee(committee['name'], note='host')
event.add_source(URL)
event.add_document(notice_name,
notice_href,
media_type='text/html')
for bill in self.get_related_bills(notice_href):
a = event.add_agenda_item(description=bill['descr'])
a.add_bill(
bill['bill_id'],
note=bill['type']
)
yield event
示例4: _parse_house_floor_xml_legislative_activity
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_committee [as 别名]
def _parse_house_floor_xml_legislative_activity(self, xml):
"""
Parses XML string of House floor updates and yields them in loop.
@param xml: XML of field update
@type xml: string
@return: complete Event object
@rtype: Event
"""
tree = self._xml_parser(xml)
congress = tree.xpath('.//legislative_congress')[0].get('congress')
house_committees = self._get_current_house_committee_names()
for fa in tree.xpath('.//floor_action'):
fa_text = fa.xpath('.//action_description')[0].xpath('string()')
eastern = pytz.timezone('US/Eastern')
dt = datetime.datetime.strptime(fa.xpath('action_time')[0].get('for-search'), '%Y%m%dT%H:%M:%S')
event = Event('House Floor Update on {0} at {1}.'.format(dt.strftime('%Y-%m-%d'), dt.strftime('%H:%M:%S')),
eastern.localize(dt).astimezone(pytz.utc),
'US/Eastern',
'',
description=fa_text,
classification='floor_update')
event.set_location("East Capitol Street Northeast & First St SE, Washington, DC 20004",
note='House Floor', url='http://www.house.gov',
coordinates={'latitude': '38.889931', 'longitude': '-77.009003'})
event.add_source(self._house_floor_src_url(date_str=tree.xpath('.//legislative_day')[0].get('date')),
note="Scraped from the Office of the Clerk, U.S. House of Representatives website.")
event.extras['act-id'] = fa.get('act-id')
event.extras['unique-id'] = fa.get('unique-id')
# bills
ai_b = event.add_agenda_item(description='Bills referenced by this update.')
for bill in fa.xpath(".//a[@rel='bill']"):
bill_name = bill.xpath('string()')
ai_b.add_bill(bill_name, id=make_pseudo_id(identifier=bill_code_to_id(bill_name), congress=congress),
note="Bill was referenced on the House floor.")
# publaws
ai_p = event.add_agenda_item(description='Public laws referenced by this update.')
for law in fa.xpath(".//a[@rel='publaw']"):
detail_url = '/'.join(law.get('href').split('/')[0:-2]) + '/content-detail.html'
ai_p.add_bill(law.xpath('string()'),
id=make_pseudo_id(**self._public_law_detail_scraper(url=detail_url)),
note='Law was referenced on the House floor.')
# votes
ai_v = event.add_agenda_item(description='Votes referenced by this update.')
for vote in fa.xpath(".//a[@rel='vote']"):
vote_name = vote.xpath('string()')
ai_v.add_vote(vote_name,
id=make_pseudo_id(identifier=vote_code_to_id(vote_name), congress=congress),
note='Vote was referenced on the House floor.')
# reports
for report in fa.xpath(".//a[@rel='report']"):
event.add_document('Document referenced by this update.', report.get('href'), media_type='text/html')
for name in house_committees:
if name.replace('House ', '') in fa_text:
event.add_committee(name, id=make_pseudo_id(name=name))
# TODO identify legislators and add them as participants?
yield event
示例5: scrape
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_committee [as 别名]
def scrape(self):
last_events = deque(maxlen=10)
for event, agenda in self.events(since=2011) :
other_orgs = ''
extras = []
if '--em--' in event[u'Meeting Location'] :
location_string, note = event[u'Meeting Location'].split('--em--')[:2]
for each in note.split(' - ') :
if each.startswith('Join') :
other_orgs = each
else :
extras.append(each)
else :
location_string = event[u'Meeting Location']
location_list = location_string.split('-', 2)
location = ', '.join([each.strip() for each in location_list[0:2]])
if not location :
continue
when = self.toTime(event[u'Meeting Date'])
event_time = event['iCalendar'].subcomponents[0]['DTSTART'].dt
when = when.replace(hour=event_time.hour,
minute=event_time.minute)
time_string = event['Meeting Time']
if time_string in ('Deferred',) :
status = 'cancelled'
elif self.now() < when :
status = 'confirmed'
else :
status = 'passed'
description = event['Meeting\xa0Topic']
if any(each in description
for each
in ('Multiple meeting items',
'AGENDA TO BE ANNOUNCED')) :
description = ''
event_name = event['Name']
event_id = (event_name, when)
if event_id in last_events :
continue
else :
last_events.append(event_id)
e = Event(name=event_name,
start_time=when,
timezone=self.TIMEZONE,
description=description,
location_name=location,
status=status)
if extras :
e.extras = {'location note' : ' '.join(extras)}
if event['Multimedia'] != 'Not\xa0available' :
e.add_media_link(note='Recording',
url = event['Multimedia']['url'],
type="recording",
media_type = 'text/html')
self.addDocs(e, event, 'Agenda')
self.addDocs(e, event, 'Minutes')
if event['Name'] == 'City Council Stated Meeting' :
participating_orgs = ['New York City Council']
elif 'committee' in event['Name'].lower() :
participating_orgs = [event["Name"]]
else :
participating_orgs = []
if other_orgs :
other_orgs = re.sub('Jointl*y with the ', '', other_orgs)
participating_orgs += re.split(' and the |, the ', other_orgs)
for org in participating_orgs :
e.add_committee(name=org)
if agenda :
e.add_source(event["Meeting Details"]['url'])
for item, _, _ in agenda :
if item["Name"] :
agenda_item = e.add_agenda_item(item["Name"])
if item["File\xa0#"] :
if item['Action'] :
note = item['Action']
else :
note = 'consideration'
agenda_item.add_bill(item["File\xa0#"]['label'],
note=note)
else :
e.add_source(self.EVENTSPAGE)
#.........这里部分代码省略.........
示例6: scrape_chamber
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_committee [as 别名]
def scrape_chamber(self, chamber=None):
# If chamber is None, don't exclude any events from the results based on chamber
chmbr = cal_chamber_text.get(chamber)
tables = url_xpath(cal_weekly_events,
"//table[@class='date-table']")
for table in tables:
date = table.xpath("../.")[0].getprevious().text_content()
trs = table.xpath("./tr")
for tr in trs:
order = ["time", "chamber", "type", "agenda", "location",
"video"]
tds = tr.xpath("./td")
metainf = {}
if not tds:
continue
for el in range(0, len(order)):
metainf[order[el]] = tds[el]
if chmbr and metainf['chamber'].text_content() != chmbr:
self.info("Skipping event based on chamber.")
continue
time = metainf['time'].text_content()
datetime_string = "%s %s" % \
(date.strip(' \r\n'), time.strip(' \r\n'))
location = metainf['location'].text_content()
description = metainf['type'].text_content()
dtfmt = "%A, %B %d, %Y %I:%M %p"
dtfmt_no_time = "%A, %B %d, %Y"
if time == 'Cancelled':
self.log("Skipping cancelled event.")
continue
else:
if "Immediately follows H-FLOOR" in datetime_string:
continue
if ' Immediately follows' in datetime_string:
datetime_string, _ = datetime_string.split(
'Immediately follows')
if "canceled" in datetime_string.lower():
continue
if "TBA" in datetime_string:
continue
datetime_string = datetime_string.strip()
try:
when = dt.datetime.strptime(datetime_string, dtfmt)
except ValueError:
when = dt.datetime.strptime(datetime_string, dtfmt_no_time)
when = self._utc.localize(when)
event = Event(
name=description,
start_date=when,
location_name=location,
description=description,
)
# The description is a committee name
event.add_committee(name=description)
event.add_source(cal_weekly_events)
agenda = metainf['agenda'].xpath(".//a")
if len(agenda) > 0:
agenda = agenda
for doc in agenda:
if not doc.text_content():
continue
agenda_url = doc.attrib['href']
self.add_agenda(
agenda_url, doc.text_content(), event)
yield event