本文整理汇总了Python中pupa.scrape.Event.add_document方法的典型用法代码示例。如果您正苦于以下问题:Python Event.add_document方法的具体用法?Python Event.add_document怎么用?Python Event.add_document使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pupa.scrape.Event
的用法示例。
在下文中一共展示了Event.add_document方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse_div
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_document [as 别名]
def parse_div(self, row, chamber, com):
cal_link = row.xpath('.//a[.//span[@id="calendarmarker"]]/@href')[0]
# event_date = row.xpath('string(.//div[contains(@class,"ItemDate")])').strip()
title, location, start_date, end_date = self.parse_gcal(cal_link)
event = Event(
start_date=start_date,
end_date=end_date,
name=title,
location_name=location,
)
event.add_source('http://mgaleg.maryland.gov/webmga/frmHearingSchedule.aspx')
for item in row.xpath('.//div[@class="col-xs-12a Item"]'):
description = item.xpath('string(.)').strip()
agenda = event.add_agenda_item(description=description)
for item in row.xpath('.//div[contains(@class,"ItemContainer")]/a'):
description = item.xpath('string(.)').strip()
agenda = event.add_agenda_item(description=description)
event.add_document(
description,
item.xpath('@href')[0],
media_type="application/pdf",
on_duplicate="ignore"
)
for item in row.xpath('.//div[contains(@class,"ItemContainer")]'
'[./div[@class="col-xs-1 Item"]]'):
description = item.xpath('string(.)').strip()
agenda = event.add_agenda_item(description=description)
bill = item.xpath('.//div[@class="col-xs-1 Item"]/a/text()')[0].strip()
agenda.add_bill(bill)
video = row.xpath('.//a[./span[@class="OnDemand"]]')
if video:
event.add_media_link(
'Video of Hearing',
video[0].xpath('@href')[0],
'text/html'
)
if 'subcommittee' in title.lower():
subcom = title.split('-')[0].strip()
event.add_participant(
subcom,
type='committee',
note='host',
)
else:
event.add_participant(
com,
type='committee',
note='host',
)
yield event
示例2: scrape_house_weekly_schedule
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_document [as 别名]
def scrape_house_weekly_schedule(self):
url = "http://house.louisiana.gov/H_Sched/Hse_MeetingSchedule.aspx"
page = self.lxmlize(url)
meeting_rows = page.xpath('//table[@id = "table229"]/tr')
valid_meetings = [row for row in meeting_rows if row.xpath(
'./td[1]')[0].text_content().replace(u'\xa0', '') and row.xpath(
'./td/a/img[contains(@src, "PDF-AGENDA.png")]') and 'Not Meeting' not in row.xpath(
'./td[2]')[0].text_content()]
for meeting in valid_meetings:
try:
guid = meeting.xpath('./td/a[descendant::img[contains(@src,'
'"PDF-AGENDA.png")]]/@href')[0]
# self.logger.debug(guid)
self.warning("logger.debug" + guid)
except KeyError:
continue # Sometimes we have a dead link. This is only on
# dead entries.
committee_name = meeting.xpath('./td[1]/text()')[0].strip()
meeting_string = meeting.xpath('./td[2]')[0].text_content()
if "@" in meeting_string:
continue # Contains no time data.
date, time, location = ([s.strip() for s in meeting_string.split(
',') if s] + [None]*3)[:3]
# check for time in date because of missing comma
time_srch = re.search(r'\d{2}:\d{2} (AM|PM)', date)
if time_srch:
location = time
time = time_srch.group()
date = date.replace(time, '')
# self.logger.debug(location)
self.warning("logger.debug" + location)
year = datetime.datetime.now().year
datetime_string = ' '.join((date, str(year), time))
when = datetime.datetime.strptime(datetime_string, '%b %d %Y %I:%M %p')
when = self._tz.localize(when)
description = 'Committee Meeting: {}'.format(committee_name)
# self.logger.debug(description)
self.warning("logger.debug" + description)
event = Event(name=description,
start_date=self._tz.localize(when),
location_name=location)
event.add_source(url)
event.add_participant(committee_name, type='committee', note='host')
event.add_document(note='Agenda', url=guid, text='agenda',
media_type='application/pdf')
yield event
示例3: scrape
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_document [as 别名]
def scrape(self):
EVENTS_URL = 'http://www.akleg.gov/basis/Meeting/Find'
events = self.lxmlize(EVENTS_URL).xpath('//ul[@id="meetingResults"]/li')
for info in events:
event_url = info.xpath('span[@class="col04"]/a/@href')[0]
doc = self.lxmlize(event_url)
# Skip events that are placeholders or tentative
# Also skip whole-chamber events
if any(x.strip().startswith("No Meeting") for x in
doc.xpath('//div[@class="schedule"]//text()')) \
or "session" in \
info.xpath('span[@class="col01"]/text()')[0].lower():
continue
name = " ".join(
x.strip()
for x in doc.xpath('//div[@class="schedule"]//text()')
if x.strip()
)
# Skip events with no name
if not name:
continue
event = Event(
start_date=self._TZ.localize(
datetime.datetime.strptime(
info.xpath('span[@class="col02"]/text()')[0],
self._DATETIME_FORMAT,
)
),
name=name,
location_name=doc.xpath(
'//div[@class="heading-container"]/span/text()'
)[0].title()
)
event.add_participant(
info.xpath('span[@class="col01"]/text()')[0].title(),
type='committee',
note='host',
)
for document in doc.xpath('//td[@data-label="Document"]/a'):
event.add_document(
document.xpath('text()')[0],
url=document.xpath('@href')[0]
)
event.add_source(EVENTS_URL)
event.add_source(event_url.replace(" ", "%20"))
yield event
示例4: scrape
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_document [as 别名]
def scrape(self):
page = self.lxmlize(calurl)
events = page.xpath("//table[@class='agenda-body']//tr")[1:]
for event in events:
comit_url = event.xpath(
".//a[contains(@href, '/Pages/comm-info.aspx?c=')]")
if len(comit_url) != 1:
raise Exception
comit_url = comit_url[0]
who = self.scrape_participants(comit_url.attrib['href'])
tds = event.xpath("./*")
date = tds[0].text_content().strip()
cttie = tds[1].text_content().strip()
_chamber, cttie = [x.strip() for x in cttie.split(" - ", 1)]
info = tds[2]
name = info.xpath("./a[contains(@href, 'raw')]")[0]
notice = name.attrib['href']
name = name.text
time, where = info.xpath("./i/text()")
what = tds[3].text_content()
what = what.replace("Items: ", "")
if "(None)" in what:
continue
what = [x.strip() for x in what.split(";")]
when = ", ".join([date, str(dt.datetime.now().year), time])
when = dt.datetime.strptime(when, "%a %b %d, %Y, %I:%M %p")
event = Event(
name=name,
location_name=where,
start_date=self._tz.localize(when),
)
event.add_source(calurl)
event.add_committee(cttie, note='host')
event.add_document("notice", notice, media_type='application/pdf')
for entry in what:
item = event.add_agenda_item(entry)
if entry.startswith('AB') or entry.startswith('SB'):
item.add_bill(entry)
for thing in who:
event.add_person(thing['name'])
yield event
示例5: scrape
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_document [as 别名]
def scrape(self):
tz = pytz.timezone("US/Eastern")
get_short_codes(self)
page = self.lxmlize(URL)
table = page.xpath(
"//table[@id='ctl00_ContentPlaceHolderCol1_GridView1']")[0]
for event in table.xpath(".//tr")[1:]:
tds = event.xpath("./td")
committee = tds[0].text_content().strip()
descr = [x.text_content() for x in tds[1].xpath(".//span")]
if len(descr) != 1:
raise Exception
descr = descr[0].replace('.', '').strip()
when = tds[2].text_content().strip()
where = tds[3].text_content().strip()
notice = tds[4].xpath(".//a")[0]
notice_href = notice.attrib['href']
notice_name = notice.text
when = dt.datetime.strptime(when, "%m/%d/%Y %I:%M %p")
when = pytz.utc.localize(when)
event = Event(name=descr, start_time=when, classification='committee-meeting',
description=descr, location_name=where, timezone=tz.zone)
if "/" in committee:
committees = committee.split("/")
else:
committees = [committee]
for committee in committees:
if "INFO" not in committee:
committee = self.short_ids.get("committee", {"chamber": "unknown",
"name": committee})
else:
committee = {
"chamber": "joint",
"name": committee,
}
event.add_committee(committee['name'], note='host')
event.add_source(URL)
event.add_document(notice_name,
notice_href,
media_type='text/html')
for bill in self.get_related_bills(notice_href):
a = event.add_agenda_item(description=bill['descr'])
a.add_bill(
bill['bill_id'],
note=bill['type']
)
yield event
示例6: scrape_event
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_document [as 别名]
def scrape_event(self, row):
date_td = row.xpath('td[1]')[0]
info_td = row.xpath('td[2]')[0]
date = date_td.xpath('b')[0].text.strip()
time = date_td.xpath('b/following-sibling::text()')[0].strip()
date_and_time = "{} {}".format(date, time)
start_date = datetime.datetime.strptime(
date_and_time, '%m/%d/%y %I:%M %p')
title = info_td.xpath('font[1]/strong')[0].text.strip()
all_text = info_td.xpath('descendant-or-self::*/text()')
notes = (line.strip() for line in all_text if line.strip())
notes = list(notes)
# Skip the first line, which is the title
notes = notes[1:]
# Split out the address
address = notes[0]
notes = notes[1:]
# The rest just becomes the description
notes = "\n".join(notes)
event = Event(
start_date=self._TZ.localize(start_date),
name=title,
location_name=address,
description=notes
)
event.add_source(self.URL)
if info_td.xpath('a[contains(font/text(),"agenda")]'):
agenda_url = info_td.xpath('a/@href')[0]
event.add_document(
"Agenda",
url=agenda_url
)
yield event
示例7: scrape
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_document [as 别名]
def scrape(self):
method = 'events/?state={}&dtstart=1776-07-04'.format(self.state)
self.events = self.api(method)
seen = set()
for event in self.events:
begin = self._date_parse(event.pop('when'))
end = self._date_parse(event.pop('end'))
all_day = event.pop('all_day',False)
e = Event(name=event.pop('description'),
classification=event.pop('type'),
location_name=event.pop('location'),
timezone=event.pop('timezone'),
start_time=begin,
end_time=end,
all_day=all_day,)
if len(e.name) >= 300:
e.name = e.name[:290]
if len(e.location['name']) >= 100:
e.location['name'] = e.location['name'][:90]
composite_key = (e.name, e.description, e.start_time)
if composite_key in seen:
print("Duplicate found: %s/%s/%s" % (composite_key))
continue
seen.add(composite_key)
for source in event.pop('sources'):
if 'retrieved' in source:
source.pop('retrieved')
e.add_source(**source)
if e.sources == []:
continue
ignore = ['country', 'level', 'state', 'created_at', 'updated_at',
'notes', '+location_url', 'session', 'id', '+chamber',
'+agenda', '+cancelled', '+media_contact', '+contact',
'+details']
# +agenda:
# Agenda on old (very old) OpenStates data is actually a string
# and not any sort of structured data we can use in the items
# schema, and is only present for a handful of events.
for i in ignore:
if i in event:
event.pop(i)
for link in ['+link', 'link']:
if link in event:
e.add_source(url=event.pop(link))
for p in event.pop('participants', []):
type_ = {
"committee": "organization",
"legislator": "person",
None: None,
}[p.get('participant_type')]
if type_ is None:
# Garbage data.
continue
e.add_participant(name=p['participant'],
note=p['type'],
type=type_,)
for b in event.pop('related_bills', []):
item = e.add_agenda_item(
b.pop('description', b.pop('+description', None)))
item.add_bill(bill=b['bill_id'],
note=b.pop('type', b.pop('+type', None)))
seen_documents = set([])
for document in event.pop('documents', []):
if document['url'] in seen_documents:
print("XXX: Buggy data in: Duped Document URL: %s (%s)" % (
document['url'], document['name']
))
continue
seen_documents.add(document['url'])
e.add_document(url=document['url'],
note=document['name'])
assert event == {}, "Unknown fields: %s" % (
", ".join(event.keys())
)
yield e
示例8: scrape
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_document [as 别名]
def scrape(self, window=None) :
if window:
n_days_ago = datetime.datetime.utcnow() - datetime.timedelta(float(window))
else:
n_days_ago = None
events = self.events(n_days_ago)
for event, web_event in self._merge_events(events):
body_name = event["EventBodyName"]
if 'Board of Directors -' in body_name:
body_name, event_name = [part.strip()
for part
in body_name.split('-')]
else:
event_name = body_name
# Events can have an EventAgendaStatusName of "Final", "Final Revised",
# and "Final 2nd Revised."
# We classify these events as "passed."
status_name = event['EventAgendaStatusName']
if status_name.startswith('Final'):
status = 'passed'
elif status_name == 'Draft':
status = 'confirmed'
elif status_name == 'Canceled':
status = 'cancelled'
else:
status = 'tentative'
location = event["EventLocation"]
if not location:
# We expect some events to have no location. LA Metro would
# like these displayed in the Councilmatic interface. However,
# OCD requires a value for this field. Add a sane default.
location = 'Not available'
e = Event(event_name,
start_date=event["start"],
description='',
location_name=location,
status=status)
e.pupa_id = str(event['EventId'])
# Metro requires the EventGuid to build out MediaPlayer links.
# Add both the English event GUID, and the Spanish event GUID if
# it exists, to the extras dict.
e.extras = {'guid': event['EventGuid']}
legistar_api_url = self.BASE_URL + '/events/{0}'.format(event['EventId'])
e.add_source(legistar_api_url, note='api')
if event.get('SAPEventGuid'):
e.extras['sap_guid'] = event['SAPEventGuid']
if 'event_details' in event:
# if there is not a meeting detail page on legistar
# don't capture the agenda data from the API
for item in self.agenda(event):
agenda_item = e.add_agenda_item(item["EventItemTitle"])
if item["EventItemMatterFile"]:
identifier = item["EventItemMatterFile"]
agenda_item.add_bill(identifier)
if item["EventItemAgendaNumber"]:
# To the notes field, add the item number as given in the agenda minutes
note = "Agenda number, {}".format(item["EventItemAgendaNumber"])
agenda_item['notes'].append(note)
# The EventItemAgendaSequence provides
# the line number of the Legistar agenda grid.
agenda_item['extras']['item_agenda_sequence'] = item['EventItemAgendaSequence']
# Historically, the Legistar system has duplicated the EventItemAgendaSequence,
# resulting in data inaccuracies. The scrape should fail in such cases, until Metro
# cleans the data.
item_agenda_sequences = [item['extras']['item_agenda_sequence'] for item in e.agenda]
if len(item_agenda_sequences) != len(set(item_agenda_sequences)):
error_msg = 'An agenda has duplicate agenda items on the Legistar grid: \
{event_name} on {event_date} ({legistar_api_url}). \
Contact Metro, and ask them to remove the duplicate EventItemAgendaSequence.'
raise ValueError(error_msg.format(event_name=e.name,
event_date=e.start_date.strftime("%B %d, %Y"),
legistar_api_url=legistar_api_url))
e.add_participant(name=body_name,
type="organization")
if event.get('SAPEventId'):
e.add_source(self.BASE_URL + '/events/{0}'.format(event['SAPEventId']),
note='api (sap)')
if event['EventAgendaFile']:
e.add_document(note= 'Agenda',
url = event['EventAgendaFile'],
media_type="application/pdf")
#.........这里部分代码省略.........
示例9: _parse_house_floor_xml_legislative_activity
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_document [as 别名]
def _parse_house_floor_xml_legislative_activity(self, xml):
"""
Parses XML string of House floor updates and yields them in loop.
@param xml: XML of field update
@type xml: string
@return: complete Event object
@rtype: Event
"""
tree = self._xml_parser(xml)
congress = tree.xpath('.//legislative_congress')[0].get('congress')
house_committees = self._get_current_house_committee_names()
for fa in tree.xpath('.//floor_action'):
fa_text = fa.xpath('.//action_description')[0].xpath('string()')
eastern = pytz.timezone('US/Eastern')
dt = datetime.datetime.strptime(fa.xpath('action_time')[0].get('for-search'), '%Y%m%dT%H:%M:%S')
event = Event('House Floor Update on {0} at {1}.'.format(dt.strftime('%Y-%m-%d'), dt.strftime('%H:%M:%S')),
eastern.localize(dt).astimezone(pytz.utc),
'US/Eastern',
'',
description=fa_text,
classification='floor_update')
event.set_location("East Capitol Street Northeast & First St SE, Washington, DC 20004",
note='House Floor', url='http://www.house.gov',
coordinates={'latitude': '38.889931', 'longitude': '-77.009003'})
event.add_source(self._house_floor_src_url(date_str=tree.xpath('.//legislative_day')[0].get('date')),
note="Scraped from the Office of the Clerk, U.S. House of Representatives website.")
event.extras['act-id'] = fa.get('act-id')
event.extras['unique-id'] = fa.get('unique-id')
# bills
ai_b = event.add_agenda_item(description='Bills referenced by this update.')
for bill in fa.xpath(".//a[@rel='bill']"):
bill_name = bill.xpath('string()')
ai_b.add_bill(bill_name, id=make_pseudo_id(identifier=bill_code_to_id(bill_name), congress=congress),
note="Bill was referenced on the House floor.")
# publaws
ai_p = event.add_agenda_item(description='Public laws referenced by this update.')
for law in fa.xpath(".//a[@rel='publaw']"):
detail_url = '/'.join(law.get('href').split('/')[0:-2]) + '/content-detail.html'
ai_p.add_bill(law.xpath('string()'),
id=make_pseudo_id(**self._public_law_detail_scraper(url=detail_url)),
note='Law was referenced on the House floor.')
# votes
ai_v = event.add_agenda_item(description='Votes referenced by this update.')
for vote in fa.xpath(".//a[@rel='vote']"):
vote_name = vote.xpath('string()')
ai_v.add_vote(vote_name,
id=make_pseudo_id(identifier=vote_code_to_id(vote_name), congress=congress),
note='Vote was referenced on the House floor.')
# reports
for report in fa.xpath(".//a[@rel='report']"):
event.add_document('Document referenced by this update.', report.get('href'), media_type='text/html')
for name in house_committees:
if name.replace('House ', '') in fa_text:
event.add_committee(name, id=make_pseudo_id(name=name))
# TODO identify legislators and add them as participants?
yield event
示例10: scrape_agenda
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_document [as 别名]
def scrape_agenda(self, url):
page = self.lxmlize(url)
# Get the date/time info:
date_time = page.xpath("//table[@class='time_place']")
if date_time == []:
return
date_time = date_time[0]
lines = date_time.xpath("./tr")
metainf = {}
for line in lines:
tds = line.xpath("./td")
metainf[tds[0].text_content()] = tds[1].text_content()
date = metainf['DATE:']
time = metainf['TIME:']
where = metainf['PLACE:']
# check for duration in time
if ' - ' in time:
start, end = time.split(' - ')
am_pm_srch = re.search('(?i)(am|pm)', end)
if am_pm_srch:
time = ' '.join([start, am_pm_srch.group().upper()])
else:
time = start
fmts = [
"%A, %B %d, %Y",
"%A, %B %d, %Y %I:%M %p",
"%A, %B %d, %Y %I:%M",
]
event_desc = "Meeting Notice"
if 'Rise' in time:
datetime = date
event_desc = "Meeting Notice: Starting at {}".format(time)
else:
datetime = "%s %s" % (date, time)
if "CANCELLED" in datetime.upper():
return
transtable = {
"P.M": "PM",
"PM.": "PM",
"P.M.": "PM",
"A.M.": "AM",
"POSTPONED": "",
"RESCHEDULED": "",
"and Rise of the Senate": "",
}
for trans in transtable:
datetime = datetime.replace(trans, transtable[trans])
datetime = datetime.strip()
for fmt in fmts:
try:
datetime = dt.datetime.strptime(datetime, fmt)
break
except ValueError:
continue
event = Event(
name=event_desc,
start_date=self._tz.localize(datetime),
location_name=where,
)
event.add_source(url)
# aight. Let's get us some bills!
bills = page.xpath("//b/a")
for bill in bills:
bill_ft = bill.attrib['href']
event.add_document(
bill.text_content(), bill_ft,
media_type="application/pdf")
root = bill.xpath('../../*')
root = [x.text_content() for x in root]
bill_id = "".join(root)
if "SCHEDULED FOR" in bill_id:
continue
descr = bill.getparent().getparent().getparent().getnext().getnext().text_content()
for thing in replace:
bill_id = bill_id.replace(thing, replace[thing])
item = event.add_agenda_item(descr)
item.add_bill(bill.text_content())
committee = page.xpath("//span[@id='lblSession']")[0].text_content()
event.add_participant(committee, 'committee', note='host')
yield event
示例11: scrape
# 需要导入模块: from pupa.scrape import Event [as 别名]
# 或者: from pupa.scrape.Event import add_document [as 别名]
def scrape(self):
meetings_html = self.urlopen(self.ARLINGTON_MEETING_PAGE)
meetings_lxml = lxml.html.fromstring(meetings_html)
for meeting_type in ('archive', 'upcoming'):
for meeting in meetings_lxml.cssselect('#%s tbody tr' % meeting_type):
# attempt to map the cells across table types.
# if the sizes mismatch, ignore this one (it's an "empty" message)
try:
cell_mapping = self._organize_cells(meeting_type, meeting.cssselect('td'))
except:
continue
meeting_title = cell_mapping['title'].text
meeting_date = datetime.datetime.fromtimestamp(int(cell_mapping['date'].cssselect('span')[0].text))
e = Event(name=meeting_title, when=meeting_date, location='unknown')
e.add_source(self.ARLINGTON_MEETING_PAGE)
# detect agenda url, if present
meeting_agenda_url = None
if len(cell_mapping['agenda'].cssselect('a'))>0:
meeting_agenda_url = cell_mapping['agenda'].cssselect('a')[0].attrib.get('href')
# follow the agenda URL and attempt to extract associated documents
if meeting_agenda_url is not None:
e.add_link(meeting_agenda_url)
e.add_document(name='Agenda', url=meeting_agenda_url, mimetype='text/html')
meeting_agenda_html = self.urlopen(meeting_agenda_url)
meeting_agenda_lxml = lxml.html.fromstring(meeting_agenda_html)
for link in meeting_agenda_lxml.cssselect('a'):
link_url = link.attrib.get('href','')
if not len(link_url):
continue
if 'metaviewer.php' in link_url.lower():
# NOTE: application/pdf is a guess, may not always be correct
if link.text is not None:
e.add_document(name=link.text, url=link_url, mimetype='application/pdf')
# skip everything below here for the 'upcoming' table
if meeting_type=='upcoming':
continue
# detect video
# TODO: extract actual mp4 files
video_cell = cell_mapping['video'].cssselect('a')
if len(video_cell)>0:
video_url_match = re.search(r"http://(.*?)'", video_cell[0].attrib.get('onclick',''))
if video_url_match is not None:
e.add_media_link(name="Video", url=video_url_match.group(0), mimetype='text/html')
# detect audio
audio_cell = cell_mapping['audio'].cssselect('a')
if len(audio_cell)>0:
e.add_media_link(name="Audio", url=audio_cell[0].attrib.get('href', ''), mimetype='audio/mpeg')
# detect minutes
minutes_cell = cell_mapping['minutes'].cssselect('a')
if len(minutes_cell)>0:
e.add_media_link(name="Minutes", url=minutes_cell[0].attrib.get('href', ''), mimetype='text/html')
yield e