本文整理匯總了Python中sgml.HTMLParser.HTMLParser類的典型用法代碼示例。如果您正苦於以下問題:Python HTMLParser類的具體用法?Python HTMLParser怎麽用?Python HTMLParser使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了HTMLParser類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: __init__
def __init__(self, viewer, reload=0):
global _inited
self.viewer = viewer
self.reload = reload
self.context = self.viewer.context
self.app = self.context.app
self.load_dingbat = self.app.load_dingbat
self.loaded = []
self.current_map = None
self.target = None
self.formatter_stack = []
fmt = formatter.AbstractFormatter(self.viewer)
HTMLParser.__init__(self, fmt)
self.push_formatter(fmt)
if not _inited:
_inited = 1
init_module(self.app.prefs)
self._ids = {}
# Hackery so reload status can be reset when all applets are loaded
import AppletLoader
self.reload1 = self.reload and AppletLoader.set_reload(self.context)
if self.reload1:
self.reload1.attach(self)
if self.app.prefs.GetBoolean('parsing-html', 'strict'):
self.sgml_parser.restrict(0)
# Information from <META ... CONTENT="..."> is collected here.
# Entries are KEY --> [(NAME, HTTP-EQUIV, CONTENT), ...], where
# KEY is (NAME or HTTP-EQUIV).
self._metadata = {}
示例2: unknown_entityref
def unknown_entityref(self, entname, terminator):
if self.suppress_output:
return
img = self.load_dingbat(entname)
if img:
if type(img) is TupleType:
s, tag = img
if tag:
if tag != "_ding":
tag = (self.formatter.writer.fonttag or '') + tag
self.viewer.configure_fonttag(tag)
self.formatter.push_style(tag)
self.viewer.text.tag_raise(tag)
self.handle_data(s)
self.formatter.pop_style()
else:
self.handle_data(s)
else:
bgcolor = self.viewer.text['background']
label = Label(self.viewer.text, image=img,
background=bgcolor, borderwidth=0)
self.add_subwindow(label)
# this needs to be done *after* the add_subwindow()
# call to get the right <Button-3> bindings.
if self.anchor:
IconicEntityLinker(self.viewer, self.anchor,
self.target, label)
else:
# Could not load dingbat, allow parent class to handle:
HTMLParser.unknown_entityref(self, entname, terminator)
示例3: __init__
def __init__(self, writer, settings, context):
if not self._inited:
for k, v in self.fontdingbats.items():
self.dingbats[(k, 'grey')] = v
self.dingbats[(k, 'color')] = v
import Greek
for k, v in Greek.entitydefs.items():
tup = (v, 'Symbol')
self.dingbats[(k, 'grey')] = tup
self.dingbats[(k, 'color')] = tup
PrintingHTMLParser._inited = 1
HTMLParser.__init__(self, AbstractFormatter(writer))
if settings.strict_parsing:
self.sgml_parser.restrict(0)
self._baseurl = context.get_baseurl()
self.context = context
self.settings = settings
if settings.imageflag:
self._image_loader = utils.image_loader
self._image_cache = {}
self._anchors = {None: None}
self._anchor_sequence = []
self._anchor_xforms = []
if not settings.footnoteflag:
self.add_anchor_transform(disallow_anchor_footnotes)
else:
self.add_anchor_transform(
disallow_self_reference(context.get_url()))
self.__fontsize = [3]
示例4: do_hr
def do_hr(self, attrs):
if attrs.has_key('src') and self.app.load_images:
align = extract_keyword('align', attrs, default='center',
conv=lambda s,gu=grailutil: gu.conv_enumeration(
gu.conv_normstring(s), ['left', 'center', 'right']))
self.implied_end_p()
self.formatter.push_alignment(align)
self.do_img({'border': '0', 'src': attrs['src']})
self.formatter.pop_alignment()
self.formatter.add_line_break()
return
HTMLParser.do_hr(self, attrs)
color = extract_keyword('color', attrs)
rule = self.viewer.rules[-1]
if attrs.has_key('noshade') and self.viewer.rules:
if color:
if not self.configcolor('background', color, widget=rule):
self.configcolor('background',
self.viewer.text['foreground'],
widget=rule)
else:
# this color is known to work already
rule.config(background=self.viewer.text['foreground'])
rule.config(relief=FLAT)
size = extract_keyword('size', attrs, 2,
conv=grailutil.conv_integer)
if size == 1:
# could not actually set it to 1 unless it was flat; do it now:
width = string.atoi(rule.cget('width'))
rule.config(borderwidth=0, height=1, width=width+2)
elif color:
self.configcolor('background', color, widget=rule)
示例5: make_format
def make_format(self, format, default='disc', listtype = None):
fmt = format or default
if fmt in ('disc', 'circle', 'square') and listtype == 'ul':
img = self.load_dingbat(fmt)
return img or HTMLParser.make_format(self, format, default)
else:
return HTMLParser.make_format(self, format, default,
listtype = listtype)
示例6: header_bgn
def header_bgn(self, tag, level, attrs):
HTMLParser.header_bgn(self, tag, level, attrs)
dingbat = extract_keyword('dingbat', attrs)
if dingbat:
self.unknown_entityref(dingbat, '')
self.formatter.add_flowing_data(' ')
elif attrs.has_key('src'):
self.do_img(attrs)
self.formatter.add_flowing_data(' ')
示例7: unknown_entityref
def unknown_entityref(self, entname, terminator):
dingbat = self.load_dingbat(entname)
if type(dingbat) is types.TupleType:
apply(self.formatter.writer.ps.push_font_string, dingbat)
self.formatter.assert_line_data()
elif dingbat:
dingbat.restrict(0.9 * self.formatter.writer.ps.get_fontsize(),
self.formatter.writer.ps.get_pagewidth())
self.formatter.writer.send_eps_data(dingbat, 'absmiddle')
self.formatter.assert_line_data()
else:
HTMLParser.unknown_entityref(self, entname, terminator)
示例8: close
def close(self):
HTMLParser.close(self)
if self.reload1:
self.reload1.detach(self)
self.reload1 = None
refresh = None
if self._metadata.has_key("refresh"):
name, http_equiv, refresh = self._metadata["refresh"][0]
elif self.context.get_headers().has_key("refresh"):
refresh = self.context.get_headers()["refresh"]
if refresh:
DynamicReloader(self.context, refresh)
示例9: start_pre
def start_pre(self, attrs):
HTMLParser.start_pre(self, attrs)
new_size = AS_IS
width = extract_keyword('width', attrs, 0, conv=conv_integer)
if width > 0:
ps = self.formatter.writer.ps
space_width = ps._font.text_width(' ')
pagewidth = ps.get_pagewidth()
required = space_width * width
if required > pagewidth:
factor = pagewidth / required
new_size = ps.get_fontsize() * factor
self.formatter.push_font((new_size, AS_IS, AS_IS, AS_IS))
示例10: make_format
def make_format(self, format, default='disc', listtype=None):
fmt = format or default
if type(fmt) is StringType:
fmt = string.lower(fmt)
if fmt in ('disc', 'circle', 'square'):
if listtype == 'ul':
img = self.load_dingbat(fmt)
return img or HTMLParser.make_format(self, format, default,
listtype = listtype)
else:
return '1.'
else:
return HTMLParser.make_format(self, format, default,
listtype = listtype)
示例11: do_li
def do_li(self, attrs):
if attrs.has_key('dingbat'):
if self.list_stack:
if self.list_stack[-1][0] == 'ul':
self.list_handle_dingbat(attrs)
else:
self.list_handle_dingbat(attrs)
elif attrs.has_key('src'):
if self.list_stack:
if self.list_stack[-1][0] == 'ul':
self.list_handle_src(attrs)
else:
self.list_handle_src(attrs)
HTMLParser.do_li(self, attrs)
示例12: start_body
def start_body(self, attrs):
HTMLParser.start_body(self, attrs)
if not self.app.prefs.GetBoolean('parsing-html', 'honor-colors'):
return
from grailutil import conv_normstring
bgcolor = extract_keyword('bgcolor', attrs, conv=conv_normstring)
if bgcolor:
clr = self.configcolor('background', bgcolor)
if clr:
# Normally not important, but ISINDEX would cause
# these to be non-empty, as would all sorts of illegal stuff:
for hr in self.viewer.rules + self.viewer.subwindows:
hr.config(highlightbackground = clr)
self.configcolor('foreground',
extract_keyword('text', attrs, conv=conv_normstring))
self.configcolor('foreground',
extract_keyword('link', attrs, conv=conv_normstring),
'a')
self.configcolor('foreground',
extract_keyword('vlink', attrs, conv=conv_normstring),
'ahist')
self.configcolor('foreground',
extract_keyword('alink', attrs, conv=conv_normstring),
'atemp')
示例13: do_dt
def do_dt(self, attrs):
HTMLParser.do_dt(self, attrs)
self.formatter.writer.suppress_indentation()
示例14: do_li
def do_li(self, attrs):
self.list_check_dingbat(attrs)
HTMLParser.do_li(self, attrs)
self.formatter.writer.suppress_indentation()
示例15: end_ol
def end_ol(self):
HTMLParser.end_ol(self)
self.formatter.writer.suppress_indentation(0)