本文整理汇总了Python中html5lib.filters.base.Filter类的典型用法代码示例。如果您正苦于以下问题:Python Filter类的具体用法?Python Filter怎么用?Python Filter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Filter类的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
def __init__(self, source):
html5lib_Filter.__init__(self, source)
self.level = 2
self.max_level = 3
self.in_header = False
self.open_level = 0
self.in_hierarchy = False
示例2: __iter__
def __iter__(self):
input = html5lib_Filter.__iter__(self)
# loop through all 'tokens'
for token in input:
# if this token is a start tag...
if token['type'] == 'StartTag':
# increment counter that tracks nesting
self.open_level += 1
for key in token['data']:
if 'id' == key[1] and token['data'][key] == self.section_id:
# note we're in the matching section
self.in_section = True
# keep track of how nested we were when section started
self.parent_level = self.open_level
elif token['type'] == 'EndTag':
# If the parent of the section has ended, end the section.
if (self.parent_level is not None and
self.open_level is self.parent_level):
self.in_section = False
self.skip = True
self.parent_level = None
# reduce nesting counter
self.open_level -= 1
# emit tokens if we're not in the section being removed
if not self.in_section and not self.skip:
yield token
else:
self.skip = False
示例3: __iter__
def __iter__(self, _title_attr=(None, 'title')):
html_ns = namespaces['html']
for token in BaseFilter.__iter__(self):
yield token
if (
token['type'] == 'EmptyTag' and
token['name'] == 'img' and
token['namespace'] == html_ns and
'data' in token
):
attrs = token['data']
if _title_attr in attrs:
yield {
'type': 'StartTag',
'namespace': html_ns,
'name': 'aside',
'data': OrderedDict(), # TODO Some way to pass through special styling.
}
yield {
'type': 'Characters',
'data': attrs[_title_attr],
}
yield {
'type': 'EndTag',
'namespace': html_ns,
'name': 'aside',
}
示例4: __iter__
def __iter__(self):
for token in HTML5LibFilterBase.__iter__(self):
type = token['type']
if type in ('StartTag', 'EmptyTag', 'EndTag'):
name = token['name']
if name in ('html', 'head', 'body'):
continue
yield token
示例5: __iter__
def __iter__(self):
for token in Filter.__iter__(self):
ret = self.sanitize_token(token)
if not ret:
continue
if isinstance(ret, list):
for subtoken in ret:
yield subtoken
else:
yield ret
示例6: __iter__
def __iter__(self):
remove_end_tag = False
for token in Filter.__iter__(self):
# only check anchor tags
if 'name' in token and token['name'] == 'a' and token['type'] in ['StartTag', 'EndTag']:
if token['type'] == 'StartTag':
remove_end_tag = True
for attr, value in token['data'].items():
if attr == (None, 'href') and value != '' and is_valid_url(value):
remove_end_tag = False
if remove_end_tag:
continue
elif token['type'] == 'EndTag' and remove_end_tag:
remove_end_tag = False
continue
yield token
示例7: __iter__
def __iter__(self):
input = html5lib_Filter.__iter__(self)
for token in input:
yield token
if (token['type'] == 'StartTag' and
token['name'] in SECTION_TAGS):
attrs = dict(token['data'])
for (namespace, name), value in attrs.items():
if name == 'id' and value:
ts = ({'type': 'StartTag',
'name': 'a',
'data': {
(None, u'title'): ugettext('Edit section'),
(None, u'class'): 'edit-section',
(None, u'data-section-id'): value,
(None, u'data-section-src-url'): u'%s?%s' % (
reverse('wiki.document',
args=[self.slug],
locale=self.locale),
urlencode({'section': value.encode('utf-8'),
'raw': 'true'})
),
(None, u'href'): u'%s?%s' % (
reverse('wiki.edit',
args=[self.slug],
locale=self.locale),
urlencode({'section': value.encode('utf-8'),
'edit_links': 'true'})
)
}},
{'type': 'Characters',
'data': ugettext(u'Edit')},
{'type': 'EndTag', 'name': 'a'})
for t in ts:
yield t
示例8: __iter__
def __iter__(self):
for token in html5lib_Filter.__iter__(self):
if 'SpaceCharacters' == token['type']:
continue
yield token
示例9: __init__
def __init__(self, source, hosts):
html5lib_Filter.__init__(self, source)
self.hosts = hosts
示例10: __iter__
def __iter__(self):
for token in Filter.__iter__(self):
if (token['type'] in ['StartTag', 'EndTag']):
if token['name'] in ['h1', 'h2', 'h3']:
token['name'] = 'h4'
yield token