本文整理汇总了Python中html5lib.constants.namespaces方法的典型用法代码示例。如果您正苦于以下问题:Python constants.namespaces方法的具体用法?Python constants.namespaces怎么用?Python constants.namespaces使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类html5lib.constants
的用法示例。
在下文中一共展示了constants.namespaces方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: getNameTuple
# 需要导入模块: from html5lib import constants [as 别名]
# 或者: from html5lib.constants import namespaces [as 别名]
def getNameTuple(self):
if self.namespace == None:
return namespaces["html"], self.name
else:
return self.namespace, self.name
示例2: reparentChildren
# 需要导入模块: from html5lib import constants [as 别名]
# 或者: from html5lib.constants import namespaces [as 别名]
def reparentChildren(self, newParent):
while self.element.contents:
child = self.element.contents[0]
child.extract()
if isinstance(child, Tag):
newParent.appendChild(
Element(child, self.soup, namespaces["html"]))
else:
newParent.appendChild(
TextNode(child, self.soup))
示例3: getNodeDetails
# 需要导入模块: from html5lib import constants [as 别名]
# 或者: from html5lib.constants import namespaces [as 别名]
def getNodeDetails(self, node):
if isinstance(node, BeautifulSoup): # Document or DocumentFragment
return (_base.DOCUMENT,)
elif isinstance(node, Declaration): # DocumentType
string = unicode(node.string)
#Slice needed to remove markup added during unicode conversion,
#but only in some versions of BeautifulSoup/Python
if string.startswith('<!') and string.endswith('>'):
string = string[2:-1]
m = self.doctype_regexp.match(string)
#This regexp approach seems wrong and fragile
#but beautiful soup stores the doctype as a single thing and we want the seperate bits
#It should work as long as the tree is created by html5lib itself but may be wrong if it's
#been modified at all
#We could just feed to it a html5lib tokenizer, I guess...
assert m is not None, "DOCTYPE did not match expected format"
name = m.group('name')
publicId = m.group('publicId')
if publicId is not None:
systemId = m.group('systemId1')
else:
systemId = m.group('systemId2')
return _base.DOCTYPE, name, publicId or "", systemId or ""
elif isinstance(node, Comment):
string = unicode(node.string)
if string.startswith('<!--') and string.endswith('-->'):
string = string[4:-3]
return _base.COMMENT, string
elif isinstance(node, unicode): # TextNode
return _base.TEXT, node
elif isinstance(node, Tag): # Element
return (_base.ELEMENT, namespaces["html"], node.name,
dict(node.attrs).items(), node.contents)
else:
return _base.UNKNOWN, node.__class__.__name__
示例4: elementInScope
# 需要导入模块: from html5lib import constants [as 别名]
# 或者: from html5lib.constants import namespaces [as 别名]
def elementInScope(self, target, variant=None):
#If we pass a node in we match that. if we pass a string
#match any node with that name
exactNode = hasattr(target, "nameTuple")
listElementsMap = {
None:(scopingElements, False),
"button":(scopingElements | set([(namespaces["html"], "button")]), False),
"list":(scopingElements | set([(namespaces["html"], "ol"),
(namespaces["html"], "ul")]), False),
"table":(set([(namespaces["html"], "html"),
(namespaces["html"], "table")]), False),
"select":(set([(namespaces["html"], "optgroup"),
(namespaces["html"], "option")]), True)
}
listElements, invert = listElementsMap[variant]
for node in reversed(self.openElements):
if (node.name == target and not exactNode or
node == target and exactNode):
return True
elif (invert ^ (node.nameTuple in listElements)):
return False
assert False # We should never reach this point
示例5: reparentChildren
# 需要导入模块: from html5lib import constants [as 别名]
# 或者: from html5lib.constants import namespaces [as 别名]
def reparentChildren(self, newParent):
while self.element.contents:
child = self.element.contents[0]
child.extract()
if isinstance(child, Tag):
newParent.appendChild(Element(child, self.soup, namespaces["html"]))
else:
newParent.appendChild(TextNode(child, self.soup))
示例6: wbr_serialize
# 需要导入模块: from html5lib import constants [as 别名]
# 或者: from html5lib.constants import namespaces [as 别名]
def wbr_serialize(self):
"""Returns concatenated HTML code with WBR tag. This is still experimental.
Returns:
The organized HTML code. (str)
"""
doc = ET.Element('span')
doc.attrib['style'] = 'word-break: keep-all'
for chunk in self:
if (chunk.has_cjk() and doc.text):
ele = ET.Element('wbr')
doc.append(ele)
doc.getchildren()[-1].tail = chunk.word
else:
# add word without span tag for non-CJK text (e.g. English)
# by appending it after the last element
if doc.getchildren():
if doc.getchildren()[-1].tail is None:
doc.getchildren()[-1].tail = chunk.word
else:
doc.getchildren()[-1].tail += chunk.word
else:
if doc.text is None:
doc.text = chunk.word
else:
doc.text += chunk.word
content = ET.tostring(doc, encoding='utf-8').decode('utf-8')
dom = html5lib.parseFragment(content)
treewalker = getTreeWalker('etree')
stream = treewalker(dom)
serializer = html5lib.serializer.HTMLSerializer(
quote_attr_values='always')
allowed_elements = set(sanitizer.allowed_elements)
allowed_elements.add((namespaces['html'], 'wbr'))
allowed_css_properties = set(sanitizer.allowed_css_properties)
allowed_css_properties.add('word-break')
result = serializer.render(sanitizer.Filter(
stream, allowed_elements=allowed_elements,
allowed_css_properties=allowed_css_properties,
))
return result
示例7: test_namespace_html_elements_0_dom
# 需要导入模块: from html5lib import constants [as 别名]
# 或者: from html5lib.constants import namespaces [as 别名]
def test_namespace_html_elements_0_dom(self):
parser = html5parser.HTMLParser(tree=self.dom_tree, namespaceHTMLElements=True)
doc = parser.parse("<html></html>")
self.assertTrue(doc.childNodes[0].namespaceURI == namespaces["html"])
示例8: test_namespace_html_elements_0_etree
# 需要导入模块: from html5lib import constants [as 别名]
# 或者: from html5lib.constants import namespaces [as 别名]
def test_namespace_html_elements_0_etree(self):
parser = html5parser.HTMLParser(namespaceHTMLElements=True)
doc = parser.parse("<html></html>")
self.assertTrue(list(doc)[0].tag == "{%s}html" % (namespaces["html"],))
示例9: test_namespace_html_elements_0_dom
# 需要导入模块: from html5lib import constants [as 别名]
# 或者: from html5lib.constants import namespaces [as 别名]
def test_namespace_html_elements_0_dom():
doc = parse("<html></html>",
treebuilder="dom",
namespaceHTMLElements=True)
assert doc.childNodes[0].namespaceURI == namespaces["html"]
示例10: test_namespace_html_elements_0_etree
# 需要导入模块: from html5lib import constants [as 别名]
# 或者: from html5lib.constants import namespaces [as 别名]
def test_namespace_html_elements_0_etree():
doc = parse("<html></html>",
treebuilder="etree",
namespaceHTMLElements=True)
assert doc.tag == "{%s}html" % (namespaces["html"],)