本文整理汇总了Python中html.parser.HTMLParser.feed方法的典型用法代码示例。如果您正苦于以下问题:Python HTMLParser.feed方法的具体用法?Python HTMLParser.feed怎么用?Python HTMLParser.feed使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类html.parser.HTMLParser
的用法示例。
在下文中一共展示了HTMLParser.feed方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: getFormattedHTML
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def getFormattedHTML(self, indent=' '):
'''
getFormattedHTML - Get formatted and xhtml of this document, replacing the original whitespace
with a pretty-printed version
@param indent - space/tab/newline of each level of indent, or integer for how many spaces per level
@return - <str> Formatted html
@see getHTML - Get HTML with original whitespace
@see getMiniHTML - Get HTML with only functional whitespace remaining
'''
from .Formatter import AdvancedHTMLFormatter
html = self.getHTML()
formatter = AdvancedHTMLFormatter(indent, None) # Do not double-encode
formatter.feed(html)
return formatter.getHTML()
示例2: getHTML
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def getHTML(self):
'''
getHTML - Get the full HTML as contained within this tree, converted to valid XHTML
@returns - String
'''
root = self.getRoot()
if root is None:
raise ValueError('Cannot format, use feed to load contents.')
if self.doctype:
doctypeStr = '<!%s>\n' %(self.doctype)
else:
doctypeStr = ''
# 6.6.0: If we have a real root tag, print the outerHTML. If we have a fake root tag (for multiple root condition),
# then print the innerHTML (skipping the outer root tag). Otherwise, we will miss
# untagged text (between the multiple root nodes).
rootNode = self.getRoot()
if rootNode.tagName == INVISIBLE_ROOT_TAG:
return doctypeStr + rootNode.innerHTML
else:
return doctypeStr + rootNode.outerHTML
# return doctypeStr + ''.join([elem.outerHTML for elem in self.getRootNodes()])
示例3: remove
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def remove(self, item):
"""
This is as list.remove but works with id.
data = '<a><b></b><b></b></a>'
html = Html()
dom = html.feed(data)
for root, ind in dom.sail_with_root():
if ind.name == 'b':
root.remove(ind)
print dom
It should print.
<a ></a>
"""
index = self.index(item)
del self[index]
示例4: take
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def take(self, *args):
"""
It returns the first object whose one of its
attributes matches (key0, value0), (key1, value1), ... .
Example:
data = '<a><b id="foo" size="1"></b></a>'
html = Html()
dom = html.feed(data)
print dom.take(('id', 'foo'))
print dom.take(('id', 'foo'), ('size', '2'))
"""
seq = self.match(*args)
try:
item = next(seq)
except StopIteration:
return None
else:
return item
示例5: walk_with_root
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def walk_with_root(self):
"""
Like walk but carries root.
Example:
html = Html()
data = '<body><em>alpha</em></body>'
dom = html.feed(data)
for (root, name, attr), (ind, name, attr) in dom.walk_with_root():
print root, name, ind, name
Output:
<em >alpha</em> 1 alpha 1
<body ><em >alpha</em></body> em <em >alpha</em> em
<body ><em >alpha</em></body> body <body ><em >alpha</em></body> body
"""
for root, ind in self.sail_with_root():
yield ((root, root.name, root.attr),
(ind, ind.name, ind.attr))
示例6: __init__
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def __init__(self, data):
"""
The data holds the characters.
Example:
html = Html()
data = '<body><em>alpha</em></body>'
dom = html.feed(data)
x = dom.fst('em')
x.append(Data('\nbeta'))
It outputs.
<body ><em >alpha
beta</em></body>
"""
Root.__init__(self, DATA)
self.data = data
示例7: getMiniHTML
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def getMiniHTML(self):
'''
getMiniHTML - Gets the HTML representation of this document without any pretty formatting
and disregarding original whitespace beyond the functional.
@return <str> - HTML with only functional whitespace present
'''
from .Formatter import AdvancedHTMLMiniFormatter
html = self.getHTML()
formatter = AdvancedHTMLMiniFormatter(None) # Do not double-encode
formatter.feed(html)
return formatter.getHTML()
示例8: feed
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def feed(self, contents):
'''
feed - Feed contents. Use parseStr or parseFile instead.
@param contents - Contents
'''
contents = stripIEConditionals(contents)
try:
HTMLParser.feed(self, contents)
except MultipleRootNodeException:
self.reset()
HTMLParser.feed(self, "%s%s" %(addStartTag(contents, INVISIBLE_ROOT_TAG_START), INVISIBLE_ROOT_TAG_END))
示例9: parseFile
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def parseFile(self, filename):
'''
parseFile - Parses a file and creates the DOM tree and indexes
@param filename <str/file> - A string to a filename or a file object. If file object, it will not be closed, you must close.
'''
self.reset()
if isinstance(filename, file):
contents = filename.read()
else:
with codecs.open(filename, 'r', encoding=self.encoding) as f:
contents = f.read()
self.feed(contents)
示例10: parseStr
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def parseStr(self, html):
'''
parseStr - Parses a string and creates the DOM tree and indexes.
@param html <str> - valid HTML
'''
self.reset()
if isinstance(html, bytes):
self.feed(html.decode(self.encoding))
else:
self.feed(html)
示例11: feed
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def feed(self, contents):
'''
feed - Load contents
@param contents - HTML contents
'''
contents = stripIEConditionals(contents)
try:
HTMLParser.feed(self, contents)
except MultipleRootNodeException:
self.reset()
HTMLParser.feed(self, "%s%s" %(addStartTag(contents, INVISIBLE_ROOT_TAG_START), INVISIBLE_ROOT_TAG_END))
示例12: parseFile
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def parseFile(self, filename):
'''
parseFile - Parses a file and creates the DOM tree and indexes
@param filename <str/file> - A string to a filename or a file object. If file object, it will not be closed, you must close.
'''
self.reset()
if isinstance(filename, file):
contents = filename.read()
else:
with codecs.open(filename, 'r', encoding=self.encoding) as f:
contents = f.read()
self.feed(contents)
示例13: parseStr
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def parseStr(self, html):
'''
parseStr - Parses a string and creates the DOM tree and indexes.
@param html <str> - valid HTML
'''
self.reset()
if isinstance(html, bytes):
self.feed(html.decode(self.encoding))
else:
self.feed(html)
示例14: sail
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def sail(self):
"""
This is used to navigate through the xml/html document.
Every xml/html object is represented by a python class
instance that inherits from Root.
The method sail is used to return an iterator
for these objects.
Example:
data = '<a> <b> </b> </a>'
html = Html()
dom = html.feed(data)
for ind in dom.sail():
print type(ind),',', ind.name
It would output.
<class 'ehp.Root'> , a
<class 'ehp.Root'> , b
"""
for indi in self[:]:
for indj in indi.sail():
yield(indj)
yield(indi)
示例15: index
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def index(self, item):
"""
This is similar to index but uses id
to check for equality.
Example:
data = '<a><b></b><b></b></a>'
html = Html()
dom = html.feed(data)
for root, ind in dom.sail_with_root():
print root.name, ind.name, root.index(ind)
It would print.
a b 0
a b 1
a 0
The line where it appears ' a 0' corresponds to the
outmost object. The outmost object is an instance of Root
that contains all the other objects.
"""
count = 0
for ind in self:
if ind is item: return count
count = count + 1
raise ValueError