本文整理汇总了Python中sgmllib.SGMLParser.feed方法的典型用法代码示例。如果您正苦于以下问题:Python SGMLParser.feed方法的具体用法?Python SGMLParser.feed怎么用?Python SGMLParser.feed使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sgmllib.SGMLParser
的用法示例。
在下文中一共展示了SGMLParser.feed方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _feed
# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import feed [as 别名]
def _feed(self, inDocumentEncoding=None, isHTML=False):
# Convert the document to Unicode.
markup = self.markup
if isinstance(markup, unicode):
if not hasattr(self, 'originalEncoding'):
self.originalEncoding = None
else:
dammit = UnicodeDammit\
(markup, [self.fromEncoding, inDocumentEncoding],
smartQuotesTo=self.smartQuotesTo, isHTML=isHTML)
markup = dammit.unicode
self.originalEncoding = dammit.originalEncoding
self.declaredHTMLEncoding = dammit.declaredHTMLEncoding
if markup:
if self.markupMassage:
if not hasattr(self.markupMassage, "__iter__"):
self.markupMassage = self.MARKUP_MASSAGE
for fix, m in self.markupMassage:
markup = fix.sub(m, markup)
# TODO: We get rid of markupMassage so that the
# soup object can be deepcopied later on. Some
# Python installations can't copy regexes. If anyone
# was relying on the existence of markupMassage, this
# might cause problems.
del(self.markupMassage)
self.reset()
SGMLParser.feed(self, markup)
# Close out any unfinished strings and close all the open tags.
self.endData()
while self.currentTag.name != self.ROOT_TAG_NAME:
self.popTag()
示例2: _feed
# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import feed [as 别名]
def _feed(self, inDocumentEncoding=None, isHTML=False):
# Convert the document to Unicode.
markup = self.markup
if isinstance(markup, unicode):
if not hasattr(self, 'originalEncoding'):
self.originalEncoding = None
else:
dammit = UnicodeDammit\
(markup, [self.fromEncoding, inDocumentEncoding],
smartQuotesTo=self.smartQuotesTo, isHTML=isHTML)
markup = dammit.unicode
self.originalEncoding = dammit.originalEncoding
self.declaredHTMLEncoding = dammit.declaredHTMLEncoding
if markup:
if self.markupMassage:
if not isList(self.markupMassage):
self.markupMassage = self.MARKUP_MASSAGE
for fix, m in self.markupMassage:
markup = fix.sub(m, markup)
# TODO: We get rid of markupMassage so that the
# soup object can be deepcopied later on. Some
# Python installations can't copy regexes. If anyone
# was relying on the existence of markupMassage, this
# might cause problems.
del(self.markupMassage)
self.reset()
SGMLParser.feed(self, markup)
# Close out any unfinished strings and close all the open tags.
self.endData()
while self.currentTag.name != self.ROOT_TAG_NAME:
self.popTag()
示例3: _feed
# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import feed [as 别名]
def _feed(self, inDocumentEncoding=None):
# Convert the document to Unicode.
markup = self.markup
if isinstance(markup, unicode):
if not hasattr(self, 'originalEncoding'):
self.originalEncoding = None
else:
dammit = UnicodeDammit\
(markup, [self.fromEncoding, inDocumentEncoding],
smartQuotesTo=self.smartQuotesTo)
markup = dammit.unicode
self.originalEncoding = dammit.originalEncoding
if markup:
if self.markupMassage:
if not isList(self.markupMassage):
self.markupMassage = self.MARKUP_MASSAGE
for fix, m in self.markupMassage:
markup = fix.sub(m, markup)
self.reset()
SGMLParser.feed(self, markup or "")
SGMLParser.close(self)
# Close out any unfinished strings and close all the open tags.
self.endData()
while self.currentTag.name != self.ROOT_TAG_NAME:
self.popTag()
示例4: _feed
# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import feed [as 别名]
def _feed(self, inDocumentEncoding=None):
# Convert the document to Unicode.
markup = self.markup
if isinstance(markup, unicode):
if not hasattr(self, 'originalEncoding'):
self.originalEncoding = None
else:
dammit = UnicodeDammit\
(markup, [self.fromEncoding, inDocumentEncoding],
smartQuotesTo=self.smartQuotesTo)
markup = dammit.unicode
self.originalEncoding = dammit.originalEncoding
if markup:
if self.markupMassage:
if not isList(self.markupMassage):
self.markupMassage = self.MARKUP_MASSAGE
for fix, m in self.markupMassage:
markup = fix.sub(m, markup)
# TODO: We get rid of markupMassage so that the
# soup object can be deepcopied later on. Some
# Python installations can't copy regexes. If anyone
# was relying on the existence of markupMassage, this
# might cause problems.
del(self.markupMassage)
self.reset()
SGMLParser.feed(self, markup)
# Close out any unfinished strings and close all the open tags.
self.endData()
while self.currentTag.name != self.ROOT_TAG_NAME:
self.popTag()