本文整理汇总了Python中sgmllib.SGMLParser.reset方法的典型用法代码示例。如果您正苦于以下问题:Python SGMLParser.reset方法的具体用法?Python SGMLParser.reset怎么用?Python SGMLParser.reset使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sgmllib.SGMLParser
的用法示例。
在下文中一共展示了SGMLParser.reset方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _extract_links
# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import reset [as 别名]
def _extract_links(self, response_text, response_url, response_encoding, base_url=None):
""" Do the real extraction work """
self.reset()
self.feed(response_text)
self.close()
ret = []
if base_url is None:
base_url = urljoin(response_url, self.base_url) if self.base_url else response_url
for link in self.links:
if isinstance(link.url, six.text_type):
link.url = link.url.encode(response_encoding)
try:
link.url = urljoin(base_url, link.url)
except ValueError:
continue
link.url = safe_url_string(link.url, response_encoding)
link.text = to_unicode(link.text, response_encoding, errors='replace').strip()
ret.append(link)
return ret
示例2: _feed
# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import reset [as 别名]
def _feed(self, inDocumentEncoding=None, isHTML=False):
# Convert the document to Unicode.
markup = self.markup
if isinstance(markup, unicode):
if not hasattr(self, 'originalEncoding'):
self.originalEncoding = None
else:
dammit = UnicodeDammit\
(markup, [self.fromEncoding, inDocumentEncoding],
smartQuotesTo=self.smartQuotesTo, isHTML=isHTML)
markup = dammit.unicode
self.originalEncoding = dammit.originalEncoding
self.declaredHTMLEncoding = dammit.declaredHTMLEncoding
if markup:
if self.markupMassage:
if not hasattr(self.markupMassage, "__iter__"):
self.markupMassage = self.MARKUP_MASSAGE
for fix, m in self.markupMassage:
markup = fix.sub(m, markup)
# TODO: We get rid of markupMassage so that the
# soup object can be deepcopied later on. Some
# Python installations can't copy regexes. If anyone
# was relying on the existence of markupMassage, this
# might cause problems.
del(self.markupMassage)
self.reset()
SGMLParser.feed(self, markup)
# Close out any unfinished strings and close all the open tags.
self.endData()
while self.currentTag.name != self.ROOT_TAG_NAME:
self.popTag()
示例3: reset
# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import reset [as 别名]
def reset(self):
Tag.__init__(self, self, self.ROOT_TAG_NAME)
self.hidden = 1
SGMLParser.reset(self)
self.currentData = []
self.currentTag = None
self.tagStack = []
self.quoteStack = []
self.pushTag(self)
示例4: _feed
# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import reset [as 别名]
def _feed(self, inDocumentEncoding=None, isHTML=False):
# Convert the document to Unicode.
markup = self.markup
if isinstance(markup, unicode):
if not hasattr(self, 'originalEncoding'):
self.originalEncoding = None
else:
dammit = UnicodeDammit\
(markup, [self.fromEncoding, inDocumentEncoding],
smartQuotesTo=self.smartQuotesTo, isHTML=isHTML)
markup = dammit.unicode
self.originalEncoding = dammit.originalEncoding
self.declaredHTMLEncoding = dammit.declaredHTMLEncoding
if markup:
if self.markupMassage:
if not isList(self.markupMassage):
self.markupMassage = self.MARKUP_MASSAGE
for fix, m in self.markupMassage:
markup = fix.sub(m, markup)
# TODO: We get rid of markupMassage so that the
# soup object can be deepcopied later on. Some
# Python installations can't copy regexes. If anyone
# was relying on the existence of markupMassage, this
# might cause problems.
del(self.markupMassage)
self.reset()
SGMLParser.feed(self, markup)
# Close out any unfinished strings and close all the open tags.
self.endData()
while self.currentTag.name != self.ROOT_TAG_NAME:
self.popTag()
示例5: reset
# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import reset [as 别名]
def reset(self):
SGMLParser.reset(self)
self.links = []
self.base_url = None
self.current_link = None
示例6: reset
# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import reset [as 别名]
def reset(self):
self.docs = []
self.data = []
SGMLParser.reset(self)
示例7: reset
# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import reset [as 别名]
def reset(self):
SGMLParser.reset(self)
self.urls = []