当前位置: 首页>>代码示例>>Python>>正文


Python SGMLParser.reset方法代码示例

本文整理汇总了Python中sgmllib.SGMLParser.reset方法的典型用法代码示例。如果您正苦于以下问题:Python SGMLParser.reset方法的具体用法?Python SGMLParser.reset怎么用?Python SGMLParser.reset使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sgmllib.SGMLParser的用法示例。


在下文中一共展示了SGMLParser.reset方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _extract_links

# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import reset [as 别名]
def _extract_links(self, response_text, response_url, response_encoding, base_url=None):
        """ Do the real extraction work """
        self.reset()
        self.feed(response_text)
        self.close()

        ret = []
        if base_url is None:
            base_url = urljoin(response_url, self.base_url) if self.base_url else response_url
        for link in self.links:
            if isinstance(link.url, six.text_type):
                link.url = link.url.encode(response_encoding)
            try:
                link.url = urljoin(base_url, link.url)
            except ValueError:
                continue
            link.url = safe_url_string(link.url, response_encoding)
            link.text = to_unicode(link.text, response_encoding, errors='replace').strip()
            ret.append(link)

        return ret 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:23,代码来源:sgml.py

示例2: _feed

# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import reset [as 别名]
def _feed(self, inDocumentEncoding=None, isHTML=False):
        # Convert the document to Unicode.
        markup = self.markup
        if isinstance(markup, unicode):
            if not hasattr(self, 'originalEncoding'):
                self.originalEncoding = None
        else:
            dammit = UnicodeDammit\
                     (markup, [self.fromEncoding, inDocumentEncoding],
                      smartQuotesTo=self.smartQuotesTo, isHTML=isHTML)
            markup = dammit.unicode
            self.originalEncoding = dammit.originalEncoding
            self.declaredHTMLEncoding = dammit.declaredHTMLEncoding
        if markup:
            if self.markupMassage:
                if not hasattr(self.markupMassage, "__iter__"):
                    self.markupMassage = self.MARKUP_MASSAGE
                for fix, m in self.markupMassage:
                    markup = fix.sub(m, markup)
                # TODO: We get rid of markupMassage so that the
                # soup object can be deepcopied later on. Some
                # Python installations can't copy regexes. If anyone
                # was relying on the existence of markupMassage, this
                # might cause problems.
                del(self.markupMassage)
        self.reset()

        SGMLParser.feed(self, markup)
        # Close out any unfinished strings and close all the open tags.
        self.endData()
        while self.currentTag.name != self.ROOT_TAG_NAME:
            self.popTag() 
开发者ID:Autodesk,项目名称:arnold-usd,代码行数:34,代码来源:__init__.py

示例3: reset

# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import reset [as 别名]
def reset(self):
        Tag.__init__(self, self, self.ROOT_TAG_NAME)
        self.hidden = 1
        SGMLParser.reset(self)
        self.currentData = []
        self.currentTag = None
        self.tagStack = []
        self.quoteStack = []
        self.pushTag(self) 
开发者ID:Autodesk,项目名称:arnold-usd,代码行数:11,代码来源:__init__.py

示例4: _feed

# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import reset [as 别名]
def _feed(self, inDocumentEncoding=None, isHTML=False):
        # Convert the document to Unicode.
        markup = self.markup
        if isinstance(markup, unicode):
            if not hasattr(self, 'originalEncoding'):
                self.originalEncoding = None
        else:
            dammit = UnicodeDammit\
                     (markup, [self.fromEncoding, inDocumentEncoding],
                      smartQuotesTo=self.smartQuotesTo, isHTML=isHTML)
            markup = dammit.unicode
            self.originalEncoding = dammit.originalEncoding
            self.declaredHTMLEncoding = dammit.declaredHTMLEncoding
        if markup:
            if self.markupMassage:
                if not isList(self.markupMassage):
                    self.markupMassage = self.MARKUP_MASSAGE
                for fix, m in self.markupMassage:
                    markup = fix.sub(m, markup)
                # TODO: We get rid of markupMassage so that the
                # soup object can be deepcopied later on. Some
                # Python installations can't copy regexes. If anyone
                # was relying on the existence of markupMassage, this
                # might cause problems.
                del(self.markupMassage)
        self.reset()

        SGMLParser.feed(self, markup)
        # Close out any unfinished strings and close all the open tags.
        self.endData()
        while self.currentTag.name != self.ROOT_TAG_NAME:
            self.popTag() 
开发者ID:skarlekar,项目名称:faces,代码行数:34,代码来源:_bsoup.py

示例5: reset

# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import reset [as 别名]
def reset(self):
        SGMLParser.reset(self)
        self.links = []
        self.base_url = None
        self.current_link = None 
开发者ID:wistbean,项目名称:learn_python3_spider,代码行数:7,代码来源:sgml.py

示例6: reset

# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import reset [as 别名]
def reset(self):
        self.docs = []
        self.data = []
        SGMLParser.reset(self) 
开发者ID:nusnlp,项目名称:m2scorer,代码行数:6,代码来源:nuclesgmlparser.py

示例7: reset

# 需要导入模块: from sgmllib import SGMLParser [as 别名]
# 或者: from sgmllib.SGMLParser import reset [as 别名]
def reset(self):
		SGMLParser.reset(self)
		self.urls = [] 
开发者ID:knightmare2600,项目名称:d4rkc0de,代码行数:5,代码来源:links.py


注:本文中的sgmllib.SGMLParser.reset方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。