当前位置: 首页>>代码示例>>Python>>正文


Python HTMLParser.feed方法代码示例

本文整理汇总了Python中html.parser.HTMLParser.feed方法的典型用法代码示例。如果您正苦于以下问题:Python HTMLParser.feed方法的具体用法?Python HTMLParser.feed怎么用?Python HTMLParser.feed使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在html.parser.HTMLParser的用法示例。


在下文中一共展示了HTMLParser.feed方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: getFormattedHTML

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def getFormattedHTML(self, indent='  '):
        '''
            getFormattedHTML - Get formatted and xhtml of this document, replacing the original whitespace
                with a pretty-printed version

            @param indent - space/tab/newline of each level of indent, or integer for how many spaces per level

            @return - <str> Formatted html

            @see getHTML - Get HTML with original whitespace

            @see getMiniHTML - Get HTML with only functional whitespace remaining
        '''
        from .Formatter import AdvancedHTMLFormatter
        html = self.getHTML()
        formatter = AdvancedHTMLFormatter(indent, None) # Do not double-encode
        formatter.feed(html)
        return formatter.getHTML() 
开发者ID:kata198,项目名称:AdvancedHTMLParser,代码行数:20,代码来源:Parser.py

示例2: getHTML

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def getHTML(self):
        '''
            getHTML - Get the full HTML as contained within this tree, converted to  valid XHTML
                @returns - String
        '''
        root = self.getRoot()
        if root is None:
            raise ValueError('Cannot format, use feed to load contents.')

        if self.doctype:
            doctypeStr = '<!%s>\n' %(self.doctype)
        else:
            doctypeStr = ''

        # 6.6.0: If we have a real root tag, print the outerHTML. If we have a fake root tag (for multiple root condition),
        #   then print the innerHTML (skipping the outer root tag). Otherwise, we will miss
        #   untagged text (between the multiple root nodes).
        rootNode = self.getRoot()
        if rootNode.tagName == INVISIBLE_ROOT_TAG:
            return doctypeStr + rootNode.innerHTML
        else:
            return doctypeStr + rootNode.outerHTML
#        return doctypeStr + ''.join([elem.outerHTML for elem in self.getRootNodes()]) 
开发者ID:kata198,项目名称:AdvancedHTMLParser,代码行数:25,代码来源:Formatter.py

示例3: remove

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def remove(self, item):
        """
        This is as list.remove but works with id.

        data = '<a><b></b><b></b></a>'
        html = Html()
        dom = html.feed(data)
        
        for root, ind in dom.sail_with_root():
            if ind.name == 'b':
                root.remove(ind)
        
        print dom
        
        It should print.

        <a ></a>
        """

        index = self.index(item)
        del self[index] 
开发者ID:iogf,项目名称:ehp,代码行数:23,代码来源:ehp.py

示例4: take

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def take(self, *args):
        """
        It returns the first object whose one of its
        attributes matches (key0, value0), (key1, value1), ... .

        Example:

        data = '<a><b id="foo" size="1"></b></a>'
        html = Html()
        dom = html.feed(data)
        
        print dom.take(('id', 'foo'))
        print dom.take(('id', 'foo'), ('size', '2'))
        """

        seq = self.match(*args)
        
        try:
            item = next(seq)
        except StopIteration:
            return None
        else:
            return item 
开发者ID:iogf,项目名称:ehp,代码行数:25,代码来源:ehp.py

示例5: walk_with_root

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def walk_with_root(self):
        """
        Like walk but carries root.

        Example:

        html = Html()
        data = '<body><em>alpha</em></body>'
        dom = html.feed(data)
        
        for (root, name, attr), (ind, name, attr) in dom.walk_with_root():
            print root, name, ind, name

        Output:

        <em >alpha</em> 1 alpha 1
        <body ><em >alpha</em></body> em <em >alpha</em> em
        <body ><em >alpha</em></body> body <body ><em >alpha</em></body> body    
        """

        for root, ind in self.sail_with_root():
            yield ((root, root.name, root.attr), 
                   (ind, ind.name, ind.attr)) 
开发者ID:iogf,项目名称:ehp,代码行数:25,代码来源:ehp.py

示例6: __init__

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def __init__(self, data):
        """
        The data holds the characters.

        Example:

        html = Html()
        data = '<body><em>alpha</em></body>'
        dom = html.feed(data)
        x = dom.fst('em')
        x.append(Data('\nbeta'))

        It outputs.

        <body ><em >alpha
        beta</em></body>
        """

        Root.__init__(self, DATA)
        self.data = data 
开发者ID:iogf,项目名称:ehp,代码行数:22,代码来源:ehp.py

示例7: getMiniHTML

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def getMiniHTML(self):
        '''
            getMiniHTML - Gets the HTML representation of this document without any pretty formatting
                and disregarding original whitespace beyond the functional.

                @return <str> - HTML with only functional whitespace present
        '''
        from .Formatter import AdvancedHTMLMiniFormatter
        html = self.getHTML()
        formatter = AdvancedHTMLMiniFormatter(None) # Do not double-encode
        formatter.feed(html)
        return formatter.getHTML() 
开发者ID:kata198,项目名称:AdvancedHTMLParser,代码行数:14,代码来源:Parser.py

示例8: feed

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def feed(self, contents):
        '''
            feed - Feed contents. Use  parseStr or parseFile instead.

            @param contents - Contents
        '''
        contents = stripIEConditionals(contents)
        try:
            HTMLParser.feed(self, contents)
        except MultipleRootNodeException:
            self.reset()
            HTMLParser.feed(self, "%s%s" %(addStartTag(contents, INVISIBLE_ROOT_TAG_START), INVISIBLE_ROOT_TAG_END)) 
开发者ID:kata198,项目名称:AdvancedHTMLParser,代码行数:14,代码来源:Parser.py

示例9: parseFile

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def parseFile(self, filename):
        '''
            parseFile - Parses a file and creates the DOM tree and indexes

                @param filename <str/file> - A string to a filename or a file object. If file object, it will not be closed, you must close.
        '''
        self.reset()

        if isinstance(filename, file):
            contents = filename.read()
        else:
            with codecs.open(filename, 'r', encoding=self.encoding) as f:
                contents = f.read()

        self.feed(contents) 
开发者ID:kata198,项目名称:AdvancedHTMLParser,代码行数:17,代码来源:Parser.py

示例10: parseStr

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def parseStr(self, html):
        '''
            parseStr - Parses a string and creates the DOM tree and indexes.

                @param html <str> - valid HTML
        '''
        self.reset()

        if isinstance(html, bytes):
            self.feed(html.decode(self.encoding))
        else:
            self.feed(html) 
开发者ID:kata198,项目名称:AdvancedHTMLParser,代码行数:14,代码来源:Parser.py

示例11: feed

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def feed(self, contents):
        '''
            feed - Load contents

            @param contents - HTML contents
        '''
        contents = stripIEConditionals(contents)
        try:
            HTMLParser.feed(self, contents)
        except MultipleRootNodeException:
            self.reset()

            HTMLParser.feed(self, "%s%s" %(addStartTag(contents, INVISIBLE_ROOT_TAG_START), INVISIBLE_ROOT_TAG_END)) 
开发者ID:kata198,项目名称:AdvancedHTMLParser,代码行数:15,代码来源:Formatter.py

示例12: parseFile

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def parseFile(self, filename):
        '''
            parseFile - Parses a file and creates the DOM tree and indexes

                @param filename <str/file> - A string to a filename or a file object. If file object, it will not be closed, you must close.
        '''
        self.reset()

        if isinstance(filename, file):
            contents = filename.read()
        else:
            with codecs.open(filename, 'r', encoding=self.encoding) as f:
                contents = f.read()
        self.feed(contents) 
开发者ID:kata198,项目名称:AdvancedHTMLParser,代码行数:16,代码来源:Formatter.py

示例13: parseStr

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def parseStr(self, html):
        '''
            parseStr - Parses a string and creates the DOM tree and indexes.

                @param html <str> - valid HTML
        '''
        self.reset()
        if isinstance(html, bytes):
            self.feed(html.decode(self.encoding))
        else:
            self.feed(html) 
开发者ID:kata198,项目名称:AdvancedHTMLParser,代码行数:13,代码来源:Formatter.py

示例14: sail

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def sail(self):
        """ 
        This is used to navigate through the xml/html document.
        Every xml/html object is represented by a python class
        instance that inherits from Root.
        
        The method sail is used to return an iterator
        for these objects.

        Example:
        data = '<a> <b> </b> </a>'

        html = Html()
        dom = html.feed(data)

        for ind in dom.sail():
            print type(ind),',', ind.name

        It would output.

        <class 'ehp.Root'> , a
        <class 'ehp.Root'> , b
        """
           
        for indi in self[:]:
            for indj in indi.sail():
                yield(indj)

            yield(indi) 
开发者ID:iogf,项目名称:ehp,代码行数:31,代码来源:ehp.py

示例15: index

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def index(self, item):
        """
        This is similar to index but uses id
        to check for equality.

        Example:

        data = '<a><b></b><b></b></a>'
        html = Html()
        dom = html.feed(data)
        
        for root, ind in dom.sail_with_root():
            print root.name, ind.name, root.index(ind)


        It would print.

        a b 0
        a b 1
         a 0        

        The line where it appears ' a 0' corresponds to the
        outmost object. The outmost object is an instance of Root
        that contains all the other objects.
        """

        count = 0
        for ind in self:
            if ind is item: return count
            count = count + 1

        raise ValueError 
开发者ID:iogf,项目名称:ehp,代码行数:34,代码来源:ehp.py


注:本文中的html.parser.HTMLParser.feed方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。