当前位置: 首页>>代码示例>>Python>>正文


Python HTMLParser.feed方法代码示例

本文整理汇总了Python中html.parser.HTMLParser.feed方法的典型用法代码示例。如果您正苦于以下问题:Python HTMLParser.feed方法的具体用法?Python HTMLParser.feed怎么用?Python HTMLParser.feed使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在html.parser.HTMLParser的用法示例。


在下文中一共展示了HTMLParser.feed方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _strip_tags

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
 def _strip_tags(self, html):
     result = []
     parser = HTMLParser()
     parser.handle_data = result.append
     parser.feed(html)
     parser.close()
     return ''.join(result)
开发者ID:SpiderDave,项目名称:spidey-supybot-plugins,代码行数:9,代码来源:plugin.py

示例2: feed

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
 def feed(self, data):
     """
     Main method for purifying HTML (overrided)
     """
     self.reset_purified()
     HTMLParser.feed(self, data)
     return self.html()
开发者ID:ilyutoev,项目名称:python-html-purifier,代码行数:9,代码来源:purifier.py

示例3: parse_html_data

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def parse_html_data(rootParser, htmlData):
    htmlParser = HTMLParser()
    root = rootParser(htmlParser, None, None, None)
    linedData = htmlData.split('\n')
    for line in linedData:
        htmlParser.feed(line.strip())
    return root
开发者ID:explosiveduck,项目名称:ed2d,代码行数:9,代码来源:markup.py

示例4: feed

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
 def feed(self, bytesdata):
   if bytesdata:
     if py3:
       super().feed(bytesdata.decode('latin1'))
     else:
       HTMLParser.feed(self, bytesdata.decode('latin1'))
   else:
     self.close()
开发者ID:coldnight,项目名称:fetchtitle,代码行数:10,代码来源:__init__.py

示例5: feed

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
 def feed(self, data, noskip = False):
     self.start_table = self.start_thead = self.start_td = self.start_tr = False
     self.tables = []
     self.table = []
     self.tr = []
     self.data = ''
     self.noskip = noskip
     HTMLParser.feed(self, data)
开发者ID:xuhongxu96,项目名称:BNU-Schoolwork-Assist,代码行数:10,代码来源:bnujwc.py

示例6: feed

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
    def feed(self, data):
        """

        """

        self.struct.clear()
        HTMLParser.feed(self, data)

        return self.struct.outmost
开发者ID:iogf,项目名称:ehp,代码行数:11,代码来源:ehp.py

示例7: parse_html

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def parse_html(rootParser, htmlPath):
    htmlParser = HTMLParser()
    root = rootParser(htmlParser, None, None, None)

    with open(htmlPath, 'rb') as htmlFile:
        for line in htmlFile:
            htmlParser.feed(line.strip())
    
    return root
开发者ID:explosiveduck,项目名称:ed2d,代码行数:11,代码来源:markup.py

示例8: feed

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
    def feed(self, data: str):
        """
        Feed some data to the parser.

        Can be called multiple times and feeding must be terminated with a
        call to :meth:`.close`.

        :param data: A string containing HTML.
        """
        HTMLParser.feed(self, data)
开发者ID:fnl,项目名称:libfnl,代码行数:12,代码来源:extract.py

示例9: strip_tags

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def strip_tags(html):
    if html:
        html = html.strip()
        html = html.strip("\n")
        result = []
        parse = HTMLParser()
        parse.handle_data = result.append
        parse.feed(html)
        parse.close()
        return "".join(result)
    return ''
开发者ID:lansheng228,项目名称:collipa,代码行数:13,代码来源:helpers.py

示例10: remove_html

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def remove_html(text):
    text = re.sub('<[^<]+?>', '', text)
    text = text.replace('&lt;', '<');
    text = text.replace('&gt;', '>');
    return text
    s = HTMLParser()
    s.reset()
    s.reset()
    s.strict = False
    s.convert_charrefs = True
    s.fed = []
    s.feed(text)
    return ''.join(s.fed)
开发者ID:luksireiku,项目名称:polaris,代码行数:15,代码来源:utils.py

示例11: remove_html

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def remove_html(text):
    text = re.sub("<[^<]+?>", "", text)
    text = text.replace("&lt;", "<")
    text = text.replace("&gt;", ">")
    return text
    s = HTMLParser()
    s.reset()
    s.reset()
    s.strict = False
    s.convert_charrefs = True
    s.fed = []
    s.feed(text)
    return "".join(s.fed)
开发者ID:zhantyzgz,项目名称:polaris,代码行数:15,代码来源:utils.py

示例12: feed

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
 def feed(self, chunk: str) -> None:
     "Feed a given chunk of bytes to the parser"
     if not self.ok:
         return
     if self.message.parsed_headers.get('content-type', [None])[0] in self.link_parseable_types:
         try:
             if not isinstance(chunk, str):
                 try:
                     chunk = chunk.decode(self.message.character_encoding, 'ignore')
                 except LookupError:
                     pass
             HTMLParser.feed(self, chunk)
         except BadErrorIReallyMeanIt:
             pass
         except Exception as why: # oh, well...
             if self.err:
                 self.err("feed problem: %s" % why)
             self.errors += 1
     else:
         self.ok = False
开发者ID:optionalg,项目名称:redbot,代码行数:22,代码来源:link_parse.py

示例13: Verb_Conjugate

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def Verb_Conjugate(verb):
    verb = verb.strip().replace(" ","+").lower()
    #verb = verb.encode("unicode-escape")
    print(repr(verb))
    address = "http://www.verbix.com/webverbix/German/{}.html".format(verb)
    #print(address)
    address = urllib.parse.urlsplit(address)
    address = list(address)
    address[2] = urllib.parse.quote(address[2])
    address = urllib.parse.urlunsplit(address)
    #print(address)
    #address = repr(address)#.encode("unicode-escape")
    with urlopen(address) as website:

        # print(html.read())
        html = deumlautify(website.read()).decode("utf8")
        #print(html)

        # print(type(html))


    parser = HTMLParser()
    try:
        parser.feed(html)
    except:
        pass

    try:
        index = parser.data.index("Nominal Forms")
        index2 = parser.data.index("Verbs conjugated like")
    except:
        raise ValueError("Could not connect to Verbix or an invalid verb was passed in")


    data = reumlautify(parser.data[index:index2])
    #print(data)
    indtenses = ["Present", "Perfect","Past","Pluperfect", "Future I","Future II"]
    contenses = ["Present", "Perfect"]

    verb_entry = Reorder(data, indtenses, contenses)
    return verb_entry
开发者ID:TheGoomy42,项目名称:German-Verb-Conjugation-Quiz,代码行数:43,代码来源:Conjugate.py

示例14: feed

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
    def feed(self, data):
        """
        :param data: Raw SAMI unicode string
        :returns: tuple (unicode, dict, set)
        """
        no_cc = 'no closed captioning available'

        if '<html' in data.lower():
            raise CaptionReadSyntaxError(
                'SAMI File seems to be an HTML file.')
        elif no_cc in data.lower():
            raise CaptionReadSyntaxError('SAMI File contains "%s"' % no_cc)

        # try to find style tag in SAMI
        try:
            # prevent BS4 error with huge SAMI files with unclosed tags
            index = data.lower().find("</head>")

            self.styles = self._css_parse(
                BeautifulSoup(data[:index]).find('style').get_text())
        except AttributeError:
            self.styles = {}

        # fix erroneous italics tags
        data = data.replace('<i/>', '<i>')

        # fix awkward tags found in some SAMIs
        data = data.replace(';>', '>')
        try:
            HTMLParser.feed(self, data)
        except HTMLParseError as e:
            raise CaptionReadSyntaxError(e)

        # close any tags that remain in the queue
        while self.queue != deque([]):
            closing_tag = self.queue.pop()
            self.sami += "</%s>" % closing_tag

        return self.sami, self.styles, self.langs
开发者ID:burakbostancioglu,项目名称:pycaption,代码行数:41,代码来源:sami.py

示例15: _check_valid_html

# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def _check_valid_html(text):
    p = HTMLParser()
    p.feed(text)
    p.close()
开发者ID:foolswood,项目名称:questioneer,代码行数:6,代码来源:test_examples_load.py


注:本文中的html.parser.HTMLParser.feed方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。