本文整理汇总了Python中html.parser.HTMLParser.feed方法的典型用法代码示例。如果您正苦于以下问题:Python HTMLParser.feed方法的具体用法?Python HTMLParser.feed怎么用?Python HTMLParser.feed使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类html.parser.HTMLParser
的用法示例。
在下文中一共展示了HTMLParser.feed方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _strip_tags
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def _strip_tags(self, html):
result = []
parser = HTMLParser()
parser.handle_data = result.append
parser.feed(html)
parser.close()
return ''.join(result)
示例2: feed
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def feed(self, data):
"""
Main method for purifying HTML (overrided)
"""
self.reset_purified()
HTMLParser.feed(self, data)
return self.html()
示例3: parse_html_data
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def parse_html_data(rootParser, htmlData):
htmlParser = HTMLParser()
root = rootParser(htmlParser, None, None, None)
linedData = htmlData.split('\n')
for line in linedData:
htmlParser.feed(line.strip())
return root
示例4: feed
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def feed(self, bytesdata):
if bytesdata:
if py3:
super().feed(bytesdata.decode('latin1'))
else:
HTMLParser.feed(self, bytesdata.decode('latin1'))
else:
self.close()
示例5: feed
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def feed(self, data, noskip = False):
self.start_table = self.start_thead = self.start_td = self.start_tr = False
self.tables = []
self.table = []
self.tr = []
self.data = ''
self.noskip = noskip
HTMLParser.feed(self, data)
示例6: feed
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def feed(self, data):
"""
"""
self.struct.clear()
HTMLParser.feed(self, data)
return self.struct.outmost
示例7: parse_html
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def parse_html(rootParser, htmlPath):
htmlParser = HTMLParser()
root = rootParser(htmlParser, None, None, None)
with open(htmlPath, 'rb') as htmlFile:
for line in htmlFile:
htmlParser.feed(line.strip())
return root
示例8: feed
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def feed(self, data: str):
"""
Feed some data to the parser.
Can be called multiple times and feeding must be terminated with a
call to :meth:`.close`.
:param data: A string containing HTML.
"""
HTMLParser.feed(self, data)
示例9: strip_tags
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def strip_tags(html):
if html:
html = html.strip()
html = html.strip("\n")
result = []
parse = HTMLParser()
parse.handle_data = result.append
parse.feed(html)
parse.close()
return "".join(result)
return ''
示例10: remove_html
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def remove_html(text):
text = re.sub('<[^<]+?>', '', text)
text = text.replace('<', '<');
text = text.replace('>', '>');
return text
s = HTMLParser()
s.reset()
s.reset()
s.strict = False
s.convert_charrefs = True
s.fed = []
s.feed(text)
return ''.join(s.fed)
示例11: remove_html
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def remove_html(text):
text = re.sub("<[^<]+?>", "", text)
text = text.replace("<", "<")
text = text.replace(">", ">")
return text
s = HTMLParser()
s.reset()
s.reset()
s.strict = False
s.convert_charrefs = True
s.fed = []
s.feed(text)
return "".join(s.fed)
示例12: feed
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def feed(self, chunk: str) -> None:
"Feed a given chunk of bytes to the parser"
if not self.ok:
return
if self.message.parsed_headers.get('content-type', [None])[0] in self.link_parseable_types:
try:
if not isinstance(chunk, str):
try:
chunk = chunk.decode(self.message.character_encoding, 'ignore')
except LookupError:
pass
HTMLParser.feed(self, chunk)
except BadErrorIReallyMeanIt:
pass
except Exception as why: # oh, well...
if self.err:
self.err("feed problem: %s" % why)
self.errors += 1
else:
self.ok = False
示例13: Verb_Conjugate
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def Verb_Conjugate(verb):
verb = verb.strip().replace(" ","+").lower()
#verb = verb.encode("unicode-escape")
print(repr(verb))
address = "http://www.verbix.com/webverbix/German/{}.html".format(verb)
#print(address)
address = urllib.parse.urlsplit(address)
address = list(address)
address[2] = urllib.parse.quote(address[2])
address = urllib.parse.urlunsplit(address)
#print(address)
#address = repr(address)#.encode("unicode-escape")
with urlopen(address) as website:
# print(html.read())
html = deumlautify(website.read()).decode("utf8")
#print(html)
# print(type(html))
parser = HTMLParser()
try:
parser.feed(html)
except:
pass
try:
index = parser.data.index("Nominal Forms")
index2 = parser.data.index("Verbs conjugated like")
except:
raise ValueError("Could not connect to Verbix or an invalid verb was passed in")
data = reumlautify(parser.data[index:index2])
#print(data)
indtenses = ["Present", "Perfect","Past","Pluperfect", "Future I","Future II"]
contenses = ["Present", "Perfect"]
verb_entry = Reorder(data, indtenses, contenses)
return verb_entry
示例14: feed
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def feed(self, data):
"""
:param data: Raw SAMI unicode string
:returns: tuple (unicode, dict, set)
"""
no_cc = 'no closed captioning available'
if '<html' in data.lower():
raise CaptionReadSyntaxError(
'SAMI File seems to be an HTML file.')
elif no_cc in data.lower():
raise CaptionReadSyntaxError('SAMI File contains "%s"' % no_cc)
# try to find style tag in SAMI
try:
# prevent BS4 error with huge SAMI files with unclosed tags
index = data.lower().find("</head>")
self.styles = self._css_parse(
BeautifulSoup(data[:index]).find('style').get_text())
except AttributeError:
self.styles = {}
# fix erroneous italics tags
data = data.replace('<i/>', '<i>')
# fix awkward tags found in some SAMIs
data = data.replace(';>', '>')
try:
HTMLParser.feed(self, data)
except HTMLParseError as e:
raise CaptionReadSyntaxError(e)
# close any tags that remain in the queue
while self.queue != deque([]):
closing_tag = self.queue.pop()
self.sami += "</%s>" % closing_tag
return self.sami, self.styles, self.langs
示例15: _check_valid_html
# 需要导入模块: from html.parser import HTMLParser [as 别名]
# 或者: from html.parser.HTMLParser import feed [as 别名]
def _check_valid_html(text):
p = HTMLParser()
p.feed(text)
p.close()