本文整理汇总了Python中html.parser.HTMLParser方法的典型用法代码示例。如果您正苦于以下问题:Python parser.HTMLParser方法的具体用法?Python parser.HTMLParser怎么用?Python parser.HTMLParser使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类html.parser
的用法示例。
在下文中一共展示了parser.HTMLParser方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def __init__(self, html):
self._messages = []
# Variables used to get the indentation
self._last_data = ''
self._last_data_position = (0, 1)
self._last_indent = 0
# Variables used to check if a charset tag should be required.
self._first_meta_line_col = None
self._after_head_line_col = None
self._has_charset = False
# Variables to extend the feature set of HTMLParser.
self._endtag_text = None
HTMLParser.HTMLParser.__init__(self)
# In case we are dealing with Python 3, set it to non-strict mode.
if hasattr(self, 'strict'):
self.strict = False
self.feed(html)
self.close()
示例2: resolveParseResult
# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def resolveParseResult(self, result, itemName):
""" This method is due to the fact that our result set is a list of dicts """
resultValue = ""
try:
resultValue = result[itemName][0]
resultValue = util.html_unescape(resultValue)
resultValue = resultValue.strip()
# unescape ugly html encoding from websites
resultValue = HTMLParser().unescape(resultValue)
except Exception as e:
# log.warn("Error while resolving item: " + itemName + " : " + str(exc))
log.warn("Error while resolving item: {0} : {1} {2}".format(itemName, type(e), str(e)))
try:
log.debug("Result " + itemName + " = " + resultValue)
except:
pass
return resultValue
示例3: get_attribute_line_column
# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def get_attribute_line_column(tag_definition, line, column, attribute):
"""Returns the line and column of the provided attribute.
Args:
tag_definition: str with the definition of the tag.
line: line where the tag starts.
column: column where the tag starts (1-based).
attribute: str representing the attribute to find.
Return:
A (line, column) tuple representing the position of the attribute.
"""
for match in HTMLParser.attrfind.finditer(tag_definition):
if match.group(1).lower() == attribute:
return get_line_column(tag_definition, line, column, match.start(1))
assert False, 'Could not find the requested attribute %s' % attribute
示例4: note_msg
# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def note_msg(msg):
print_msg(get_whole_msg(msg))
content = HTMLParser().unescape(msg['Content'])
try:
content_tree = ETree.fromstring(content)
except Exception:
# invent/remove to chatroom
return
if content_tree is None:
return
revoked = content_tree.find('revokemsg')
if revoked is None:
return
old_msg_id = revoked.find('msgid').text
old_msg = msg_store.get(old_msg_id)
if old_msg is None:
return
msg_send = get_whole_msg(old_msg, download=True)
for m in msg_send:
bot.send(m, toUserName='filehelper')
clear_timeouted_message()
示例5: _scrape_tokens
# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def _scrape_tokens(self):
"""Scrape JCDS upload URL and upload access token from the jamfcloud instance."""
jss = self.connection['jss']
response = jss.scrape('legacy/packages.html?id=-1&o=c')
matches = re.search(r'data-base-url="([^"]*)"', response.content.decode("utf-8"))
if matches is None:
raise JSSError('Did not find the JCDS base URL on the packages page. Is this actually Jamfcloud?')
jcds_base_url = matches.group(1)
matches = re.search(r'data-upload-token="([^"]*)"', response.content.decode("utf-8"))
if matches is None:
raise JSSError('Did not find the JCDS upload token on the packages page. Is this actually Jamfcloud?')
jcds_upload_token = matches.group(1)
h = HTMLParser()
jcds_base_url = h.unescape(jcds_base_url)
self.connection['jcds_base_url'] = jcds_base_url
self.connection['jcds_upload_token'] = jcds_upload_token
self.connection["url"] = jcds_base_url # This is to make JSSImporter happy because it accesses .connection
示例6: search_ep
# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def search_ep(self, titles, season, episode, year):
try:
for title in titles:
data = {
'fid_name': title,
'sezon': season,
'odcinek': episode,
'title': title
}
result = requests.post('http://178.19.110.218/forumserialeco/skrypt/szukaj3.php', data=data).content
result = result.decode('utf-8')
h = HTMLParser()
result = h.unescape(result)
if result:
return title, season, episode
except:
return
示例7: unescape
# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def unescape(html_text):
if sys.version_info >= (3, 0):
if sys.version_info >= (3, 4):
return html.unescape(html_text)
return HTMLParser().unescape(html_text)
return HTMLParser().unescape(html_text)
# ------------------------------------------------------------------------------- #
# Decode Brotli on older versions of urllib3 manually
# ------------------------------------------------------------------------------- #
示例8: parse_html
# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def parse_html(html_value):
"""Parse HTML entities"""
try: # Python 2
from HTMLParser import HTMLParser
except ImportError: # Python 3
from html.parser import HTMLParser
return HTMLParser().unescape(html_value)
示例9: get_steps
# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def get_steps(protocol_id):
"""
Get steps of a protocol
:param protocol_id: int, protocol id
:return: list, list of unresolved steps
"""
step_list = []
steps = Protocol.objects.filter(parent=protocol_id).order_by('step_order')
html_parser = HTMLParser()
workspace_path = settings['env']['workspace']
for index, step in enumerate(steps):
# priority for self-compiled tool
software_path = os.path.join(os.path.join(os.path.join(workspace_path, str(step.user_id)), 'bin'),
str(step.software))
if os.path.exists(software_path) and os.path.isfile(software_path):
step.software = software_path
step_list.append({
'id': index,
'parameter': html_parser.unescape(str(step.software).rstrip() + " " + str(step.parameter)),
'specify_output': step.specify_output,
'hash': step.hash,
'env': step.env,
'force_local': step.force_local,
})
return step_list
示例10: get_email_links
# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def get_email_links(auth, message, link_regexp, download=False):
links = []
html_parser = HTMLParser()
link_filter = re.compile(r'%s' % link_regexp) if link_regexp else None
try:
for part in message['payload'].get('parts', []) or [message['payload']]:
if 'data' in part['body']:
data = part['body']['data']
content = base64.urlsafe_b64decode(data).decode('utf-8')
# plain text may be different than html
if part['mimeType'] == 'text/plain':
links.extend(parse_url(content))
# html needs to decode links
elif part['mimeType'] == 'text/html':
links.extend(map(lambda link: html_parser.unescape(link), parse_url(content)))
except HttpError as error:
print('EMAIL LINK ERROR: %s' % error)
# remove duplicates
links = _list_unique(links)
# filter links
if link_filter: links = [link for link in links if link_filter.match(link)]
# for downloads convert links into files and data
for link in links:
if download:
try: yield parse_filename(link, url=True), BytesIO(urlopen(link).read())
except: 'ERROR: Unable To Download', link
else:
yield link
示例11: feed
# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def feed(self, data):
data = data.replace("</' + 'script>", "</ignore>")
HTMLParser.HTMLParser.feed(self, data)
示例12: close
# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def close(self):
HTMLParser.HTMLParser.close(self)
self.pbr()
self.o('', 0, 'end')
self.outtext = self.outtext.join(self.outtextlist)
if self.unicode_snob:
nbsp = unichr(name2cp('nbsp'))
else:
nbsp = u' '
self.outtext = self.outtext.replace(u' _place_holder;', nbsp)
return self.outtext
示例13: unescape
# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def unescape(code):
"""Utility function to unescape a string with HTML entities."""
parser = HTMLParser.HTMLParser()
return parser.unescape(code)
示例14: get_value_line_column
# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def get_value_line_column(tag_definition, line, column, attribute):
"""Returns the line and column of the value of the provided attribute.
Args:
tag_definition: str with the definition of the tag.
line: line where the tag starts.
column: column where the tag starts (1-based).
attribute: str representing the attribute for which we want its value.
Return:
A (line, column) tuple representing the position of the value.
"""
for match in HTMLParser.attrfind.finditer(tag_definition):
if match.group(1).lower() == attribute:
if not match.group(3):
pos = match.end(1)
elif match.group(3)[0] in '"\'':
pos = match.start(3) + 1
else:
pos = match.start(3)
return get_line_column(tag_definition, line, column, pos)
assert False, 'Could not find the requested attribute %s' % attribute
# pylint: disable=too-many-public-methods
示例15: parse_endtag
# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def parse_endtag(self, i):
"""Stores the endtag and delegates to the original method."""
match = HTMLParser.endtagfind.match(self.rawdata, i) # </ + tag + >
self._endtag_text = None
if match:
self._endtag_text = match.group(0)
return HTMLParser.HTMLParser.parse_endtag(self, i)