本文整理汇总了Python中six.moves.html_parser.HTMLParser方法的典型用法代码示例。如果您正苦于以下问题:Python html_parser.HTMLParser方法的具体用法?Python html_parser.HTMLParser怎么用?Python html_parser.HTMLParser使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类six.moves.html_parser
的用法示例。
在下文中一共展示了html_parser.HTMLParser方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse
# 需要导入模块: from six.moves import html_parser [as 别名]
# 或者: from six.moves.html_parser import HTMLParser [as 别名]
def parse(self, response):
soup = BeautifulSoup(
response.content.decode('utf-8', 'ignore'), 'lxml')
image_divs = soup.find_all('div', class_='imgpt')
pattern = re.compile(r'murl\":\"(.*?)\.jpg')
for div in image_divs:
href_str = html_parser.HTMLParser().unescape(div.a['m'])
match = pattern.search(href_str)
if match:
name = (match.group(1)
if six.PY3 else match.group(1).encode('utf-8'))
img_url = '{}.jpg'.format(name)
yield dict(file_url=img_url)
示例2: __init__
# 需要导入模块: from six.moves import html_parser [as 别名]
# 或者: from six.moves.html_parser import HTMLParser [as 别名]
def __init__(self):
html_parser.HTMLParser.__init__(self)
self.recording = 0
self.data = []
示例3: message
# 需要导入模块: from six.moves import html_parser [as 别名]
# 或者: from six.moves.html_parser import HTMLParser [as 别名]
def message(self, msg):
"""Process incoming message stanzas.
Be aware that this also includes MUC messages and error messages. It is
usually a good idea to check the messages's type before processing or
sending replies. If the message is the appropriate type, then the bot
checks wikipedia to see if the message string exists as a page on the
site. If so, it sends this link back to the sender in the reply.
Arguments:
msg -- The received message stanza. See the SleekXMPP documentation
for stanza objects and the Message stanza to see how it may be
used.
"""
if msg['type'] in ('chat', 'normal'):
msg_body = msg['body']
encoded_body = urllib.quote_plus(msg_body)
response = requests.get(
'https://en.wikipedia.org/w/api.php?'
'action=query&list=search&format=json&srprop=snippet&'
'srsearch={}'.format(encoded_body))
doc = json.loads(response.content)
results = doc.get('query', {}).get('search')
if not results:
msg.reply('I wasn\'t able to locate info on "{}" Sorry'.format(
msg_body)).send()
return
snippet = results[0]['snippet']
title = urllib.quote_plus(results[0]['title'])
# Strip out html
snippet = html_parser.HTMLParser().unescape(
re.sub(r'<[^>]*>', '', snippet))
msg.reply(u'{}...\n(http://en.wikipedia.org/w/?title={})'.format(
snippet, title)).send()
示例4: _string_data
# 需要导入模块: from six.moves import html_parser [as 别名]
# 或者: from six.moves.html_parser import HTMLParser [as 别名]
def _string_data(data, data_type):
"""Replace various objects types with string representations."""
if data_type == 'json':
return json.dumps(data)
elif data_type == 'xml':
if isinstance(data, str):
return data
str_data = ElementTree.tostring(data)
# No way to stop tostring from HTML escaping even if we wanted
h = html_parser.HTMLParser()
return h.unescape(str_data.decode())
elif data_type == 'yaml':
return yaml.dump(data)
else:
return data
示例5: __init__
# 需要导入模块: from six.moves import html_parser [as 别名]
# 或者: from six.moves.html_parser import HTMLParser [as 别名]
def __init__(self):
html_parser.HTMLParser.__init__(self)
self._in_td = False
self.data = list()
示例6: get_saml_token
# 需要导入模块: from six.moves import html_parser [as 别名]
# 或者: from six.moves.html_parser import HTMLParser [as 别名]
def get_saml_token(session, username, password, saml_cfg_id):
"""
Log into LastPass and retrieve a SAML token for a given
SAML configuration.
"""
logger.debug("Getting SAML token")
# now logged in, grab the SAML token from the IdP-initiated login
idp_login = '%s/saml/launch/cfg/%d' % (LASTPASS_SERVER, saml_cfg_id)
r = session.get(idp_login, verify=should_verify())
form = extract_form(r.text)
if not form['action']:
# try to scrape the error message just to make it more user friendly
error = ""
for l in r.text.splitlines():
match = re.search(r'<h2>(.*)</h2>', l)
if match:
msg = html_parser.HTMLParser().unescape(match.group(1))
msg = msg.replace("<br/>", "\n")
msg = msg.replace("<b>", "")
msg = msg.replace("</b>", "")
error = "\n" + msg
raise ValueError("Unable to find SAML ACS" + error)
return b64decode(form['fields']['SAMLResponse'])
示例7: strip_html
# 需要导入模块: from six.moves import html_parser [as 别名]
# 或者: from six.moves.html_parser import HTMLParser [as 别名]
def strip_html(html):
class MLStripper(HTMLParser):
def __init__(self):
self.reset()
self.strict = False
self.fed = []
def handle_data(self, d):
self.fed.append(d)
def get_data(self):
return ''.join(self.fed)
p = MLStripper()
p.feed(html)
return p.get_data()
示例8: _get_field
# 需要导入模块: from six.moves import html_parser [as 别名]
# 或者: from six.moves.html_parser import HTMLParser [as 别名]
def _get_field(self, field, default=''):
val = self.params.get(field, [default])
val = val[0] if isinstance(val, list) else val
return HTMLParser().unescape(val)
示例9: _highlight
# 需要导入模块: from six.moves import html_parser [as 别名]
# 或者: from six.moves.html_parser import HTMLParser [as 别名]
def _highlight(html):
"""Syntax-highlights HTML-rendered Markdown.
Plucks sections to highlight that conform the the GitHub fenced code info
string as defined at https://github.github.com/gfm/#info-string.
Args:
html (str): The rendered HTML.
Returns:
str: The HTML with Pygments syntax highlighting applied to all code
blocks.
"""
formatter = pygments.formatters.HtmlFormatter(nowrap=True)
code_expr = re.compile(
r'<pre><code class="language-(?P<lang>.+?)">(?P<code>.+?)'
r'</code></pre>', re.DOTALL)
def replacer(match):
try:
lang = match.group('lang')
lang = _LANG_ALIASES.get(lang, lang)
lexer = pygments.lexers.get_lexer_by_name(lang)
except ValueError:
lexer = pygments.lexers.TextLexer()
code = match.group('code')
# Decode html entities in the code. cmark tries to be helpful and
# translate '"' to '"', but it confuses pygments. Pygments will
# escape any html entities when re-writing the code, and we run
# everything through bleach after.
code = html_parser.HTMLParser().unescape(code)
highlighted = pygments.highlight(code, lexer, formatter)
return '<pre>{}</pre>'.format(highlighted)
result = code_expr.sub(replacer, html)
return result