当前位置: 首页>>代码示例>>Python>>正文


Python parser.HTMLParser方法代码示例

本文整理汇总了Python中html.parser.HTMLParser方法的典型用法代码示例。如果您正苦于以下问题:Python parser.HTMLParser方法的具体用法?Python parser.HTMLParser怎么用?Python parser.HTMLParser使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在html.parser的用法示例。


在下文中一共展示了parser.HTMLParser方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def __init__(self, html):
        self._messages = []

        # Variables used to get the indentation
        self._last_data = ''
        self._last_data_position = (0, 1)
        self._last_indent = 0

        # Variables used to check if a charset tag should be required.
        self._first_meta_line_col = None
        self._after_head_line_col = None
        self._has_charset = False

        # Variables to extend the feature set of HTMLParser.
        self._endtag_text = None

        HTMLParser.HTMLParser.__init__(self)

        # In case we are dealing with Python 3, set it to non-strict mode.
        if hasattr(self, 'strict'):
            self.strict = False

        self.feed(html)
        self.close() 
开发者ID:deezer,项目名称:html-linter,代码行数:26,代码来源:html_linter.py

示例2: resolveParseResult

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def resolveParseResult(self, result, itemName):
        """ This method is due to the fact that our result set is a list of dicts """

        resultValue = ""

        try:
            resultValue = result[itemName][0]
            resultValue = util.html_unescape(resultValue)
            resultValue = resultValue.strip()
            # unescape ugly html encoding from websites
            resultValue = HTMLParser().unescape(resultValue)

        except Exception as e:
            # log.warn("Error while resolving item: " + itemName + " : " + str(exc))
            log.warn("Error while resolving item: {0} : {1} {2}".format(itemName, type(e), str(e)))

        try:
            log.debug("Result " + itemName + " = " + resultValue)
        except:
            pass

        return resultValue 
开发者ID:maloep,项目名称:romcollectionbrowser,代码行数:24,代码来源:matcher.py

示例3: get_attribute_line_column

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def get_attribute_line_column(tag_definition, line, column, attribute):
    """Returns the line and column of the provided attribute.

    Args:
        tag_definition: str with the definition of the tag.
        line: line where the tag starts.
        column: column where the tag starts (1-based).
        attribute: str representing the attribute to find.

    Return:
       A (line, column) tuple representing the position of the attribute.
    """
    for match in HTMLParser.attrfind.finditer(tag_definition):
        if match.group(1).lower() == attribute:
            return get_line_column(tag_definition, line, column, match.start(1))

    assert False, 'Could not find the requested attribute %s' % attribute 
开发者ID:deezer,项目名称:html-linter,代码行数:19,代码来源:html_linter.py

示例4: note_msg

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def note_msg(msg):
    print_msg(get_whole_msg(msg))
    content = HTMLParser().unescape(msg['Content'])
    try:
        content_tree = ETree.fromstring(content)
    except Exception:
        # invent/remove to chatroom
        return
    if content_tree is None:
        return
    revoked = content_tree.find('revokemsg')
    if revoked is None:
        return
    old_msg_id = revoked.find('msgid').text
    old_msg = msg_store.get(old_msg_id)
    if old_msg is None:
        return
    msg_send = get_whole_msg(old_msg, download=True)
    for m in msg_send:
        bot.send(m, toUserName='filehelper')
    clear_timeouted_message() 
开发者ID:lb2281075105,项目名称:Python-Spider,代码行数:23,代码来源:29 PythonCeHui.py

示例5: _scrape_tokens

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def _scrape_tokens(self):
        """Scrape JCDS upload URL and upload access token from the jamfcloud instance."""
        jss = self.connection['jss']
        response = jss.scrape('legacy/packages.html?id=-1&o=c')
        matches = re.search(r'data-base-url="([^"]*)"', response.content.decode("utf-8"))
        if matches is None:
            raise JSSError('Did not find the JCDS base URL on the packages page. Is this actually Jamfcloud?')

        jcds_base_url = matches.group(1)

        matches = re.search(r'data-upload-token="([^"]*)"', response.content.decode("utf-8"))
        if matches is None:
            raise JSSError('Did not find the JCDS upload token on the packages page. Is this actually Jamfcloud?')

        jcds_upload_token = matches.group(1)

        h = HTMLParser()
        jcds_base_url = h.unescape(jcds_base_url)
        self.connection['jcds_base_url'] = jcds_base_url
        self.connection['jcds_upload_token'] = jcds_upload_token
        self.connection["url"] = jcds_base_url  # This is to make JSSImporter happy because it accesses .connection 
开发者ID:jssimporter,项目名称:python-jss,代码行数:23,代码来源:distribution_point.py

示例6: search_ep

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def search_ep(self, titles, season, episode, year):
		try:
			for title in titles:
				data = {
					'fid_name': title,
					'sezon': season,
					'odcinek': episode,
					'title': title
				}

				result = requests.post('http://178.19.110.218/forumserialeco/skrypt/szukaj3.php', data=data).content
				result = result.decode('utf-8')
				h = HTMLParser()
				result = h.unescape(result)
				if result:
					return title, season, episode
		except:
			return 
开发者ID:a4k-openproject,项目名称:script.module.openscrapers,代码行数:20,代码来源:serialeco.py

示例7: unescape

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def unescape(html_text):
        if sys.version_info >= (3, 0):
            if sys.version_info >= (3, 4):
                return html.unescape(html_text)

            return HTMLParser().unescape(html_text)

        return HTMLParser().unescape(html_text)

    # ------------------------------------------------------------------------------- #
    # Decode Brotli on older versions of urllib3 manually
    # ------------------------------------------------------------------------------- # 
开发者ID:a4k-openproject,项目名称:a4kScrapers,代码行数:14,代码来源:cloudscraper.py

示例8: parse_html

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def parse_html(html_value):
    """Parse HTML entities"""
    try:  # Python 2
        from HTMLParser import HTMLParser
    except ImportError:  # Python 3
        from html.parser import HTMLParser
    return HTMLParser().unescape(html_value) 
开发者ID:CastagnaIT,项目名称:plugin.video.netflix,代码行数:9,代码来源:website.py

示例9: get_steps

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def get_steps(protocol_id):
    """
    Get steps of a protocol
    :param protocol_id: int, protocol id
    :return: list, list of unresolved steps
    """
    step_list = []

    steps = Protocol.objects.filter(parent=protocol_id).order_by('step_order')
    html_parser = HTMLParser()
    workspace_path = settings['env']['workspace']
    for index, step in enumerate(steps):
        # priority for self-compiled tool
        software_path = os.path.join(os.path.join(os.path.join(workspace_path, str(step.user_id)), 'bin'),
                                     str(step.software))
        if os.path.exists(software_path) and os.path.isfile(software_path):
            step.software = software_path
        step_list.append({
            'id': index,
            'parameter': html_parser.unescape(str(step.software).rstrip() + " " + str(step.parameter)),
            'specify_output': step.specify_output,
            'hash': step.hash,
            'env': step.env,
            'force_local': step.force_local,
        })
    return step_list 
开发者ID:liyao001,项目名称:BioQueue,代码行数:28,代码来源:bioqueue.py

示例10: get_email_links

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def get_email_links(auth, message, link_regexp, download=False):
  links = []
  html_parser = HTMLParser()
  link_filter = re.compile(r'%s' % link_regexp) if link_regexp else None

  try:
    for part in message['payload'].get('parts', []) or [message['payload']]:
      if 'data' in part['body']:
        data = part['body']['data']
        content = base64.urlsafe_b64decode(data).decode('utf-8') 
        # plain text may be different than html
        if part['mimeType'] == 'text/plain': 
          links.extend(parse_url(content))
        # html needs to decode links
        elif part['mimeType'] == 'text/html': 
          links.extend(map(lambda link: html_parser.unescape(link), parse_url(content)))

  except HttpError as error:
    print('EMAIL LINK ERROR: %s' % error)

  # remove duplicates
  links = _list_unique(links)

  # filter links
  if link_filter: links = [link for link in links if link_filter.match(link)]

  # for downloads convert links into files and data
  for link in links: 
    if download:
      try: yield parse_filename(link, url=True), BytesIO(urlopen(link).read())
      except: 'ERROR: Unable To Download', link
    else:
      yield link 
开发者ID:google,项目名称:starthinker,代码行数:35,代码来源:__init__.py

示例11: feed

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def feed(self, data):
        data = data.replace("</' + 'script>", "</ignore>")
        HTMLParser.HTMLParser.feed(self, data) 
开发者ID:schollz,项目名称:extract_recipe,代码行数:5,代码来源:extract_recipe.py

示例12: close

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def close(self):
        HTMLParser.HTMLParser.close(self)

        self.pbr()
        self.o('', 0, 'end')

        self.outtext = self.outtext.join(self.outtextlist)
        if self.unicode_snob:
            nbsp = unichr(name2cp('nbsp'))
        else:
            nbsp = u' '
        self.outtext = self.outtext.replace(u'&nbsp_place_holder;', nbsp)

        return self.outtext 
开发者ID:schollz,项目名称:extract_recipe,代码行数:16,代码来源:extract_recipe.py

示例13: unescape

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def unescape(code):
    """Utility function to unescape a string with HTML entities."""
    parser = HTMLParser.HTMLParser()
    return parser.unescape(code) 
开发者ID:deezer,项目名称:html-linter,代码行数:6,代码来源:html_linter.py

示例14: get_value_line_column

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def get_value_line_column(tag_definition, line, column, attribute):
    """Returns the line and column of the value of the provided attribute.

    Args:
        tag_definition: str with the definition of the tag.
        line: line where the tag starts.
        column: column where the tag starts (1-based).
        attribute: str representing the attribute for which we want its value.

    Return:
       A (line, column) tuple representing the position of the value.
    """
    for match in HTMLParser.attrfind.finditer(tag_definition):
        if match.group(1).lower() == attribute:
            if not match.group(3):
                pos = match.end(1)
            elif match.group(3)[0] in '"\'':
                pos = match.start(3) + 1
            else:
                pos = match.start(3)
            return get_line_column(tag_definition, line, column, pos)

    assert False, 'Could not find the requested attribute %s' % attribute


# pylint: disable=too-many-public-methods 
开发者ID:deezer,项目名称:html-linter,代码行数:28,代码来源:html_linter.py

示例15: parse_endtag

# 需要导入模块: from html import parser [as 别名]
# 或者: from html.parser import HTMLParser [as 别名]
def parse_endtag(self, i):
        """Stores the endtag and delegates to the original method."""
        match = HTMLParser.endtagfind.match(self.rawdata, i)  # </ + tag + >
        self._endtag_text = None
        if match:
            self._endtag_text = match.group(0)

        return HTMLParser.HTMLParser.parse_endtag(self, i) 
开发者ID:deezer,项目名称:html-linter,代码行数:10,代码来源:html_linter.py


注:本文中的html.parser.HTMLParser方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。