當前位置: 首頁>>代碼示例>>Python>>正文


Python parser.HTMLParser方法代碼示例

本文整理匯總了Python中html.parser.HTMLParser方法的典型用法代碼示例。如果您正苦於以下問題:Python parser.HTMLParser方法的具體用法?Python parser.HTMLParser怎麽用?Python parser.HTMLParser使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在html.parser的用法示例。


在下文中一共展示了parser.HTMLParser方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: __init__

# 需要導入模塊: from html import parser [as 別名]
# 或者: from html.parser import HTMLParser [as 別名]
def __init__(self, html):
        self._messages = []

        # Variables used to get the indentation
        self._last_data = ''
        self._last_data_position = (0, 1)
        self._last_indent = 0

        # Variables used to check if a charset tag should be required.
        self._first_meta_line_col = None
        self._after_head_line_col = None
        self._has_charset = False

        # Variables to extend the feature set of HTMLParser.
        self._endtag_text = None

        HTMLParser.HTMLParser.__init__(self)

        # In case we are dealing with Python 3, set it to non-strict mode.
        if hasattr(self, 'strict'):
            self.strict = False

        self.feed(html)
        self.close() 
開發者ID:deezer,項目名稱:html-linter,代碼行數:26,代碼來源:html_linter.py

示例2: resolveParseResult

# 需要導入模塊: from html import parser [as 別名]
# 或者: from html.parser import HTMLParser [as 別名]
def resolveParseResult(self, result, itemName):
        """ This method is due to the fact that our result set is a list of dicts """

        resultValue = ""

        try:
            resultValue = result[itemName][0]
            resultValue = util.html_unescape(resultValue)
            resultValue = resultValue.strip()
            # unescape ugly html encoding from websites
            resultValue = HTMLParser().unescape(resultValue)

        except Exception as e:
            # log.warn("Error while resolving item: " + itemName + " : " + str(exc))
            log.warn("Error while resolving item: {0} : {1} {2}".format(itemName, type(e), str(e)))

        try:
            log.debug("Result " + itemName + " = " + resultValue)
        except:
            pass

        return resultValue 
開發者ID:maloep,項目名稱:romcollectionbrowser,代碼行數:24,代碼來源:matcher.py

示例3: get_attribute_line_column

# 需要導入模塊: from html import parser [as 別名]
# 或者: from html.parser import HTMLParser [as 別名]
def get_attribute_line_column(tag_definition, line, column, attribute):
    """Returns the line and column of the provided attribute.

    Args:
        tag_definition: str with the definition of the tag.
        line: line where the tag starts.
        column: column where the tag starts (1-based).
        attribute: str representing the attribute to find.

    Return:
       A (line, column) tuple representing the position of the attribute.
    """
    for match in HTMLParser.attrfind.finditer(tag_definition):
        if match.group(1).lower() == attribute:
            return get_line_column(tag_definition, line, column, match.start(1))

    assert False, 'Could not find the requested attribute %s' % attribute 
開發者ID:deezer,項目名稱:html-linter,代碼行數:19,代碼來源:html_linter.py

示例4: note_msg

# 需要導入模塊: from html import parser [as 別名]
# 或者: from html.parser import HTMLParser [as 別名]
def note_msg(msg):
    print_msg(get_whole_msg(msg))
    content = HTMLParser().unescape(msg['Content'])
    try:
        content_tree = ETree.fromstring(content)
    except Exception:
        # invent/remove to chatroom
        return
    if content_tree is None:
        return
    revoked = content_tree.find('revokemsg')
    if revoked is None:
        return
    old_msg_id = revoked.find('msgid').text
    old_msg = msg_store.get(old_msg_id)
    if old_msg is None:
        return
    msg_send = get_whole_msg(old_msg, download=True)
    for m in msg_send:
        bot.send(m, toUserName='filehelper')
    clear_timeouted_message() 
開發者ID:lb2281075105,項目名稱:Python-Spider,代碼行數:23,代碼來源:29 PythonCeHui.py

示例5: _scrape_tokens

# 需要導入模塊: from html import parser [as 別名]
# 或者: from html.parser import HTMLParser [as 別名]
def _scrape_tokens(self):
        """Scrape JCDS upload URL and upload access token from the jamfcloud instance."""
        jss = self.connection['jss']
        response = jss.scrape('legacy/packages.html?id=-1&o=c')
        matches = re.search(r'data-base-url="([^"]*)"', response.content.decode("utf-8"))
        if matches is None:
            raise JSSError('Did not find the JCDS base URL on the packages page. Is this actually Jamfcloud?')

        jcds_base_url = matches.group(1)

        matches = re.search(r'data-upload-token="([^"]*)"', response.content.decode("utf-8"))
        if matches is None:
            raise JSSError('Did not find the JCDS upload token on the packages page. Is this actually Jamfcloud?')

        jcds_upload_token = matches.group(1)

        h = HTMLParser()
        jcds_base_url = h.unescape(jcds_base_url)
        self.connection['jcds_base_url'] = jcds_base_url
        self.connection['jcds_upload_token'] = jcds_upload_token
        self.connection["url"] = jcds_base_url  # This is to make JSSImporter happy because it accesses .connection 
開發者ID:jssimporter,項目名稱:python-jss,代碼行數:23,代碼來源:distribution_point.py

示例6: search_ep

# 需要導入模塊: from html import parser [as 別名]
# 或者: from html.parser import HTMLParser [as 別名]
def search_ep(self, titles, season, episode, year):
		try:
			for title in titles:
				data = {
					'fid_name': title,
					'sezon': season,
					'odcinek': episode,
					'title': title
				}

				result = requests.post('http://178.19.110.218/forumserialeco/skrypt/szukaj3.php', data=data).content
				result = result.decode('utf-8')
				h = HTMLParser()
				result = h.unescape(result)
				if result:
					return title, season, episode
		except:
			return 
開發者ID:a4k-openproject,項目名稱:script.module.openscrapers,代碼行數:20,代碼來源:serialeco.py

示例7: unescape

# 需要導入模塊: from html import parser [as 別名]
# 或者: from html.parser import HTMLParser [as 別名]
def unescape(html_text):
        if sys.version_info >= (3, 0):
            if sys.version_info >= (3, 4):
                return html.unescape(html_text)

            return HTMLParser().unescape(html_text)

        return HTMLParser().unescape(html_text)

    # ------------------------------------------------------------------------------- #
    # Decode Brotli on older versions of urllib3 manually
    # ------------------------------------------------------------------------------- # 
開發者ID:a4k-openproject,項目名稱:a4kScrapers,代碼行數:14,代碼來源:cloudscraper.py

示例8: parse_html

# 需要導入模塊: from html import parser [as 別名]
# 或者: from html.parser import HTMLParser [as 別名]
def parse_html(html_value):
    """Parse HTML entities"""
    try:  # Python 2
        from HTMLParser import HTMLParser
    except ImportError:  # Python 3
        from html.parser import HTMLParser
    return HTMLParser().unescape(html_value) 
開發者ID:CastagnaIT,項目名稱:plugin.video.netflix,代碼行數:9,代碼來源:website.py

示例9: get_steps

# 需要導入模塊: from html import parser [as 別名]
# 或者: from html.parser import HTMLParser [as 別名]
def get_steps(protocol_id):
    """
    Get steps of a protocol
    :param protocol_id: int, protocol id
    :return: list, list of unresolved steps
    """
    step_list = []

    steps = Protocol.objects.filter(parent=protocol_id).order_by('step_order')
    html_parser = HTMLParser()
    workspace_path = settings['env']['workspace']
    for index, step in enumerate(steps):
        # priority for self-compiled tool
        software_path = os.path.join(os.path.join(os.path.join(workspace_path, str(step.user_id)), 'bin'),
                                     str(step.software))
        if os.path.exists(software_path) and os.path.isfile(software_path):
            step.software = software_path
        step_list.append({
            'id': index,
            'parameter': html_parser.unescape(str(step.software).rstrip() + " " + str(step.parameter)),
            'specify_output': step.specify_output,
            'hash': step.hash,
            'env': step.env,
            'force_local': step.force_local,
        })
    return step_list 
開發者ID:liyao001,項目名稱:BioQueue,代碼行數:28,代碼來源:bioqueue.py

示例10: get_email_links

# 需要導入模塊: from html import parser [as 別名]
# 或者: from html.parser import HTMLParser [as 別名]
def get_email_links(auth, message, link_regexp, download=False):
  links = []
  html_parser = HTMLParser()
  link_filter = re.compile(r'%s' % link_regexp) if link_regexp else None

  try:
    for part in message['payload'].get('parts', []) or [message['payload']]:
      if 'data' in part['body']:
        data = part['body']['data']
        content = base64.urlsafe_b64decode(data).decode('utf-8') 
        # plain text may be different than html
        if part['mimeType'] == 'text/plain': 
          links.extend(parse_url(content))
        # html needs to decode links
        elif part['mimeType'] == 'text/html': 
          links.extend(map(lambda link: html_parser.unescape(link), parse_url(content)))

  except HttpError as error:
    print('EMAIL LINK ERROR: %s' % error)

  # remove duplicates
  links = _list_unique(links)

  # filter links
  if link_filter: links = [link for link in links if link_filter.match(link)]

  # for downloads convert links into files and data
  for link in links: 
    if download:
      try: yield parse_filename(link, url=True), BytesIO(urlopen(link).read())
      except: 'ERROR: Unable To Download', link
    else:
      yield link 
開發者ID:google,項目名稱:starthinker,代碼行數:35,代碼來源:__init__.py

示例11: feed

# 需要導入模塊: from html import parser [as 別名]
# 或者: from html.parser import HTMLParser [as 別名]
def feed(self, data):
        data = data.replace("</' + 'script>", "</ignore>")
        HTMLParser.HTMLParser.feed(self, data) 
開發者ID:schollz,項目名稱:extract_recipe,代碼行數:5,代碼來源:extract_recipe.py

示例12: close

# 需要導入模塊: from html import parser [as 別名]
# 或者: from html.parser import HTMLParser [as 別名]
def close(self):
        HTMLParser.HTMLParser.close(self)

        self.pbr()
        self.o('', 0, 'end')

        self.outtext = self.outtext.join(self.outtextlist)
        if self.unicode_snob:
            nbsp = unichr(name2cp('nbsp'))
        else:
            nbsp = u' '
        self.outtext = self.outtext.replace(u'&nbsp_place_holder;', nbsp)

        return self.outtext 
開發者ID:schollz,項目名稱:extract_recipe,代碼行數:16,代碼來源:extract_recipe.py

示例13: unescape

# 需要導入模塊: from html import parser [as 別名]
# 或者: from html.parser import HTMLParser [as 別名]
def unescape(code):
    """Utility function to unescape a string with HTML entities."""
    parser = HTMLParser.HTMLParser()
    return parser.unescape(code) 
開發者ID:deezer,項目名稱:html-linter,代碼行數:6,代碼來源:html_linter.py

示例14: get_value_line_column

# 需要導入模塊: from html import parser [as 別名]
# 或者: from html.parser import HTMLParser [as 別名]
def get_value_line_column(tag_definition, line, column, attribute):
    """Returns the line and column of the value of the provided attribute.

    Args:
        tag_definition: str with the definition of the tag.
        line: line where the tag starts.
        column: column where the tag starts (1-based).
        attribute: str representing the attribute for which we want its value.

    Return:
       A (line, column) tuple representing the position of the value.
    """
    for match in HTMLParser.attrfind.finditer(tag_definition):
        if match.group(1).lower() == attribute:
            if not match.group(3):
                pos = match.end(1)
            elif match.group(3)[0] in '"\'':
                pos = match.start(3) + 1
            else:
                pos = match.start(3)
            return get_line_column(tag_definition, line, column, pos)

    assert False, 'Could not find the requested attribute %s' % attribute


# pylint: disable=too-many-public-methods 
開發者ID:deezer,項目名稱:html-linter,代碼行數:28,代碼來源:html_linter.py

示例15: parse_endtag

# 需要導入模塊: from html import parser [as 別名]
# 或者: from html.parser import HTMLParser [as 別名]
def parse_endtag(self, i):
        """Stores the endtag and delegates to the original method."""
        match = HTMLParser.endtagfind.match(self.rawdata, i)  # </ + tag + >
        self._endtag_text = None
        if match:
            self._endtag_text = match.group(0)

        return HTMLParser.HTMLParser.parse_endtag(self, i) 
開發者ID:deezer,項目名稱:html-linter,代碼行數:10,代碼來源:html_linter.py


注:本文中的html.parser.HTMLParser方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。