当前位置: 首页>>代码示例>>Python>>正文


Python HTMLParser.unescape方法代码示例

本文整理汇总了Python中six.moves.html_parser.HTMLParser.unescape方法的典型用法代码示例。如果您正苦于以下问题:Python HTMLParser.unescape方法的具体用法?Python HTMLParser.unescape怎么用?Python HTMLParser.unescape使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在six.moves.html_parser.HTMLParser的用法示例。


在下文中一共展示了HTMLParser.unescape方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: original_unescape

# 需要导入模块: from six.moves.html_parser import HTMLParser [as 别名]
# 或者: from six.moves.html_parser.HTMLParser import unescape [as 别名]
 def original_unescape(self, s):
     """Since we need to use this sometimes"""
     if isinstance(s, basestring):
         return unicode(HTMLParser.unescape(self, s))
     elif isinstance(s, list):
         return [unicode(HTMLParser.unescape(self, item)) for item in s]
     else:
         return s
开发者ID:Parsely,项目名称:schemato,代码行数:10,代码来源:parselypage.py

示例2: __init__

# 需要导入模块: from six.moves.html_parser import HTMLParser [as 别名]
# 或者: from six.moves.html_parser.HTMLParser import unescape [as 别名]
class Feed:
    # Class to handle Feeds
    def __init__(self, data, markup):
        self.obj = BeautifulSoup(data, markup)
        self.html_parser = HTMLParser()

    def getFeeds(self):
        # instantiate
        feeds = {}

        # get title
        feeds['title'] = self.getTitle()
        # get link
        feeds['link'] = self.getLink()
        # get items
        feeds['items'] = self.setupItems()

        return feeds

    def getTitle(self):
        return self.obj.title.string

    def getLink(self):
        return self.obj.find('link').string

    def getItems(self):
        return self.obj.find_all('item')

    def setupItems(self):
        items = self.getItems()
        data = []

        for item in items:
            new_item = {
                'title': self.html_parser.unescape( item.title.string ),
                'link': item.find("link").string,
                'comments_link': item.find("comments"),
                'publication_date': item.find('pubDate').text,
                'author': self.html_parser.unescape( item.find('creator').text )
            }
            data.append(new_item)

        return data
开发者ID:hect1c,项目名称:rss-crawler,代码行数:45,代码来源:run.py

示例3: add_set

# 需要导入模块: from six.moves.html_parser import HTMLParser [as 别名]
# 或者: from six.moves.html_parser.HTMLParser import unescape [as 别名]
 def add_set(self, title, description, index=-1):
     widget = QtWidgets.QCheckBox(title.replace('&', '&&'))
     if description:
         h = HTMLParser()
         widget.setToolTip(h.unescape(description))
     if index >= 0:
         self.sets_widget.layout().insertWidget(index, widget)
     else:
         self.sets_widget.layout().addWidget(widget)
     return widget
开发者ID:jim-easterbrook,项目名称:Photini,代码行数:12,代码来源:flickr.py

示例4: logged_in

# 需要导入模块: from six.moves.html_parser import HTMLParser [as 别名]
# 或者: from six.moves.html_parser.HTMLParser import unescape [as 别名]
 def logged_in(self, y):
     if all([None is y or 'logout' in y,
             bool(filter(lambda c: 'remember_web_' in c, self.session.cookies.keys()))]):
         if None is not y:
             self.shows = dict(re.findall('<option value="(\d+)">(.*?)</option>', y))
             h = HTMLParser()
             for k, v in self.shows.items():
                 self.shows[k] = sanitizeSceneName(h.unescape(unidecode(v.decode('utf-8'))))
         return True
     return False
开发者ID:JackDandy,项目名称:SickGear,代码行数:12,代码来源:showrss.py

示例5: get_formatted_value

# 需要导入模块: from six.moves.html_parser import HTMLParser [as 别名]
# 或者: from six.moves.html_parser.HTMLParser import unescape [as 别名]
def get_formatted_value(value, field):
	'''Prepare field from raw data'''

	from six.moves.html_parser import HTMLParser

	if(getattr(field, 'fieldtype', None) in ["Text", "Text Editor"]):
		h = HTMLParser()
		value = h.unescape(value)
		value = (re.subn(r'<[\s]*(script|style).*?</\1>(?s)', '', text_type(value))[0])
		value = ' '.join(value.split())
	return field.label + " : " + strip_html_tags(text_type(value))
开发者ID:vhrspvl,项目名称:vhrs-frappe,代码行数:13,代码来源:global_search.py

示例6: check_bz_bug

# 需要导入模块: from six.moves.html_parser import HTMLParser [as 别名]
# 或者: from six.moves.html_parser.HTMLParser import unescape [as 别名]
def check_bz_bug(b):
    ''' Return status of a bug in BZ'''
    html = get_html(b)
    if html:
        text = html.content.decode('utf-8')
        name = TITLE.search(text).group(1) if TITLE.search(text) else ''
        h = HTMLParser()
        name = h.unescape(name)
    else:
        name = ''
    return name, None
开发者ID:sshnaidm,项目名称:various,代码行数:13,代码来源:check_tests.py

示例7: get_email_subject

# 需要导入模块: from six.moves.html_parser import HTMLParser [as 别名]
# 或者: from six.moves.html_parser.HTMLParser import unescape [as 别名]
 def get_email_subject(self):
     """
     WARNING: It is MANDATORY to override method if you are going to
     send email using the  `send_notification_email` method.
     Your class must define an `email_subject_tmpl` attribute
     containing a template path to a file that has your email subject.
     """
     # Convert the html back to plaintext after rendering it using template
     # to get rid of html ampersand character codes
     parser = HTMLParser()
     html_email = self._get_email_field('email_subject_tmpl', 'get_email_subject')
     return parser.unescape(html_email)
开发者ID:Nomadblue,项目名称:django-nomad-notifier,代码行数:14,代码来源:models.py

示例8: try_unescape

# 需要导入模块: from six.moves.html_parser import HTMLParser [as 别名]
# 或者: from six.moves.html_parser.HTMLParser import unescape [as 别名]
    def try_unescape(self, value):
        if not value.startswith('http'):
            return value

        try:
            if orig_unescape:
                new_value = orig_unescape(value)
            else:
                new_value = HTMLParser.unescape(self, value)
        except Exception as e:
            return value

        return new_value
开发者ID:ikreymer,项目名称:pywb,代码行数:15,代码来源:html_rewriter.py

示例9: try_unescape

# 需要导入模块: from six.moves.html_parser import HTMLParser [as 别名]
# 或者: from six.moves.html_parser.HTMLParser import unescape [as 别名]
    def try_unescape(self, value):
        if not value.startswith('http'):
            return value

        try:
            new_value = HTMLParser.unescape(self, value)
        except:
            return value

        if value != new_value:
            # ensure utf-8 encoded to avoid %-encoding query here
            if isinstance(new_value, text_type):
                new_value = new_value.encode('utf-8')

        return new_value
开发者ID:daleathan,项目名称:pywb,代码行数:17,代码来源:html_rewriter.py

示例10: replace_html_entities

# 需要导入模块: from six.moves.html_parser import HTMLParser [as 别名]
# 或者: from six.moves.html_parser.HTMLParser import unescape [as 别名]
def replace_html_entities(xml_bin_str):
    """XML does not contain entity references for many HTML entities, yet the
    Federal Register XML sometimes contains the HTML entities. Replace them
    here, lest we throw off XML parsing"""
    parser = HTMLParser()
    match = HTML_RE.search(xml_bin_str)
    while match:
        match_bin = match.group(0)
        match_str = match_bin.decode('utf-8')
        replacement = parser.unescape(match_str).encode('UTF-8')
        logger.debug("Replacing %s with %s in retrieved XML",
                     match_str, replacement)
        xml_bin_str = xml_bin_str.replace(match_bin, replacement)
        match = HTML_RE.search(xml_bin_str)
    return xml_bin_str
开发者ID:eregs,项目名称:regulations-parser,代码行数:17,代码来源:preprocessors.py

示例11: html_unescape

# 需要导入模块: from six.moves.html_parser import HTMLParser [as 别名]
# 或者: from six.moves.html_parser.HTMLParser import unescape [as 别名]
def html_unescape(s):
    h = HTMLParser()
    return h.unescape(s)
开发者ID:janderse,项目名称:juriscraper,代码行数:5,代码来源:html_utils.py

示例12: filter

# 需要导入模块: from six.moves.html_parser import HTMLParser [as 别名]
# 或者: from six.moves.html_parser.HTMLParser import unescape [as 别名]
 def filter(self, data):
     h = HTMLParser()
     txt = super(Entities, self).filter(data)
     return h.unescape(txt)
开发者ID:P4ncake,项目名称:weboob,代码行数:6,代码来源:pages.py

示例13: notify_about_activity_log

# 需要导入模块: from six.moves.html_parser import HTMLParser [as 别名]
# 或者: from six.moves.html_parser.HTMLParser import unescape [as 别名]
def notify_about_activity_log(addon, version, note, perm_setting=None,
                              send_to_reviewers=True, send_to_staff=True):
    """Notify relevant users about an ActivityLog note."""
    comments = (note.details or {}).get('comments')
    if not comments:
        # Just use the name of the action if no comments provided.  Alas we
        # can't know the locale of recipient, and our templates are English
        # only so prevent language jumble by forcing into en-US.
        with translation.override(settings.LANGUAGE_CODE):
            comments = '%s' % amo.LOG_BY_ID[note.action].short
    else:
        htmlparser = HTMLParser()
        comments = htmlparser.unescape(comments)

    # Collect add-on authors (excl. the person who sent the email.) and build
    # the context for them.
    addon_authors = set(addon.authors.all()) - {note.user}

    author_context_dict = {
        'name': addon.name,
        'number': version.version,
        'author': note.author_name,
        'comments': comments,
        'url': absolutify(addon.get_dev_url('versions')),
        'SITE_URL': settings.SITE_URL,
        'email_reason': 'you are listed as an author of this add-on',
        'is_info_request': note.action == amo.LOG.REQUEST_INFORMATION.id,
    }

    # Not being localised because we don't know the recipients locale.
    with translation.override('en-US'):
        if note.action == amo.LOG.REQUEST_INFORMATION.id:
            if addon.pending_info_request:
                days_left = (
                    # We pad the time left with an extra hour so that the email
                    # does not end up saying "6 days left" because a few
                    # seconds or minutes passed between the datetime was saved
                    # and the email was sent.
                    addon.pending_info_request + timedelta(hours=1) -
                    datetime.now()
                ).days
                if days_left > 9:
                    author_context_dict['number_of_days_left'] = (
                        '%d days' % days_left)
                elif days_left > 1:
                    author_context_dict['number_of_days_left'] = (
                        '%s (%d) days' % (apnumber(days_left), days_left))
                else:
                    author_context_dict['number_of_days_left'] = 'one (1) day'
            subject = u'Mozilla Add-ons: Action Required for %s %s' % (
                addon.name, version.version)
            reviewer_subject = u'Mozilla Add-ons: %s %s' % (
                addon.name, version.version)
        else:
            subject = reviewer_subject = u'Mozilla Add-ons: %s %s' % (
                addon.name, version.version)
    # Build and send the mail for authors.
    template = template_from_user(note.user, version)
    from_email = formataddr((note.author_name, NOTIFICATIONS_FROM_EMAIL))
    send_activity_mail(
        subject, template.render(author_context_dict),
        version, addon_authors, from_email, note.id, perm_setting)

    if send_to_reviewers or send_to_staff:
        # If task_user doesn't exist that's no big issue (i.e. in tests)
        try:
            task_user = {get_task_user()}
        except UserProfile.DoesNotExist:
            task_user = set()

    if send_to_reviewers:
        # Collect reviewers on the thread (excl. the email sender and task user
        # for automated messages), build the context for them and send them
        # their copy.
        log_users = {
            alog.user for alog in ActivityLog.objects.for_version(version) if
            acl.is_user_any_kind_of_reviewer(alog.user)}
        reviewers = log_users - addon_authors - task_user - {note.user}
        reviewer_context_dict = author_context_dict.copy()
        reviewer_context_dict['url'] = absolutify(
            reverse('reviewers.review',
                    kwargs={
                        'addon_id': version.addon.pk,
                        'channel': amo.CHANNEL_CHOICES_API[version.channel]
                    }, add_prefix=False))
        reviewer_context_dict['email_reason'] = 'you reviewed this add-on'
        send_activity_mail(
            reviewer_subject, template.render(reviewer_context_dict),
            version, reviewers, from_email, note.id, perm_setting)

    if send_to_staff:
        # Collect staff that want a copy of the email, build the context for
        # them and send them their copy.
        staff = set(
            UserProfile.objects.filter(groups__name=ACTIVITY_MAIL_GROUP))
        staff_cc = (
            staff - reviewers - addon_authors - task_user - {note.user})
        staff_cc_context_dict = reviewer_context_dict.copy()
        staff_cc_context_dict['email_reason'] = (
            'you are member of the activity email cc group')
#.........这里部分代码省略.........
开发者ID:diox,项目名称:olympia,代码行数:103,代码来源:utils.py

示例14: decode_html

# 需要导入模块: from six.moves.html_parser import HTMLParser [as 别名]
# 或者: from six.moves.html_parser.HTMLParser import unescape [as 别名]
def decode_html(data):
    """Decode dumb html"""
    h = HTMLParser()
    return h.unescape(data)
开发者ID:cvium,项目名称:irc_bot,代码行数:6,代码来源:utils.py


注:本文中的six.moves.html_parser.HTMLParser.unescape方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。