当前位置: 首页>>代码示例>>Python>>正文


Python textlib.replaceExcept函数代码示例

本文整理汇总了Python中pywikibot.textlib.replaceExcept函数的典型用法代码示例。如果您正苦于以下问题:Python replaceExcept函数的具体用法?Python replaceExcept怎么用?Python replaceExcept使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了replaceExcept函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: translateAndCapitalizeNamespaces

    def translateAndCapitalizeNamespaces(self, text):
        """Use localized namespace names."""
        # arz uses english stylish codes
        if self.site.sitename == 'wikipedia:arz':
            return text
        # wiki links aren't parsed here.
        exceptions = ['nowiki', 'comment', 'math', 'pre']

        for namespace in self.site.namespaces.values():
            if namespace == 0:
                # skip main (article) namespace
                continue
            # a clone is needed. Won't change the namespace dict
            namespaces = list(namespace)
            if namespace == 6 and self.site.family.name == 'wikipedia':
                if self.site.code in ('en', 'fr') and MediaWikiVersion(
                        self.site.version()) >= MediaWikiVersion('1.14'):
                    # do not change "Image" on en-wiki and fr-wiki
                    assert u'Image' in namespaces
                    namespaces.remove(u'Image')
                if self.site.code == 'hu':
                    # do not change "Kép" on hu-wiki
                    assert u'Kép' in namespaces
                    namespaces.remove(u'Kép')
                elif self.site.code == 'pt':
                    # use "Imagem" by default on pt-wiki (per T57242)
                    assert 'Imagem' in namespaces
                    namespaces.insert(
                        0, namespaces.pop(namespaces.index('Imagem')))
            # final namespace variant
            final_ns = namespaces.pop(0)
            if namespace in (2, 3):
                # skip localized user namespace, maybe gender is used
                namespaces = ['User' if namespace == 2 else 'User talk']
            # lowerspaced and underscored namespaces
            for i, item in enumerate(namespaces):
                item = item.replace(' ', '[ _]')
                item = u'[%s%s]' % (item[0], item[0].lower()) + item[1:]
                namespaces[i] = item
            namespaces.append(first_lower(final_ns))
            if final_ns and namespaces:
                if self.site.sitename == 'wikipedia:pt' and namespace == 6:
                    # only change on these file extensions (per T57242)
                    extensions = ('png', 'gif', 'jpg', 'jpeg', 'svg', 'tiff',
                                  'tif')
                    text = textlib.replaceExcept(
                        text,
                        r'\[\[\s*({}) *:(?P<name>[^\|\]]*?\.({}))'
                        r'(?P<label>.*?)\]\]'
                        .format('|'.join(namespaces), '|'.join(extensions)),
                        r'[[{}:\g<name>\g<label>]]'.format(final_ns),
                        exceptions)
                else:
                    text = textlib.replaceExcept(
                        text,
                        r'\[\[\s*(%s) *:(?P<nameAndLabel>.*?)\]\]'
                        % '|'.join(namespaces),
                        r'[[%s:\g<nameAndLabel>]]' % final_ns,
                        exceptions)
        return text
开发者ID:Zeffar,项目名称:Elobot,代码行数:60,代码来源:cosmetic_changes.py

示例2: fixSyntaxSave

    def fixSyntaxSave(self, text):
        def replace_link(match):
            replacement = '[[' + match.group('link')
            if match.group('title'):
                replacement += '|' + match.group('title')
            return replacement + ']]'

        exceptions = ['nowiki', 'comment', 'math', 'pre', 'source',
                      'startspace']
        # link to the wiki working on
        # Only use suffixes for article paths
        for suffix in self.site._interwiki_urls(True):
            http_url = self.site.base_url(suffix, 'http')
            if self.site.protocol() == 'http':
                https_url = None
            else:
                https_url = self.site.base_url(suffix, 'https')
            # compare strings without the protocol, if they are empty support
            # also no prefix (//en.wikipedia.org/…)
            if https_url is not None and http_url[4:] == https_url[5:]:
                urls = ['(?:https?:)?' + re.escape(http_url[5:])]
            else:
                urls = [re.escape(url) for url in (http_url, https_url)
                        if url is not None]
            for url in urls:
                # Only include links which don't include the separator as
                # the wikilink won't support additional parameters
                separator = '?'
                if '?' in suffix:
                    separator += '&'
                # Match first a non space in the title to prevent that multiple
                # spaces at the end without title will be matched by it
                text = textlib.replaceExcept(
                    text,
                    r'\[\[?' + url + r'(?P<link>[^' + separator + r']+?)'
                    r'(\s+(?P<title>[^\s].*?))?\s*\]\]?',
                    replace_link, exceptions, site=self.site)
        # external link in/starting with double brackets
        text = textlib.replaceExcept(
            text,
            r'\[\[(?P<url>https?://[^\]]+?)\]\]?',
            r'[\g<url>]', exceptions, site=self.site)
        # external link and description separated by a pipe, with
        # whitespace in front of the pipe, so that it is clear that
        # the dash is not a legitimate part of the URL.
        text = textlib.replaceExcept(
            text,
            r'\[(?P<url>https?://[^\|\] \r\n]+?) +\| *(?P<label>[^\|\]]+?)\]',
            r'[\g<url> \g<label>]', exceptions)
        # dash in external link, where the correct end of the URL can
        # be detected from the file extension. It is very unlikely that
        # this will cause mistakes.
        extensions = [r'\.{0}'.format(ext)
                      for ext in ['pdf', 'html?', 'php', 'aspx?', 'jsp']]
        text = textlib.replaceExcept(
            text,
            r'\[(?P<url>https?://[^\|\] ]+?(' + '|'.join(extensions) + r')) *'
            r'\| *(?P<label>[^\|\]]+?)\]',
            r'[\g<url> \g<label>]', exceptions)
        return text
开发者ID:Nivgov,项目名称:AvodatGemer,代码行数:60,代码来源:cosmetic_changes.py

示例3: commonsfiledesc

    def commonsfiledesc(self, text):
        """
        Clean up file descriptions on the Wikimedia Commons.

        It is working according to [1] and works only on pages in the file
        namespace on the Wikimedia Commons.

        [1]: https://commons.wikimedia.org/wiki/Commons:Tools/pywiki_file_description_cleanup
        """
        if self.site.sitename != 'commons:commons' or self.namespace == 6:
            return
        # section headers to {{int:}} versions
        exceptions = ['comment', 'includeonly', 'math', 'noinclude', 'nowiki',
                      'pre', 'source', 'ref', 'timeline']
        text = textlib.replaceExcept(text,
                                     r"([\r\n]|^)\=\= *Summary *\=\=",
                                     r"\1== {{int:filedesc}} ==",
                                     exceptions, True)
        text = textlib.replaceExcept(
            text,
            r"([\r\n])\=\= *\[\[Commons:Copyright tags\|Licensing\]\]: *\=\=",
            r"\1== {{int:license-header}} ==", exceptions, True)
        text = textlib.replaceExcept(
            text,
            r"([\r\n])\=\= *(Licensing|License information|{{int:license}}) *\=\=",
            r"\1== {{int:license-header}} ==", exceptions, True)

        # frequent field values to {{int:}} versions
        text = textlib.replaceExcept(
            text,
            r'([\r\n]\|[Ss]ource *\= *)'
            r'(?:[Oo]wn work by uploader|[Oo]wn work|[Ee]igene [Aa]rbeit) *([\r\n])',
            r'\1{{own}}\2', exceptions, True)
        text = textlib.replaceExcept(
            text,
            r'(\| *Permission *\=) *(?:[Ss]ee below|[Ss]iehe unten) *([\r\n])',
            r'\1\2', exceptions, True)

        # added to transwikied pages
        text = textlib.replaceExcept(text, r'__NOTOC__', '', exceptions, True)

        # tracker element for js upload form
        text = textlib.replaceExcept(
            text,
            r'<!-- *{{ImageUpload\|(?:full|basic)}} *-->',
            '', exceptions[1:], True)
        text = textlib.replaceExcept(text, r'{{ImageUpload\|(?:basic|full)}}',
                                     '', exceptions, True)

        # duplicated section headers
        text = textlib.replaceExcept(
            text,
            r'([\r\n]|^)\=\= *{{int:filedesc}} *\=\=(?:[\r\n ]*)\=\= *{{int:filedesc}} *\=\=',
            r'\1== {{int:filedesc}} ==', exceptions, True)
        text = textlib.replaceExcept(
            text,
            r'([\r\n]|^)\=\= *{{int:license-header}} *\=\=(?:[\r\n ]*)'
            r'\=\= *{{int:license-header}} *\=\=',
            r'\1== {{int:license-header}} ==', exceptions, True)
        return text
开发者ID:PersianWikipedia,项目名称:pywikibot-core,代码行数:60,代码来源:cosmetic_changes.py

示例4: fixHtml

    def fixHtml(self, text):
        """Relace html markups with wikitext markups."""
        def replace_header(match):
            """Create a header string for replacing."""
            depth = int(match.group(1))
            return r'{0} {1} {0}'.format('=' * depth, match.group(2))

        # Everything case-insensitive (?i)
        # Keep in mind that MediaWiki automatically converts <br> to <br />
        exceptions = ['nowiki', 'comment', 'math', 'pre', 'source',
                      'startspace']
        text = textlib.replaceExcept(text, r'(?i)<(b|strong)>(.*?)</\1>',
                                     r"'''\2'''", exceptions, site=self.site)
        text = textlib.replaceExcept(text, r'(?i)<(i|em)>(.*?)</\1>',
                                     r"''\2''", exceptions, site=self.site)
        # horizontal line without attributes in a single line
        text = textlib.replaceExcept(text, r'(?i)([\r\n])<hr[ /]*>([\r\n])',
                                     r'\1----\2', exceptions)
        # horizontal line with attributes; can't be done with wiki syntax
        # so we only make it XHTML compliant
        text = textlib.replaceExcept(text, r'(?i)<hr ([^>/]+?)>',
                                     r'<hr \1 />',
                                     exceptions)
        # a header where only spaces are in the same line
        text = textlib.replaceExcept(
            text,
            r'(?i)(?<=[\r\n]) *<h([1-7])> *([^<]+?) *</h\1> *(?=[\r\n])',
            replace_header,
            exceptions)
        # TODO: maybe we can make the bot replace <p> tags with \r\n's.
        return text
开发者ID:PersianWikipedia,项目名称:pywikibot-core,代码行数:31,代码来源:cosmetic_changes.py

示例5: fixSyntaxSave

    def fixSyntaxSave(self, text):
        exceptions = ['nowiki', 'comment', 'math', 'pre', 'source',
                      'startspace']
        # link to the wiki working on
        # TODO: disable this for difflinks and titled links,
        # to prevent edits like this:
        # https://de.wikipedia.org/w/index.php?title=Wikipedia%3aVandalismusmeldung&diff=103109563&oldid=103109271
#        text = textlib.replaceExcept(text,
#                                     r'\[https?://%s\.%s\.org/wiki/(?P<link>\S+)\s+(?P<title>.+?)\s?\]'
#                                     % (self.site.code, self.site.family.name),
#                                     r'[[\g<link>|\g<title>]]', exceptions)
        # external link in double brackets
        text = textlib.replaceExcept(
            text,
            r'\[\[(?P<url>https?://[^\]]+?)\]\]',
            r'[\g<url>]', exceptions)
        # external link starting with double bracket
        text = textlib.replaceExcept(text,
                                     r'\[\[(?P<url>https?://.+?)\]',
                                     r'[\g<url>]', exceptions)
        # external link and description separated by a dash, with
        # whitespace in front of the dash, so that it is clear that
        # the dash is not a legitimate part of the URL.
        text = textlib.replaceExcept(
            text,
            r'\[(?P<url>https?://[^\|\] \r\n]+?) +\| *(?P<label>[^\|\]]+?)\]',
            r'[\g<url> \g<label>]', exceptions)
        # dash in external link, where the correct end of the URL can
        # be detected from the file extension. It is very unlikely that
        # this will cause mistakes.
        text = textlib.replaceExcept(
            text,
            r'\[(?P<url>https?://[^\|\] ]+?(\.pdf|\.html|\.htm|\.php|\.asp|\.aspx|\.jsp)) *\| *(?P<label>[^\|\]]+?)\]',
            r'[\g<url> \g<label>]', exceptions)
        return text
开发者ID:skamithi,项目名称:pywikibot-core,代码行数:35,代码来源:cosmetic_changes.py

示例6: fixHtml

 def fixHtml(self, text):
     # Everything case-insensitive (?i)
     # Keep in mind that MediaWiki automatically converts <br> to <br />
     exceptions = ['nowiki', 'comment', 'math', 'pre', 'source',
                   'startspace']
     text = textlib.replaceExcept(text, r'(?i)<(b|strong)>(.*?)</\1>',
                                  r"'''\2'''", exceptions, site=self.site)
     text = textlib.replaceExcept(text, r'(?i)<(i|em)>(.*?)</\1>',
                                  r"''\2''", exceptions, site=self.site)
     # horizontal line without attributes in a single line
     text = textlib.replaceExcept(text, r'(?i)([\r\n])<hr[ /]*>([\r\n])',
                                  r'\1----\2', exceptions)
     # horizontal line with attributes; can't be done with wiki syntax
     # so we only make it XHTML compliant
     text = textlib.replaceExcept(text, r'(?i)<hr ([^>/]+?)>',
                                  r'<hr \1 />',
                                  exceptions)
     # a header where only spaces are in the same line
     for level in range(1, 7):
         equals = '\\1%s \\2 %s\\3' % ("=" * level, "=" * level)
         text = textlib.replaceExcept(
             text,
             r'(?i)([\r\n]) *<h%d> *([^<]+?) *</h%d> *([\r\n])'
             % (level, level),
             r'%s' % equals,
             exceptions)
     # TODO: maybe we can make the bot replace <p> tags with \r\n's.
     return text
开发者ID:edgarskos,项目名称:wbots,代码行数:28,代码来源:cosmetic_changes.py

示例7: test_replace_template

    def test_replace_template(self):
        """Test replacing not inside templates."""
        template_sample = (r'a {{templatename '
                           r'    | accessdate={{Fecha|1993}} '
                           r'    |atitle=The [[real title]] }}')
        self.assertEqual(textlib.replaceExcept(template_sample, 'a', 'X',
                                               ['template'], site=self.site),
                         'X' + template_sample[1:])

        template_sample = (r'a {{templatename '
                           r'    | 1={{a}}2{{a}} '
                           r'    | 2={{a}}1{{a}} }}')
        self.assertEqual(textlib.replaceExcept(template_sample, 'a', 'X',
                                               ['template'], site=self.site),
                         'X' + template_sample[1:])

        template_sample = (r'a {{templatename '
                           r'    | 1={{{a}}}2{{{a}}} '
                           r'    | 2={{{a}}}1{{{a}}} }}')
        self.assertEqual(textlib.replaceExcept(template_sample, 'a', 'X',
                                               ['template'], site=self.site),
                         'X' + template_sample[1:])

        # sf.net bug 1575: unclosed template
        template_sample = template_sample[:-2]
        self.assertEqual(textlib.replaceExcept(template_sample, 'a', 'X',
                                               ['template'], site=self.site),
                         'X' + template_sample[1:])
开发者ID:metakgp,项目名称:batman,代码行数:28,代码来源:textlib_tests.py

示例8: test_replace_source_reference

 def test_replace_source_reference(self):
     """Test replacing in text which contains back references."""
     # Don't use a valid reference number in the original string, in case it
     # tries to apply that as a reference.
     self.assertEqual(textlib.replaceExcept(r"\42", r"^(.*)$", r"X\1X", [], site=self.site), r"X\42X")
     self.assertEqual(
         textlib.replaceExcept(r"\g<bar>", r"^(?P<foo>.*)$", r"X\g<foo>X", [], site=self.site), r"X\g<bar>X"
     )
开发者ID:hasteur,项目名称:pywikibot_scripts,代码行数:8,代码来源:textlib_tests.py

示例9: removeUselessSpaces

 def removeUselessSpaces(self, text):
     multipleSpacesR = re.compile('  +')
     spaceAtLineEndR = re.compile(' $')
     exceptions = ['comment', 'math', 'nowiki', 'pre', 'startspace', 'table',
                   'template']
     text = textlib.replaceExcept(text, multipleSpacesR, ' ', exceptions)
     text = textlib.replaceExcept(text, spaceAtLineEndR, '', exceptions)
     return text
开发者ID:skamithi,项目名称:pywikibot-core,代码行数:8,代码来源:cosmetic_changes.py

示例10: test_replace_exception

 def test_replace_exception(self):
     self.assertEqual(textlib.replaceExcept('123x123', '123', '000', [],
                                            site=self.site),
                      '000x000')
     self.assertEqual(textlib.replaceExcept('123x123', '123', '000',
                                            [re.compile(r'\w123')],
                                            site=self.site),
                      '000x123')
开发者ID:xZise,项目名称:pywikibot-core,代码行数:8,代码来源:textlib_tests.py

示例11: test_replace_with_marker

 def test_replace_with_marker(self):
     self.assertEqual(textlib.replaceExcept('AxyxB', 'x', 'y', [],
                                            marker='.',
                                            site=self.site),
                      'Ayyy.B')
     self.assertEqual(textlib.replaceExcept('AxyxB', '1', 'y', [],
                                            marker='.',
                                            site=self.site),
                      'AxyxB.')
开发者ID:xZise,项目名称:pywikibot-core,代码行数:9,代码来源:textlib_tests.py

示例12: test_replace_exception

 def test_replace_exception(self):
     """Test replacing not inside a specific regex."""
     self.assertEqual(textlib.replaceExcept('123x123', '123', '000', [],
                                            site=self.site),
                      '000x000')
     self.assertEqual(textlib.replaceExcept('123x123', '123', '000',
                                            [re.compile(r'\w123')],
                                            site=self.site),
                      '000x123')
开发者ID:metakgp,项目名称:batman,代码行数:9,代码来源:textlib_tests.py

示例13: test_overlapping_replace

 def test_overlapping_replace(self):
     self.assertEqual(textlib.replaceExcept('1111', '11', '21', [],
                                            allowoverlap=False,
                                            site=self.site),
                      '2121')
     self.assertEqual(textlib.replaceExcept('1111', '11', '21', [],
                                            allowoverlap=True,
                                            site=self.site),
                      '2221')
开发者ID:xZise,项目名称:pywikibot-core,代码行数:9,代码来源:textlib_tests.py

示例14: test_simple_replace

 def test_simple_replace(self):
     self.assertEqual(textlib.replaceExcept('AxB', 'x', 'y', [],
                                            site=self.site),
                      'AyB')
     self.assertEqual(textlib.replaceExcept('AxxB', 'x', 'y', [],
                                            site=self.site),
                      'AyyB')
     self.assertEqual(textlib.replaceExcept('AxyxB', 'x', 'y', [],
                                            site=self.site),
                      'AyyyB')
开发者ID:xZise,项目名称:pywikibot-core,代码行数:10,代码来源:textlib_tests.py

示例15: removeUselessSpaces

 def removeUselessSpaces(self, text):
     """Cleanup multiple or trailing spaces."""
     multipleSpacesR = re.compile('  +')
     spaceAtLineEndR = re.compile(' $')
     exceptions = ['comment', 'math', 'nowiki', 'pre', 'startspace', 'table']
     if self.site.sitename != 'wikipedia:cs':
         exceptions.append('template')
     text = textlib.replaceExcept(text, multipleSpacesR, ' ', exceptions)
     text = textlib.replaceExcept(text, spaceAtLineEndR, '', exceptions)
     return text
开发者ID:metakgp,项目名称:batman,代码行数:10,代码来源:cosmetic_changes.py


注:本文中的pywikibot.textlib.replaceExcept函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。