当前位置: 首页>>代码示例>>Python>>正文


Python mwparserfromhell.parse方法代码示例

本文整理汇总了Python中mwparserfromhell.parse方法的典型用法代码示例。如果您正苦于以下问题:Python mwparserfromhell.parse方法的具体用法?Python mwparserfromhell.parse怎么用?Python mwparserfromhell.parse使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在mwparserfromhell的用法示例。


在下文中一共展示了mwparserfromhell.parse方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: make_new_wikicode

# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def make_new_wikicode(text, form_data, page_name):
    wikicode = mwparserfromhell.parse(text)
    change_made = False
    for template in wikicode.filter_templates():
        edit = main.TemplateEdit(template, page_name)
        if edit.classification == 'ignored' or edit.classification == 'rejected':
            continue
        proposed_addition = form_data.get(edit.orig_hash)
        user_checked = form_data.get(edit.orig_hash+'-addlink')
        if proposed_addition and user_checked == 'checked':
            # Go through one or more suggestions separated by pipe
            for proposed_parameter in proposed_addition.split("|"):
                try:
                    # Get the new wikitext for the template with this parameter added
                    edit.update_template(proposed_parameter)
                    change_made = True
                except ValueError:
                    app.logger.exception('update_template failed on {}'.format(page_name))
                    pass # TODO report to the user
    return unicode(wikicode), change_made 
开发者ID:dissemin,项目名称:oabot,代码行数:22,代码来源:app.py

示例2: bot_is_allowed

# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def bot_is_allowed(text, user):
    """
    Taken from https://en.wikipedia.org/wiki/Template:Bots
    For bot exclusion compliance.
    """
    user = user.lower().strip()
    text = mwparserfromhell.parse(text)
    for tl in text.filter_templates():
        if tl.name in ('bots', 'nobots'):
            break
    else:
        return True
    for param in tl.params:
        bots = [x.lower().strip() for x in param.value.split(",")]
        if param.name == 'allow':
            if ''.join(bots) == 'none': return False
            for bot in bots:
                if bot in (user, 'all'):
                    return True
        elif param.name == 'deny':
            if ''.join(bots) == 'none': return True
            for bot in bots:
                if bot in (user, 'all'):
                    return False
    return True 
开发者ID:dissemin,项目名称:oabot,代码行数:27,代码来源:main.py

示例3: check_relative

# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def check_relative(self, src_title, wikilink, title):
        """
        Use relative links whenever possible. For example, links to sections such as
        `[[Foo#Bar]]` on a page `title` are replaced with `[[#Bar]]` whenever `Foo`
        redirects to or is equivalent to `title`.

        :param str src_title: the title of the page being checked
        :param wikilink: the link to be checked
        :type wikilink: :py:class:`mwparserfromhell.nodes.wikilink.Wikilink`
        :param title: the parsed :py:attr:`wikilink.title`
        :type title: :py:class:`mw.parser_helpers.title.Title`
        """
        if title.iwprefix or not title.sectionname:
            return
        # check if title is a redirect
        target = self.api.redirects.map.get(title.fullpagename)
        if target:
            _title = self.api.Title(target)
            _title.sectionname = title.sectionname
        else:
            _title = title

        if canonicalize(src_title) == _title.fullpagename:
            wikilink.title = "#" + _title.sectionname
            title.parse(wikilink.title) 
开发者ID:lahwaacz,项目名称:wiki-scripts,代码行数:27,代码来源:link-checker.py

示例4: check_redirect_capitalization

# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def check_redirect_capitalization(self, wikilink, title):
        """
        Avoid redirect iff the difference is only in capitalization.

        :param wikilink: the link to be checked
        :type wikilink: :py:class:`mwparserfromhell.nodes.wikilink.Wikilink`
        :param title: the parsed :py:attr:`wikilink.title`
        :type title: :py:class:`mw.parser_helpers.title.Title`
        """
        # run only in interactive mode
        if self.interactive is False:
            return

        # FIXME: very common false positive
        if title.pagename.lower().startswith("wpa supplicant"):
            return

        # might be only a section, e.g. [[#foo]]
        if title.fullpagename:
            target = self.api.redirects.map.get(title.fullpagename)
            if target is not None and target.lower() == title.fullpagename.lower():
                if title.sectionname:
                    target += "#" + title.sectionname
                wikilink.title = target
                title.parse(wikilink.title) 
开发者ID:lahwaacz,项目名称:wiki-scripts,代码行数:27,代码来源:link-checker.py

示例5: recategorize_page

# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def recategorize_page(self, page, source, target):
        title = page["title"]
        text_old = page["revisions"][0]["slots"]["main"]["*"]
        timestamp = page["revisions"][0]["timestamp"]

        source = self.api.Title(source)
        assert(source.namespace == "Category")

        logger.info("Parsing page [[{}]] ...".format(title))
        wikicode = mwparserfromhell.parse(text_old)
        for wikilink in wikicode.ifilter_wikilinks(recursive=True):
            wl_title = self.api.Title(wikilink.title)
            if wl_title.namespace == "Category" and wl_title.pagename == source.pagename:
                wikilink.title = target
        text_new = str(wikicode)

        if text_old != text_new:
#            edit_interactive(self.api, title, page["pageid"], text_old, text_new, timestamp, self.edit_summary, bot="")
            self.api.edit(title, page["pageid"], text_new, timestamp, self.edit_summary, bot="") 
开发者ID:lahwaacz,项目名称:wiki-scripts,代码行数:21,代码来源:recategorize-over-redirect.py

示例6: extract_labels

# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def extract_labels(self, text):
        """
        Extracts a set of labels for a version of text by parsing templates.

        :Parameters:
            text : `str`
                Wikitext markup to extract labels from

        :Returns:
            An iterator over (project, label) pairs
        """
        # filter_text is an initial fast pass to weed out wikitext that
        # can't contain the template (eg. because the template name
        # never appears)
        if hasattr(self, 'filter_text'):
            if not self.filter_text(text):
                return

        parsed_text = mwp.parse(text)
        templates = parsed_text.filter_templates()
        for template in templates:

            yield from self.from_template(template) 
开发者ID:wikimedia,项目名称:articlequality,代码行数:25,代码来源:extractor.py

示例7: add_oa_links_in_references

# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def add_oa_links_in_references(text, page, only_doi=False):
    """
    Main function of the bot.

    :param text: the wikicode of the page to edit
    :returns: a tuple: the new wikicode, the list of changed templates,
            and edit statistics
    """
    wikicode = mwparserfromhell.parse(text)

    for index, template in enumerate(wikicode.filter_templates()):
        edit = TemplateEdit(template, page)
        edit.index = index
        edit.propose_change(only_doi)
        yield edit 
开发者ID:dissemin,项目名称:oabot,代码行数:17,代码来源:main.py

示例8: _process_wikicode

# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def _process_wikicode(text):
    return mwparserfromhell.parse(text) 
开发者ID:wikimedia,项目名称:revscoring,代码行数:4,代码来源:parsed.py

示例9: set_title

# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def set_title(self, title):
        """
        Set current title to ``title`` and parse its content. Unsaved changes to
        previous page will be lost. The content of the page should have been
        fetched by :py:meth:`fetch_pages`, otherwise :py:exc:`ValueError` will
        be raised.

        :param str title: the page title
        """
        if title not in self.contents.keys():
            raise ValueError("Content of page [[{}]] is not fetched.".format(title))
        self.title = title
        self.wikicode = mwparserfromhell.parse(self.contents[self.title]) 
开发者ID:lahwaacz,项目名称:wiki-scripts,代码行数:15,代码来源:autopage.py

示例10: ensure_flagged_by_template

# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def ensure_flagged_by_template(wikicode, node, template_name, *template_parameters, overwrite_parameters=True):
    """
    Makes sure that ``node`` in ``wikicode`` is immediately (except for
    whitespace) followed by a template with ``template_name`` and optional
    ``template_parameters``.

    :param wikicode: a :py:class:`mwparserfromhell.wikicode.Wikicode` object
    :param node: a :py:class:`mwparserfromhell.nodes.Node` object
    :param str template_name: the name of the template flag
    :param template_parameters: optional template parameters
    :returns: the template flag, as a
        :py:class:`mwparserfromhell.nodes.template.Template` objet
    """
    parent = get_parent_wikicode(wikicode, node)
    adjacent = get_adjacent_node(parent, node, ignore_whitespace=True)

    if template_parameters:
        flag = "{{%s}}" % "|".join([template_name, *template_parameters])
    else:
        flag = "{{%s}}" % template_name
    flag = mwparserfromhell.parse(flag).nodes[0]
    assert(isinstance(flag, mwparserfromhell.nodes.Template))

    if isinstance(adjacent, mwparserfromhell.nodes.Template) and adjacent.name.matches(template_name):
        # in case of {{Dead link}} we want to preserve the original parameters
        if overwrite_parameters is True:
            wikicode.replace(adjacent, flag)
        else:
            flag = adjacent
    else:
        wikicode.insert_after(node, flag)

    assert(get_parent_wikicode(wikicode, flag) is parent)
    return flag 
开发者ID:lahwaacz,项目名称:wiki-scripts,代码行数:36,代码来源:wikicode.py

示例11: update_page

# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def update_page(title, text, langlinks, weak_update=True):
        """
        :param str title: title of the page
        :param str text: wikitext of the page
        :param langlinks: a sorted list of ``(tag, title)`` tuples as obtained
                          from :py:meth:`self.get_langlinks`
        :param weak_update:
            When ``True``, the langlinks present on the page are mixed with those
            suggested by ``family_titles``. This is necessary only when there are
            multiple "intersecting" families, in which case the intersection should
            be preserved and solved manually. This is reported in _merge_families.
        :returns: updated wikicode
        """
        # temporarily skip main pages until the behavior switches
        # (__NOTOC__ etc.) can be parsed by mwparserfromhell
        # NOTE: handling whitespace right will be hard: https://wiki.archlinux.org/index.php?title=Main_page&diff=383144&oldid=382787
        if re.search("__NOTOC__|__NOEDITSECTION__", text):
            logger.warning("Skipping page '{}' (contains behavior switch(es))".format(title))
            return text

        # format langlinks, in the prefix form
        # (e.g. "cs:Some title" for title="Some title" and tag="cs")
        langlinks = ["[[{}:{}]]".format(tag, title) for tag, title in langlinks]

        logger.info("Parsing page [[{}]] ...".format(title))
        wikicode = mwparserfromhell.parse(text)
        if weak_update is True:
            parent, magics, cats, langlinks = header.get_header_parts(wikicode, langlinks=langlinks, remove_from_parent=True)
        else:
            # drop the extracted langlinks
            parent, magics, cats, _ = header.get_header_parts(wikicode, remove_from_parent=True)
        header.build_header(wikicode, parent, magics, cats, langlinks)
        return wikicode 
开发者ID:lahwaacz,项目名称:wiki-scripts,代码行数:35,代码来源:InterlanguageLinks.py

示例12: fix_page

# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def fix_page(title, text_old):
        langname = lang.detect_language(title)[1]
        wikicode = mwparserfromhell.parse(text_old)
        parent, magics, cats, langlinks = get_header_parts(wikicode, remove_from_parent=True)

        for cat in cats:
            # get_header_parts returns list of wikicode objects, each with one node
            cat = cat.nodes[0]

            pure, ln = lang.detect_language(str(cat.title))
            if ln != langname:
                cat.title = lang.format_title(pure, langname)

        build_header(wikicode, parent, magics, cats, langlinks)
        return wikicode 
开发者ID:lahwaacz,项目名称:wiki-scripts,代码行数:17,代码来源:Categorization.py

示例13: decategorize

# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def decategorize(title, text_old):
        wikicode = mwparserfromhell.parse(text_old)
        parent, magics, cats, langlinks = get_header_parts(wikicode, remove_from_parent=True)
        build_header(wikicode, parent, magics, [], langlinks)
        return wikicode 
开发者ID:lahwaacz,项目名称:wiki-scripts,代码行数:7,代码来源:Decategorization.py

示例14: get_normalized_extlinks

# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def get_normalized_extlinks(wikicode):
    # Pass 1: re-parse all external links, because "http://example.com/{{Dead link}}" was initially
    # parsed as one big URL, but the template transcludes tags which should terminate the URL.
#    for el in wikicode.filter_external_links(recursive=True):
#        wikicode.replace(el, str(el))
    # performance optimization, see https://github.com/earwig/mwparserfromhell/issues/195
    for parent, el in parented_ifilter(wikicode, forcetype=mwparserfromhell.nodes.external_link.ExternalLink, recursive=True):
        parent.replace(el, str(el), recursive=False)

    extlinks = wikicode.filter_external_links(recursive=True)

    # Pass 2: normalize the URLs
    for el in extlinks:
        # strip whitespace like "\t"
        el.url = str(el.url).strip()
        # decode percent-encoding
        # MW incompatibility: MediaWiki decodes only some characters, spaces and some unicode characters with accents are encoded
        try:
            el.url = urldecode(str(el.url))
        except UnicodeDecodeError:
            pass

    # Pass 3: skip invalid URLs
    filtered_extlinks = []
    for el in extlinks:
        try:
            # try to parse the URL - fails e.g. if port is not a number
            # reference: https://urllib3.readthedocs.io/en/latest/reference/urllib3.util.html#urllib3.util.parse_url
            url = urllib3.util.url.parse_url(str(el.url))
            # skip URLs with empty host, e.g. "http://" or "http://git@" or "http:///var/run"
            # (partial workaround for https://github.com/earwig/mwparserfromhell/issues/196 )
            # GOTCHA: mailto:user@host is scheme + path only; auth, host and port are recognized only after //
            if url.scheme != "mailto" and not url.host:
                continue
            filtered_extlinks.append(el)
        except urllib3.exceptions.LocationParseError:
            pass

    return filtered_extlinks 
开发者ID:lahwaacz,项目名称:wiki-scripts,代码行数:41,代码来源:parser_cache.py

示例15: update_page

# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def update_page(self, src_title, text):
        """
        Parse the content of the page and call various methods to update the links.

        :param str src_title: title of the page
        :param str text: content of the page
        :returns: a (text, edit_summary) tuple, where text is the updated content
            and edit_summary is the description of performed changes
        """
        logger.info("Parsing page [[{}]] ...".format(src_title))
        # FIXME: skip_style_tags=True is a partial workaround for https://github.com/earwig/mwparserfromhell/issues/40
        wikicode = mwparserfromhell.parse(text, skip_style_tags=True)

        # We could use the default single-threaded executor with basically the same performance
        # (because of Python's GIL), but the ThreadPoolExecutor allows to limit the maximum number
        # of workers and thus the maximum number of concurrent connections.
        with ThreadPoolExecutor(max_workers=10) as executor:
            with requests.Session() as session:
                loop = asyncio.get_event_loop()
                tasks = [
                    loop.run_in_executor(
                        executor,
                        self.check_extlink_status,
                        # a way to pass multiple arguments to the check_extlink_status method
                        *(wikicode, extlink)
                    )
                    for extlink in wikicode.ifilter_external_links(recursive=True)
                ]
                for result in await asyncio.gather(*tasks):
                    pass

        edit_summary = "update status of external links (interactive)"
        return str(wikicode), edit_summary 
开发者ID:lahwaacz,项目名称:wiki-scripts,代码行数:35,代码来源:extlink-checker.py


注:本文中的mwparserfromhell.parse方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。