本文整理汇总了Python中mwparserfromhell.parse方法的典型用法代码示例。如果您正苦于以下问题:Python mwparserfromhell.parse方法的具体用法?Python mwparserfromhell.parse怎么用?Python mwparserfromhell.parse使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类mwparserfromhell
的用法示例。
在下文中一共展示了mwparserfromhell.parse方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: make_new_wikicode
# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def make_new_wikicode(text, form_data, page_name):
wikicode = mwparserfromhell.parse(text)
change_made = False
for template in wikicode.filter_templates():
edit = main.TemplateEdit(template, page_name)
if edit.classification == 'ignored' or edit.classification == 'rejected':
continue
proposed_addition = form_data.get(edit.orig_hash)
user_checked = form_data.get(edit.orig_hash+'-addlink')
if proposed_addition and user_checked == 'checked':
# Go through one or more suggestions separated by pipe
for proposed_parameter in proposed_addition.split("|"):
try:
# Get the new wikitext for the template with this parameter added
edit.update_template(proposed_parameter)
change_made = True
except ValueError:
app.logger.exception('update_template failed on {}'.format(page_name))
pass # TODO report to the user
return unicode(wikicode), change_made
示例2: bot_is_allowed
# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def bot_is_allowed(text, user):
"""
Taken from https://en.wikipedia.org/wiki/Template:Bots
For bot exclusion compliance.
"""
user = user.lower().strip()
text = mwparserfromhell.parse(text)
for tl in text.filter_templates():
if tl.name in ('bots', 'nobots'):
break
else:
return True
for param in tl.params:
bots = [x.lower().strip() for x in param.value.split(",")]
if param.name == 'allow':
if ''.join(bots) == 'none': return False
for bot in bots:
if bot in (user, 'all'):
return True
elif param.name == 'deny':
if ''.join(bots) == 'none': return True
for bot in bots:
if bot in (user, 'all'):
return False
return True
示例3: check_relative
# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def check_relative(self, src_title, wikilink, title):
"""
Use relative links whenever possible. For example, links to sections such as
`[[Foo#Bar]]` on a page `title` are replaced with `[[#Bar]]` whenever `Foo`
redirects to or is equivalent to `title`.
:param str src_title: the title of the page being checked
:param wikilink: the link to be checked
:type wikilink: :py:class:`mwparserfromhell.nodes.wikilink.Wikilink`
:param title: the parsed :py:attr:`wikilink.title`
:type title: :py:class:`mw.parser_helpers.title.Title`
"""
if title.iwprefix or not title.sectionname:
return
# check if title is a redirect
target = self.api.redirects.map.get(title.fullpagename)
if target:
_title = self.api.Title(target)
_title.sectionname = title.sectionname
else:
_title = title
if canonicalize(src_title) == _title.fullpagename:
wikilink.title = "#" + _title.sectionname
title.parse(wikilink.title)
示例4: check_redirect_capitalization
# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def check_redirect_capitalization(self, wikilink, title):
"""
Avoid redirect iff the difference is only in capitalization.
:param wikilink: the link to be checked
:type wikilink: :py:class:`mwparserfromhell.nodes.wikilink.Wikilink`
:param title: the parsed :py:attr:`wikilink.title`
:type title: :py:class:`mw.parser_helpers.title.Title`
"""
# run only in interactive mode
if self.interactive is False:
return
# FIXME: very common false positive
if title.pagename.lower().startswith("wpa supplicant"):
return
# might be only a section, e.g. [[#foo]]
if title.fullpagename:
target = self.api.redirects.map.get(title.fullpagename)
if target is not None and target.lower() == title.fullpagename.lower():
if title.sectionname:
target += "#" + title.sectionname
wikilink.title = target
title.parse(wikilink.title)
示例5: recategorize_page
# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def recategorize_page(self, page, source, target):
title = page["title"]
text_old = page["revisions"][0]["slots"]["main"]["*"]
timestamp = page["revisions"][0]["timestamp"]
source = self.api.Title(source)
assert(source.namespace == "Category")
logger.info("Parsing page [[{}]] ...".format(title))
wikicode = mwparserfromhell.parse(text_old)
for wikilink in wikicode.ifilter_wikilinks(recursive=True):
wl_title = self.api.Title(wikilink.title)
if wl_title.namespace == "Category" and wl_title.pagename == source.pagename:
wikilink.title = target
text_new = str(wikicode)
if text_old != text_new:
# edit_interactive(self.api, title, page["pageid"], text_old, text_new, timestamp, self.edit_summary, bot="")
self.api.edit(title, page["pageid"], text_new, timestamp, self.edit_summary, bot="")
示例6: extract_labels
# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def extract_labels(self, text):
"""
Extracts a set of labels for a version of text by parsing templates.
:Parameters:
text : `str`
Wikitext markup to extract labels from
:Returns:
An iterator over (project, label) pairs
"""
# filter_text is an initial fast pass to weed out wikitext that
# can't contain the template (eg. because the template name
# never appears)
if hasattr(self, 'filter_text'):
if not self.filter_text(text):
return
parsed_text = mwp.parse(text)
templates = parsed_text.filter_templates()
for template in templates:
yield from self.from_template(template)
示例7: add_oa_links_in_references
# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def add_oa_links_in_references(text, page, only_doi=False):
"""
Main function of the bot.
:param text: the wikicode of the page to edit
:returns: a tuple: the new wikicode, the list of changed templates,
and edit statistics
"""
wikicode = mwparserfromhell.parse(text)
for index, template in enumerate(wikicode.filter_templates()):
edit = TemplateEdit(template, page)
edit.index = index
edit.propose_change(only_doi)
yield edit
示例8: _process_wikicode
# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def _process_wikicode(text):
return mwparserfromhell.parse(text)
示例9: set_title
# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def set_title(self, title):
"""
Set current title to ``title`` and parse its content. Unsaved changes to
previous page will be lost. The content of the page should have been
fetched by :py:meth:`fetch_pages`, otherwise :py:exc:`ValueError` will
be raised.
:param str title: the page title
"""
if title not in self.contents.keys():
raise ValueError("Content of page [[{}]] is not fetched.".format(title))
self.title = title
self.wikicode = mwparserfromhell.parse(self.contents[self.title])
示例10: ensure_flagged_by_template
# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def ensure_flagged_by_template(wikicode, node, template_name, *template_parameters, overwrite_parameters=True):
"""
Makes sure that ``node`` in ``wikicode`` is immediately (except for
whitespace) followed by a template with ``template_name`` and optional
``template_parameters``.
:param wikicode: a :py:class:`mwparserfromhell.wikicode.Wikicode` object
:param node: a :py:class:`mwparserfromhell.nodes.Node` object
:param str template_name: the name of the template flag
:param template_parameters: optional template parameters
:returns: the template flag, as a
:py:class:`mwparserfromhell.nodes.template.Template` objet
"""
parent = get_parent_wikicode(wikicode, node)
adjacent = get_adjacent_node(parent, node, ignore_whitespace=True)
if template_parameters:
flag = "{{%s}}" % "|".join([template_name, *template_parameters])
else:
flag = "{{%s}}" % template_name
flag = mwparserfromhell.parse(flag).nodes[0]
assert(isinstance(flag, mwparserfromhell.nodes.Template))
if isinstance(adjacent, mwparserfromhell.nodes.Template) and adjacent.name.matches(template_name):
# in case of {{Dead link}} we want to preserve the original parameters
if overwrite_parameters is True:
wikicode.replace(adjacent, flag)
else:
flag = adjacent
else:
wikicode.insert_after(node, flag)
assert(get_parent_wikicode(wikicode, flag) is parent)
return flag
示例11: update_page
# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def update_page(title, text, langlinks, weak_update=True):
"""
:param str title: title of the page
:param str text: wikitext of the page
:param langlinks: a sorted list of ``(tag, title)`` tuples as obtained
from :py:meth:`self.get_langlinks`
:param weak_update:
When ``True``, the langlinks present on the page are mixed with those
suggested by ``family_titles``. This is necessary only when there are
multiple "intersecting" families, in which case the intersection should
be preserved and solved manually. This is reported in _merge_families.
:returns: updated wikicode
"""
# temporarily skip main pages until the behavior switches
# (__NOTOC__ etc.) can be parsed by mwparserfromhell
# NOTE: handling whitespace right will be hard: https://wiki.archlinux.org/index.php?title=Main_page&diff=383144&oldid=382787
if re.search("__NOTOC__|__NOEDITSECTION__", text):
logger.warning("Skipping page '{}' (contains behavior switch(es))".format(title))
return text
# format langlinks, in the prefix form
# (e.g. "cs:Some title" for title="Some title" and tag="cs")
langlinks = ["[[{}:{}]]".format(tag, title) for tag, title in langlinks]
logger.info("Parsing page [[{}]] ...".format(title))
wikicode = mwparserfromhell.parse(text)
if weak_update is True:
parent, magics, cats, langlinks = header.get_header_parts(wikicode, langlinks=langlinks, remove_from_parent=True)
else:
# drop the extracted langlinks
parent, magics, cats, _ = header.get_header_parts(wikicode, remove_from_parent=True)
header.build_header(wikicode, parent, magics, cats, langlinks)
return wikicode
示例12: fix_page
# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def fix_page(title, text_old):
langname = lang.detect_language(title)[1]
wikicode = mwparserfromhell.parse(text_old)
parent, magics, cats, langlinks = get_header_parts(wikicode, remove_from_parent=True)
for cat in cats:
# get_header_parts returns list of wikicode objects, each with one node
cat = cat.nodes[0]
pure, ln = lang.detect_language(str(cat.title))
if ln != langname:
cat.title = lang.format_title(pure, langname)
build_header(wikicode, parent, magics, cats, langlinks)
return wikicode
示例13: decategorize
# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def decategorize(title, text_old):
wikicode = mwparserfromhell.parse(text_old)
parent, magics, cats, langlinks = get_header_parts(wikicode, remove_from_parent=True)
build_header(wikicode, parent, magics, [], langlinks)
return wikicode
示例14: get_normalized_extlinks
# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def get_normalized_extlinks(wikicode):
# Pass 1: re-parse all external links, because "http://example.com/{{Dead link}}" was initially
# parsed as one big URL, but the template transcludes tags which should terminate the URL.
# for el in wikicode.filter_external_links(recursive=True):
# wikicode.replace(el, str(el))
# performance optimization, see https://github.com/earwig/mwparserfromhell/issues/195
for parent, el in parented_ifilter(wikicode, forcetype=mwparserfromhell.nodes.external_link.ExternalLink, recursive=True):
parent.replace(el, str(el), recursive=False)
extlinks = wikicode.filter_external_links(recursive=True)
# Pass 2: normalize the URLs
for el in extlinks:
# strip whitespace like "\t"
el.url = str(el.url).strip()
# decode percent-encoding
# MW incompatibility: MediaWiki decodes only some characters, spaces and some unicode characters with accents are encoded
try:
el.url = urldecode(str(el.url))
except UnicodeDecodeError:
pass
# Pass 3: skip invalid URLs
filtered_extlinks = []
for el in extlinks:
try:
# try to parse the URL - fails e.g. if port is not a number
# reference: https://urllib3.readthedocs.io/en/latest/reference/urllib3.util.html#urllib3.util.parse_url
url = urllib3.util.url.parse_url(str(el.url))
# skip URLs with empty host, e.g. "http://" or "http://git@" or "http:///var/run"
# (partial workaround for https://github.com/earwig/mwparserfromhell/issues/196 )
# GOTCHA: mailto:user@host is scheme + path only; auth, host and port are recognized only after //
if url.scheme != "mailto" and not url.host:
continue
filtered_extlinks.append(el)
except urllib3.exceptions.LocationParseError:
pass
return filtered_extlinks
示例15: update_page
# 需要导入模块: import mwparserfromhell [as 别名]
# 或者: from mwparserfromhell import parse [as 别名]
def update_page(self, src_title, text):
"""
Parse the content of the page and call various methods to update the links.
:param str src_title: title of the page
:param str text: content of the page
:returns: a (text, edit_summary) tuple, where text is the updated content
and edit_summary is the description of performed changes
"""
logger.info("Parsing page [[{}]] ...".format(src_title))
# FIXME: skip_style_tags=True is a partial workaround for https://github.com/earwig/mwparserfromhell/issues/40
wikicode = mwparserfromhell.parse(text, skip_style_tags=True)
# We could use the default single-threaded executor with basically the same performance
# (because of Python's GIL), but the ThreadPoolExecutor allows to limit the maximum number
# of workers and thus the maximum number of concurrent connections.
with ThreadPoolExecutor(max_workers=10) as executor:
with requests.Session() as session:
loop = asyncio.get_event_loop()
tasks = [
loop.run_in_executor(
executor,
self.check_extlink_status,
# a way to pass multiple arguments to the check_extlink_status method
*(wikicode, extlink)
)
for extlink in wikicode.ifilter_external_links(recursive=True)
]
for result in await asyncio.gather(*tasks):
pass
edit_summary = "update status of external links (interactive)"
return str(wikicode), edit_summary