本文整理汇总了Python中mwparserfromhell.parse函数的典型用法代码示例。如果您正苦于以下问题:Python parse函数的具体用法?Python parse怎么用?Python parse使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了parse函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_captions
def get_captions(title):
params = {
'action': 'query',
'list': 'allpages',
'apfrom': title.split(':', 1)[1],
'aplimit': '100',
'apnamespace': '10'
}
data = api(**params)
langs = {}
prefix = title + ' '
for item in data['query']['allpages']:
if item['title'].startswith(prefix):
lang = item['title'].split('(')[1].split(')')[0]
langs[lang] = item['title']
text = ''
for lang in sorted(langs):
lang_name = get_language_name(lang)
content = page_content(langs[lang])
if content.strip().startswith('#REDIRECT'):
# ???
continue
code = mwparserfromhell.parse(content)
try:
temp = code.filter_templates()[0]
except IndexError:
continue
caption_code = temp.get(1).value
# We want templates like {{w|FooBar}} to render, so expand them
expanded = expand_templates(unicode(caption_code))
caption = unicode(mwparserfromhell.parse(expanded).strip_code())
text += '%s: %s\n' % (lang_name, caption)
return text
示例2: update
def update(self, push=True):
self.fetch_info()
self.parse_info()
print self.LOCATION
print self.CATEGORY
print self.ABOUT
print self.MOVEMENT
print self.PRESSURE
print self.WINDS
#print self.UTC_TIMESTAMP
#actually update crap
#return
text = self.wikipage.get()
code = mwparserfromhell.parse(text)
main = pywikibot.Page(self.wikipage.site, '2012 Atlantic hurricane season')
main_text = main.get()
main_code = mwparserfromhell.parse(main_text)
for template in code.filter_templates():
name = template.name.lower().strip()
if name == 'Infobox hurricane current'.lower():
if template.get('name').value.strip() == 'Hurricane Sandy':
template.get('time').value = self.UTC_TIMESTAMP
template.get('category').value = self.CATEGORY
template.get('gusts').value = self.format_wind(self.WINDS)
template.get('lat').value = self.LOCATION['latc']
template.get(1).value = self.LOCATION['latd']
template.get('lon').value = self.LOCATION['lonc']
template.get(2).value = self.LOCATION['lond']
template.get('movement').value = self.format_movement(self.MOVEMENT)
template.get('pressure').value = self.format_pressure(self.PRESSURE)
pywikibot.showDiff(text, unicode(code))
if push:
self.wikipage.put(unicode(code), 'Bot: Updating hurricane infobox. Errors? [[User talk:Legoktm|report them!]]')
示例3: parse
def parse():
text = rfd.get()
code = mwparserfromhell.parse(text)
requests = []
section = code.get_sections()[2]
for section in code.get_sections()[1:]:
#print section
#print type(section)
data = {'section': section}
header = unicode(section.filter_headings()[0])
data['header'] = header
text = mwparserfromhell.parse(unicode(section).replace(header +'\n', ''))
data['text'] = text
#print text
item = None
for template in text.filter_templates():
if unicode(template.name).startswith('Rfd group'):
data['type'] = 'bulk'
break
elif template.name == 'rfd links':
data['type'] = 'single'
item = template.get(1).value
break
if item:
item = pywikibot.ItemPage(repo, item)
data['item'] = item
requests.append(data)
return requests
示例4: test_multiple_nodes_newlines
def test_multiple_nodes_newlines(self):
snippet = "[[link1]]\n[[link2]]\n[[link3]]"
wikicode = mwparserfromhell.parse(snippet)
self._do_test(wikicode, "[[link1]]", "[[link2]]\n[[link3]]")
wikicode = mwparserfromhell.parse(snippet)
self._do_test(wikicode, "[[link2]]", "[[link1]]\n[[link3]]")
wikicode = mwparserfromhell.parse(snippet)
self._do_test(wikicode, "[[link3]]", "[[link1]]\n[[link2]]")
示例5: test_multiple_nodes_spaces
def test_multiple_nodes_spaces(self):
snippet = "foo [[link1]] [[link2]] [[link3]] bar"
wikicode = mwparserfromhell.parse(snippet)
self._do_test(wikicode, "[[link1]]", "foo [[link2]] [[link3]] bar")
wikicode = mwparserfromhell.parse(snippet)
self._do_test(wikicode, "[[link2]]", "foo [[link1]] [[link3]] bar")
wikicode = mwparserfromhell.parse(snippet)
self._do_test(wikicode, "[[link3]]", "foo [[link1]] [[link2]] bar")
示例6: cleanup_sectionlink
def cleanup_sectionlink(self, section_title):
code = mwparser.parse(section_title)
template = code.filter_templates()
if len(template) == 1 and template[0].name.matches(('Erl', 'erl')):
section_title = template[0].get(1)
title = mwparser.parse(unicode(section_title))
clean_title = title.strip_code(normalize=True, collapse=True).strip()
return clean_title
示例7: wikicode
def wikicode(self):
"""
Return the parsed wikitext (mwparserfromhell.wikicode.Wikicode object)
"""
if not self._wikicode:
try:
self._wikicode = mwparserfromhell.parse(self.wikitext)
except SystemError:
self._wikicode = mwparserfromhell.parse('')
return self._wikicode
示例8: _parse_revs_into_wcode
def _parse_revs_into_wcode(rev_text_dict):
result = []
for rev_id in rev_text_dict:
try:
result.append((rev_id, mwp.parse(rev_text_dict[rev_id])))
except mwp.parser.ParserError as e:
logger.warning(e)
logger.warning('Error parsing {0}'.format(rev_id))
result.append((rev_id, mwp.parse('')))
return result
示例9: test_contains
def test_contains(self):
"""test Wikicode.contains()"""
code = parse("Here is {{aaa|{{bbb|xyz{{ccc}}}}}} and a [[page|link]]")
tmpl1, tmpl2, tmpl3 = code.filter_templates()
tmpl4 = parse("{{ccc}}").filter_templates()[0]
self.assertTrue(code.contains(tmpl1))
self.assertTrue(code.contains(tmpl3))
self.assertFalse(code.contains(tmpl4))
self.assertTrue(code.contains(str(tmpl4)))
self.assertTrue(code.contains(tmpl2.params[0].value))
示例10: __init__
def __init__(self, title=None, text=None):
super(Article, self).__init__(text=text)
self.title = title
self.paragraphs = None
self.readable_text = None
self.lede_length = 1
if title is not None:
self.page = pwb.Page(site, title)
self.text = mwp.parse(self.page.text)
self.wikitext = mwp.parse(self.page.text)
示例11: page_f
def page_f(pg):
count = 0
text = pg.get()
code = mwparserfromhell.parse(text)
for template in code.filter_templates(recursive=True):
if template.name.lower().strip() in CITE_TEMPLATES:
url = template.get('url').value.strip()
if 'msnbc.com' in url:
continue
isup = is_up(url)
if isup:
continue
if template.has_param('archiveurl'):
#if template.has_param('deadurl'):
# if template.get('deadurl').value.strip() == 'no':
# template.remove('deadurl')
# template.add('deadurl', 'yes')
# continue
continue
#find it on archive.org
ai_url = archive_page(url)
if not ai_url:
print 'Not found. :('
continue
raw_date = ai_url[27:27+14]
year = int(raw_date[:4])
day = int(raw_date[6:8])
month_num = int(raw_date[4:6])
month = MONTH_NAMES[month_num-1]
template.add('archiveurl', ai_url)
template.add('deadurl', 'yes')
template.add('archivedate', '%s %s %s' % (day, month, year))
count += 1
#lets remove all the {{dead link}} now
code = unicode(code)
for tag in re.finditer(r'<ref(.*?)>(.*?)</ref>', code):
p = mwparserfromhell.parse(tag.group(2))
for template in p.filter_templates():
set = False
if template.name.lower().strip() in CITE_TEMPLATES:
if template.has_param('archiveurl'):
set = True
elif template.name.lower().strip() in DEAD_LINK:
if set:
del p.nodes[p.nodes.index(unicode(template))]
code = code.replace(tag.group(2), unicode(p))
if text != code:
print 'No changes made on %s' % pg.title(asLink=True)
return
pywikibot.showDiff(text, unicode(code))
if raw_input('Save?').lower() == 'y':
pg.put(unicode(code), 'Manually-assisted archive url fetching.')
示例12: test_matches
def test_matches(self):
"""test Wikicode.matches()"""
code1 = parse("Cleanup")
code2 = parse("\nstub<!-- TODO: make more specific -->")
self.assertTrue(code1.matches("Cleanup"))
self.assertTrue(code1.matches("cleanup"))
self.assertTrue(code1.matches(" cleanup\n"))
self.assertFalse(code1.matches("CLEANup"))
self.assertFalse(code1.matches("Blah"))
self.assertTrue(code2.matches("stub"))
self.assertTrue(code2.matches("Stub<!-- no, it's fine! -->"))
self.assertFalse(code2.matches("StuB"))
示例13: process_page
def process_page(page):
text = page.get()
text, blah = AWB.do_page(text, date=False)
code = mwparserfromhell.parse(text)
urls = []
for m in urlregex.MATCH_URL.finditer(unicode(code)):
u = m.group(0)
if u.startswith(('http://ap.google', 'https://ap.google')):
urls.append(u)
"""
buffer = unicode(code)
for template in code.filter_templates():
for url in urls:
if url in template:
if template.has_param('archiveurl'):
urls.remove(url)
else:
buffer = buffer.replace(unicode(template), unicode(template)+TAG)
urls.remove(url)
code = buffer
"""
#find ref tags
loop1= False
for tag in re.finditer(r'<ref(.*?)>(.*?)</ref>', unicode(code)):
for url in urls:
if url in tag.group(2):
for template in mwparserfromhell.parse(tag.group(2)).filter_templates():
if template.has_param('archiveurl'):
try:
urls.remove(url)
except ValueError:
pass
loop1 = True
if loop1:
break
if 'dead link' in tag.group(0).lower():
urls.remove(url)
else:
code = unicode(code).replace(tag.group(0), '<ref'+tag.group(1)+'>'+tag.group(2)+TAG+'</ref>')
urls.remove(url)
if loop1:
loop1 = False
break
if urls:
print 'STILL HAVE THESE LEFT: '+', '.join(urls)
pywikibot.showDiff(text, unicode(code))
if text != unicode(code):
page.put(unicode(code), 'Bot: Tagging ap.google.* links with {{dead link}}')
return True
else:
return None
示例14: test_transform
def test_transform(self):
wcode_list = [mwp.parse('{{Infobox something | thing}}'
'{{not-one else}}'
'{{infobox again}}'),
mwp.parse('{{Infobox num1 | thing}}'
'{{not-one else}}'
'{{infobox num2}}')]
result = ifb._transform(wcode_list)
self.assertEqual(len(result), 2)
self.assertEqual(result[0], 'infobox-something infobox-again')
self.assertEqual(result[1], 'infobox-num1 infobox-num2')
示例15: load_stub_templates
def load_stub_templates(self):
self.stub_templates = []
st = pywikibot.Page(self.site, 'Wikipedia:WikiProject Stub sorting/Stub types')
text = st.get()
code = mwparserfromhell.parse(text)
for template in code.filter_templates():
if template.name.startswith('Wikipedia:WikiProject Stub sorting/Stub types/'):
st_page = pywikibot.Page(self.site, unicode(template.name))
text = st_page.get()
code = mwparserfromhell.parse(text)
for template in code.filter_templates():
if template.name.lower() == 'tl':
self.stub_templates.append(unicode(template.get(1).value).lower())