本文整理汇总了Python中pypandoc.convert_text函数的典型用法代码示例。如果您正苦于以下问题:Python convert_text函数的具体用法?Python convert_text怎么用?Python convert_text使用的例子?那么, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了convert_text函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: convert_ipynb_to_gallery
def convert_ipynb_to_gallery(file_name):
"""
Blatantly stolen + adapted from
https://gist.github.com/wuhuikai/4a7ceb8bc52454e17a4eb8327d538d85
"""
python_file = ""
nb_dict = json.load(open(file_name))
cells = nb_dict['cells']
for i, cell in enumerate(cells):
if i == 0:
assert cell['cell_type'] == 'markdown', \
'First cell has to be markdown'
md_source = ''.join(cell['source'])
rst_source = pdoc.convert_text(md_source, 'rst', 'md')
python_file = '"""\n' + rst_source + '\n"""'
else:
if cell['cell_type'] == 'markdown':
md_source = ''.join(cell['source'])
rst_source = pdoc.convert_text(md_source, 'rst', 'md')
commented_source = '\n'.join([
'# ' + x for x in rst_source.split('\n')
])
python_file = python_file + '\n\n\n' + '#' * 70 + '\n' + \
commented_source
elif cell['cell_type'] == 'code':
source = ''.join(cell['source'])
python_file = python_file + '\n' * 2 + source
open(file_name.replace('.ipynb', '.py'), 'w').write(python_file)
示例2: convert_ipynb_to_gallery
def convert_ipynb_to_gallery(file_name):
python_file = ""
nb_dict = json.load(open(file_name))
cells = nb_dict['cells']
for i, cell in enumerate(cells):
if i == 0:
assert cell['cell_type'] == 'markdown', \
'First cell has to be markdown'
md_source = ''.join(cell['source'])
rst_source = pdoc.convert_text(md_source, 'rst', 'md')
python_file = '"""\n' + rst_source + '\n"""'
else:
if cell['cell_type'] == 'markdown':
md_source = ''.join(cell['source'])
rst_source = pdoc.convert_text(md_source, 'rst', 'md')
commented_source = '\n'.join(['# ' + x for x in
rst_source.split('\n')])
python_file = python_file + '\n\n\n' + '#' * 70 + '\n' + \
commented_source
elif cell['cell_type'] == 'code':
source = ''.join(cell['source'])
python_file = python_file + '\n' * 2 + source
open(file_name.replace('.ipynb', '.py'), 'w').write(python_file)
示例3: expand_description
def expand_description(self, exp):
return {
"general": pypandoc.convert_text(exp.find("./description/general").text, "latex", format="md"),
"details": [
pypandoc.convert_text(detail.text.strip(), "latex", format="md")
for detail in exp.findall("./description/details/detail")
],
}
示例4: main
def main():
if len(sys.argv) <= 1:
sys.exit("Please supply a filename")
input_format = "markdown"
pdf_output = common_md()
html_output = pdf_output["html"]
pdf_output = pdf_output["pdf"]
print()
for arg in sys.argv[1:]:
p = Path(arg).resolve()
print(f"Generating: {p}")
ext = p.suffix
if ext == ".md":
p.write_text(pdf_output)
elif ext == ".html":
html_output = "# " + VERSION_STR + "\n\n" + html_output
pypandoc.convert_text(
html_output,
format=input_format,
to="html5",
outputfile=str(p),
extra_args=["--standalone",
"--self-contained",
"--toc",
"--toc-depth=2",
"--css=" + str(TEMPLATE_DIR / "docs.css"),
"--template=" + str(TEMPLATE_DIR /
"template.html")])
elif ext == ".pdf" or ext == ".tex":
latex_preamble = env.get_template("latex_preamble.jinja2.md")
latex = latex_preamble \
.render(title=VERSION_STR, fonts_dir=FONTS_DIR) + "\n\n"
latex += pdf_output
pandoc_version = int(pypandoc.get_pandoc_version()[0])
engine = ("--pdf-engine=xelatex"
if pandoc_version >= 2
else "--latex-engine=xelatex")
pypandoc.convert_text(
latex,
format=input_format,
to=ext[1:],
outputfile=str(p),
extra_args=["--standalone",
"--column=80",
"--toc",
"--toc-depth=2",
engine,
"--variable=papersize:A4"])
示例5: render_to_format
def render_to_format(request, format, title, template_src, context):
# for some weird reason we have to cast here explicitly
format = str(format)
title = str(title)
if format in settings.EXPORT_FORMATS:
# render the template to a html string
template = get_template(template_src)
html = template.render(context)
# remove empty lines
html = os.linesep.join([line for line in html.splitlines() if line.strip()])
if format == 'html':
# create the response object
response = HttpResponse(html)
else:
if format == 'pdf':
args = ['-V', 'geometry:margin=1in']
content_disposition = 'filename=%s.%s' % (title, format)
else:
args = []
content_disposition = 'attachment; filename=%s.%s' % (title, format)
print (content_disposition)
# create a temporary file
(tmp_fd, tmp_filename) = mkstemp('.' + format)
# convert the file using pandoc
pypandoc.convert_text(html, format, format='html', outputfile=tmp_filename, extra_args=args)
# read the temporary file
file_handler = os.fdopen(tmp_fd, 'rb')
file_content = file_handler.read()
file_handler.close()
# delete the temporary file
os.remove(tmp_filename)
# create the response object
response = HttpResponse(file_content, content_type='application/%s' % format)
response['Content-Disposition'] = content_disposition
return response
else:
return HttpResponseBadRequest(_('This format is not supported.'))
示例6: ChangeSpellDesc2MD
def ChangeSpellDesc2MD():
with open(json_file['spells']) as json_data:
spells = json.load(json_data)
for spell in spells:
#print(spell)
spell['desc'] = pypandoc.convert_text(spell['desc'],'md',format='html',extra_args=['--wrap=none'])
if 'higher_level' in spell:
spell['higher_level'] = pypandoc.convert_text(spell['higher_level'],'md',format='html',extra_args=['--wrap=none'])
if 'material' in spell:
spell['material'] = pypandoc.convert_text(spell['material'],'md',format='html',extra_args=['--wrap=none'])
with open(json_file['spells'], 'w') as outfile:
json.dump(spells, outfile)
示例7: parse
def parse(self, response):
talk_ids = collections.defaultdict(list)
for day in response.css('div.schedule__day.iframe_schedule_day'):
curr_date = day.css('p.schedule__date::text').get()
for r in day.css('div::attr(data-link)'):
talk_ids[r.get()] = curr_date
yield talk_ids
for talk in response.css('div.details.uv-card__mask'):
for session in talk.css('div.uv-card--session'):
time_of_day = session.css(
'span.session__time:nth-child(1)').xpath(
'normalize-space()').get()
talk_id = talk.xpath('@id').get()
desc = session.css('div.safe-description').get()
try:
desc_md = html2text(desc)
desc = pypandoc.convert_text(desc_md, 'rst', format='md')
except:
pass
yield {'title': session.xpath('string(.//h2)').get(),
'datetime': dateparser.parse('{date} {year} {tod}'.format(
date=talk_ids[talk_id],
year=2016,
tod=time_of_day)),
'description': desc,
'spearkers': session.css('''
div.session__speakers-box
div.uv-shortcard__title::text''').extract()}
示例8: md2rst
def md2rst(comment):
"""Convert a comment from protobuf markdown to restructuredtext.
This method:
- Replaces proto links with literals (e.g. [Foo][bar.baz.Foo] -> `Foo`)
- Resolves relative URLs to https://cloud.google.com
- Runs pandoc to convert from markdown to restructuredtext
"""
comment = _replace_proto_link(comment)
comment = _replace_relative_link(comment)
# Calling pypandoc.convert_text is slow, so we try to avoid it if there are
# no special characters in the markdown.
if any([i in comment for i in '`[]*_']):
comment = pypandoc.convert_text(comment, 'rst', format='commonmark')
# Comments are now valid restructuredtext, but there is a problem. They
# are being inserted back into a descriptor set, and there is an
# expectation that each line of a comment will begin with a space, to
# separate it from the '//' that begins the comment. You would think
# that we could ignore this detail, but it will cause formatting
# problems down the line in gapic-generator because parsing code will
# try to remove the leading space, affecting the indentation of lines
# that actually do begin with a space, so we insert the additional
# space now. Comments that are not processed by pypandoc will already
# have a leading space, so should not be changed.
comment = _insert_spaces(comment)
return comment
示例9: render_markdown
def render_markdown(value):
"""Render Markdown"""
try:
output = pypandoc.convert_text(value, to='html5', format='md', extra_args=['--mathjax'])
except RuntimeError:
output = value
return output
示例10: convert
def convert(self, text):
text = '\n\n'.join([re.sub(self.regexCodeBlock, r'<pre>\1</pre>', block) for block in text.split('\n\n')])
# convert from textile to markdown
text = pypandoc.convert_text(text, 'markdown_strict', format='textile')
# pandoc does not convert everything, notably the [[link|text]] syntax
# is not handled. So let's fix that.
# [[ wikipage | link_text ]] -> [link_text](wikipage)
text = re.sub(self.regexWikiLinkWithText, self.wiki_link, text, re.MULTILINE | re.DOTALL)
# [[ link_url ]] -> [link_url](link_url)
text = re.sub(self.regexWikiLinkWithoutText, self.wiki_link, text, re.MULTILINE | re.DOTALL)
# nested lists, fix at least the common issues
text = text.replace(" \\#\\*", " -")
text = text.replace(" \\*\\#", " 1.")
# Redmine is using '>' for blockquote, which is not textile
text = text.replace("> ", ">")
# wiki note macros
text = re.sub(self.regexTipMacro, r'---\n**TIP**: \1\n---\n', text, re.MULTILINE | re.DOTALL)
text = re.sub(self.regexNoteMacro, r'---\n**NOTE**: \1\n---\n', text, re.MULTILINE | re.DOTALL)
text = re.sub(self.regexWarningMacro, r'---\n**WARNING**: \1\n---\n', text, re.MULTILINE | re.DOTALL)
text = re.sub(self.regexImportantMacro, r'---\n**IMPORTANT**: \1\n---\n', text, re.MULTILINE | re.DOTALL)
# all other macros
text = re.sub(self.regexAnyMacro, r'\1', text, re.MULTILINE | re.DOTALL)
return text
示例11: tokenize_block
def tokenize_block(source: str, pandoc_extra_args: list=None) -> list:
"""
Convert a Jupyter output to Pandoc's JSON AST.
"""
if pandoc_extra_args is None:
pandoc_extra_args = []
json_doc = pypandoc.convert_text(source, to='json', format='markdown', extra_args=pandoc_extra_args)
return json.loads(json_doc)['blocks']
示例12: text_decode
def text_decode(text):
if re.search(r'\\u', text):
body = fix_arnaud_post(text)
elif is_html(text):
text = escape_special_characters(text)
body = pypandoc.convert_text(text, 'markdown_strict', format='html')
else:
body = text
return body
示例13: save_url
def save_url(chapter, title, url):
file_name = '{}.tex'.format(title.replace('/', '\\').replace(':', ' -'))
path = pathlib.Path(os.path.join('content', chapter, 'images'))
path.mkdir(parents=True, exist_ok=True)
p = mercury.parse(url)
html = save_images(p.content, path)
content = pypandoc.convert_text(html, 'tex', format='html')
write_content(path.parent.joinpath(file_name), content)
示例14: create
def create(self, variables, md_output, pdf_output):
env = Environment(loader=PackageLoader('qanta', 'reporting/templates'))
template = env.get_template(self.template)
markdown = template.render(variables)
if md_output is not None:
with open(md_output, 'w') as f:
f.write(markdown)
try:
import pypandoc
pypandoc.convert_text(
markdown,
'pdf',
format='md',
outputfile=pdf_output,
extra_args=['-V', 'geometry:margin=.75in']
)
except Exception as e:
log.warn('Pandoc was not installed or there was an error calling it, omitting PDF report')
log.warn(str(e))
示例15: main
def main():
if len(sys.argv) <= 1:
sys.exit("Please supply a filename")
input_format = "markdown"
output = common_md()
print()
for arg in sys.argv[1:]:
p = Path(arg).resolve()
print(f"Generating: {p}")
ext = p.suffix
if ext == ".md":
p.write_text(output)
elif ext == ".html":
pypandoc.convert_text(
output,
format=input_format,
to="html5",
outputfile=str(p),
extra_args=["--standalone",
"--self-contained",
"--toc",
"--toc-depth=2",
"--css=" + str(TEMPLATE_DIR / "docs.css")])
elif ext == ".pdf" or ext == ".tex":
latex = Path(TEMPLATE_DIR / "latex_preamble.md").read_text()
latex += output
pypandoc.convert_text(
latex,
format=input_format,
to=ext[1:],
outputfile=str(p),
extra_args=["--standalone",
"--column=80",
"--toc",
"--toc-depth=2",
"--latex-engine=xelatex",
"--variable=papersize:A4"])