本文整理汇总了Python中pypandoc.convert_file函数的典型用法代码示例。如果您正苦于以下问题:Python convert_file函数的具体用法?Python convert_file怎么用?Python convert_file使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了convert_file函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: docx2txt
def docx2txt(filename):
newfilename = filename.replace(u'docx', u'txt')
if os.name == 'nt':
print 'nt'
word = wc.Dispatch('Word.Application')
doc = word.Documents.Open(filename)
doc.SaveAs(newfilename, 4)
doc.Close()
word.Quit() #另存为txt文件,编码为gbk
input_file = open(newfilename, 'r')
gbktxt = input_file.read()
utftxt = gbktxt.decode('gbk').encode('utf-8') #读取txt文件,将gbk转换成utf-8
input_file.close()
output_file = open(newfilename, 'w')
output_file.write(utftxt) #保存utf-8文本
output_file.close()
else:
'''
# 从word(docx格式)中提取text,保存为txt
document = Document(filename)
docText = '\n\n'.join([
paragraph.text.encode('utf-8') for paragraph in document.paragraphs
])
print docText
# 保存文件
# document.save('doc/new-SL351C-A11-01.doc')
output_file = open(newfilename, 'w')
output_file.write(docText)
output_file.close()
'''
#使用pandoc进行转换
pypandoc.convert_file(filename,'markdown','docx',outputfile=newfilename)
print newfilename
示例2: main
def main():
home_link = "https://raw.githubusercontent.com/mbadry1/DeepLearning.ai-Summary/master/"
marks_down_links = {
"Deeplearning.ai summary Homepage":
home_link + "Readme.md",
"01- Neural Networks and Deep Learning":
home_link + "1-%20Neural%20Networks%20and%20Deep%20Learning/Readme.md",
"02- Improving Deep Neural Networks Hyperparameter tuning, Regularization and Optimization":
home_link + "2-%20Improving%20Deep%20Neural%20Networks/Readme.md",
"03- Structuring Machine Learning Projects":
home_link + "3-%20Structuring%20Machine%20Learning%20Projects/Readme.md",
"04- Convolutional Neural Networks":
home_link + "4-%20Convolutional%20Neural%20Networks/Readme.md",
"05- Sequence Models":
home_link + "5-%20Sequence%20Models/Readme.md",
}
# Extracting pandoc version
print("pandoc_version:", pypandoc.get_pandoc_version())
print("pandoc_path:", pypandoc.get_pandoc_path())
print("\n")
# Starting downloading and converting
for key, value in marks_down_links.items():
print("Converting", key)
pypandoc.convert_file(
value,
'pdf',
extra_args=['--latex-engine=xelatex', '-V', 'geometry:margin=1.5cm'],
outputfile=(key + ".pdf")
)
print("Converting", key, "completed")
示例3: convert_index_to_html
def convert_index_to_html(directory):
"""
Looks for the index.rst file, and converts it to index.html using pypandoc.
"""
convert_file('{0}/index.rst'.format(directory),
'html',
outputfile='{0}/index.html'.format(directory))
示例4: convert_readme
def convert_readme():
print("[*] Converting Markdown README to reStructuredText")
import pypandoc
rst = pypandoc.convert_file('README.md', 'rst')
with open('README.rst', 'w+', encoding='utf-8') as f:
f.write(rst)
print("[*] Finished converting to README.rst ({} bytes)".format(len(rst)))
示例5: get_long_description
def get_long_description(self, filename='README.md'):
""" I really prefer Markdown to reStructuredText. PyPi does not.
"""
try:
import pypandoc
description = pypandoc.convert_file('README.md', 'rst', 'md')
except (IOError, ImportError):
description = open("README.md").read()
return description
示例6: convert_readme
def convert_readme():
try:
import pypandoc
except ImportError:
return read_rst()
rst = pypandoc.convert_file('README.md', 'rst')
with open('README.rst', 'w') as f:
f.write(rst)
return rst
示例7: long_description
def long_description(filename = "README.md"):
if os.path.isfile(os.path.expandvars(filename)):
try:
import pypandoc
long_description = pypandoc.convert_file(filename, "rst")
except ImportError:
long_description = open(filename).read()
else:
long_description = ""
return long_description
示例8: finalize
def finalize(args):
distclean()
try:
check_pypirc()
repository = Repository(REPO_ROOT, args.repo)
img_manager = ImageManager(args.release)
pr_data = repository.find_release_pr(args.release)
if not pr_data:
raise ScriptError('No PR found for {}'.format(args.release))
if not check_pr_mergeable(pr_data):
raise ScriptError('Can not finalize release with an unmergeable PR')
if not img_manager.check_images():
raise ScriptError('Missing release image')
br_name = branch_name(args.release)
if not repository.branch_exists(br_name):
raise ScriptError('No local branch exists for this release.')
gh_release = repository.find_release(args.release)
if not gh_release:
raise ScriptError('No Github release draft for this version')
repository.checkout_branch(br_name)
pypandoc.convert_file(
os.path.join(REPO_ROOT, 'README.md'), 'rst', outputfile=os.path.join(REPO_ROOT, 'README.rst')
)
run_setup(os.path.join(REPO_ROOT, 'setup.py'), script_args=['sdist', 'bdist_wheel'])
merge_status = pr_data.merge()
if not merge_status.merged and not args.finalize_resume:
raise ScriptError(
'Unable to merge PR #{}: {}'.format(pr_data.number, merge_status.message)
)
pypi_upload(args)
img_manager.push_images()
repository.publish_release(gh_release)
except ScriptError as e:
print(e)
return 1
return 0
示例9: read_metadata
def read_metadata(self, path, format=None):
metadata_yaml = convert_file(path, to='markdown', format=format,
extra_args=['--template', META_TEMPLATE])
raw_metadata = yaml.safe_load(metadata_yaml)
logger.debug(str(raw_metadata))
metadata = {}
for name, value in raw_metadata.items():
name = name.lower()
value = str(value)
metadata[name] = self.process_metadata(name, value)
return metadata
示例10: doc_convert
def doc_convert(project, logger):
import pypandoc
readme_file = project.expand_path("$distutils_readme_file")
logger.debug("Converting %s into RST format for PyPi documentation...", readme_file)
description = pypandoc.convert_file(readme_file, "rst")
if not hasattr(project, "description") or project.description is None or project.get_property(
"distutils_description_overwrite"):
setattr(project, "description", description)
if not hasattr(project, "summary") or project.summary is None or project.get_property(
"distutils_description_overwrite"):
setattr(project, "summary", description.splitlines()[0].strip())
示例11: read_md
def read_md(path):
long_desc = ""
if os.path.exists(path):
try:
from pypandoc import convert_file
long_desc = convert_file(path, 'rst')
except:
try:
long_desc = open(path, 'r').read()
except:
pass
return long_desc
示例12: read_input
def read_input(infilename):
""" read text from a file
supported formats:
* plain text
* pdf
* all formats from pandoc
"""
if ".pdf" in infilename:
return extract_from_pdf(infilename)
try:
return pypandoc.convert_file(infilename, 'md')
except Exception as e:
# if fileinput format is not available using pypandoc so try to read it as text
with open(infilename, "r") as infile:
return "".join(infile.readlines())
示例13: open
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from setuptools import setup, find_packages
# I really prefer Markdown to reStructuredText. PyPi does not. This allows me
# to have things how I'd like, but not throw complaints when people are trying
# to install the package and they don't have pypandoc or the README in the
# right place.
try:
import pypandoc
long_description = pypandoc.convert_file('README.md', 'rst')
except (IOError, ImportError):
long_description = open('README.md').read()
about = {}
with open('src/clikraken/__about__.py') as f:
exec(f.read(), about)
# now we have a about['__version__'] variable
setup(
name=about['__title__'],
version=about['__version__'],
packages=find_packages('src'),
package_dir={'': 'src'},
author=about['__author__'],
author_email=about['__email__'],
license=about['__license__'],
description=about['__summary__'],
long_description=long_description,
include_package_data=True,
示例14: convert_file
from setuptools import setup
from setuptools import find_packages
try:
from pypandoc import convert_file
long_description = convert_file('README.md', 'rst')
except ImportError:
long_description = open('README.md').read()
setup(
name='talkzoho',
version='3.0.3',
description='Asynchronous wrapper for Zoho\'s numerous APIs',
long_description=long_description,
url='https://github.com/A2Z-Cloud/Talk-Zoho',
packages=find_packages(exclude=('tests', 'tests.*')),
author='James Stidard',
author_email='[email protected]',
keywords=['talkzoho', 'Zoho', 'async', 'tornado'],
install_requires=[
'fuzzywuzzy',
'python-Levenshtein',
'inflect',
'tornado'])
示例15: open
#!/usr/bin/env python
import pypandoc
import os
import yara
import fnmatch
from codecs import open
rules_dir = 'apkid/rules/'
compiled_rules_path = "%srules.yarc" % rules_dir
print "[*] Converting Markdown README to reStructuredText ..."
rst = pypandoc.convert_file('README.md', 'rst')
with open('README.rst', 'w+', encoding='utf-8') as f:
f.write(rst)
print "[*] Finished converting to README.rst (%s bytes)" % len(rst)
yara_files = {}
for root, dirnames, filenames in os.walk(rules_dir):
for filename in fnmatch.filter(filenames, '*.yar'):
path = os.path.join(root, filename)
yara_files[path] = path
#print yara_files
rules = yara.compile(filepaths=yara_files)
print "[*] Compiling %d Yara rules ..." % len(yara_files)
rules.save(compiled_rules_path)
print "[*] Saved rules to %s" % compiled_rules_path
#print "[*] Registering ..."
#os.system("python setup.py register")