本文整理汇总了Python中docx.Document方法的典型用法代码示例。如果您正苦于以下问题:Python docx.Document方法的具体用法?Python docx.Document怎么用?Python docx.Document使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类docx
的用法示例。
在下文中一共展示了docx.Document方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_finds_run_nodes_in_complex_field_without_separate_correctly
# 需要导入模块: import docx [as 别名]
# 或者: from docx import Document [as 别名]
def test_finds_run_nodes_in_complex_field_without_separate_correctly(self):
document = Document(docx_path('complex_field_without_separate.docx'))
properties = CustomProperties(document).find_docprops_in_document()
assert 2 == len(properties), \
'input should contain two complex field docproperties'
# The "User.FullName" docproperty should be the one without a separate run
# In this field, there are the following runs: begin, docprop and end
matches = [prop for prop in properties if prop.name == 'User.FullName']
assert 1 == len(matches), \
"There should be only one User.FullName docproperty"
prop = matches[0]
assert prop.get_separate_run() is None, \
"This complex field should not have a separate run."
assert [] == prop.get_runs_for_update(), \
"As there is no separate run, there should be no run to update"
# As there are no separate, all runs should be removed when dissolving
# the property.
runs = prop.get_runs_to_replace_field_with_value()
assert 3 == len(runs)
assert runs[0] == prop.begin_run
assert runs[1] == prop.w_r
assert runs[2] == prop.end_run
示例2: generate_document
# 需要导入模块: import docx [as 别名]
# 或者: from docx import Document [as 别名]
def generate_document(employee_data, agenda):
document = Document()
for emp in employee_data:
if emp['isDue']:
name = emp['name']
document.add_heading('Your New Hire Orientation\n', level=1)
document.add_paragraph('Dear %s,' % name)
document.add_paragraph('Welcome to Google Inc. You have been selected for our new hire orientation.')
document.add_paragraph('Based on your department you will go through below sessions:')
department = emp['department']
for session in agenda[department]:
document.add_paragraph(
session , style='ListBullet'
)
document.add_paragraph('Thanks,\n HR Manager')
document.save('orientation_%s.docx' % emp['id'])
示例3: iter_block_items
# 需要导入模块: import docx [as 别名]
# 或者: from docx import Document [as 别名]
def iter_block_items(self, parent):
"""
Yield each paragraph and table child within *parent*, in document order.
Each returned value is an instance of either Table or Paragraph. *parent*
would most commonly be a reference to a main Document object, but
also works for a _Cell object, which itself can contain paragraphs and tables.
"""
if isinstance(parent, Document):
parent_elm = parent.element.body
elif isinstance(parent, _Cell):
parent_elm = parent._tc
else:
raise ValueError("something's not right")
for child in parent_elm.iterchildren():
if isinstance(child, CT_P):
yield Paragraph(child, parent)
elif isinstance(child, CT_Tbl):
yield Table(child, parent)
示例4: translate_doc
# 需要导入模块: import docx [as 别名]
# 或者: from docx import Document [as 别名]
def translate_doc(filename, destination='zh-CN', mix=True):
"""
translate a word document type of file and save the result as document and keep the exactly same file format.
:param filename: word doc file
:param destination='zh-CN':
:param mix=True: if True, will have original language and target language into the same doc. paragraphs by paragraphs.
"""
def tx(t): return Translator().translate(t, dest=destination).text
doc = Document(filename)
for p in doc.paragraphs:
txd = tx(p.text)
p.text = p.text + ('\n' + txd if mix else '')
for table in doc.tables:
for row in table.rows:
for cell in row.cells:
txd = tx(cell.text)
p.text = cell.text + ('\n' + txd if mix else '')
f = filename.replace('.doc', destination.lower() + '.doc')
doc.save(f)
示例5: test_complex_docprop_fields_with_multiple_textnodes_are_updated
# 需要导入模块: import docx [as 别名]
# 或者: from docx import Document [as 别名]
def test_complex_docprop_fields_with_multiple_textnodes_are_updated(self):
document = Document(docx_path('spellchecked_docproperty.docx'))
paragraphs = xpath(document.element.body, '//w:p')
assert 1 == len(paragraphs), 'input file contains one paragraph'
assert 1 == len(xpath(document.element.body, '//w:instrText')), \
'input contains one complex field docproperty'
w_p = paragraphs[0]
cached_values = cached_complex_field_values(w_p)
assert 4 == len(cached_values), \
'doc property value is scattered over 4 parts'
assert 'i will be spllchecked!' == ''.join(cached_values)
CustomProperties(document).update_all()
w_p = xpath(document.element.body, '//w:p')[0]
cached_values = cached_complex_field_values(w_p)
assert 1 == len(cached_values), \
'doc property value has been reset to one cached value'
assert 'i will be spllchecked!' == cached_values[0]
示例6: test_complex_field_gets_updated
# 需要导入模块: import docx [as 别名]
# 或者: from docx import Document [as 别名]
def test_complex_field_gets_updated(self):
document = Document(docx_path('docproperties.docx'))
assert 6 == len(document.paragraphs), 'input file should contain 6 paragraphs'
properties = xpath(document.element.body, './/w:instrText')
assert 5 == len(properties),\
'input should contain five complex field docproperties'
expected_paragraphs = [u'Custom Doc Properties',
u'Text: Foo Bar',
u'Number: 123',
u'Boolean: Y',
u'Date: 11.06.2019',
u'Float: 1.1']
actual_paragraphs = [paragraph.text for paragraph in document.paragraphs]
assert actual_paragraphs == expected_paragraphs
CustomProperties(document).update("Number Property", 423)
expected_paragraphs[2] = u'Number: 423'
actual_paragraphs = [paragraph.text for paragraph in document.paragraphs]
assert actual_paragraphs == expected_paragraphs
示例7: test_removes_simple_field_but_keeps_value
# 需要导入模块: import docx [as 别名]
# 或者: from docx import Document [as 别名]
def test_removes_simple_field_but_keeps_value(self):
document = Document(docx_path('outdated_docproperty_with_umlauts.docx'))
assert 1 == len(document.paragraphs), 'input file should contain 1 paragraph'
fields = xpath(
document.element.body,
simple_field_expression(u"F\xfc\xfc"))
assert 1 == len(fields), 'should contain one simple field docproperty'
assert u'Hie chund ds property: ' == document.paragraphs[0].text
assert u'xxx' == fields[0].text
CustomProperties(document).dissolve_fields(u"F\xfc\xfc")
fields = xpath(
document.element.body,
simple_field_expression(u"F\xfc\xfc"))
assert 0 == len(fields), 'should not contain any docproperties anymore'
# when simple field is removed, the value is moved one up in the hierarchy
assert u'Hie chund ds property: xxx' == document.paragraphs[0].text
示例8: test_dissolves_all_instances_of_given_field
# 需要导入模块: import docx [as 别名]
# 或者: from docx import Document [as 别名]
def test_dissolves_all_instances_of_given_field(self):
document = Document(docx_path('multiple_identical_properties.docx'))
assert 3 == len(document.paragraphs), 'input file should contain 3 paragraphs'
assert 3 == len(xpath(document.element.body, './/w:instrText')), \
'document should contain three complex field docproperties'
for paragraph in document.paragraphs:
assert u'Foo' == paragraph.text
CustomProperties(document).dissolve_fields("Text Property")
assert 3 == len(document.paragraphs)
assert 0 == len(xpath(document.element.body, './/w:instrText')), \
'document should not contain any complex field anymore'
for paragraph in document.paragraphs:
assert u'Foo' == paragraph.text, "value should have been kept in document"
示例9: test_dissolving_field_when_three_complex_docprop_in_same_paragraph
# 需要导入模块: import docx [as 别名]
# 或者: from docx import Document [as 别名]
def test_dissolving_field_when_three_complex_docprop_in_same_paragraph(self):
document = Document(docx_path('three_props_in_same_paragraph.docx'))
assert 1 == len(document.paragraphs), 'input file should contains one paragraph'
paragraph = document.paragraphs[0]
properties = CustomProperties(document)
assert 3 == len(properties.find_docprops_in_document()), \
'input should contain three complex field docproperties'
text = u'{text} / {num} mor between the fields {text} and some afte the three fields'
assert text.format(text="I was spellcecked", num=0) == paragraph.text
properties.dissolve_fields("Text Property")
assert 1 == len(document.paragraphs)
assert 1 == len(properties.find_docprops_in_document()), \
'document should contain one complex field after removal'
assert text.format(text="I was spellcecked", num=0) == paragraph.text
示例10: Analyze_Metadata_pdf
# 需要导入模块: import docx [as 别名]
# 或者: from docx import Document [as 别名]
def Analyze_Metadata_pdf(filename):
####### FUNCTION AnalyzeMetadata ######
pdfFile = PdfFileReader(file(filename, 'rb'))
metadata = pdfFile.getDocumentInfo()
print ' - Document: ' + str(filename)
for meta in metadata:
value=(metadata[meta])
print ' - ' + meta + ':' + metadata[meta]
if meta == "/Author":
if value not in meta_author_array:
meta_author_array.append(value)
elif meta =="/Producer":
if value not in meta_producer_array:
meta_producer_array.append(value)
elif meta == "/Creator":
if value not in meta_creator_array:
meta_creator_array.append(value)
#Group the different arrays in one with all metadata
metadata_files.append(meta_author_array)
metadata_files.append(meta_producer_array)
metadata_files.append(meta_creator_array)
####### FUNCTION AnalyzeMetadata doc ######
示例11: Analyze_Metadata_doc
# 需要导入模块: import docx [as 别名]
# 或者: from docx import Document [as 别名]
def Analyze_Metadata_doc(fileName):
#Open file
docxFile = docx.Document(file(fileName,'rb'))
#Get the structure
docxInfo= docxFile.core_properties
#Print the metadata which it wants to display
attribute = ["author", "category", "comments", "content_status",
"created", "identifier", "keywords", "language",
"last_modified_by", "last_printed", "modified",
"revision", "subject", "title", "version"]
#run the list in a for loop to print the value of each metadata
print ' - Document: ' + str(fileName)
for meta in attribute:
metadata = getattr(docxInfo,meta)
if metadata:
#Separate the values unicode and time date
if isinstance(metadata, unicode):
print " \n\t" + str(meta)+": " + str(metadata)
elif isinstance(metadata, datetime.datetime):
print " \n\t" + str(meta)+": " + str(metadata)
示例12: Analyze_Metadata_pdf
# 需要导入模块: import docx [as 别名]
# 或者: from docx import Document [as 别名]
def Analyze_Metadata_pdf(filename):
####### FUNCTION AnalyzeMetadata ######
pdfFile = PdfFileReader(file(filename, 'rb'))
metadata = pdfFile.getDocumentInfo()
print ' - Document: ' + str(filename)
for meta in metadata:
value=(metadata[meta])
print ' - ' + meta + ':' + metadata[meta]
if meta == "/Author":
if value not in meta_author_array:
meta_author_array.append(value)
elif meta =="/Producer":
if value not in meta_producer_array:
meta_producer_array.append(value)
elif meta == "/Creator":
if value not in meta_creator_array:
meta_creator_array.append(value)
#Group the different arrays in one with all metadata
metadata_files.append(meta_author_array)
metadata_files.append(meta_producer_array)
metadata_files.append(meta_creator_array)
#print metadata_files
####### FUNCTION AnalyzeMetadata doc ######
示例13: _docx2text
# 需要导入模块: import docx [as 别名]
# 或者: from docx import Document [as 别名]
def _docx2text(file_path=None):
'''
:param file_path: 文件路径
:return:
'''
document = docx.Document(file_path)
data_str = []
for i in range(len(document.paragraphs)):
if document.paragraphs[i].text != '':
text_one = document.paragraphs[i].text
data_str.append(text_one)
return data_str
示例14: insetImgToDocx
# 需要导入模块: import docx [as 别名]
# 或者: from docx import Document [as 别名]
def insetImgToDocx(image):
document = Document(docx_file)
p = document.add_paragraph()
r = p.add_run()
r.add_picture(image, width=Inches(5.5)) #inces sesuai yang ada di garis
r.add_text('Ini picture: {}'.format(image))
document.save(docx_file)
示例15: __init__
# 需要导入模块: import docx [as 别名]
# 或者: from docx import Document [as 别名]
def __init__(self, project_root, lines_to_extract=3000, output_file='extracted_code.docx', output=True):
self.info = ProjectInfo(project_root, lines_to_extract)
self.output = output
if output:
self.output_path = self.info.get_output_file_path(output_file)
# self.output_file = open(
# self.info.get_output_file_path(output_file), 'w+')
self.output_file = Document(os.path.join(
os.path.dirname(__file__), 'data/template.docx'))
self.paragraph = None
return