本文整理汇总了Python中calibre.ebooks.docx.container.DOCX.read方法的典型用法代码示例。如果您正苦于以下问题:Python DOCX.read方法的具体用法?Python DOCX.read怎么用?Python DOCX.read使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类calibre.ebooks.docx.container.DOCX
的用法示例。
在下文中一共展示了DOCX.read方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: set_metadata
# 需要导入模块: from calibre.ebooks.docx.container import DOCX [as 别名]
# 或者: from calibre.ebooks.docx.container.DOCX import read [as 别名]
def set_metadata(stream, mi):
from calibre.utils.zipfile import safe_replace
c = DOCX(stream, extract=False)
dp_name, ap_name = c.get_document_properties_names()
dp_raw = c.read(dp_name)
try:
ap_raw = c.read(ap_name)
except Exception:
ap_raw = None
cp = etree.fromstring(dp_raw)
update_doc_props(cp, mi)
replacements = {}
if ap_raw is not None:
ap = etree.fromstring(ap_raw)
comp = ap.makeelement('{%s}Company' % namespaces['ep'])
for child in tuple(ap):
if child.tag == comp.tag:
ap.remove(child)
comp.text = mi.publisher
ap.append(comp)
replacements[ap_name] = BytesIO(xml2str(ap))
stream.seek(0)
safe_replace(stream, dp_name, BytesIO(xml2str(cp)), extra_replacements=replacements)
示例2: Convert
# 需要导入模块: from calibre.ebooks.docx.container import DOCX [as 别名]
# 或者: from calibre.ebooks.docx.container.DOCX import read [as 别名]
class Convert(object):
def __init__(self, path_or_stream, dest_dir=None, log=None, notes_text=None):
self.docx = DOCX(path_or_stream, log=log)
self.log = self.docx.log
self.notes_text = notes_text or _("Notes")
self.dest_dir = dest_dir or os.getcwdu()
self.mi = self.docx.metadata
self.body = BODY()
self.styles = Styles()
self.images = Images()
self.tables = Tables()
self.object_map = OrderedDict()
self.html = HTML(
HEAD(
META(charset="utf-8"),
TITLE(self.mi.title or _("Unknown")),
LINK(rel="stylesheet", type="text/css", href="docx.css"),
),
self.body,
)
self.html.text = "\n\t"
self.html[0].text = "\n\t\t"
self.html[0].tail = "\n"
for child in self.html[0]:
child.tail = "\n\t\t"
self.html[0][-1].tail = "\n\t"
self.html[1].text = self.html[1].tail = "\n"
lang = canonicalize_lang(self.mi.language)
if lang and lang != "und":
lang = lang_as_iso639_1(lang)
if lang:
self.html.set("lang", lang)
def __call__(self):
doc = self.docx.document
relationships_by_id, relationships_by_type = self.docx.document_relationships
self.read_styles(relationships_by_type)
self.images(relationships_by_id)
self.layers = OrderedDict()
self.framed = [[]]
self.framed_map = {}
self.anchor_map = {}
self.link_map = defaultdict(list)
self.read_page_properties(doc)
for wp, page_properties in self.page_map.iteritems():
self.current_page = page_properties
p = self.convert_p(wp)
self.body.append(p)
notes_header = None
if self.footnotes.has_notes:
dl = DL()
dl.set("class", "notes")
self.body.append(H1(self.notes_text))
notes_header = self.body[-1]
notes_header.set("class", "notes-header")
self.body.append(dl)
for anchor, text, note in self.footnotes:
dl.append(DT("[", A("←" + text, href="#back_%s" % anchor, title=text), id=anchor))
dl[-1][0].tail = "]"
dl.append(DD())
in_table = False
for wp in note:
if wp.tag.endswith("}tbl"):
self.tables.register(wp)
in_table = True
continue
if in_table:
if ancestor(wp, "w:tbl") is not None:
self.tables.add(wp)
else:
in_table = False
p = self.convert_p(wp)
dl[-1].append(p)
self.resolve_links(relationships_by_id)
self.styles.cascade(self.layers)
self.tables.apply_markup(self.object_map)
numbered = []
for html_obj, obj in self.object_map.iteritems():
raw = obj.get("calibre_num_id", None)
if raw is not None:
lvl, num_id = raw.partition(":")[0::2]
try:
lvl = int(lvl)
except (TypeError, ValueError):
lvl = 0
numbered.append((html_obj, num_id, lvl))
self.numbering.apply_markup(numbered, self.body, self.styles, self.object_map)
self.apply_frames()
if len(self.body) > 0:
self.body.text = "\n\t"
for child in self.body:
child.tail = "\n\t"
self.body[-1].tail = "\n"
#.........这里部分代码省略.........
示例3: Convert
# 需要导入模块: from calibre.ebooks.docx.container import DOCX [as 别名]
# 或者: from calibre.ebooks.docx.container.DOCX import read [as 别名]
class Convert(object):
def __init__(self, path_or_stream, dest_dir=None, log=None, detect_cover=True, notes_text=None):
self.docx = DOCX(path_or_stream, log=log)
self.ms_pat = re.compile(r'\s{2,}')
self.ws_pat = re.compile(r'[\n\r\t]')
self.log = self.docx.log
self.detect_cover = detect_cover
self.notes_text = notes_text or _('Notes')
self.dest_dir = dest_dir or os.getcwdu()
self.mi = self.docx.metadata
self.body = BODY()
self.theme = Theme()
self.settings = Settings()
self.tables = Tables()
self.fields = Fields()
self.styles = Styles(self.tables)
self.images = Images(self.log)
self.object_map = OrderedDict()
self.html = HTML(
HEAD(
META(charset='utf-8'),
TITLE(self.mi.title or _('Unknown')),
LINK(rel='stylesheet', type='text/css', href='docx.css'),
),
self.body
)
self.html.text='\n\t'
self.html[0].text='\n\t\t'
self.html[0].tail='\n'
for child in self.html[0]:
child.tail = '\n\t\t'
self.html[0][-1].tail = '\n\t'
self.html[1].text = self.html[1].tail = '\n'
lang = canonicalize_lang(self.mi.language)
if lang and lang != 'und':
lang = lang_as_iso639_1(lang)
if lang:
self.html.set('lang', lang)
def __call__(self):
doc = self.docx.document
relationships_by_id, relationships_by_type = self.docx.document_relationships
self.fields(doc, self.log)
self.read_styles(relationships_by_type)
self.images(relationships_by_id)
self.layers = OrderedDict()
self.framed = [[]]
self.framed_map = {}
self.anchor_map = {}
self.link_map = defaultdict(list)
self.link_source_map = {}
paras = []
self.log.debug('Converting Word markup to HTML')
self.read_page_properties(doc)
self.current_rels = relationships_by_id
for wp, page_properties in self.page_map.iteritems():
self.current_page = page_properties
if wp.tag.endswith('}p'):
p = self.convert_p(wp)
self.body.append(p)
paras.append(wp)
self.read_block_anchors(doc)
self.styles.apply_contextual_spacing(paras)
# Apply page breaks at the start of every section, except the first
# section (since that will be the start of the file)
self.styles.apply_section_page_breaks(self.section_starts[1:])
notes_header = None
orig_rid_map = self.images.rid_map
if self.footnotes.has_notes:
dl = DL()
dl.set('class', 'notes')
self.body.append(H1(self.notes_text))
notes_header = self.body[-1]
notes_header.set('class', 'notes-header')
self.body.append(dl)
for anchor, text, note in self.footnotes:
dl.append(DT('[', A('←' + text, href='#back_%s' % anchor, title=text), id=anchor))
dl[-1][0].tail = ']'
dl.append(DD())
paras = []
self.images.rid_map = self.current_rels = note.rels[0]
for wp in note:
if wp.tag.endswith('}tbl'):
self.tables.register(wp, self.styles)
self.page_map[wp] = self.current_page
else:
p = self.convert_p(wp)
dl[-1].append(p)
paras.append(wp)
self.styles.apply_contextual_spacing(paras)
for p, wp in self.object_map.iteritems():
if len(p) > 0 and not p.text and len(p[0]) > 0 and not p[0].text and p[0][0].get('class', None) == 'tab':
# Paragraph uses tabs for indentation, convert to text-indent
parent = p[0]
#.........这里部分代码省略.........
示例4: Convert
# 需要导入模块: from calibre.ebooks.docx.container import DOCX [as 别名]
# 或者: from calibre.ebooks.docx.container.DOCX import read [as 别名]
class Convert(object):
def __init__(self, path_or_stream, dest_dir=None, log=None, notes_text=None):
self.docx = DOCX(path_or_stream, log=log)
self.ms_pat = re.compile(r'\s{2,}')
self.ws_pat = re.compile(r'[\n\r\t]')
self.log = self.docx.log
self.notes_text = notes_text or _('Notes')
self.dest_dir = dest_dir or os.getcwdu()
self.mi = self.docx.metadata
self.body = BODY()
self.tables = Tables()
self.styles = Styles(self.tables)
self.images = Images()
self.object_map = OrderedDict()
self.html = HTML(
HEAD(
META(charset='utf-8'),
TITLE(self.mi.title or _('Unknown')),
LINK(rel='stylesheet', type='text/css', href='docx.css'),
),
self.body
)
self.html.text='\n\t'
self.html[0].text='\n\t\t'
self.html[0].tail='\n'
for child in self.html[0]:
child.tail = '\n\t\t'
self.html[0][-1].tail = '\n\t'
self.html[1].text = self.html[1].tail = '\n'
lang = canonicalize_lang(self.mi.language)
if lang and lang != 'und':
lang = lang_as_iso639_1(lang)
if lang:
self.html.set('lang', lang)
def __call__(self):
doc = self.docx.document
relationships_by_id, relationships_by_type = self.docx.document_relationships
self.read_styles(relationships_by_type)
self.images(relationships_by_id)
self.layers = OrderedDict()
self.framed = [[]]
self.framed_map = {}
self.anchor_map = {}
self.link_map = defaultdict(list)
self.read_page_properties(doc)
for wp, page_properties in self.page_map.iteritems():
self.current_page = page_properties
if wp.tag.endswith('}p'):
p = self.convert_p(wp)
self.body.append(p)
notes_header = None
if self.footnotes.has_notes:
dl = DL()
dl.set('class', 'notes')
self.body.append(H1(self.notes_text))
notes_header = self.body[-1]
notes_header.set('class', 'notes-header')
self.body.append(dl)
for anchor, text, note in self.footnotes:
dl.append(DT('[', A('←' + text, href='#back_%s' % anchor, title=text), id=anchor))
dl[-1][0].tail = ']'
dl.append(DD())
for wp in note:
if wp.tag.endswith('}tbl'):
self.tables.register(wp, self.styles)
self.page_map[wp] = self.current_page
p = self.convert_p(wp)
dl[-1].append(p)
self.resolve_links(relationships_by_id)
self.styles.cascade(self.layers)
self.tables.apply_markup(self.object_map, self.page_map)
numbered = []
for html_obj, obj in self.object_map.iteritems():
raw = obj.get('calibre_num_id', None)
if raw is not None:
lvl, num_id = raw.partition(':')[0::2]
try:
lvl = int(lvl)
except (TypeError, ValueError):
lvl = 0
numbered.append((html_obj, num_id, lvl))
self.numbering.apply_markup(numbered, self.body, self.styles, self.object_map)
self.apply_frames()
if len(self.body) > 0:
self.body.text = '\n\t'
for child in self.body:
child.tail = '\n\t'
self.body[-1].tail = '\n'
self.styles.generate_classes()
for html_obj, obj in self.object_map.iteritems():
#.........这里部分代码省略.........