本文整理汇总了Python中pdfminer.layout.LTTextBox方法的典型用法代码示例。如果您正苦于以下问题:Python layout.LTTextBox方法的具体用法?Python layout.LTTextBox怎么用?Python layout.LTTextBox使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pdfminer.layout
的用法示例。
在下文中一共展示了layout.LTTextBox方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: convert_pdf_to_txt
# 需要导入模块: from pdfminer import layout [as 别名]
# 或者: from pdfminer.layout import LTTextBox [as 别名]
def convert_pdf_to_txt(path):
fp = open(path, 'rb')
txt = ''
parser = PDFParser(fp)
doc = PDFDocument()
parser.set_document(doc)
doc.set_parser(parser)
doc.initialize('')
rsrcmgr = PDFResourceManager()
laparams = LAParams()
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
interpreter = PDFPageInterpreter(rsrcmgr, device)
# Process each page contained in the document.
for page in doc.get_pages():
interpreter.process_page(page)
layout = device.get_result()
for lt_obj in layout:
if isinstance(lt_obj, LTTextBox) or isinstance(lt_obj, LTTextLine):
txt += lt_obj.get_text()
return(txt)
示例2: parse_layout
# 需要导入模块: from pdfminer import layout [as 别名]
# 或者: from pdfminer.layout import LTTextBox [as 别名]
def parse_layout(self, layout):
obj_stack = list(reversed(list(layout)))
while obj_stack:
obj = obj_stack.pop()
if type(obj) in [LTFigure, LTTextBox, LTTextLine, LTTextBoxHorizontal]:
obj_stack.extend(reversed(list(obj)))
elif type(obj) == LTTextLineHorizontal:
self.texts.append(obj)
elif type(obj) == LTRect:
if obj.width < 1.0:
self._adjust_to_close(obj, self.verticals, 'x0')
self.verticals.append(obj)
elif obj.height < 1.0:
self._adjust_to_close(obj, self.horizontals, 'y0')
self.horizontals.append(obj)
elif type(obj) == LTImage:
self.images.append(obj)
elif type(obj) == LTCurve:
pass
elif type(obj) == LTChar:
pass
elif type(obj) == LTLine:
pass
else:
assert False, "Unrecognized type: %s" % type(obj)
示例3: count_rows
# 需要导入模块: from pdfminer import layout [as 别名]
# 或者: from pdfminer.layout import LTTextBox [as 别名]
def count_rows(cls, t, o, offset=0):
info = t.col_info[TITable.NAME]
count = 0
if isinstance(o, layout.LTTextBox):
for i in o:
count += cls.count_rows(t, i, offset)
return count
elif isinstance(o, layout.LTTextLine):
text = cls.get_entry_text(o)
if abs(info.l - (o.bbox[0] + offset)) < 0.2:
if info.regex.match(text):
return 1
return 0
示例4: _process_layout
# 需要导入模块: from pdfminer import layout [as 别名]
# 或者: from pdfminer.layout import LTTextBox [as 别名]
def _process_layout(self, layout):
"""Process an LTPage layout and return a list of elements."""
# Here we just group text into paragraphs
elements = []
for lt_obj in layout:
if isinstance(lt_obj, LTTextBox) or isinstance(lt_obj, LTTextLine):
elements.append(Paragraph(lt_obj.get_text().strip()))
elif isinstance(lt_obj, LTFigure):
# Recursive...
elements.extend(self._process_layout(lt_obj))
return elements
示例5: get_text_obj
# 需要导入模块: from pdfminer import layout [as 别名]
# 或者: from pdfminer.layout import LTTextBox [as 别名]
def get_text_obj(cls, obj, index, regexp, text):
otext = cls.get_entry_text(obj)
if otext == text:
return obj
else:
if isinstance(obj, layout.LTTextBox):
i = 0
for l in obj:
ret = cls.get_text_obj(l, text)
if ret:
return ret
return None
示例6: try_add_field
# 需要导入模块: from pdfminer import layout [as 别名]
# 或者: from pdfminer.layout import LTTextBox [as 别名]
def try_add_field(cls, t, obj, results, nrows, nameoffset=0):
if isinstance(obj, layout.LTTextLine):
cls._try_add(t, obj, results, nrows, nameoffset)
elif isinstance(obj, layout.LTTextBox):
if not cls._try_add(t, obj, results, nrows, nameoffset): #only if add fails recurse
for i in obj:
cls.try_add_field(t, i, results, nrows, nameoffset)