当前位置: 首页>>代码示例>>Python>>正文


Python layout.LTTextBox方法代码示例

本文整理汇总了Python中pdfminer.layout.LTTextBox方法的典型用法代码示例。如果您正苦于以下问题:Python layout.LTTextBox方法的具体用法?Python layout.LTTextBox怎么用?Python layout.LTTextBox使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pdfminer.layout的用法示例。


在下文中一共展示了layout.LTTextBox方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: convert_pdf_to_txt

# 需要导入模块: from pdfminer import layout [as 别名]
# 或者: from pdfminer.layout import LTTextBox [as 别名]
def convert_pdf_to_txt(path):
    fp = open(path, 'rb')
    txt = ''
    parser = PDFParser(fp)
    doc = PDFDocument()
    parser.set_document(doc)
    doc.set_parser(parser)
    doc.initialize('')
    rsrcmgr = PDFResourceManager()
    laparams = LAParams()
    device = PDFPageAggregator(rsrcmgr, laparams=laparams)
    interpreter = PDFPageInterpreter(rsrcmgr, device)
    # Process each page contained in the document.
    for page in doc.get_pages():
        interpreter.process_page(page)
        layout = device.get_result()
        for lt_obj in layout:
            if isinstance(lt_obj, LTTextBox) or isinstance(lt_obj, LTTextLine):
                txt += lt_obj.get_text()
    return(txt) 
开发者ID:opensourcesec,项目名称:Forager,代码行数:22,代码来源:pdfConverter.py

示例2: parse_layout

# 需要导入模块: from pdfminer import layout [as 别名]
# 或者: from pdfminer.layout import LTTextBox [as 别名]
def parse_layout(self, layout):
        obj_stack = list(reversed(list(layout)))
        while obj_stack:
            obj = obj_stack.pop()
            if type(obj) in [LTFigure, LTTextBox, LTTextLine, LTTextBoxHorizontal]:
                obj_stack.extend(reversed(list(obj)))
            elif type(obj) == LTTextLineHorizontal:
                self.texts.append(obj)
            elif type(obj) == LTRect:
                if obj.width < 1.0:
                    self._adjust_to_close(obj, self.verticals, 'x0')
                    self.verticals.append(obj)
                elif obj.height < 1.0:
                    self._adjust_to_close(obj, self.horizontals, 'y0')
                    self.horizontals.append(obj)
            elif type(obj) == LTImage:
                self.images.append(obj)
            elif type(obj) == LTCurve:
                pass
            elif type(obj) == LTChar:
                pass
            elif type(obj) == LTLine:
                pass                    
            else:
                assert False, "Unrecognized type: %s" % type(obj) 
开发者ID:johnlinp,项目名称:pdf-to-markdown,代码行数:27,代码来源:pile.py

示例3: count_rows

# 需要导入模块: from pdfminer import layout [as 别名]
# 或者: from pdfminer.layout import LTTextBox [as 别名]
def count_rows(cls, t, o, offset=0):
        info = t.col_info[TITable.NAME]
        count = 0
        if isinstance(o, layout.LTTextBox):
            for i in o:
                count += cls.count_rows(t, i, offset)
            return count
        elif isinstance(o, layout.LTTextLine):
            text = cls.get_entry_text(o)
            if abs(info.l - (o.bbox[0] + offset)) < 0.2:
                if info.regex.match(text):
                    return 1
        return 0 
开发者ID:bx,项目名称:bootloader_instrumentation_suite,代码行数:15,代码来源:parse_am37x_register_tables.py

示例4: _process_layout

# 需要导入模块: from pdfminer import layout [as 别名]
# 或者: from pdfminer.layout import LTTextBox [as 别名]
def _process_layout(self, layout):
        """Process an LTPage layout and return a list of elements."""
        # Here we just group text into paragraphs
        elements = []
        for lt_obj in layout:
            if isinstance(lt_obj, LTTextBox) or isinstance(lt_obj, LTTextLine):
                elements.append(Paragraph(lt_obj.get_text().strip()))
            elif isinstance(lt_obj, LTFigure):
                # Recursive...
                elements.extend(self._process_layout(lt_obj))
        return elements 
开发者ID:mcs07,项目名称:ChemDataExtractor,代码行数:13,代码来源:pdf.py

示例5: get_text_obj

# 需要导入模块: from pdfminer import layout [as 别名]
# 或者: from pdfminer.layout import LTTextBox [as 别名]
def get_text_obj(cls, obj, index, regexp, text):
        otext = cls.get_entry_text(obj)
        if otext == text:
            return obj
        else:
            if isinstance(obj, layout.LTTextBox):
                i = 0
                for l in obj:
                    ret = cls.get_text_obj(l, text)
                    if ret:
                        return ret
            return None 
开发者ID:bx,项目名称:bootloader_instrumentation_suite,代码行数:14,代码来源:parse_am37x_register_tables.py

示例6: try_add_field

# 需要导入模块: from pdfminer import layout [as 别名]
# 或者: from pdfminer.layout import LTTextBox [as 别名]
def try_add_field(cls, t, obj, results, nrows, nameoffset=0):
        if isinstance(obj, layout.LTTextLine):
            cls._try_add(t, obj, results, nrows, nameoffset)
        elif isinstance(obj, layout.LTTextBox):
            if not cls._try_add(t, obj, results, nrows, nameoffset): #only if add fails recurse
                for i in obj:
                    cls.try_add_field(t, i, results, nrows, nameoffset) 
开发者ID:bx,项目名称:bootloader_instrumentation_suite,代码行数:9,代码来源:parse_am37x_register_tables.py


注:本文中的pdfminer.layout.LTTextBox方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。