当前位置: 首页>>代码示例>>Python>>正文


Python PDFDocument._parse_everything方法代码示例

本文整理汇总了Python中pdfminer.pdfparser.PDFDocument._parse_everything方法的典型用法代码示例。如果您正苦于以下问题:Python PDFDocument._parse_everything方法的具体用法?Python PDFDocument._parse_everything怎么用?Python PDFDocument._parse_everything使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pdfminer.pdfparser.PDFDocument的用法示例。


在下文中一共展示了PDFDocument._parse_everything方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: PDFExploreCmd

# 需要导入模块: from pdfminer.pdfparser import PDFDocument [as 别名]
# 或者: from pdfminer.pdfparser.PDFDocument import _parse_everything [as 别名]

#.........这里部分代码省略.........
    @intarg(1)
    def do_rtok(self, arg):
        "Read the next X tokens, X being the supplied argument."
        tokens = []
        try:
            for _ in range(arg):
                pos, token = self.parser.nexttoken()
                token = str(token)
                if len(token) > 20:
                    token = token[:20] + "[...(%d)]" % (len(token)-20)
                tokens.append(token)
        except PSEOF:
            pass
        print(' '.join(tokens))
        if len(tokens) != arg:
            print("End of file reached")
    
    @intarg(1)
    def do_ptok(self, arg):
        "Peek the next X tokens, X being the supplied argument. Your current position will not change."
        pos = self.parser.lex.lexpos
        self.do_rtok(arg)
        self.do_setpos(pos)
    
    def do_robj(self, arg):
        "Read the next object and sets it as the 'current' object."
        objid, genno, obj = self.doc.readobj()
        self.current_obj = (objid, genno, obj)
        self.do_st('')
    
    @intarg()
    def do_sobj(self, arg):
        "Select object with ID X. The object has to have been read already."
        obj = None
        if arg in self.doc._cached_objs:
            obj = self.doc._cached_objs[arg]
        elif arg in self.doc._parsed_objs:
            obj = self.doc._parsed_objs[arg]
        else:
            print("Object hasn't been read yet.")
            strmid, index = self.doc.find_obj_ref(arg)
            if index is not None:
                print("However, our object id is in a xref")
                if strmid:
                    print("Stream ID: %d" % strmid)
                print("Position: %d" % index)
        if obj is not None:
            self.current_obj = (arg, 0, obj)
            self.do_st('')
    
    def do_dbgobj(self, arg):
        "Enter in debug mode with current obj as 'obj' in the local scope."
        if not self.current_obj:
            print("No current obj.")
            return
        objid, genno, obj = self.current_obj
        import pdb; pdb.set_trace()
    
    def do_readall(self, arg):
        "Read all objects in the document."
        self.doc._parse_everything()
        print("Read %d objects:" % len(self.doc._cached_objs))
        self.do_whatisread('')
    
    def do_dumpdata(self, arg):
        "For each read stream, print out the decoded data it contains."
        objs = self._cached_objects()
        for objid, obj in objs:
            print("Dumping obj id: %d" % objid)
            print(repr(obj))
            if hasattr(obj, 'get_data'):
                print(repr(obj.get_data()))
    
    def do_whatisread(self, arg):
        "Prints a list of all read object ids."
        objs = self._cached_objects()
        print(repr([objid for objid, obj in objs]))
    
    def do_refs(self, arg):
        "Look in all read objects and find all objects that reference to our current object."
        if not self.current_obj:
            print("No current obj.")
            return
        
        target_id, _, _ = self.current_obj
        result = [parent_id for parent_id, ref in self._get_refs() if ref.objid == target_id]
        print(repr(result))
    
    def do_deadrefs(self, arg):
        "Print (dead_id, host_id) for all dead references in the document."
        objs = self._cached_objects()
        objids = {objid for objid, obj in objs}
        result = [(ref.objid, parent_id) for parent_id, ref in self._get_refs() if ref.objid not in objids]
        print(repr(result))
    
    def do_quit(self, arg):
        "Quit PDFExplore"
        self.fp.close()
        sys.exit(0)
    do_q = do_quit
开发者ID:doarthon,项目名称:pdfminer3k,代码行数:104,代码来源:pdfexplore.py


注:本文中的pdfminer.pdfparser.PDFDocument._parse_everything方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。