当前位置: 首页>>代码示例>>Python>>正文


Python PdfFileReader.getNumPages方法代码示例

本文整理汇总了Python中pyPdf.PdfFileReader.getNumPages方法的典型用法代码示例。如果您正苦于以下问题:Python PdfFileReader.getNumPages方法的具体用法?Python PdfFileReader.getNumPages怎么用?Python PdfFileReader.getNumPages使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyPdf.PdfFileReader的用法示例。


在下文中一共展示了PdfFileReader.getNumPages方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _dl_ctrl_list

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
def _dl_ctrl_list():
    url = _get_ctrl_list_url()
    if not url:
        logging.error("Could not extract Control List PDF URL")
        return False

    url = config.sia_base + url

    logging.info("Downloading Control List PDF")
    try:
        ret = urllib2.urlopen(url)
    except(urllib2.URLError):
        logging.info("An error occurred when downloading Control List PDF %s" % (url))
        return False

    f = open(config.pdf_dst, 'wb')
    f.write(ret.read())
    f.close()

    try:
        pdf = PdfFileReader(file(config.pdf_dst))
    except(IOError, PdfReadError):
        logging.info("An error occurred when attempting to open the PDF")
        return False

    if not config.pdf_pages[0] <= pdf.getNumPages() <= config.pdf_pages[1]:
        logging.info("PDF page number %d is out of range" % (pdf.getNumPages()))
        return False

    return True
开发者ID:MartinWetterwald,项目名称:Mellovvac,代码行数:32,代码来源:sia.py

示例2: test_concat_pdf_files

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
    def test_concat_pdf_files( self ):
        try:
            os.unlink( r"docs/c.pdf" )
        except:
            pass
        self.assertTrue( True )
        input_a = PdfFileReader( file( r"docs/a.pdf", 'rb' ) )
        input_b = PdfFileReader( file( r"docs/b.pdf", 'rb' ) )

        output = PdfFileWriter()

        for x in range( 0, input_a.getNumPages() ):
            output.addPage( input_a.getPage( x ) )
        for x in range( 0, input_b.getNumPages() ):
            output.addPage( input_b.getPage( x ) )

        outputStream = file( r"docs/c.pdf", 'wb' )
        output.write( outputStream )
        outputStream.close()
        
        count = input_a.getNumPages() + input_b.getNumPages()
        
        check = PdfFileReader( file( r"docs/c.pdf", 'rb' ) )
        self.assertEqual( count, check.getNumPages() )
        os.unlink( r"docs/c.pdf" )
开发者ID:bkulyk,项目名称:pyMailMergeService,代码行数:27,代码来源:testPyPDF.py

示例3: duplicated_pdf

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
def duplicated_pdf(stream):
    """Creates a duplicated pdf, from html stream (A.K.A. StringIO)"""

    o_text = "<center><h3>-- Original --</h3></center>"
    c_text = "<center><h3>-- Duplicado --</h3></center>"
    pdf_conv = html_to_pdf.HTMLToPDFConverter()

    original = PdfFileReader(StringIO(pdf_conv.convert(stream, o_text, o_text)))

    stream.seek(0)
    copy = PdfFileReader(StringIO(pdf_conv.convert(stream, c_text, c_text)))

    out = PdfFileWriter()
    for n in xrange(0, original.getNumPages()):
        out.addPage(original.getPage(n))

    for n in xrange(0, copy.getNumPages()):
        out.addPage(copy.getPage(n))

    encoded_pdf = StringIO()
    out.write(encoded_pdf)

    encoded_pdf.seek(0)
    encoded_pdf = encoded_pdf.read()

    return encoded_pdf
开发者ID:kailIII,项目名称:RecibosCoop,代码行数:28,代码来源:__init__.py

示例4: main

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
def main(output_file, input_files):
    print "****** \"" + output_file + "\" への書き込み開始 ******"

    output = PdfFileWriter()
    total_pages = 0

    for f in input_files:
        # expect filename as "*.pdf"
        if f[-4:] != ".pdf":
            print "skipped file: ", f
            continue
        else:
            input = PdfFileReader(file(f, 'rb'))
            num_pages = input.getNumPages()
            total_pages += num_pages
            print f, "->", str(num_pages) + " ページ"
            for i in xrange(0, num_pages):
                output.addPage(input.getPage(i))

    outputStream = file(output_file, 'wb')
    output.write(outputStream)
    print total_pages, "ページ 書き込み"
    outputStream.close()

    print
    print "### チェック ###"

    resultFile = PdfFileReader(file(output_file, 'rb'))
    num_pages = resultFile.getNumPages()
    print output_file, "->", str(num_pages), "ページあります"

    print "****** \"" + output_file + "\" への書き込み完了 ******"
开发者ID:tinypiece,项目名称:tools,代码行数:34,代码来源:concat_pdf.py

示例5: test2

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
    def test2(self):
        "Test generating several 'n-up' docs in 'legal' format."
        
        # minipages are squeezed, i.e. they lose their original page ratio...
        # needs to be addressed later...

        for path0 in ("samples/test-legal-p.pdf",):
            for n in (2, 4, 8, 9):
                outName = os.path.splitext(path0)[0] + "-%dup.pdf" % n
                path1 = os.path.join(".", outName)
                generateNup(path0, n, path1, verbose=False) # , dirs="UL")
    
                # assert output has correct number of pages
                input = PdfFileReader(file(path0, "rb"))
                np0 = input.getNumPages()
                input = PdfFileReader(file(path1, "rb"))
                np1 = input.getNumPages()
                self.assertEqual(np1, math.ceil(np0 / float(n)))
    
                # assert output page(s) has/have correct text content
                for pn in range(np1):
                    page = input.getPage(pn)
                    text = page.extractText().split()
                    exp = group([str(num) for num in range(np0)], n)[pn]
                    self.assertEqual(text, exp)
开发者ID:nunb,项目名称:pdfnup,代码行数:27,代码来源:test_pdfnup.py

示例6: merge

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
def merge(fppath, bppath, outputpath, no_delete, fed_backwards):
  fpfile = PdfFileReader(open(fppath))
  bpfile = PdfFileReader(open(bppath))

  outputfile = PdfFileWriter()

  outputpages = []
  for i in range(fpfile.getNumPages()):
    backpages = True
    try:
      outputpages.append(fpfile.getPage(i))
      if backpages:
        if fed_backwards:
          outputpages.append(bpfile.getPage(bpfile.getNumPages() - i - 1))
        else:
          outputpages.append(bpfile.getPage(i))
    except IndexError:
      backpages = False

  if not no_delete:
    outputpages = [page for page in outputpages if page.extractText() != '']

  [outputfile.addPage(page) for page in outputpages]

  outputfile.write(open(os.path.expanduser(outputpath), 'w'))
开发者ID:mgarriott,项目名称:PDFMerger,代码行数:27,代码来源:merge.py

示例7: add_terms_and_conditions

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
    def add_terms_and_conditions(self, ids, original_report_pdf,
                                 original_report):

        terms_and_conditions_decoded = False
        default_terms_and_conditions_decoded = False


        user = self.env['res.users'].browse(self._uid)

        # todo change user language to report language (client language)

        language_field = original_report.terms_conditions_language_field
        model = original_report.model

        object = self.env[model].browse(ids)
        localdict = {'o': object}
        eval('document_language = o.%s' % language_field, localdict,
             mode="exec", nocopy=True)
        document_language = localdict.get('document_language',
                                          self._context.get('lang'))

        company = object.company_id
        # todo check language
        terms_and_conditions_list = company.terms_and_conditions

        for terms_and_conditions in terms_and_conditions_list:
            if terms_and_conditions.language == document_language:
                terms_and_conditions_decoded =\
                    base64.decodestring(terms_and_conditions.datas)
            if terms_and_conditions.language == 'default':
                default_terms_and_conditions_decoded = \
                    base64.decodestring(terms_and_conditions.datas)

        if not terms_and_conditions_decoded:
            terms_and_conditions_decoded = \
                default_terms_and_conditions_decoded or False

        if terms_and_conditions_decoded:
            writer = PdfFileWriter()
            stream_original_report = StringIO(original_report_pdf)
            reader_original_report = PdfFileReader(stream_original_report)
            stream_terms_and_conditions = StringIO(terms_and_conditions_decoded)
            reader_terms_and_conditions = PdfFileReader(
                stream_terms_and_conditions)
            for page in range(0, reader_original_report.getNumPages()):
                writer.addPage(reader_original_report.getPage(page))

            for page in range(0, reader_terms_and_conditions.getNumPages()):
                writer.addPage(reader_terms_and_conditions.getPage(page))

            stream_to_write = StringIO()
            writer.write(stream_to_write)

            combined_pdf = stream_to_write.getvalue()

            return combined_pdf
        else:
            return original_report_pdf
开发者ID:Niboo,项目名称:niboo-community,代码行数:60,代码来源:report.py

示例8: imain

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
def imain(args):
    cells = []
    if not args.page:
        print 'p argument not passed. Converting all pages.'
        args.page = []
        pdf = PdfFileReader(open(args.infile,'rb'))
        print "Total Number of Pages in " + args.infile + " are " + str(pdf.getNumPages())
        for pg in range(1,pdf.getNumPages()+1):
            args.page.extend(str(pg))
    if args.checkcrop or args.checklines or args.checkdivs or args.checkcells:
        for pgs in args.page :
            print "Processing Page #" + pgs
            success = process_page(args.infile, pgs,
                bitmap=args.bitmap,
                checkcrop=args.checkcrop,
                checklines=args.checklines,
                checkdivs=args.checkdivs,
                checkcells=args.checkcells,
                whitespace=args.whitespace,
                boxes=args.boxes,
                greyscale_threshold=args.greyscale_threshold,
                page=args.page,
                crop=args.crop,
                line_length=args.line_length,
                bitmap_resolution=args.bitmap_resolution,
                name=args.name,
                pad=args.pad,
                white=args.white,
                black=args.black, outfilename=args.outfile)

    else:
        for pgs in args.page :
            print "Processing Page #" + pgs
            cells.extend(process_page(args.infile, pgs,
                bitmap=args.bitmap,
                checkcrop=args.checkcrop,
                checklines=args.checklines,
                checkdivs=args.checkdivs,
                checkcells=args.checkcells,
                whitespace=args.whitespace,
                boxes=args.boxes,
                greyscale_threshold=args.greyscale_threshold,
                page=args.page,
                crop=args.crop,
                line_length=args.line_length,
                bitmap_resolution=args.bitmap_resolution,
                name=args.name,
                pad=args.pad,
                white=args.white,
                black=args.black))

            filenames = dict()
            if args.outfile is None:
                args.outfile = sys.stdout
            filenames["{0}_filename".format(args.t)] = args.outfile
            output(cells, args.page, name=args.name, infile=args.infile, output_type=args.t, **filenames)
开发者ID:shry15harsh,项目名称:pdf-table-extract,代码行数:58,代码来源:scripts.py

示例9: makeOnePagersOld

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
def makeOnePagersOld(filename='GPO-CONAN-REV-2014.pdf' ,path='pdf/'):
    infile = PdfFileReader(open(filename, 'rb'))
    print(infile.getNumPages())
    for i in range(infile.getNumPages()):
        p = infile.getPage(i)
        outfile = PdfFileWriter()
        outfile.addPage(p)
        outputStream = file(path+'pageindex-%02d.pdf' % i, 'wb')
        outfile.write(outputStream)
        outputStream.close()
开发者ID:aih,项目名称:ConstitutionAnnotated,代码行数:12,代码来源:pdfconvert.py

示例10: testSplitPdfBasic

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
 def testSplitPdfBasic(self):
     
     pdfHelper = PdfHelper()
     file1 = open(self.PDF1, "rb")  
     pdfReader1 = PdfFileReader(file1)
     splitPoint = pdfReader1.getNumPages()+5
             
     # the split point is upper than the number of pages        
     pdfHelper.split_pdfs(file1, splitPoint, self.RESULT_FILE_SPLIT1, self.RESULT_FILE_SPLIT2)
     assert os.path.exists(self.RESULT_FILE_SPLIT1)
     assert not os.path.exists(self.RESULT_FILE_SPLIT2)        
     pdfReaderResult = PdfFileReader(open(self.RESULT_FILE_SPLIT1))
     assert pdfReader1.getNumPages() == pdfReaderResult.getNumPages()
开发者ID:icruces,项目名称:blog-PDFMerging,代码行数:15,代码来源:pdf_core_test.py

示例11: testSplitPdf

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
 def testSplitPdf(self):        
         
     pdfHelper = PdfHelper()
     file1 = open(self.PDF1,"rb")
     pdfReader1 = PdfFileReader(file1)        
     splitPoint = pdfReader1.getNumPages() - 2
     
     pdfHelper.split_pdfs(file1, splitPoint, self.RESULT_FILE_SPLIT1, self.RESULT_FILE_SPLIT2)
     assert os.path.exists(self.RESULT_FILE_SPLIT1)
     assert os.path.exists(self.RESULT_FILE_SPLIT2)  
     
     splitFile1 = PdfFileReader(open(self.RESULT_FILE_SPLIT1))
     splitFile2 = PdfFileReader(open(self.RESULT_FILE_SPLIT2))
     assert splitFile1.getNumPages() == splitPoint
     assert splitFile2.getNumPages() == pdfReader1.getNumPages() - splitPoint     
开发者ID:icruces,项目名称:blog-PDFMerging,代码行数:17,代码来源:pdf_core_test.py

示例12: testMergin

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
    def testMergin(self):
              
        pdfHelper = PdfHelper()
        file1 = open(self.PDF1,"rb")
        file2 = open(self.PFD2, "rb") 
           
        assert not os.path.exists(self.RESULT_FILE_MERGIN)         
        pdfHelper.merge_pdfs((file1, file2), os.path.join('data', 'result.pdf'))
        assert os.path.exists(self.RESULT_FILE_MERGIN)
        
        pdfReader1 = PdfFileReader(file1)      
        pdfReader2 = PdfFileReader(file2)
        pdfReaderResult = PdfFileReader(file(self.RESULT_FILE_MERGIN, "rb"))

        assert pdfReader1.getNumPages() + pdfReader2.getNumPages() == pdfReaderResult.getNumPages()
开发者ID:icruces,项目名称:blog-PDFMerging,代码行数:17,代码来源:pdf_core_test.py

示例13: getPDFContents

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
def getPDFContents(path):
    # print % (input1.getDocumentInfo().title)
    try:
        content = ""
        pdf = PdfFileReader(file(path, "rb")) 
        # get all pages and put them in a string
        if pdf.isEncrypted:
            print "%s is encrypted!" % path 
            pass
        else:
            for i in range(0, pdf.getNumPages()):
                #i = pdf.getPage(i).extractText().lower()
                #for word in i:
                #    if word in schlaglist:
                #        cnt[word] +=1
                #        
                content += pdf.getPage(i).extractText().lower() + " \n"
            content = u" ".join(content.replace(u"\xa0", u" ").strip().split())
    except ValueError as d:
        print d.args
        pass
    except Exception as e:
        print e.args
        pass
    return content
开发者ID:mellowizz,项目名称:metastudy,代码行数:27,代码来源:pdfs.py

示例14: parse_file

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
def parse_file(pdfFile,nameFile):
  pdfReader = PdfFileReader(file(pdfFile,"rb"))
  
  # read the names and emails from csv file
  names = get_names(nameFile)
  
  # create an instance in SMTP server
  smtp = smtplib.SMTP('localhost')
  
  # loop through the pages of the pdf
  # when a name is found, write pages to a new pdf until next name is found
  # then write the file and email as attachment
  i = 0
  prevName = ""
  while i<pdfReader.getNumPages():
    page = pdfReader.getPage(i)
    pageStr = page.extractText()      # extract the pdf text
    for name in names.keys():
      if pageStr.lower().find(name.lower())!=-1:
        if 'pdfWriter' in locals():   # send the current pdf
          send_email(smtp,pdfWriter,prevName,names)

        pdfWriter = PdfFileWriter()   # create new pdfWriter file and add current page
        prevName = name               # save off previous name
        break
    if 'pdfWriter' in locals():
      pdfWriter.addPage(page)
    i+=1

  # send the last file
  if 'pdfWriter' in locals():
    send_email(smtp,pdfWriter,prevName,names)
    
  # quit the smtp server
  smtp.quit()
开发者ID:fyockm,项目名称:pdf_split_py,代码行数:37,代码来源:pdf_split.py

示例15: add_omr_marks

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
    def add_omr_marks(self, pdf_data, is_latest_document):
        # Documentation
        # http://meteorite.unm.edu/site_media/pdf/reportlab-userguide.pdf
        # https://pythonhosted.org/PyPDF2/PdfFileReader.html
        # https://stackoverflow.com/a/17538003
        # https://gist.github.com/kzim44/5023021
        # https://www.blog.pythonlibrary.org/2013/07/16/
        #   pypdf-how-to-write-a-pdf-to-memory/
        self.ensure_one()

        pdf_buffer = StringIO.StringIO()
        pdf_buffer.write(pdf_data)

        existing_pdf = PdfFileReader(pdf_buffer)
        output = PdfFileWriter()
        total_pages = existing_pdf.getNumPages()

        # print latest omr mark on latest pair page (recto)
        latest_omr_page = total_pages // 2

        for page_number in range(total_pages):
            page = existing_pdf.getPage(page_number)
            # only print omr marks on pair pages (recto)
            if page_number % 2 is 0:
                is_latest_page = is_latest_document and \
                    page_number == latest_omr_page
                marks = self._compute_marks(is_latest_page)
                omr_layer = self._build_omr_layer(marks)
                page.mergePage(omr_layer)
            output.addPage(page)

        out_buffer = StringIO.StringIO()
        output.write(out_buffer)

        return out_buffer.getvalue()
开发者ID:maxime-beck,项目名称:compassion-modules,代码行数:37,代码来源:communication_job.py


注:本文中的pyPdf.PdfFileReader.getNumPages方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。