本文整理汇总了Python中pyPdf.PdfFileReader.getNumPages方法的典型用法代码示例。如果您正苦于以下问题:Python PdfFileReader.getNumPages方法的具体用法?Python PdfFileReader.getNumPages怎么用?Python PdfFileReader.getNumPages使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyPdf.PdfFileReader
的用法示例。
在下文中一共展示了PdfFileReader.getNumPages方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _dl_ctrl_list
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
def _dl_ctrl_list():
url = _get_ctrl_list_url()
if not url:
logging.error("Could not extract Control List PDF URL")
return False
url = config.sia_base + url
logging.info("Downloading Control List PDF")
try:
ret = urllib2.urlopen(url)
except(urllib2.URLError):
logging.info("An error occurred when downloading Control List PDF %s" % (url))
return False
f = open(config.pdf_dst, 'wb')
f.write(ret.read())
f.close()
try:
pdf = PdfFileReader(file(config.pdf_dst))
except(IOError, PdfReadError):
logging.info("An error occurred when attempting to open the PDF")
return False
if not config.pdf_pages[0] <= pdf.getNumPages() <= config.pdf_pages[1]:
logging.info("PDF page number %d is out of range" % (pdf.getNumPages()))
return False
return True
示例2: test_concat_pdf_files
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
def test_concat_pdf_files( self ):
try:
os.unlink( r"docs/c.pdf" )
except:
pass
self.assertTrue( True )
input_a = PdfFileReader( file( r"docs/a.pdf", 'rb' ) )
input_b = PdfFileReader( file( r"docs/b.pdf", 'rb' ) )
output = PdfFileWriter()
for x in range( 0, input_a.getNumPages() ):
output.addPage( input_a.getPage( x ) )
for x in range( 0, input_b.getNumPages() ):
output.addPage( input_b.getPage( x ) )
outputStream = file( r"docs/c.pdf", 'wb' )
output.write( outputStream )
outputStream.close()
count = input_a.getNumPages() + input_b.getNumPages()
check = PdfFileReader( file( r"docs/c.pdf", 'rb' ) )
self.assertEqual( count, check.getNumPages() )
os.unlink( r"docs/c.pdf" )
示例3: duplicated_pdf
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
def duplicated_pdf(stream):
"""Creates a duplicated pdf, from html stream (A.K.A. StringIO)"""
o_text = "<center><h3>-- Original --</h3></center>"
c_text = "<center><h3>-- Duplicado --</h3></center>"
pdf_conv = html_to_pdf.HTMLToPDFConverter()
original = PdfFileReader(StringIO(pdf_conv.convert(stream, o_text, o_text)))
stream.seek(0)
copy = PdfFileReader(StringIO(pdf_conv.convert(stream, c_text, c_text)))
out = PdfFileWriter()
for n in xrange(0, original.getNumPages()):
out.addPage(original.getPage(n))
for n in xrange(0, copy.getNumPages()):
out.addPage(copy.getPage(n))
encoded_pdf = StringIO()
out.write(encoded_pdf)
encoded_pdf.seek(0)
encoded_pdf = encoded_pdf.read()
return encoded_pdf
示例4: main
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
def main(output_file, input_files):
print "****** \"" + output_file + "\" への書き込み開始 ******"
output = PdfFileWriter()
total_pages = 0
for f in input_files:
# expect filename as "*.pdf"
if f[-4:] != ".pdf":
print "skipped file: ", f
continue
else:
input = PdfFileReader(file(f, 'rb'))
num_pages = input.getNumPages()
total_pages += num_pages
print f, "->", str(num_pages) + " ページ"
for i in xrange(0, num_pages):
output.addPage(input.getPage(i))
outputStream = file(output_file, 'wb')
output.write(outputStream)
print total_pages, "ページ 書き込み"
outputStream.close()
print
print "### チェック ###"
resultFile = PdfFileReader(file(output_file, 'rb'))
num_pages = resultFile.getNumPages()
print output_file, "->", str(num_pages), "ページあります"
print "****** \"" + output_file + "\" への書き込み完了 ******"
示例5: test2
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
def test2(self):
"Test generating several 'n-up' docs in 'legal' format."
# minipages are squeezed, i.e. they lose their original page ratio...
# needs to be addressed later...
for path0 in ("samples/test-legal-p.pdf",):
for n in (2, 4, 8, 9):
outName = os.path.splitext(path0)[0] + "-%dup.pdf" % n
path1 = os.path.join(".", outName)
generateNup(path0, n, path1, verbose=False) # , dirs="UL")
# assert output has correct number of pages
input = PdfFileReader(file(path0, "rb"))
np0 = input.getNumPages()
input = PdfFileReader(file(path1, "rb"))
np1 = input.getNumPages()
self.assertEqual(np1, math.ceil(np0 / float(n)))
# assert output page(s) has/have correct text content
for pn in range(np1):
page = input.getPage(pn)
text = page.extractText().split()
exp = group([str(num) for num in range(np0)], n)[pn]
self.assertEqual(text, exp)
示例6: merge
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
def merge(fppath, bppath, outputpath, no_delete, fed_backwards):
fpfile = PdfFileReader(open(fppath))
bpfile = PdfFileReader(open(bppath))
outputfile = PdfFileWriter()
outputpages = []
for i in range(fpfile.getNumPages()):
backpages = True
try:
outputpages.append(fpfile.getPage(i))
if backpages:
if fed_backwards:
outputpages.append(bpfile.getPage(bpfile.getNumPages() - i - 1))
else:
outputpages.append(bpfile.getPage(i))
except IndexError:
backpages = False
if not no_delete:
outputpages = [page for page in outputpages if page.extractText() != '']
[outputfile.addPage(page) for page in outputpages]
outputfile.write(open(os.path.expanduser(outputpath), 'w'))
示例7: add_terms_and_conditions
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
def add_terms_and_conditions(self, ids, original_report_pdf,
original_report):
terms_and_conditions_decoded = False
default_terms_and_conditions_decoded = False
user = self.env['res.users'].browse(self._uid)
# todo change user language to report language (client language)
language_field = original_report.terms_conditions_language_field
model = original_report.model
object = self.env[model].browse(ids)
localdict = {'o': object}
eval('document_language = o.%s' % language_field, localdict,
mode="exec", nocopy=True)
document_language = localdict.get('document_language',
self._context.get('lang'))
company = object.company_id
# todo check language
terms_and_conditions_list = company.terms_and_conditions
for terms_and_conditions in terms_and_conditions_list:
if terms_and_conditions.language == document_language:
terms_and_conditions_decoded =\
base64.decodestring(terms_and_conditions.datas)
if terms_and_conditions.language == 'default':
default_terms_and_conditions_decoded = \
base64.decodestring(terms_and_conditions.datas)
if not terms_and_conditions_decoded:
terms_and_conditions_decoded = \
default_terms_and_conditions_decoded or False
if terms_and_conditions_decoded:
writer = PdfFileWriter()
stream_original_report = StringIO(original_report_pdf)
reader_original_report = PdfFileReader(stream_original_report)
stream_terms_and_conditions = StringIO(terms_and_conditions_decoded)
reader_terms_and_conditions = PdfFileReader(
stream_terms_and_conditions)
for page in range(0, reader_original_report.getNumPages()):
writer.addPage(reader_original_report.getPage(page))
for page in range(0, reader_terms_and_conditions.getNumPages()):
writer.addPage(reader_terms_and_conditions.getPage(page))
stream_to_write = StringIO()
writer.write(stream_to_write)
combined_pdf = stream_to_write.getvalue()
return combined_pdf
else:
return original_report_pdf
示例8: imain
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
def imain(args):
cells = []
if not args.page:
print 'p argument not passed. Converting all pages.'
args.page = []
pdf = PdfFileReader(open(args.infile,'rb'))
print "Total Number of Pages in " + args.infile + " are " + str(pdf.getNumPages())
for pg in range(1,pdf.getNumPages()+1):
args.page.extend(str(pg))
if args.checkcrop or args.checklines or args.checkdivs or args.checkcells:
for pgs in args.page :
print "Processing Page #" + pgs
success = process_page(args.infile, pgs,
bitmap=args.bitmap,
checkcrop=args.checkcrop,
checklines=args.checklines,
checkdivs=args.checkdivs,
checkcells=args.checkcells,
whitespace=args.whitespace,
boxes=args.boxes,
greyscale_threshold=args.greyscale_threshold,
page=args.page,
crop=args.crop,
line_length=args.line_length,
bitmap_resolution=args.bitmap_resolution,
name=args.name,
pad=args.pad,
white=args.white,
black=args.black, outfilename=args.outfile)
else:
for pgs in args.page :
print "Processing Page #" + pgs
cells.extend(process_page(args.infile, pgs,
bitmap=args.bitmap,
checkcrop=args.checkcrop,
checklines=args.checklines,
checkdivs=args.checkdivs,
checkcells=args.checkcells,
whitespace=args.whitespace,
boxes=args.boxes,
greyscale_threshold=args.greyscale_threshold,
page=args.page,
crop=args.crop,
line_length=args.line_length,
bitmap_resolution=args.bitmap_resolution,
name=args.name,
pad=args.pad,
white=args.white,
black=args.black))
filenames = dict()
if args.outfile is None:
args.outfile = sys.stdout
filenames["{0}_filename".format(args.t)] = args.outfile
output(cells, args.page, name=args.name, infile=args.infile, output_type=args.t, **filenames)
示例9: makeOnePagersOld
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
def makeOnePagersOld(filename='GPO-CONAN-REV-2014.pdf' ,path='pdf/'):
infile = PdfFileReader(open(filename, 'rb'))
print(infile.getNumPages())
for i in range(infile.getNumPages()):
p = infile.getPage(i)
outfile = PdfFileWriter()
outfile.addPage(p)
outputStream = file(path+'pageindex-%02d.pdf' % i, 'wb')
outfile.write(outputStream)
outputStream.close()
示例10: testSplitPdfBasic
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
def testSplitPdfBasic(self):
pdfHelper = PdfHelper()
file1 = open(self.PDF1, "rb")
pdfReader1 = PdfFileReader(file1)
splitPoint = pdfReader1.getNumPages()+5
# the split point is upper than the number of pages
pdfHelper.split_pdfs(file1, splitPoint, self.RESULT_FILE_SPLIT1, self.RESULT_FILE_SPLIT2)
assert os.path.exists(self.RESULT_FILE_SPLIT1)
assert not os.path.exists(self.RESULT_FILE_SPLIT2)
pdfReaderResult = PdfFileReader(open(self.RESULT_FILE_SPLIT1))
assert pdfReader1.getNumPages() == pdfReaderResult.getNumPages()
示例11: testSplitPdf
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
def testSplitPdf(self):
pdfHelper = PdfHelper()
file1 = open(self.PDF1,"rb")
pdfReader1 = PdfFileReader(file1)
splitPoint = pdfReader1.getNumPages() - 2
pdfHelper.split_pdfs(file1, splitPoint, self.RESULT_FILE_SPLIT1, self.RESULT_FILE_SPLIT2)
assert os.path.exists(self.RESULT_FILE_SPLIT1)
assert os.path.exists(self.RESULT_FILE_SPLIT2)
splitFile1 = PdfFileReader(open(self.RESULT_FILE_SPLIT1))
splitFile2 = PdfFileReader(open(self.RESULT_FILE_SPLIT2))
assert splitFile1.getNumPages() == splitPoint
assert splitFile2.getNumPages() == pdfReader1.getNumPages() - splitPoint
示例12: testMergin
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
def testMergin(self):
pdfHelper = PdfHelper()
file1 = open(self.PDF1,"rb")
file2 = open(self.PFD2, "rb")
assert not os.path.exists(self.RESULT_FILE_MERGIN)
pdfHelper.merge_pdfs((file1, file2), os.path.join('data', 'result.pdf'))
assert os.path.exists(self.RESULT_FILE_MERGIN)
pdfReader1 = PdfFileReader(file1)
pdfReader2 = PdfFileReader(file2)
pdfReaderResult = PdfFileReader(file(self.RESULT_FILE_MERGIN, "rb"))
assert pdfReader1.getNumPages() + pdfReader2.getNumPages() == pdfReaderResult.getNumPages()
示例13: getPDFContents
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
def getPDFContents(path):
# print % (input1.getDocumentInfo().title)
try:
content = ""
pdf = PdfFileReader(file(path, "rb"))
# get all pages and put them in a string
if pdf.isEncrypted:
print "%s is encrypted!" % path
pass
else:
for i in range(0, pdf.getNumPages()):
#i = pdf.getPage(i).extractText().lower()
#for word in i:
# if word in schlaglist:
# cnt[word] +=1
#
content += pdf.getPage(i).extractText().lower() + " \n"
content = u" ".join(content.replace(u"\xa0", u" ").strip().split())
except ValueError as d:
print d.args
pass
except Exception as e:
print e.args
pass
return content
示例14: parse_file
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
def parse_file(pdfFile,nameFile):
pdfReader = PdfFileReader(file(pdfFile,"rb"))
# read the names and emails from csv file
names = get_names(nameFile)
# create an instance in SMTP server
smtp = smtplib.SMTP('localhost')
# loop through the pages of the pdf
# when a name is found, write pages to a new pdf until next name is found
# then write the file and email as attachment
i = 0
prevName = ""
while i<pdfReader.getNumPages():
page = pdfReader.getPage(i)
pageStr = page.extractText() # extract the pdf text
for name in names.keys():
if pageStr.lower().find(name.lower())!=-1:
if 'pdfWriter' in locals(): # send the current pdf
send_email(smtp,pdfWriter,prevName,names)
pdfWriter = PdfFileWriter() # create new pdfWriter file and add current page
prevName = name # save off previous name
break
if 'pdfWriter' in locals():
pdfWriter.addPage(page)
i+=1
# send the last file
if 'pdfWriter' in locals():
send_email(smtp,pdfWriter,prevName,names)
# quit the smtp server
smtp.quit()
示例15: add_omr_marks
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getNumPages [as 别名]
def add_omr_marks(self, pdf_data, is_latest_document):
# Documentation
# http://meteorite.unm.edu/site_media/pdf/reportlab-userguide.pdf
# https://pythonhosted.org/PyPDF2/PdfFileReader.html
# https://stackoverflow.com/a/17538003
# https://gist.github.com/kzim44/5023021
# https://www.blog.pythonlibrary.org/2013/07/16/
# pypdf-how-to-write-a-pdf-to-memory/
self.ensure_one()
pdf_buffer = StringIO.StringIO()
pdf_buffer.write(pdf_data)
existing_pdf = PdfFileReader(pdf_buffer)
output = PdfFileWriter()
total_pages = existing_pdf.getNumPages()
# print latest omr mark on latest pair page (recto)
latest_omr_page = total_pages // 2
for page_number in range(total_pages):
page = existing_pdf.getPage(page_number)
# only print omr marks on pair pages (recto)
if page_number % 2 is 0:
is_latest_page = is_latest_document and \
page_number == latest_omr_page
marks = self._compute_marks(is_latest_page)
omr_layer = self._build_omr_layer(marks)
page.mergePage(omr_layer)
output.addPage(page)
out_buffer = StringIO.StringIO()
output.write(out_buffer)
return out_buffer.getvalue()