本文整理汇总了Python中pyPdf.PdfFileReader类的典型用法代码示例。如果您正苦于以下问题:Python PdfFileReader类的具体用法?Python PdfFileReader怎么用?Python PdfFileReader使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了PdfFileReader类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: setMetadata
def setMetadata(self, metadata):
"""Returns a document with new metadata.
Keyword arguments:
metadata -- expected an dictionary with metadata.
"""
# TODO: date as "D:20090401124817-04'00'" ASN.1 for ModDate and CreationDate
input_pdf = PdfFileReader(open(self.document.getUrl(), "rb"))
output_pdf = PdfFileWriter()
modification_date = metadata.pop("ModificationDate", None)
if modification_date:
metadata['ModDate'] = modification_date
if type(metadata.get('Keywords', None)) is list:
metadata['Keywords'] = metadata['Keywords'].join(' ')
args = {}
for key, value in list(metadata.items()):
args[NameObject('/' + key.capitalize())] = createStringObject(value)
output_pdf._info.getObject().update(args)
for page_num in range(input_pdf.getNumPages()):
output_pdf.addPage(input_pdf.getPage(page_num))
output_stream = io.BytesIO()
output_pdf.write(output_stream)
return output_stream.getvalue()
示例2: createPDFHttpResponse
def createPDFHttpResponse(filepath, output_filename, user, access_time):
"""
Creates a HttpResponse from a watermarked PDF file. Watermark contains the user who accessed the document
and the time of access.
:param filepath: Path to the file
:param output_filename: File name sent to the user
:param user:
:param access_time:
:return: HttpResponse with the file content, or HttpResponseNotFound
"""
#Add access watermark
buffer = StringIO()
p = canvas.Canvas(buffer)
p.drawString(0,0, "Downloaded by %s at %s" %(user, access_time.isoformat(' ')))
p.showPage()
p.save()
buffer.seek(0)
watermark = PdfFileReader(buffer)
#Read the PDF to be accessed
attachment = PdfFileReader(open(filepath, 'rb'))
output = PdfFileWriter()
#Attach watermark to each page
for page in attachment.pages:
page.mergePage(watermark.getPage(0))
output.addPage(page)
response = HttpResponse(mimetype='application/pdf')
response['Content-Disposition'] = 'inline; filename=%s' % output_filename.encode('utf-8')
output.write(response)
return response
示例3: rewrite
def rewrite(self, context, font={'name': 'Times-Roman', 'size': 11}):
packet = StringIO.StringIO()
# create a new PDF with Reportlab
can = canvas.Canvas(packet, pagesize=letter)
can.setFont(font['name'], font['size'])
for i in context:
can.drawString(i['x'], i['y'], i['value'])
can.save()
# move to the beginning of the StringIO buffer
packet.seek(0)
new_pdf = PdfFileReader(packet)
# read your existing PDF
existing_pdf = PdfFileReader(file(self.path, "rb"))
output = PdfFileWriter()
# merge the new file with the existing
page = existing_pdf.getPage(0)
page.mergePage(new_pdf.getPage(0))
output.addPage(page)
# finally, write "output" to a real file
outputStream = file(self.destination, "wb")
output.write(outputStream)
outputStream.close()
return True
示例4: add_omr_marks
def add_omr_marks(self, pdf_data, is_latest_document):
# Documentation
# http://meteorite.unm.edu/site_media/pdf/reportlab-userguide.pdf
# https://pythonhosted.org/PyPDF2/PdfFileReader.html
# https://stackoverflow.com/a/17538003
# https://gist.github.com/kzim44/5023021
# https://www.blog.pythonlibrary.org/2013/07/16/
# pypdf-how-to-write-a-pdf-to-memory/
self.ensure_one()
pdf_buffer = StringIO.StringIO()
pdf_buffer.write(pdf_data)
existing_pdf = PdfFileReader(pdf_buffer)
output = PdfFileWriter()
total_pages = existing_pdf.getNumPages()
# print latest omr mark on latest pair page (recto)
latest_omr_page = total_pages // 2
for page_number in range(total_pages):
page = existing_pdf.getPage(page_number)
# only print omr marks on pair pages (recto)
if page_number % 2 is 0:
is_latest_page = is_latest_document and \
page_number == latest_omr_page
marks = self._compute_marks(is_latest_page)
omr_layer = self._build_omr_layer(marks)
page.mergePage(omr_layer)
output.addPage(page)
out_buffer = StringIO.StringIO()
output.write(out_buffer)
return out_buffer.getvalue()
示例5: split_chapters
def split_chapters(*t_args):
"""
Split a large pdf into chunks (i.e. chapters)
"""
if len(t_args)>0:
args=t_args[0]
if len(args)<1:
print "usage: utils_pdf split_chapters configfile"
return
from pyPdf import PdfFileWriter, PdfFileReader
f = open(args[0])
P = json.loads(f.read())
f.close()
input = PdfFileReader(file(P["source"], "rb"))
i0 = P["first_chapter_index"]
ends = P["chapters_ends"]
for i in xrange(0, len(ends)):
ch_num = i0+i
fmt = P["chapter_fmt"] % (ch_num, )
output = PdfFileWriter()
if not os.path.exists(P["outputdir"]):
os.mkdir( P["outputdir"])
fn_out = "%s/%s%s" % (P["outputdir"], P["chapter_prefix"], fmt)
j0 = P["firstpage"] if i==0 else ends[i-1]
for j in xrange(j0, ends[i]):
output.addPage(input.getPage(j))
outputStream = file(fn_out, "wb")
output.write(outputStream)
outputStream.close()
print "wrote %s" % (fn_out,)
示例6: __init__
class cleanpdf:
def __init__(self,pathFile):
self.pathFile = pathFile
self.inputFile = file(self.pathFile,"rb")
self.pdfInput = PdfFileReader(self.inputFile)
self.pyPdfOutput = PdfFileWriter()
self.dataToUpdate = self.pyPdfOutput._info.getObject()
self.__modifyData()
self.__copyPDF()
def __modifyData(self):
for data in self.dataToUpdate:
self.dataToUpdate[data] = createStringObject(('<h1 onmouseover=alert(1)>').encode('ascii'))
def __copyPDF(self):
for page in range(0,self.pdfInput.getNumPages()):
self.pyPdfOutput.addPage(self.pdfInput.getPage(page))
outputFile = file(self.__changeName(),"wb")
self.pyPdfOutput.write(outputFile)
def __changeName(self):
newName = self.pathFile[0:self.pathFile.rfind(".")]+"5.pdf"
return newName
示例7: save
def save(self, to):
origin = self.get_origin()
if not origin:
raise RuntimeError("Please implement get_origin method or origin attribute")
try:
existing_pdf = PdfFileReader(file(origin, "rb"))
except IOError:
raise RuntimeError(u"Failed to open origin file")
output = PdfFileWriter()
for page_id, page_class in enumerate(self.pages):
new_page = page_class(self.instance).save()
base_page = existing_pdf.getPage(0)
base_page.mergePage(new_page)
output.addPage(base_page)
if isinstance(to, basestring):
outputStream = file(to, "wb")
else:
outputStream = to
output.write(outputStream)
outputStream.close()
示例8: choose_file
def choose_file(self,widget,data=None):
global textbuffer
dialog = gtk.FileChooserDialog("Open..",
None,
gtk.FILE_CHOOSER_ACTION_OPEN,
(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
gtk.STOCK_OPEN, gtk.RESPONSE_OK))
dialog.set_default_response(gtk.RESPONSE_OK)
filter = gtk.FileFilter()
filter.set_name("PDF files")
filter.add_pattern("*.pdf")
dialog.add_filter(filter)
response = dialog.run()
if response == gtk.RESPONSE_OK:
print dialog.get_filename(), 'selected'
from pyPdf import PdfFileWriter, PdfFileReader
pdf = PdfFileReader(file("kpeng.pdf", "rb"))
content=""
for i in range(0, pdf.getNumPages()):
# Extract text from page and add to content
content += pdf.getPage(i).extractText() + "/n"
# Collapse whitespace
content = " ".join(content.replace(u"/xa0", " ").strip().split())
textbuffer.set_text(content);
elif response == gtk.RESPONSE_CANCEL:
print 'Closed, no files selected'
dialog.destroy()
示例9: watermark
def watermark( self, pdfStr, watermarkFile, spec ):
# Read the watermark- and document pdf file
inputWatermark = PdfFileReader( file( watermarkFile, "rb" ) )
generatedPdf = PdfFileReader( pdfStr )
outputPdf = PdfFileWriter()
# flag for the first page of the source file
firstPage = True
# Loop over source document pages and merge with the first page of the watermark
# file.
watermarkPage = inputWatermark.getPage(0)
for page in generatedPdf.pages:
if (spec == Mark.FIRST_PAGE and firstPage) or spec == Mark.ALL_PAGES:
# deep copy the watermark page here, otherwise the watermark page
# gets merged over and over because p would only be a reference
p = copy.copy( watermarkPage )
p.mergePage( page )
outputPdf.addPage( p )
firstPage = False
else:
outputPdf.addPage(page)
if self.outputFile:
# Write to outputfile
outputStream = file( self.outputFile, "wb" )
outputPdf.write( outputStream )
outputStream.close()
return self.outputFile
else:
stringIO = StringIO.StringIO();
outputPdf.write( stringIO )
return stringIO.getvalue()
示例10: output
def output(self):
# get the output filename using the file dialog
(out_filename, filter) = \
QFileDialog.getSaveFileName(parent = self,
caption = self.tr(u'Export'),
dir = '',
filter = self.tr('pdf (*.pdf)'))
# file IO
out_file = open(out_filename, 'wb')
in_file = open(self.in_filename, 'rb')
in_reader = PdfFileReader(in_file)
out_writer = PdfFileWriter()
# extract input
pages_string = self.pages_line_edit.text()
# Get the indices of pages to extract
pages = pages_parser(in_reader.getNumPages()).parse(pages_string)
# append pages to output writer
for page_index in pages:
out_writer.addPage(in_reader.getPage(page_index))
# write to file
out_writer.write(out_file)
# close files
in_file.close()
out_file.close()
示例11: showpdf
def showpdf(request):
sign = os.path.join(settings.MEDIA_ROOT, "signature.png")
mimetypes.init()
response = None
if 'f' in request.GET:
fr = open(os.path.join(settings.MEDIA_ROOT,'pdffiles','extracted','%s' % request.GET['f']), "rb")
imgTemp = StringIO()
imgDoc = canvas.Canvas(imgTemp)
if request.GET['o'] == 'l':
imgDoc.drawImage(sign, 529, 40, 290/2, 154/2)
else:
imgDoc.drawImage(sign, 70, 40, 290/2, 154/2)
imgDoc.save()
overlay = PdfFileReader(StringIO(imgTemp.getvalue())).getPage(0)
page = PdfFileReader(fr).getPage(0)
page.mergePage(overlay)
pdf_out = PdfFileWriter()
pdf_out.addPage(page)
response = HttpResponse(mimetype='application/pdf')
response['Content-Disposition'] = 'attachment; filename=%s' % request.GET['f']
pdf_out.write(response)
return response
示例12: add_guides
def add_guides(self):
pdf_in = PdfFileReader(open('sig.pdf', 'rb'))
pdf_out = PdfFileWriter()
for i in xrange(pdf_in.getNumPages()):
page = pdf_in.getPage(i)
if not i:
guides = StringIO()
if self.args.longarm:
create_pdf(
guides, a4lwidth_pt, a4lheight_pt, generate_longarm())
else:
if self.args.a5:
w, h = a5width_pt, a5height_pt
else:
w, h = a4lwidth_pt, a4lheight_pt
create_pdf(guides, w, h, generate_shortarm(
self.args.a5, bool(self.args.signature)))
pdf_guides = PdfFileReader(guides)
page.mergePage(pdf_guides.getPage(0))
pdf_out.addPage(page)
pdf_out.write(open('sigs.pdf', 'wb'))
示例13: read_neb_enzyme_price_list
def read_neb_enzyme_price_list():
# throws URLError, IOError
price_list = urllib2.urlopen(NEB_PRICE_LIST_URL)
file_buffer = StringIO(price_list.read())
reader = PdfFileReader(file_buffer)
enzymes = []
for p in range(reader.getNumPages()):
# fi/fl misread hacks-- little nasty in here-- poor PDF read
for match in NEB_PRICE_LINE_RE.finditer(reader.getPage(p).extractText().replace(u'\u02dc','fi').replace(u'˚','fl')):
# format of the groups will be: name prefix, lastletter(+supplement)+small_cost, supplement, large_cost, small_unit, large_unit
name_prefix, transition, supplement, large_cost, small_unit, large_unit = match.groups()
if supplement:
carryover = transition.index(supplement)+len(supplement)
name = "%s%s" % (name_prefix, transition[:carryover])
small_cost = int_comma(transition[carryover:])
else:
name = "%s%s" % (name_prefix, transition[0])
small_cost = int_comma(transition[1:])
large_cost = int_comma(large_cost)
small_unit = int_comma(small_unit)
large_unit = int_comma(large_unit)
enzymes.append((name, small_cost, large_cost, small_unit, large_unit))
return sorted(enzymes, key=operator.itemgetter(0))
示例14: generate
def generate(donor):
os.system('mkdir -p output')
donor_url = donor.replace(' ','%20')
page1 = 'output/%s1' % (donor.replace(' ','-').lower())
page2 = 'output/%s2' % (donor.replace(' ','-').lower())
combined = 'output/%s.pdf' % (donor.replace(' ','-').lower())
if os.path.exists(combined): return
os.system('cp "%s" "%s.svg"' % (page1_svg, page1))
os.system('sed "s|/France/|/%s/|" "%s" > "%s.svg"' % (donor_url, page1_svg, page1))
os.system('inkscape --file="%s.svg" --verb=za.co.widgetlabs.update --verb=FileSave --verb=FileQuit 2> /dev/null' % (page1))
os.system('inkscape --file="%s.svg" --export-pdf="%s.pdf" 2> /dev/null' % (page1, page1))
os.system('cp "%s" "%s.svg"' % (page2_svg, page2))
os.system('sed "s|/France/|/%s/|" "%s" > "%s.svg"' % (donor_url, page2_svg, page2))
os.system('inkscape --file="%s.svg" --verb=za.co.widgetlabs.update --verb=FileSave --verb=FileQuit 2> /dev/null' % (page2))
os.system('inkscape --file="%s.svg" --export-pdf="%s.pdf" ' % (page2, page2))
# Merge pages
input1 = PdfFileReader(file('%s.pdf' % (page1), 'rb'))
input2 = PdfFileReader(file('%s.pdf' % (page2), 'rb'))
output = PdfFileWriter()
output.addPage(input1.getPage(0))
output.addPage(input2.getPage(0))
outputStream = file(combined, 'wb')
output.write(outputStream)
outputStream.close()
sleep(2)
示例15: split_pset
def split_pset():
if (not options.pset or not options.probs):
print_err_and_die("You must enter both arguements! run with -h for help")
path = "pset%s/latex/"%options.pset
try:
filename = "%spset%s_answers.pdf"%(path, options.pset)
inp = PdfFileReader(file(filename, "rb"))
except IOError:
print_err_and_die("Error! File, %s was not found." % filename)
##loop over user input and break up pdf
questionNum = 1
probs = options.probs.split(",")
for prob in probs:
print "Processing question", questionNum
prob = prob.strip() #kill whitespace
out = PdfFileWriter()
pages = get_pages(prob, inp.getNumPages())
for page in pages:
print "page num", str(page)
out.addPage(inp.getPage(int(page)-1))
outStream = file("%spset%s-%s_answer.pdf"%(path, options.pset, questionNum), "wb")
out.write(outStream)
outStream.close()
questionNum +=1
print "Done!"