当前位置: 首页>>代码示例>>Python>>正文


Python pyPdf.PdfFileReader类代码示例

本文整理汇总了Python中pyPdf.PdfFileReader的典型用法代码示例。如果您正苦于以下问题:Python PdfFileReader类的具体用法?Python PdfFileReader怎么用?Python PdfFileReader使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了PdfFileReader类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: setMetadata

  def setMetadata(self, metadata):
    """Returns a document with new metadata.
    Keyword arguments:
    metadata -- expected an dictionary with metadata.
    """
    # TODO: date as "D:20090401124817-04'00'" ASN.1 for ModDate and CreationDate
    input_pdf = PdfFileReader(open(self.document.getUrl(), "rb"))
    output_pdf = PdfFileWriter()

    modification_date = metadata.pop("ModificationDate", None)
    if modification_date:
      metadata['ModDate'] = modification_date
    if type(metadata.get('Keywords', None)) is list:
      metadata['Keywords'] = metadata['Keywords'].join(' ')
    args = {}
    for key, value in list(metadata.items()):
      args[NameObject('/' + key.capitalize())] = createStringObject(value)

    output_pdf._info.getObject().update(args)

    for page_num in range(input_pdf.getNumPages()):
      output_pdf.addPage(input_pdf.getPage(page_num))

    output_stream = io.BytesIO()
    output_pdf.write(output_stream)
    return output_stream.getvalue()
开发者ID:Nexedi,项目名称:cloudooo,代码行数:26,代码来源:handler.py

示例2: createPDFHttpResponse

def createPDFHttpResponse(filepath, output_filename, user, access_time):
    """
    Creates a HttpResponse from a watermarked PDF file. Watermark contains the user who accessed the document
    and the time of access.

    :param filepath: Path to the file
    :param output_filename: File name sent to the user
    :param user:
    :param access_time:
    :return: HttpResponse with the file content, or HttpResponseNotFound
    
    """
    #Add access watermark
    buffer = StringIO()
    p = canvas.Canvas(buffer)
    p.drawString(0,0, "Downloaded by %s at %s" %(user, access_time.isoformat(' ')))
    p.showPage()
    p.save()
    buffer.seek(0)
    watermark = PdfFileReader(buffer)

    #Read the PDF to be accessed
    attachment = PdfFileReader(open(filepath, 'rb'))
    output = PdfFileWriter()

    #Attach watermark to each page
    for page in attachment.pages:
        page.mergePage(watermark.getPage(0))
        output.addPage(page)

    response = HttpResponse(mimetype='application/pdf')
    response['Content-Disposition'] = 'inline; filename=%s' % output_filename.encode('utf-8')
    output.write(response)
    return response
开发者ID:wufulab,项目名称:Kavalan_Management_System,代码行数:34,代码来源:views.py

示例3: rewrite

    def rewrite(self, context, font={'name': 'Times-Roman', 'size': 11}):

        packet = StringIO.StringIO()
        # create a new PDF with Reportlab
        can = canvas.Canvas(packet, pagesize=letter)
        can.setFont(font['name'], font['size'])
        for i in context:
            can.drawString(i['x'], i['y'], i['value'])
        can.save()

        # move to the beginning of the StringIO buffer
        packet.seek(0)
        new_pdf = PdfFileReader(packet)
        # read your existing PDF
        existing_pdf = PdfFileReader(file(self.path, "rb"))
        output = PdfFileWriter()
        # merge the new file with the existing
        page = existing_pdf.getPage(0)
        page.mergePage(new_pdf.getPage(0))
        output.addPage(page)
        # finally, write "output" to a real file
        outputStream = file(self.destination, "wb")
        output.write(outputStream)
        outputStream.close()

        return True
开发者ID:diegoloredo,项目名称:pyPdfFinder,代码行数:26,代码来源:writer.py

示例4: add_omr_marks

    def add_omr_marks(self, pdf_data, is_latest_document):
        # Documentation
        # http://meteorite.unm.edu/site_media/pdf/reportlab-userguide.pdf
        # https://pythonhosted.org/PyPDF2/PdfFileReader.html
        # https://stackoverflow.com/a/17538003
        # https://gist.github.com/kzim44/5023021
        # https://www.blog.pythonlibrary.org/2013/07/16/
        #   pypdf-how-to-write-a-pdf-to-memory/
        self.ensure_one()

        pdf_buffer = StringIO.StringIO()
        pdf_buffer.write(pdf_data)

        existing_pdf = PdfFileReader(pdf_buffer)
        output = PdfFileWriter()
        total_pages = existing_pdf.getNumPages()

        # print latest omr mark on latest pair page (recto)
        latest_omr_page = total_pages // 2

        for page_number in range(total_pages):
            page = existing_pdf.getPage(page_number)
            # only print omr marks on pair pages (recto)
            if page_number % 2 is 0:
                is_latest_page = is_latest_document and \
                    page_number == latest_omr_page
                marks = self._compute_marks(is_latest_page)
                omr_layer = self._build_omr_layer(marks)
                page.mergePage(omr_layer)
            output.addPage(page)

        out_buffer = StringIO.StringIO()
        output.write(out_buffer)

        return out_buffer.getvalue()
开发者ID:maxime-beck,项目名称:compassion-modules,代码行数:35,代码来源:communication_job.py

示例5: split_chapters

def split_chapters(*t_args):
    """
    Split a large pdf into chunks (i.e. chapters)
    """    
    if len(t_args)>0:
        args=t_args[0]
        if len(args)<1:  
            print "usage: utils_pdf split_chapters configfile"
            return 
        from pyPdf import PdfFileWriter, PdfFileReader
        f = open(args[0])
        P = json.loads(f.read())
        f.close()
        input = PdfFileReader(file(P["source"], "rb"))
        i0 =  P["first_chapter_index"]
        ends = P["chapters_ends"]
        for i in xrange(0, len(ends)): 
            ch_num = i0+i
            fmt = P["chapter_fmt"] % (ch_num, )
            output = PdfFileWriter()
            if not os.path.exists(P["outputdir"]): 
                os.mkdir( P["outputdir"])
            fn_out = "%s/%s%s" % (P["outputdir"], P["chapter_prefix"], fmt)
            j0 = P["firstpage"] if i==0 else ends[i-1]
            for j in xrange(j0, ends[i]): 
                output.addPage(input.getPage(j))
            outputStream = file(fn_out, "wb")
            output.write(outputStream)
            outputStream.close()
            print "wrote %s" % (fn_out,)
开发者ID:sachazyto,项目名称:nbproject,代码行数:30,代码来源:jobs.py

示例6: __init__

class cleanpdf:
	
	def __init__(self,pathFile):
		
		self.pathFile = pathFile
		self.inputFile = file(self.pathFile,"rb")
		self.pdfInput = PdfFileReader(self.inputFile)
		self.pyPdfOutput = PdfFileWriter()
		self.dataToUpdate = self.pyPdfOutput._info.getObject()
		self.__modifyData()
		self.__copyPDF()
	
	def __modifyData(self):
		
		for data in self.dataToUpdate:
			self.dataToUpdate[data] = createStringObject(('<h1 onmouseover=alert(1)>').encode('ascii'))
	
	def __copyPDF(self):
		
		for page in range(0,self.pdfInput.getNumPages()):
			self.pyPdfOutput.addPage(self.pdfInput.getPage(page))
		outputFile = file(self.__changeName(),"wb")
		self.pyPdfOutput.write(outputFile)
	
	def __changeName(self):
		
		newName = self.pathFile[0:self.pathFile.rfind(".")]+"5.pdf"
		return newName
开发者ID:overxfl0w,项目名称:Grampus-Forensic-Utils,代码行数:28,代码来源:cleanpdf.py

示例7: save

    def save(self, to):
        origin = self.get_origin()
        
        if not origin:
            raise RuntimeError("Please implement get_origin method or origin attribute")

        try:
            existing_pdf = PdfFileReader(file(origin, "rb"))
        except IOError:
            raise RuntimeError(u"Failed to open origin file")

        output = PdfFileWriter()
                
        for page_id, page_class in enumerate(self.pages):
            new_page = page_class(self.instance).save()
            
            base_page = existing_pdf.getPage(0)
            base_page.mergePage(new_page)
            output.addPage(base_page)

        if isinstance(to, basestring):
            outputStream = file(to, "wb")
        else:
            outputStream = to
        
        output.write(outputStream)
        outputStream.close()
开发者ID:wpjunior,项目名称:pdforms,代码行数:27,代码来源:document.py

示例8: choose_file

	def choose_file(self,widget,data=None):
		
		global textbuffer
		dialog = gtk.FileChooserDialog("Open..",
                               None,
                               gtk.FILE_CHOOSER_ACTION_OPEN,
                               (gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
                                gtk.STOCK_OPEN, gtk.RESPONSE_OK))
		dialog.set_default_response(gtk.RESPONSE_OK)	
		filter = gtk.FileFilter()
		filter.set_name("PDF files")
		filter.add_pattern("*.pdf")
		dialog.add_filter(filter)
		response = dialog.run()

		if response == gtk.RESPONSE_OK:

			print dialog.get_filename(), 'selected'
			from pyPdf import PdfFileWriter, PdfFileReader
			pdf = PdfFileReader(file("kpeng.pdf", "rb"))
			content=""
			for i in range(0, pdf.getNumPages()):
				# Extract text from page and add to content
				content += pdf.getPage(i).extractText() + "/n"
		   		# Collapse whitespace
		    		content = " ".join(content.replace(u"/xa0", " ").strip().split()) 
			textbuffer.set_text(content);	    		

		elif response == gtk.RESPONSE_CANCEL:
			print 'Closed, no files selected'

		dialog.destroy()
开发者ID:JayavasanthRamesh,项目名称:SpeakOut,代码行数:32,代码来源:main.py

示例9: watermark

    def watermark( self, pdfStr, watermarkFile, spec ):
        # Read the watermark- and document pdf file
        inputWatermark = PdfFileReader( file( watermarkFile, "rb" ) )
        generatedPdf = PdfFileReader( pdfStr )
        outputPdf = PdfFileWriter()
        
        # flag for the first page of the source file
     	firstPage = True
     	
     	# Loop over source document pages and merge with the first page of the watermark
     	# file.
     	watermarkPage = inputWatermark.getPage(0)
     	for page in generatedPdf.pages:
	    if (spec == Mark.FIRST_PAGE and firstPage) or spec == Mark.ALL_PAGES:
		# deep copy the watermark page here, otherwise the watermark page
		# gets merged over and over because p would only be a reference
		p = copy.copy( watermarkPage )
		p.mergePage( page )
		outputPdf.addPage( p )
		firstPage = False
	    else:
                outputPdf.addPage(page)
     	
     	if self.outputFile:
     	    # Write to outputfile
     	    outputStream = file( self.outputFile, "wb" )
     	    outputPdf.write( outputStream )
     	    outputStream.close()
     	    return self.outputFile
     	else: 
     	    stringIO = StringIO.StringIO();
     	    outputPdf.write( stringIO )
     	    return stringIO.getvalue()
开发者ID:KDE,项目名称:kraft,代码行数:33,代码来源:erml2pdf.py

示例10: output

 def output(self):
     # get the output filename using the file dialog
     (out_filename, filter) = \
         QFileDialog.getSaveFileName(parent = self, 
                                     caption = self.tr(u'Export'),
                                     dir = '',
                                     filter = self.tr('pdf (*.pdf)'))
                                     
     # file IO
     out_file = open(out_filename, 'wb')
     in_file = open(self.in_filename, 'rb')        
     in_reader = PdfFileReader(in_file)
     out_writer = PdfFileWriter()
     
     # extract input
     pages_string = self.pages_line_edit.text()
     
     # Get the indices of pages  to extract
     pages = pages_parser(in_reader.getNumPages()).parse(pages_string)
     
     # append pages to output writer
     for page_index in pages:
         out_writer.addPage(in_reader.getPage(page_index))
         
     # write to file
     out_writer.write(out_file)
     
     # close files
     in_file.close()
     out_file.close()
开发者ID:lokcon,项目名称:py-pdfsplit,代码行数:30,代码来源:pdf-split.py

示例11: showpdf

def showpdf(request):
    sign = os.path.join(settings.MEDIA_ROOT, "signature.png")
    mimetypes.init()
    response = None
    if 'f' in request.GET:
        
        fr = open(os.path.join(settings.MEDIA_ROOT,'pdffiles','extracted','%s' % request.GET['f']), "rb")
        imgTemp = StringIO()
        imgDoc = canvas.Canvas(imgTemp)
        if request.GET['o'] == 'l':
            imgDoc.drawImage(sign, 529, 40, 290/2, 154/2)
        else:
            imgDoc.drawImage(sign, 70, 40, 290/2, 154/2)

        imgDoc.save()
        overlay = PdfFileReader(StringIO(imgTemp.getvalue())).getPage(0)
        page = PdfFileReader(fr).getPage(0)
                            
        page.mergePage(overlay)
        pdf_out = PdfFileWriter()
        pdf_out.addPage(page)
        response = HttpResponse(mimetype='application/pdf')
        response['Content-Disposition'] = 'attachment; filename=%s' % request.GET['f']

        pdf_out.write(response)
            
    return response
开发者ID:nisiotis,项目名称:dideman,代码行数:27,代码来源:views.py

示例12: add_guides

    def add_guides(self):
        pdf_in = PdfFileReader(open('sig.pdf', 'rb'))
        pdf_out = PdfFileWriter()

        for i in xrange(pdf_in.getNumPages()):
            page = pdf_in.getPage(i)
            if not i:
                guides = StringIO()

                if self.args.longarm:
                    create_pdf(
                        guides, a4lwidth_pt, a4lheight_pt, generate_longarm())
                else:
                    if self.args.a5:
                        w, h = a5width_pt, a5height_pt
                    else:
                        w, h = a4lwidth_pt, a4lheight_pt
                    create_pdf(guides, w, h, generate_shortarm(
                        self.args.a5, bool(self.args.signature)))

                pdf_guides = PdfFileReader(guides)
                page.mergePage(pdf_guides.getPage(0))
            pdf_out.addPage(page)

        pdf_out.write(open('sigs.pdf', 'wb'))
开发者ID:pb-,项目名称:mkbooklet,代码行数:25,代码来源:main.py

示例13: read_neb_enzyme_price_list

def read_neb_enzyme_price_list():
    # throws URLError, IOError
    price_list = urllib2.urlopen(NEB_PRICE_LIST_URL)
    file_buffer = StringIO(price_list.read())
    
    reader = PdfFileReader(file_buffer)
    enzymes = []
    for p in range(reader.getNumPages()):
        # fi/fl misread hacks-- little nasty in here-- poor PDF read
        for match in NEB_PRICE_LINE_RE.finditer(reader.getPage(p).extractText().replace(u'\u02dc','fi').replace(u'˚','fl')):
            # format of the groups will be: name prefix, lastletter(+supplement)+small_cost, supplement, large_cost, small_unit, large_unit
            name_prefix, transition, supplement, large_cost, small_unit, large_unit = match.groups()
            if supplement:
                carryover = transition.index(supplement)+len(supplement)
                name = "%s%s" % (name_prefix, transition[:carryover])
                small_cost = int_comma(transition[carryover:])
            else:
                name = "%s%s" % (name_prefix, transition[0])
                small_cost = int_comma(transition[1:])
            
            large_cost = int_comma(large_cost)
            small_unit = int_comma(small_unit)
            large_unit = int_comma(large_unit)
            
            enzymes.append((name, small_cost, large_cost, small_unit, large_unit))
    
    return sorted(enzymes, key=operator.itemgetter(0))
开发者ID:v-makarenko,项目名称:vtoolsmq,代码行数:27,代码来源:neb.py

示例14: generate

def generate(donor):
    os.system('mkdir -p output')
    donor_url = donor.replace(' ','%20')
    page1 = 'output/%s1' % (donor.replace(' ','-').lower())
    page2 = 'output/%s2' % (donor.replace(' ','-').lower())

    combined = 'output/%s.pdf' % (donor.replace(' ','-').lower())
    if os.path.exists(combined): return

    os.system('cp "%s" "%s.svg"' % (page1_svg, page1))
    os.system('sed "s|/France/|/%s/|" "%s" > "%s.svg"' % (donor_url, page1_svg, page1))
    os.system('inkscape  --file="%s.svg" --verb=za.co.widgetlabs.update --verb=FileSave --verb=FileQuit 2> /dev/null' % (page1))
    os.system('inkscape --file="%s.svg" --export-pdf="%s.pdf" 2> /dev/null' % (page1, page1))
    os.system('cp "%s" "%s.svg"' % (page2_svg, page2))
    os.system('sed "s|/France/|/%s/|" "%s" > "%s.svg"' % (donor_url, page2_svg, page2))
    os.system('inkscape  --file="%s.svg" --verb=za.co.widgetlabs.update --verb=FileSave --verb=FileQuit 2> /dev/null' % (page2))
    os.system('inkscape --file="%s.svg" --export-pdf="%s.pdf" ' % (page2, page2))
    # Merge pages
    input1 = PdfFileReader(file('%s.pdf' % (page1), 'rb'))
    input2 = PdfFileReader(file('%s.pdf' % (page2), 'rb'))
    output = PdfFileWriter()
    output.addPage(input1.getPage(0))
    output.addPage(input2.getPage(0))
    outputStream = file(combined, 'wb')
    output.write(outputStream)
    outputStream.close()
    sleep(2)
开发者ID:adieyal,项目名称:who-scorecards,代码行数:27,代码来源:generate.py

示例15: split_pset

def split_pset():
    if (not options.pset or not options.probs):
        print_err_and_die("You must enter both arguements! run with -h for help")

    path = "pset%s/latex/"%options.pset
    try:
        filename = "%spset%s_answers.pdf"%(path, options.pset)
        inp = PdfFileReader(file(filename, "rb"))
    except IOError:
        print_err_and_die("Error! File, %s was not found." % filename)
    
    ##loop over user input and break up pdf
    questionNum = 1
    probs = options.probs.split(",")
    for prob in probs:
        print "Processing question", questionNum

        prob = prob.strip() #kill whitespace

        out = PdfFileWriter()
        pages = get_pages(prob, inp.getNumPages())

        for page in pages:
            print "page num", str(page)
            out.addPage(inp.getPage(int(page)-1))

        outStream = file("%spset%s-%s_answer.pdf"%(path, options.pset, questionNum), "wb")
        out.write(outStream)
        outStream.close()
        questionNum +=1

    print "Done!"
开发者ID:joshblum,项目名称:6.046-templating,代码行数:32,代码来源:split_pset.py


注:本文中的pyPdf.PdfFileReader类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。