当前位置: 首页>>代码示例>>Python>>正文


Python PdfFileReader.getDocumentInfo方法代码示例

本文整理汇总了Python中pyPdf.PdfFileReader.getDocumentInfo方法的典型用法代码示例。如果您正苦于以下问题:Python PdfFileReader.getDocumentInfo方法的具体用法?Python PdfFileReader.getDocumentInfo怎么用?Python PdfFileReader.getDocumentInfo使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyPdf.PdfFileReader的用法示例。


在下文中一共展示了PdfFileReader.getDocumentInfo方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: pre_save_handler

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
 def pre_save_handler(sender, instance, **kwargs):
    r = PdfFileReader(instance.pdf_file)
    instance.num_pages = r.numPages
    instance.title = r.getDocumentInfo().title
    instance.author = r.getDocumentInfo().author
    instance.info = r.getDocumentInfo()
    print "title = %s" % (r.getDocumentInfo().title)
开发者ID:assomy,项目名称:thesis,代码行数:9,代码来源:models+(another+copy).py

示例2: add

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
def add(request):
	"""
	Upload a document
	"""
	
	if request.method == "POST":

		form = AddDocumentForm(request.POST, request.FILES)
		if form.is_valid():
			document = form.save(commit=False)
			document.user = request.user
			
			try:
				from pyPdf import PdfFileReader
				pdf = PdfFileReader(document.file)
				
				document.title = pdf.getDocumentInfo().title
				document.author = pdf.getDocumentInfo().author
				
			except:
				document.title = "( Insert title )"
				document.author = "( Insert author )"
				
			document.save()
			return HttpResponseRedirect('/documents/edit/' + str(document.id))
	else:
		form = AddDocumentForm()
	
	context = {
		'form': form,
	}
	return render_to_response('add.html', context,
							  context_instance=RequestContext(request))
开发者ID:CNDLS,项目名称:eportfoliodemo,代码行数:35,代码来源:views.py

示例3: get_pdf_text

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
    def get_pdf_text(self, response):
        """ Peek inside PDF to check possible violations.

        @return: PDF content as searcable plain-text string
        """

        try:
                from pyPdf import PdfFileReader
        except ImportError:
                print "Needed: easy_install pyPdf"
                raise 

        stream = StringIO.StringIO(response.body)
        reader = PdfFileReader(stream)

        text = u""

        if reader.getDocumentInfo().title:
                # Title is optional, may be None
                text += reader.getDocumentInfo().title

        for page in reader.pages:
                # XXX: Does handle unicode properly?
                text += page.extractText()

        return text                                      
开发者ID:ponyboy226,项目名称:verbclub-spider,代码行数:28,代码来源:spiders.py

示例4: PDFInfo

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
def PDFInfo (inputfiles):
    """prints useful Information about a PDF File """
    totalpagenum = 0
    totalfilesize = 0
    for inputfile in inputfiles:
        input1 = PdfFileReader(file(inputfile, "rb"))
        fileinfo = os.stat (inputfile)
        filesizekb = fileinfo[6] / 1024
        pagenum = input1.getNumPages()
        print "\n\n"
        print inputfile
        print "\n"
        print "\tTitel:\t\t %s" % (input1.getDocumentInfo().title)
        print "\tGroesse (KBytes):\t\t %s" % (filesizekb)
        print "\tzuletzt geaendert:\t\t %s" % (_formatDate(fileinfo[8]))
        print "\terstellt:\t\t %s" % (_formatDate(fileinfo[9]))
        print "\tSeiten :\t\t %s" % (pagenum)
        print "\tAutor :\t\t %s" % (input1.getDocumentInfo().author)
        print "\tQuelldokument erstellt mit :\t\t %s" % (input1.getDocumentInfo().creator)
        print "\tIn PDF konvertiert durch :\t\t %s" % (input1.getDocumentInfo().producer)
        totalpagenum += pagenum
        totalfilesize += filesizekb
    print 80 * "-"
    print "GESAMTINFO:"
    print "Seiten insgesamt: %s" % (totalpagenum)
    print "Dateigroesse insgesamt (kb): %s" % (totalfilesize)
开发者ID:xoviat,项目名称:smpdf,代码行数:28,代码来源:pdfsm.py

示例5: validate

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
 def validate(self, path, ext):
     try:
         with open(path, "rb") as fr:
             pdf = PdfFileReader(fr)
             pdf.getDocumentInfo()
             for p in pdf.pages:
                 pass
         return (0, "")
     except AssertionError as e:
         return (1, str(e))
     except Exception as e:
         return (1, str(e))
开发者ID:Dobatymo,项目名称:pyfilevalidate,代码行数:14,代码来源:PDF.py

示例6: printMeta

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
def printMeta(fileName):
    pdfFile = PdfFileReader(file(fileName, "rb"))
    docInfo = pdfFile.getDocumentInfo()
    pginput = pdfFile.getPage(1)

    print "title = %s" % (pdfFile.getDocumentInfo().title)
    print "[*] PDF MetaData For: " + str(fileName)

    tocfilepdf = get_toc(fileName)

    for metaItem in docInfo:
        print "[+]" + metaItem + ":" + docInfo[metaItem]
开发者ID:erexhepa,项目名称:IF_COLOC_ENGINE,代码行数:14,代码来源:pdf_metadata.py

示例7: printMeta

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
def printMeta(fileName):
	pdfFile = PdfFileReader(file(fileName, 'rb'))
	docInfo = pdfFile.getDocumentInfo()
	#print docInfo.producer
	print '[*] PDF MetaData For:'  + str(fileName)
	for metaItem in docInfo:
		print '[+]'  + metaItem + ':' + docInfo[metaItem]
开发者ID:bosz,项目名称:python-class-projects,代码行数:9,代码来源:pdf-metadata-extracractor.py

示例8: print_meta

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
def print_meta(file_name):
    pdf = PdfFileReader(file(file_name, 'rb'))
    info = pdf.getDocumentInfo()
    print Style.BRIGHT + Back.GREEN + 'PDF MetaData For: ' + str(file_name) + Style.RESET_ALL

    for metaItem in info:
        print '[+] ' + metaItem + ':' + info[metaItem]
开发者ID:JackStouffer,项目名称:Violent-Python,代码行数:9,代码来源:pdf_metadata.py

示例9: main

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
def main():
	fileName= sys.argv[len(sys.argv)-1]
	pdfFile = PdfFileReader(file(fileName, 'rb'))
 	info = pdfFile.getDocumentInfo()
	print "The Metadata for the file" + fileName + " are: \n"
	for line in info:
		print line+ " : " +info[line]
开发者ID:roskyfrosky,项目名称:Hacking,代码行数:9,代码来源:PdfMetadata.py

示例10: _getPDFText

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
    def _getPDFText(self, filename, d):
        logger.debug(u"filename: %s" % filename)
        newparatextlist = list()

        try:
            pdfDoc = PdfFileReader(file(filename, u"rb"))

            pdfDict = pdfDoc.getDocumentInfo()

            for x in pdfDict.keys():
                d.addConceptKeyType(x[1:], pdfDict[x])

            # c.logConcepts()

            for page in pdfDoc.pages:
                text = page.extractText()
                if not isinstance(text, str):
                    unicodedata.normalize(u'NFKD', text).encode(u'ascii', u'ignore')

                logger.debug(u"PDF : %s" % text)

                newparatextlist.append(text + u". ")

            return newparatextlist

        except Exception, msg:
            logger.error(u"%s" % msg)
开发者ID:Darth-Neo,项目名称:DirCrawler,代码行数:29,代码来源:nl_phase_a_DirCrawl.py

示例11: get_metadata

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
def get_metadata(pdf):
	ret={}
	pdf_toread = PdfFileReader(open(pdf, "rb"))
	pdf_info = pdf_toread.getDocumentInfo()

	
	print str(pdf_info)
开发者ID:bosz,项目名称:python-class-projects,代码行数:9,代码来源:pdf.py

示例12: test_backlog_list

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
    def test_backlog_list(self):
        user = factories.UserFactory.create(
            email='[email protected]', password='pass')
        backlog = factories.create_project_sample_backlog(user)
        for i in range(0, 10):
            factories.create_sample_story(user, backlog=backlog)
        # special printing of -1 points
        story = factories.UserStory.objects.all()[0]
        story.points = -1
        story.save()
        url = reverse("print_stories")
        url_plus = "{0}?backlog_id={1}".format(url, backlog.pk)
        self.app.get(url_plus, status=302)
        response = self.app.get(url_plus, user=user)
        form = response.forms['print_pdf_form']
        for k, f in form.fields.items():
            if k and "story-" in k:
                form[k] = True
        form['print-side'] = "long"
        form['print-format'] = "a4"
        response = form.submit()
        self.assertEqual(response['Content-Type'], "application/pdf")
        o = StringIO.StringIO(response.content)
        pdf = PdfFileReader(o)
        info = pdf.getDocumentInfo()
        self.assertEqual(pdf.getNumPages(), 6)
        self.assertEqual("backlogman.com", info['/Author'])
        # A4 is not "round" in PDF unit format real value are
        # approximately : [0, 0, 841.88980, 595.27560]
        self.assertEqual([0, 0, 841, 595],
                         [int(x) for x in pdf.getPage(0)["/MediaBox"]])

        response = self.app.get(url_plus, user=user)
        form = response.forms['print_pdf_form']
        for k, f in form.fields.items():
            if k and "story-" in k:
                form[k] = True
        form['print-side'] = "short"
        form['print-format'] = "letter"
        response = form.submit()
        self.assertEqual(response['Content-Type'], "application/pdf")
        o = StringIO.StringIO(response.content)
        pdf = PdfFileReader(o)
        info = pdf.getDocumentInfo()
        self.assertEqual(pdf.getNumPages(), 6)
        self.assertEqual("backlogman.com", info['/Author'])
        self.assertEqual([0, 0, 792, 612], pdf.getPage(0)["/MediaBox"])
开发者ID:mmsepyx,项目名称:facile_backlog,代码行数:49,代码来源:test_print_pdf.py

示例13: printMeta

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
def printMeta(filename):

	pdfFile = PdfFileReader(file(filename, 'rb'))
	docInfo = pdfFile.getDocumentInfo()

	print '[+] PDF MetaData for : ' + str(filename)
	for metaItem in docInfo:
		print '[+]' + metaItem + ":" + docInfo[metaItem]
开发者ID:0x7ab00,项目名称:violent-python-exercises,代码行数:10,代码来源:pdfRead.py

示例14: extractTitle

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
def extractTitle(dirPath):
      fileTitles = {}
      fileName = listFileNames (dirPath)
      for index in range (len(fileName)):
            inputPdf = PdfFileReader(file("%s/%s" % (dirPath, fileName[index]),'rb'))
            fileTitles.setdefault(index+1,'')
            fileTitles[index+1] = inputPdf.getDocumentInfo().title
      return fileTitles
开发者ID:kzintun,项目名称:fyp-test,代码行数:10,代码来源:processTitles.py

示例15: get_name

# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
def get_name(filename):
    '''get pdf name'''
    try:
        file_obj = file(filename, "rb")
        input1 = PdfFileReader(file_obj)
        title = input1.getDocumentInfo().title
	subject = input1.getDocumentInfo().subject
	if title:
	    if not subject:
		new_name ="{0}.pdf".format(str(title))
	    else:
                new_name = ("{0}_{1}.pdf".format(str(title), str(subject).replace("/", "-").replace(" ", "_")))
	else:
	    new_name = filename
	file_obj.close()
    except:
	print "NO CHANGES!"
    return new_name
开发者ID:zhilongjia,项目名称:untility,代码行数:20,代码来源:pdf_rename.py


注:本文中的pyPdf.PdfFileReader.getDocumentInfo方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。