本文整理汇总了Python中pyPdf.PdfFileReader.getDocumentInfo方法的典型用法代码示例。如果您正苦于以下问题:Python PdfFileReader.getDocumentInfo方法的具体用法?Python PdfFileReader.getDocumentInfo怎么用?Python PdfFileReader.getDocumentInfo使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyPdf.PdfFileReader
的用法示例。
在下文中一共展示了PdfFileReader.getDocumentInfo方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: pre_save_handler
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
def pre_save_handler(sender, instance, **kwargs):
r = PdfFileReader(instance.pdf_file)
instance.num_pages = r.numPages
instance.title = r.getDocumentInfo().title
instance.author = r.getDocumentInfo().author
instance.info = r.getDocumentInfo()
print "title = %s" % (r.getDocumentInfo().title)
示例2: add
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
def add(request):
"""
Upload a document
"""
if request.method == "POST":
form = AddDocumentForm(request.POST, request.FILES)
if form.is_valid():
document = form.save(commit=False)
document.user = request.user
try:
from pyPdf import PdfFileReader
pdf = PdfFileReader(document.file)
document.title = pdf.getDocumentInfo().title
document.author = pdf.getDocumentInfo().author
except:
document.title = "( Insert title )"
document.author = "( Insert author )"
document.save()
return HttpResponseRedirect('/documents/edit/' + str(document.id))
else:
form = AddDocumentForm()
context = {
'form': form,
}
return render_to_response('add.html', context,
context_instance=RequestContext(request))
示例3: get_pdf_text
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
def get_pdf_text(self, response):
""" Peek inside PDF to check possible violations.
@return: PDF content as searcable plain-text string
"""
try:
from pyPdf import PdfFileReader
except ImportError:
print "Needed: easy_install pyPdf"
raise
stream = StringIO.StringIO(response.body)
reader = PdfFileReader(stream)
text = u""
if reader.getDocumentInfo().title:
# Title is optional, may be None
text += reader.getDocumentInfo().title
for page in reader.pages:
# XXX: Does handle unicode properly?
text += page.extractText()
return text
示例4: PDFInfo
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
def PDFInfo (inputfiles):
"""prints useful Information about a PDF File """
totalpagenum = 0
totalfilesize = 0
for inputfile in inputfiles:
input1 = PdfFileReader(file(inputfile, "rb"))
fileinfo = os.stat (inputfile)
filesizekb = fileinfo[6] / 1024
pagenum = input1.getNumPages()
print "\n\n"
print inputfile
print "\n"
print "\tTitel:\t\t %s" % (input1.getDocumentInfo().title)
print "\tGroesse (KBytes):\t\t %s" % (filesizekb)
print "\tzuletzt geaendert:\t\t %s" % (_formatDate(fileinfo[8]))
print "\terstellt:\t\t %s" % (_formatDate(fileinfo[9]))
print "\tSeiten :\t\t %s" % (pagenum)
print "\tAutor :\t\t %s" % (input1.getDocumentInfo().author)
print "\tQuelldokument erstellt mit :\t\t %s" % (input1.getDocumentInfo().creator)
print "\tIn PDF konvertiert durch :\t\t %s" % (input1.getDocumentInfo().producer)
totalpagenum += pagenum
totalfilesize += filesizekb
print 80 * "-"
print "GESAMTINFO:"
print "Seiten insgesamt: %s" % (totalpagenum)
print "Dateigroesse insgesamt (kb): %s" % (totalfilesize)
示例5: validate
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
def validate(self, path, ext):
try:
with open(path, "rb") as fr:
pdf = PdfFileReader(fr)
pdf.getDocumentInfo()
for p in pdf.pages:
pass
return (0, "")
except AssertionError as e:
return (1, str(e))
except Exception as e:
return (1, str(e))
示例6: printMeta
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
def printMeta(fileName):
pdfFile = PdfFileReader(file(fileName, "rb"))
docInfo = pdfFile.getDocumentInfo()
pginput = pdfFile.getPage(1)
print "title = %s" % (pdfFile.getDocumentInfo().title)
print "[*] PDF MetaData For: " + str(fileName)
tocfilepdf = get_toc(fileName)
for metaItem in docInfo:
print "[+]" + metaItem + ":" + docInfo[metaItem]
示例7: printMeta
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
def printMeta(fileName):
pdfFile = PdfFileReader(file(fileName, 'rb'))
docInfo = pdfFile.getDocumentInfo()
#print docInfo.producer
print '[*] PDF MetaData For:' + str(fileName)
for metaItem in docInfo:
print '[+]' + metaItem + ':' + docInfo[metaItem]
示例8: print_meta
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
def print_meta(file_name):
pdf = PdfFileReader(file(file_name, 'rb'))
info = pdf.getDocumentInfo()
print Style.BRIGHT + Back.GREEN + 'PDF MetaData For: ' + str(file_name) + Style.RESET_ALL
for metaItem in info:
print '[+] ' + metaItem + ':' + info[metaItem]
示例9: main
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
def main():
fileName= sys.argv[len(sys.argv)-1]
pdfFile = PdfFileReader(file(fileName, 'rb'))
info = pdfFile.getDocumentInfo()
print "The Metadata for the file" + fileName + " are: \n"
for line in info:
print line+ " : " +info[line]
示例10: _getPDFText
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
def _getPDFText(self, filename, d):
logger.debug(u"filename: %s" % filename)
newparatextlist = list()
try:
pdfDoc = PdfFileReader(file(filename, u"rb"))
pdfDict = pdfDoc.getDocumentInfo()
for x in pdfDict.keys():
d.addConceptKeyType(x[1:], pdfDict[x])
# c.logConcepts()
for page in pdfDoc.pages:
text = page.extractText()
if not isinstance(text, str):
unicodedata.normalize(u'NFKD', text).encode(u'ascii', u'ignore')
logger.debug(u"PDF : %s" % text)
newparatextlist.append(text + u". ")
return newparatextlist
except Exception, msg:
logger.error(u"%s" % msg)
示例11: get_metadata
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
def get_metadata(pdf):
ret={}
pdf_toread = PdfFileReader(open(pdf, "rb"))
pdf_info = pdf_toread.getDocumentInfo()
print str(pdf_info)
示例12: test_backlog_list
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
def test_backlog_list(self):
user = factories.UserFactory.create(
email='[email protected]', password='pass')
backlog = factories.create_project_sample_backlog(user)
for i in range(0, 10):
factories.create_sample_story(user, backlog=backlog)
# special printing of -1 points
story = factories.UserStory.objects.all()[0]
story.points = -1
story.save()
url = reverse("print_stories")
url_plus = "{0}?backlog_id={1}".format(url, backlog.pk)
self.app.get(url_plus, status=302)
response = self.app.get(url_plus, user=user)
form = response.forms['print_pdf_form']
for k, f in form.fields.items():
if k and "story-" in k:
form[k] = True
form['print-side'] = "long"
form['print-format'] = "a4"
response = form.submit()
self.assertEqual(response['Content-Type'], "application/pdf")
o = StringIO.StringIO(response.content)
pdf = PdfFileReader(o)
info = pdf.getDocumentInfo()
self.assertEqual(pdf.getNumPages(), 6)
self.assertEqual("backlogman.com", info['/Author'])
# A4 is not "round" in PDF unit format real value are
# approximately : [0, 0, 841.88980, 595.27560]
self.assertEqual([0, 0, 841, 595],
[int(x) for x in pdf.getPage(0)["/MediaBox"]])
response = self.app.get(url_plus, user=user)
form = response.forms['print_pdf_form']
for k, f in form.fields.items():
if k and "story-" in k:
form[k] = True
form['print-side'] = "short"
form['print-format'] = "letter"
response = form.submit()
self.assertEqual(response['Content-Type'], "application/pdf")
o = StringIO.StringIO(response.content)
pdf = PdfFileReader(o)
info = pdf.getDocumentInfo()
self.assertEqual(pdf.getNumPages(), 6)
self.assertEqual("backlogman.com", info['/Author'])
self.assertEqual([0, 0, 792, 612], pdf.getPage(0)["/MediaBox"])
示例13: printMeta
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
def printMeta(filename):
pdfFile = PdfFileReader(file(filename, 'rb'))
docInfo = pdfFile.getDocumentInfo()
print '[+] PDF MetaData for : ' + str(filename)
for metaItem in docInfo:
print '[+]' + metaItem + ":" + docInfo[metaItem]
示例14: extractTitle
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
def extractTitle(dirPath):
fileTitles = {}
fileName = listFileNames (dirPath)
for index in range (len(fileName)):
inputPdf = PdfFileReader(file("%s/%s" % (dirPath, fileName[index]),'rb'))
fileTitles.setdefault(index+1,'')
fileTitles[index+1] = inputPdf.getDocumentInfo().title
return fileTitles
示例15: get_name
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import getDocumentInfo [as 别名]
def get_name(filename):
'''get pdf name'''
try:
file_obj = file(filename, "rb")
input1 = PdfFileReader(file_obj)
title = input1.getDocumentInfo().title
subject = input1.getDocumentInfo().subject
if title:
if not subject:
new_name ="{0}.pdf".format(str(title))
else:
new_name = ("{0}_{1}.pdf".format(str(title), str(subject).replace("/", "-").replace(" ", "_")))
else:
new_name = filename
file_obj.close()
except:
print "NO CHANGES!"
return new_name