本文整理汇总了Python中pyPdf.PdfFileReader.decrypt方法的典型用法代码示例。如果您正苦于以下问题:Python PdfFileReader.decrypt方法的具体用法?Python PdfFileReader.decrypt怎么用?Python PdfFileReader.decrypt使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyPdf.PdfFileReader
的用法示例。
在下文中一共展示了PdfFileReader.decrypt方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: render
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import decrypt [as 别名]
def render(self):
output = PdfFileWriter()
base1 = "%s/lib/%s" % (path.dirname(__file__), "kfza_base.pdf")
base1 = open(base1, 'rb')
b1_pdf = PdfFileReader(base1)
wm = b1_pdf.getPage(0)
p1 = PdfFileReader(self.generate_page_one())
page1 = p1.getPage(0)
page1.mergePage(wm)
output.addPage(page1)
bpdf = "%s/lib/%s" % (path.dirname(__file__), self.base_pdf)
with open(bpdf, 'rb') as pdf:
pf = PdfFileReader(pdf)
if pf.isEncrypted:
pf.decrypt('')
for page in range(pf.getNumPages()):
output.addPage(pf.getPage(page))
if self.context.course.extra_questions:
b1_pdf = PdfFileReader(base1)
wm = b1_pdf.getPage(0)
p1 = PdfFileReader(self.generate_page_one())
page1 = p1.getPage(1)
page1.mergePage(wm)
output.addPage(page1)
ntf = TemporaryFile()
output.write(ntf)
ntf.seek(0)
base1.close()
return ntf
示例2: _get_images_from_pdf
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import decrypt [as 别名]
def _get_images_from_pdf(pdf_filename, resolution, verbose, delete_files,
temp_dir, make_thumbs, thumb_size, thumb_dir, thumb_prefix, pool_count=1):
success = False
try:
if verbose == True:
print "Splitting PDF into multiple pdf's for processing ..."
# make sure there is a place to put our temporary pdfs
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
# make sure if we are going to make thumbs, the folde rexists
if make_thumbs == True:
if not os.path.exists(thumb_dir):
os.makedirs(thumb_dir)
# read input pdf
inputpdf = PdfFileReader(open(pdf_filename, "rb"))
if inputpdf.getIsEncrypted():
inputpdf.decrypt('')
if verbose == True:
print "Writing out %i pages ..." % inputpdf.numPages
# create all of the temporary pdfs
for i in xrange(inputpdf.numPages):
output = PdfFileWriter()
output.addPage(inputpdf.getPage(i))
#print output.resolvedObjects
filename = "{0}/document-page-{1}.pdf".format(temp_dir,i)
with open(filename, "wb") as outputStream:
output.write(outputStream)
__pdf_queue.put(i)
if verbose == True:
print "Dispatching pdf workers ..."
# spin up our workers to convert the pdfs to images
#pool_count = 4
pool = Pool()
pool.map_async(
_pdf_converter_worker,
[(x, resolution, verbose, delete_files,
temp_dir, make_thumbs, thumb_size,
thumb_dir, thumb_prefix) for \
x in range(pool_count)]
)
while __pdf_texts.qsize() != inputpdf.numPages:
time.sleep(.25)
if verbose == True:
print "Done converting PDF."
success = True
except Exception, e:
print str(e)
示例3: merge_pdf
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import decrypt [as 别名]
def merge_pdf(new_filename, pdfs, encryp=False, user_pw="", owner_pw=None, lvl=128):
"""
Merges pdfs into one pdf called new_filename.
pdf: list of tuples (path=string, password=string)
"""
output = PdfFileWriter()
for path, pw in pdfs:
pdf = PdfFileReader(open(path, "rb"))
if pdf.isEncrypted:
decryption = pdf.decrypt(pw)
if decryption == 0:
raise PasswordError
for page_num in range(pdf.getNumPages()):
page = pdf.getPage(page_num)
output.addPage(page)
with file(new_filename, "wb") as outputStream:
outputStream = open(new_filename, "wb")
if encryp and lvl == 128:
output.encrypt(user_pw, owner_pw, True)
elif encryp:
output.encrypt(user_pw, owner_pw, False)
output.write(outputStream)
示例4: OCR
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import decrypt [as 别名]
def OCR(self, fn, resolution=300, verbose=False, part=''):
i = 1
pdf = PdfFileReader(file(fn, 'rb'))
if pdf.getIsEncrypted():
if pdf.decrypt(''):
jnk = 0
else:
return false
pagedata = []
text = ''
for p in pdf.pages:
if verbose:
print ' --- ' + str(i)
part = str(part)
# Temporary filenames for ImageMagick conversion
pgfile = 'tmp-' + part + '-' + str(i) + '.pdf'
pgfilejpg = 'tmp-' + part + '-' + str(i) + '.jpg'
# Parse this page
output = PdfFileWriter()
output.addPage(p)
outputStream = file(pgfile,'wb')
output.write(outputStream)
outputStream.close()
# Convert this page to a high-resolution JPEG
img = PythonMagick.Image()
img.density(str(resolution))
img.read(pgfile)
img.write(pgfilejpg)
# OCR the converted JPG
im = Image.open(pgfilejpg)
if(len(im.split()) == 4):
r, g, b, a = im.split()
im = Image.merge('RGB', (r,g,b))
t = image_to_string(im)
# Cleanup
os.remove(pgfile)
os.remove(pgfilejpg)
# Add to data object
pagedata.append(OCRPage(i, t, self.OCRCleanup(t)))
text += t
i += 1
# Produce the output data object
result = OCRResult(text, self.OCRCleanup(text), (i-1), pagedata)
return result
示例5: read_pdf
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import decrypt [as 别名]
def read_pdf(filename):
"""Open a PDF file with pyPdf."""
if not os.path.exists(filename):
raise CommandError("{} does not exist".format(filename))
pdf = PdfFileReader(file(filename, "rb"))
if pdf.isEncrypted:
while True:
pw = prompt_for_pw(filename)
matched = pdf.decrypt(pw)
if matched:
break
else:
print "The password did not match."
return pdf
示例6: merge_vac
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import decrypt [as 别名]
def merge_vac(icao_code_list, directory, options):
""" Merges pages corresponding to icao_code_list in directory to a single pdf file. """
vac_all_file = "%s/%s.pdf" % (directory, VAC_ALL_FILE)
vac_a5_file = "%s/%s.a5.pdf" % (directory, VAC_ALL_FILE)
start_page = 1 if options.merge_better else 0
info("Merging all VAC charts into one pdf file (%s)..." % vac_all_file, options)
from pyPdf import PdfFileWriter, PdfFileReader
out_pdf = PdfFileWriter()
for icao_code in icao_code_list:
in_file = open("%s/%s.pdf" % (directory, icao_code), "rb")
in_pdf = PdfFileReader(in_file)
in_pdf.decrypt("")
for i in range(start_page, in_pdf.numPages):
out_pdf.addPage(in_pdf.getPage(i))
out_file = open("%s/%s.pdf" % (directory, VAC_ALL_FILE), "wb")
out_pdf.write(out_file)
out_file.close()
if options.a5:
info("Converting merged file to 2x1 A5 in A4 format (%s)..." % vac_a5_file, options)
(status, output) = commands.getstatusoutput("pdfnup %s/%s.pdf --outfile %s/%s.a5.pdf"
% (directory, VAC_ALL_FILE, directory, VAC_ALL_FILE))
if status != 0:
fail("Failed to convert merged file to 2x1 A5 in A4 format.\n%s" % output)
示例7: decrypt
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import decrypt [as 别名]
def decrypt(pdf_path, out_path, password):
pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
if pdf.decrypt(str(password)) == 0:
raise DecryptionError(pdf_path)
title = pdf.documentInfo.title if pdf.documentInfo.title else _('Unknown')
author = pdf.documentInfo.author if pdf.documentInfo.author else _('Unknown')
out_pdf = PdfFileWriter(title=title, author=author)
for page in pdf.pages:
out_pdf.addPage(page)
with open(out_path, 'wb') as out_file:
out_pdf.write(out_file)
示例8: PdfBox
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import decrypt [as 别名]
class PdfBox(object):
''' Wraps pyPdf utils into a pdf object'''
pdfReader = None
pdfInfo = None
currentpage = 0
extractedPages = {}
filepath = ""
isencrypted = False
password = ""
author = ""
title = ""
subject = ""
pages = 0
initialized = False
def __init__(self, filepath, password = None):
self.filepath = filepath
self.pdfReader = PdfFileReader(file(filepath, "rb"))
if password:
self.password = password
if self.initializePdf(self.password):
self.pdfInfo = self.pdfReader.getDocumentInfo()
self.author = self.pdfInfo.author
self.title = self.pdfInfo.title
self.pages = self.pdfReader.getNumPages()
self.subject = self.pdfInfo.subject
self.extractedPages = {}
def initializePdf(self, password = None):
if self.pdfReader.getIsEncrypted():
self.isencrypted = True
if self.pdfReader.decrypt(self.password):
self.initialized = True
return True
else:
self.initialized = True
return True
return False
def getPage(self, pagenum):
self.currentpage = pagenum
if self.extractedPages.has_key(pagenum):
return self.extractedPages[pagenum]
else:
page = self.pdfReader.getPage(pagenum)
text = page.extractText()
self.extractedPages[pagenum] = text
return text
示例9: export_to_file
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import decrypt [as 别名]
def export_to_file(self, file_out, only_selected=False):
"""Export to file"""
selection = self.iconview.get_selected_items()
pdf_output = PdfFileWriter()
pdf_input = []
for pdfdoc in self.pdfqueue:
pdfdoc_inp = PdfFileReader(file(pdfdoc.copyname, 'rb'))
if pdfdoc_inp.getIsEncrypted():
try: # Workaround for lp:#355479
stat = pdfdoc_inp.decrypt('')
except:
stat = 0
if (stat!=1):
errmsg = _('File %s is encrypted.\n'
'Support for encrypted files has not been implemented yet.\n'
'File export failed.') % pdfdoc.filename
raise Exception, errmsg
#FIXME
#else
# ask for password and decrypt file
pdf_input.append(pdfdoc_inp)
for row in self.model:
if only_selected and row.path not in selection:
continue
# add pages from input to output document
nfile = row[2]
npage = row[3]
current_page = copy(pdf_input[nfile-1].getPage(npage-1))
angle = row[6]
angle0 = current_page.get("/Rotate",0)
crop = [row[7],row[8],row[9],row[10]]
if angle != 0:
current_page.rotateClockwise(angle)
if crop != [0.,0.,0.,0.]:
rotate_times = (((angle + angle0) % 360 + 45) / 90) % 4
crop_init = crop
if rotate_times != 0:
perm = [0,2,1,3]
for it in range(rotate_times):
perm.append(perm.pop(0))
perm.insert(1,perm.pop(2))
crop = [crop_init[perm[side]] for side in range(4)]
#(x1, y1) = current_page.cropBox.lowerLeft
#(x2, y2) = current_page.cropBox.upperRight
(x1, y1) = [float(xy) for xy in current_page.mediaBox.lowerLeft]
(x2, y2) = [float(xy) for xy in current_page.mediaBox.upperRight]
x1_new = int(x1 + (x2-x1) * crop[0])
x2_new = int(x2 - (x2-x1) * crop[1])
y1_new = int(y1 + (y2-y1) * crop[3])
y2_new = int(y2 - (y2-y1) * crop[2])
#current_page.cropBox.lowerLeft = (x1_new, y1_new)
#current_page.cropBox.upperRight = (x2_new, y2_new)
current_page.mediaBox.lowerLeft = (x1_new, y1_new)
current_page.mediaBox.upperRight = (x2_new, y2_new)
pdf_output.addPage(current_page)
# finally, write "output" to document-output.pdf
pdf_output.write(file(file_out, 'wb'))
示例10: PdfFileReader
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import decrypt [as 别名]
# 12.2 review exercises
import os
import copy
from pyPdf import PdfFileReader, PdfFileWriter
path = "C:/Real Python/Course materials/Chapter 8/Practice files"
inputFileName = os.path.join(path, "Walrus.pdf")
inputFile = PdfFileReader(file(inputFileName, "rb"))
outputPDF = PdfFileWriter()
inputFile.decrypt("IamtheWalrus") # decrypt the input file
for pageNum in range(0, inputFile.getNumPages()):
# rotate pages (call everything pageLeft for now; will make a copy)
pageLeft = inputFile.getPage(pageNum)
pageLeft.rotateCounterClockwise(90)
pageRight = copy.copy(pageLeft) # split each page in half
upperRight = pageLeft.mediaBox.upperRight # get original page corner
# crop and add left-side page
pageLeft.mediaBox.upperRight = (upperRight[0]/2, upperRight[1])
outputPDF.addPage(pageLeft)
# crop and add right-side page
pageRight.mediaBox.upperLeft = (upperRight[0]/2, upperRight[1])
outputPDF.addPage(pageRight)
# save new pages to an output file
outputFileName = os.path.join(path, "Output/Updated Walrus.pdf")
with file(outputFileName, "wb") as outputFile:
示例11: employer_resume_book_create
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import decrypt [as 别名]
def employer_resume_book_create(request):
if request.POST.has_key("resume_book_id") and request.POST["resume_book_id"]:
redelivering = True
try:
resume_book = ResumeBook.objects.get(id=request.POST["resume_book_id"])
except ResumeBook.DoesNotExist:
raise Http404("No resume book exists with id of %s" % request.POST["resume_book_id"])
else:
redelivering = False
try:
resume_book, created = ResumeBook.objects.get_or_create(recruiter=request.user.recruiter, delivered=False)
except ResumeBook.MultipleObjectsReturned:
resume_books = ResumeBook.objects.filter(recruiter=request.user.recruiter, delivered=False)
for i, rb in enumerate(resume_books):
if i != 0:
rb.delete()
else:
resume_book = rb
if redelivering:
resume_book_name = resume_book.name
else:
now = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
resume_book_name = "%s_%s" % (str(request.user), now)
resume_book.name = resume_book_name
resume_book.save()
file_path = "%semployer/resumebook/" % (s.MEDIA_ROOT,)
if not os.path.exists(file_path):
os.makedirs(file_path)
if request.POST["delivery_format"] == "separate":
# Create the zip file
file_name = "%s%s" % (file_path, resume_book_name)
output = zipfile.ZipFile(file_name, "w")
try:
for student in resume_book.students.visible():
resume_file = file("%s%s" % (s.MEDIA_ROOT, str(student.resume)), "rb")
try:
name = "%s %s (%s, %s).pdf" % (
student.first_name,
student.last_name,
student.graduation_year,
student.degree_program,
)
output.write(resume_file.name, name, zipfile.ZIP_DEFLATED)
finally:
resume_file.close()
finally:
output.close()
else:
output = PdfFileWriter()
file_name = "%s%s.pdf" % (file_path, resume_book_name)
report_buffer = cStringIO.StringIO()
c = Canvas(report_buffer)
now = datetime.now()
first_line = "Created on %s at %s" % (now.strftime("%m/%d/%Y"), now.strftime("%I:%M %p"))
c.drawString(1 * cm, 28.5 * cm, first_line)
c.drawString(1 * cm, 28 * cm, str(request.user.recruiter))
c.drawString(1 * cm, 27.5 * cm, str(request.user.recruiter.employer))
c.drawString(16 * cm, 28.5 * cm, "Created using Umeqo")
c.drawString(8.5 * cm, 26.5 * cm, "Resume Book Contents")
pad_from_top = 0
for num, student in enumerate(
resume_book.students.visible().order_by("graduation_year", "first_name", "last_name")
):
c.drawString(6.5 * cm, (25.5 - pad_from_top * 0.5) * cm, "%s %s" % (student.first_name, student.last_name))
c.drawString(
12 * cm, (25.5 - pad_from_top * 0.5) * cm, "%s, %s" % (student.graduation_year, student.degree_program)
)
pad_from_top += 1
if num == 50:
c.showPage()
c.save()
output.addPage(PdfFileReader(cStringIO.StringIO(report_buffer.getvalue())).getPage(0))
report_buffer = cStringIO.StringIO()
c = Canvas(report_buffer)
pad_from_top = 0
c.showPage()
c.save()
output.addPage(PdfFileReader(cStringIO.StringIO(report_buffer.getvalue())).getPage(0))
for student in resume_book.students.visible().order_by("graduation_year", "first_name", "last_name"):
resume_file = open("%s%s" % (s.MEDIA_ROOT, str(student.resume)), "rb")
resume = PdfFileReader(resume_file)
if resume.getIsEncrypted():
resume.decrypt("")
for page in range(resume.getNumPages()):
output.addPage(resume.getPage(page))
outputStream = file(file_name, "wb")
output.write(outputStream)
outputStream.close()
resume_file.close()
resume_book_contents = open(file_name, "rb")
resume_book.resume_book.save(file_name, File(resume_book_contents))
resume_book_contents.close()
return HttpResponse()
示例12: process_file
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import decrypt [as 别名]
def process_file(id_source):
from processing.tex_template import tex_header
from pyPdf import PdfFileReader
from numpy import array
logging.info("begin %s" % (id_source, ))
OUTPUT = []
S = {}
locations, comments = annotations.getPublicCommentsByFile(id_source)
repfile = "%s/%s/%s" % (settings.HTTPD_MEDIA,settings.REPOSITORY_DIR, id_source)
if not os.path.exists(repfile):
logging.warning("%s not found. Skipping..." % (repfile, ))
return
srcfile = "/tmp/orig_%s.pdf" % (id_source, )
if not os.path.exists(srcfile):
os.symlink(repfile, srcfile)
pdf = PdfFileReader(file(srcfile, "rb"))
if pdf.isEncrypted and pdf.decrypt("")==0:
print "PDF file encrypted with non-empty password: %s" % (srcfile,)
return False
trim_box = pdf.pages[0].trimBox # Sacha's coordinate system now uses this box
crop_box = pdf.pages[0].cropBox # ConTeXt's page inclusion uses this box
fudge = (int(trim_box[2])-int(trim_box[0]))/612.0 # for the assumption of 612bp width
bp_per_pixel = 72.0/150 * fudge
roots = {}
children_of = {}
for k in comments:
node = int(k)
parent = comments[k]['id_parent']
if parent:
if parent not in children_of:
children_of[parent] = []
children_of[parent].append(node)
else:
loc_id = comments[node]['ID_location']
loc = locations[loc_id]
if loc['page'] != 0:
loc['center_x'] = loc['left'] + loc['w']/2.0
loc['center_y'] = loc['top'] + loc['h']/2.0
else:
loc['center_x'] = None
loc['center_y'] = None
roots[node] = loc
def oneline(s):
return s.replace('\n', ' ')
def texify(s):
s = s.strip()
patterns = [(r'\\', r'\\\\'),
(r'%', r'\%'), (r'\$', r'\$'), ('_', r'\_'), (r'\&', r'\&'),
(r'\^', r'\^\\null{}'), (r'#', r'\#'), (r'\|', r'$|$')]
for p in patterns:
s = re.sub(p[0], p[1], s)
return s
def rect2array(rect):
return array(rect.lowerLeft+rect.upperRight, dtype=float)
def rectangle_height(rect):
return rect.upperRight[1]-rect.lowerLeft[1]
S["last_page"] = -1
def print_child(n, levels=0):
body = comments[n]['body']
loc_id = comments[n]['ID_location']
location = locations[loc_id]
page = int(location['page'])
if levels == 0 and page > S["last_page"]:
OUTPUT.append('\n%% Comments on page %d of %s [%s]' % (page,
"myfile",
os.path.basename(srcfile)))
if page == 0:
sectitle = 'Global comments'
else:
sectitle = 'Comments on page %d' % page
OUTPUT.append(r'\title{%s} \def\whatpage{%d}' % (sectitle, page))
S["last_page"] = page
if comments[n]['admin'] == 1:
me = 1
else:
me = 0
msg = '\n'+r'\comment{note-%s}{%d}{%s}{%d}{%d}' % (n, levels, texify(body), me, int(n))
OUTPUT.append(unicode(msg).encode("ascii", "ignore"))
if levels == 0 and page != 0: # a root comment not on page 0 needs callout
root = roots[n]
# Sacha's coords are from top left corner, relative to TrimBox
# but in pixels (not postscript points).
# evaluate comment_box_px, with this coord system, as [llx lly w h]
comment_box_px = array([root['left'],
root['top']+root['h'],
root['w'],
root['h']])
comment_box_bp = comment_box_px * bp_per_pixel
# convert y coordinate to use bottom edge of trim_box as y=0
comment_box_bp[1] = int(rectangle_height(trim_box))-int(comment_box_bp[1])
# convert to coordinates relative to CropBox
comment_box_bp[0:2] += (rect2array(trim_box)-rect2array(crop_box))[0:2]
OUTPUT.append('\setpospxywhd{note-%d-dest}{1}' % n)
#.........这里部分代码省略.........
示例13: len
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import decrypt [as 别名]
import sys
from pyPdf import PdfFileReader
helpmsg = "Simple PDF brute force script\n"
helpmsg += "Cracks pwds of the format <first 4 chars of email>0000-9999."
helpmsg += "Example: snow0653\n\n"
helpmsg += "Usage: pdfbrute.py <encrypted_pdf_file> <email_address>"
if len(sys.argv) < 2:
print helpmsg
sys.exit()
pdffile = PdfFileReader(file(sys.argv[1], "rb"))
if pdffile.isEncrypted == False:
print "[!] The file is not protected with any password. Exiting."
exit
print "[+] Attempting to Brute force. This could take some time..."
z = ""
for i in range(0,9999):
z = str (i)
while (len(z) < 4):
z = "0" + z
a = str(sys.argv[2][:4] + str(z))
if pdffile.decrypt(a) > 0:
print "[+] Password is: " + a
print "[...] Exiting.."
sys.exit()
示例14: PdfFileReader
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import decrypt [as 别名]
from pyPdf import PdfFileReader,PdfFileWriter
my_path ='C:/Users/Intel i3/Desktop/python/Real_Python/book1-exercises-master/Course materials/Chapter 12/Practice files/Walrus.pdf'
pdf_file = PdfFileReader(file(my_path,'rb'))
yo = pdf_file.decrypt('IamtheWalrus')
page = pdf_file.getNumPages()
outfile = PdfFileWriter()
for page_no in range(0,page):
text = pdf_file.getPage(page_no)
text = text.rotateClockwise(270)
#text = text.encode('utf-8')
#text = text.replace(' ','\n')
outfile.addPage(text)
out_path ='C:/Users/Intel i3/Desktop/python/Real_Python/book1-exercises-master/Course materials/Chapter 12/Practice files/new.pdf'
out_file = open(out_path,'wb')
outfile.write(out_file)
out_file.close()
示例15: processFile
# 需要导入模块: from pyPdf import PdfFileReader [as 别名]
# 或者: from pyPdf.PdfFileReader import decrypt [as 别名]
def processFile(self, curr_file):
global extractedFrom
author = '-'
date = '-'
generator = '-'
created = '-'
producer = '-'
modded = '-'
last_saved = '-'
if ".pdf" in curr_file:
try:
pdfFile = PdfFileReader(file(curr_file, 'rb'))
if pdfFile.getIsEncrypted():
pdfFile.decrypt('')
docInfo = pdfFile.getDocumentInfo()
if not docInfo:
return
last_saved = '-'
#looks at the entire dictionary to parse for information
if "/CreationDate" in docInfo:
data = docInfo["/CreationDate"].strip("D:|'")
year = data[0:4]
date = data[4:6] + "/" + data[6:8]
created_time = data[8:10] + ":" + data[10:12]
created_time = time.strftime("%I:%M %p", time.strptime(created_time, "%H:%M"))
created = date + "/" + year + " " + created_time
if "/Author" in docInfo:
author = docInfo["/Author"] + " "
if len(author) <=1:
author = "-"
if "/Producer" in docInfo:
producer = docInfo["/Producer"].strip("(Windows)")
producer = re.sub(r'[^\w]', ' ', producer)
if len(producer) == 0:
producer = "-"
while True:
if " " in producer:
producer = producer.replace(" ", " ")
else:
break
if "/ModDate" in docInfo:
data = docInfo["/ModDate"].strip("D:|'")
year = data[0:4]
date = data[4:6] + "/" + data[6:8]
modded_time = data[8:10] + ":" + data[10:12]
modded_time = time.strftime("%I:%M %p", time.strptime(modded_time, "%H:%M"))
modded = date + "/" + year + " " + modded_time
#strips '/' off file name (if it includes directory name)
if "/" in curr_file:
curr_file = curr_file[curr_file.rfind("/")+1:]
if "\\" in curr_file:
curr_file = curr_file.replace("\\","")
#trim information if it's too long
if len(curr_file) > 15: # trims file name
curr_file = curr_file[:15] + "..." + curr_file[-13:]
if len(producer) > 30:
producer = producer[:20] + " [snipped] "
if len(author) > 20:
author = author[:20] + " [snipped] "
#appends each piece of information. output will show ONLY if at least ONE file has data in a column
self.container.append([" | " + curr_file,created,author,producer,modded,last_saved])
except Exception, err:
return