本文整理汇总了Python中InternetArchiveCommon类的典型用法代码示例。如果您正苦于以下问题:Python InternetArchiveCommon类的具体用法?Python InternetArchiveCommon怎么用?Python InternetArchiveCommon使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了InternetArchiveCommon类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: upload_docket
def upload_docket(docket, nonce):
"""Case should be locked prior to this method"""
ia_docket, message = _get_docket_from_IA(docket)
if ia_docket:
docket.merge_docket(ia_docket)
# Don't upload if nothing has changed
if docket == ia_docket:
return True, 'Unmodified'
docket.nonce = nonce
#TK: Check that it's okay to always request a new bucket made
request = IACommon.make_docketxml_request(docket.to_xml(),
docket.get_court(),
docket.get_casenum(),
docket.casemeta,
makenew=True)
success, msg = _post_request(request)
if not success:
logger.error('XML Docket upload for %s.%s failed: %s', docket.get_court(),
docket.get_casenum(),
msg)
return False, msg
logger.info('XML Docket upload for %s.%s succeeded', docket.get_court(),
docket.get_casenum())
# TK: Maybe handle this in a separate function that can deal with html?
# Assuming this is sucessful, also upload an update to the html page
request = IACommon.make_dockethtml_request(docket.to_html(),
docket.get_court(),
docket.get_casenum(),
docket.casemeta)
success, msg = _post_request(request)
if not success:
logger.error('HTML Docket upload for %s.%s failed: %s', docket.get_court(),
docket.get_casenum(),
msg)
return False, msg
logger.info('HTML Docket upload for %s.%s succeeded', docket.get_court(),
docket.get_casenum())
return success, msg
示例2: delete_docket_xml_from_IA
def delete_docket_xml_from_IA(court, casenum):
request = IACommon.make_docketxml_delete_request(court, casenum)
try:
response = urllib2.urlopen(request)
except urllib2.HTTPError, e:
if e.code != 204:
print " the response to the delete request was %s. This may not be an error" % e.code
示例3: add_document_to_blacklist
def add_document_to_blacklist(document):
BLACKLIST_PATH = "../blacklist"
f = open(BLACKLIST_PATH, "a")
f.write(IACommon.get_pdfname(document.court, document.casenum, document.docnum, document.subdocnum) + "\n")
f.close()
print " added document to %s, you may want to add a comment in that file" % BLACKLIST_PATH
示例4: delete_document_from_IA
def delete_document_from_IA(document):
request = IACommon.make_pdf_delete_request(document.court, document.casenum, document.docnum, document.subdocnum)
try:
response = urllib2.urlopen(request)
except urllib2.HTTPError, e:
if e.code != 204:
print " the response to the delete request was %s. This may not be an error" % e.code
示例5: delete_documents_from_docket
def delete_documents_from_docket(court, casenum, documents):
# Step 1: Get docket and convert into DocketXML
docketstring, fetcherror = IADirect.get_docket_string(court, casenum)
if not docketstring:
print "Could not find docket on IA, exiting...."
exit()
ia_docket, message = DocketXML.parse_xml_string(docketstring)
if not ia_docket:
print "Docket parsing error: %s.%s, exiting...." % (court, casenum)
exit()
# Step 2: Remove documents from DocketXML object
for document in documents:
ia_docket.remove_document(document.docnum, document.subdocnum)
# Step 3: upload modified xml
docketbits = ia_docket.to_xml()
request = IACommon.make_docketxml_request(docketbits, court, casenum,
ia_docket.casemeta)
success_status = False
try:
response = urllib2.urlopen(request)
except urllib2.HTTPError, e:
if e.code == 201 or e.code == 200: # 201 Created: Success!
print "Updated %s %s docket.xml" % (court, casenum)
success_status = True
示例6: _cron_process_docketXML
def _cron_process_docketXML(docket, ppentry):
''' Required to have the lock. '''
court = docket.casemeta["court"]
casenum = docket.casemeta["pacer_case_num"]
# Force '0' in the XML on docs that failed to upload.
_update_docs_availability(docket)
# The docket filename
docketname = IACommon.get_docketxml_name(court, casenum)
# Step 1: Try to fetch the existing docket from IA
docketstring, fetcherror = IADirect.get_docket_string(court, casenum)
if docketstring:
# Got the existing docket-- put merged docket file.
ia_docket, parse_msg = DocketXML.parse_xml_string(docketstring)
if ia_docket:
put_result, put_msg = _cron_me_up(ia_docket, docket, ppentry)
print " %s %s" % (docketname, put_msg)
else:
print " %s docket parsing error: %s" % (docketname, parse_msg)
elif fetcherror is IADirect.FETCH_NO_FILE:
# Bucket exists but no docket-- put a new docket file.
put_result, put_msg = put_docket(docket, court, casenum, ppentry)
print " %s put into existing bucket: %s" % (docketname, put_msg)
elif fetcherror is IADirect.FETCH_NO_BUCKET:
# Bucket doesn't exist-- make the bucket and put a new docket file.
put_result, put_msg = put_docket(docket, court, casenum, ppentry,
newbucket=1)
print " %s put into new bucket: %s" % (docketname, put_msg)
elif fetcherror is IADirect.FETCH_URLERROR:
# Couldn't get the IA docket
# Unset the processing flag for later
# ppentry.processing = 0
# ppentry.save()
# Leave the pickle file for later
# Drop Lock Here?
print " %s timed out. wait for next cron." % (docketname)
else:
# Unknown fetch error.
# Unset the processing flag for later
# ppentry.processing = 0
# ppentry.save()
# Drop Lock Here?
# Leave the pickle file for later
print " %s unknown fetch error. wait for next cron." % (docketname)
示例7: archive_docket_xml_locally
def archive_docket_xml_locally(court, casenum, directory = "archived_dockets"):
docket_url = IACommon.get_docketxml_url(court, casenum)
if os.system("wget --quiet --directory-prefix=%s %s" % (directory, docket_url)) != 0:
print "Could not archive this docket, exiting without trying to delete..."
exit()
print " saved docket %s.%s for analysis in %s directory" % (court, casenum, directory)
示例8: archive_document_locally
def archive_document_locally(document, directory="blacklisted_documents"):
doc_url = IACommon.get_pdf_url(document.court, document.casenum,
document.docnum, document.subdocnum)
if os.system("wget --quiet --directory-prefix=%s %s" % (directory, doc_url)) != 0:
print "There was an error archiving document (%s.%s.%s.%s), it has been marked as unavailble, but has not been deleted from the Internet Archive" % (document.court, document.casenum, document.docnum, document.subdocnum)
exit()
print " saved document %s.%s for analysis in %s directory" % (document.docnum, document.subdocnum, directory)
示例9: check_bucket_ready
def check_bucket_ready(court, casenum):
bucketurl = IACommon.get_bucketcheck_url(court, casenum)
request = urllib2.Request(bucketurl)
try:
response = opener.open(request)
except urllib2.HTTPError, e: # HTTP Error
# No bucket exists, probably a 404 code.
return False, int(e.code)
示例10: upload_document
def upload_document(pdfbits, court, casenum, docnum, subdocnum):
logger.info(' Uploading document %s.%s.%s.%s' % (court, casenum, docnum, subdocnum))
request = IACommon.make_pdf_request(pdfbits, court, casenum,
docnum, subdocnum, metadict = {},
makenew=True)
success, msg = _post_request(request)
if not success:
logger.error(' Failed to upload document %s.%s.%s.%s' % (court, casenum, docnum, subdocnum))
return False, msg
logger.info(' Uploaded document %s.%s.%s.%s' % (court, casenum, docnum, subdocnum))
return success, msg
示例11: put_file
def put_file(filebits, court, casenum, docnum, subdocnum, metadict={}):
""" PUT the file into a new Internet Archive bucket. """
request = IACommon.make_pdf_request(filebits, court, casenum,
docnum, subdocnum, metadict)
# If this file is already scheduled, drop this. # TK: what we want?
filename = IACommon.get_pdfname(court, casenum, docnum, subdocnum)
query = PickledPut.objects.filter(filename=filename)
if query:
logging.info("put_file: same file already pickled. %s" % filename)
return "IA PUT failed: the same file is already in the pickle bucket."
# Add a PickledPut DB entry to schedule the PUT, not yet ready
ppentry = PickledPut(filename=filename)
# Fix a race case?
try:
ppentry.save()
except IntegrityError:
logging.info("put_file: same file already pickled. %s" % filename)
return "IA PUT failed: the same file is already in the pickle bucket."
# Pickle the request object into the jar
pickle_success, message = pickle_object(request, filename)
if pickle_success:
# PickledPut now ready for processing.
ppentry.ready = 1
ppentry.save()
logging.info("put_file: ready. %s" % filename)
else:
# Could not pickle object, so remove from DB
logging.warning("put_file: could not pickle PDF. %s" % filename)
ppentry.delete()
return message
示例12: delete_document_from_IA
def delete_document_from_IA(document):
request = IACommon.make_pdf_delete_request(document.court, document.casenum, document.docnum, document.subdocnum)
print request
print request.get_full_url()
print request.get_method()
print dir(request)
try:
response = urllib2.urlopen(request)
print response
except urllib2.HTTPError, e:
if e.code != 204:
print " the response to the delete request was %s. This may not be an error" % e.code
print " response: %s" % e.code
示例13: put_docket
def put_docket(docket, court, casenum, casemeta_diff=1):
docketbits = docket.to_xml()
request = IACommon.make_docketxml_request(docketbits, court, casenum,
docket.casemeta)
put_result, put_msg = _dispatch_direct_put(request)
if put_result:
cleanup_docket_put(court, casenum, docket, metadiff=casemeta_diff)
return put_result, put_msg
示例14: _cron_process_PDF
def _cron_process_PDF(obj, ppentry):
filename = ppentry.filename
meta = IACommon.get_meta_from_filename(filename)
court = meta["court"]
casenum = meta["casenum"]
docnum = meta["docnum"]
subdocnum = meta["subdocnum"]
invalid_PDF = _is_invalid_pdf(obj, filename)
# We only want to check for ssns on valid PDFs
# PyPdf doesn't deal well with bad input
if not invalid_PDF:
# SSN privacy check
has_ssn = _has_ssn(obj, filename)
else:
has_ssn = False
# Blacklist file check
in_blacklist = _in_blacklist(filename)
if invalid_PDF or has_ssn or in_blacklist:
docket = DocketXML.make_docket_for_pdf("", court, casenum, docnum,
subdocnum, available=0)
UploadHandler.do_me_up(docket)
# Delete the entry from the DB
ppentry.delete()
# Quarantine the pickle file for analysis
_quarantine_pickle(filename, ssn=has_ssn, blacklist_file= in_blacklist, invalid_PDF= invalid_PDF)
return
put_result, put_msg = _dispatch_put(obj, ppentry)
if put_result:
# Put success-- mark this document as available in the DB
DocumentManager.mark_as_available(filename)
docket = DocketXML.make_docket_for_pdf("", court, casenum, docnum,
subdocnum, available=1)
UploadHandler.do_me_up(docket)
print " %s %s" % (filename, put_msg)
示例15: mark_as_available
def mark_as_available(filename):
docmeta = IACommon.get_meta_from_filename(filename)
docquery = Document.objects.filter(
court=docmeta["court"], casenum=docmeta["casenum"], docnum=docmeta["docnum"], subdocnum=docmeta["subdocnum"]
)
try:
docentry = docquery[0]
except IndexError:
# Unexpected case. No Document entry
logging.error("mark_as_available: no entry for %s." % (filename))
else:
docentry.available = 1
try:
docentry.save()
except IntegrityError:
logging.error("mark_as_available: could not save %s." % (filename))