本文整理汇总了Python中InternetArchiveCommon.get_pdf_url方法的典型用法代码示例。如果您正苦于以下问题:Python InternetArchiveCommon.get_pdf_url方法的具体用法?Python InternetArchiveCommon.get_pdf_url怎么用?Python InternetArchiveCommon.get_pdf_url使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类InternetArchiveCommon
的用法示例。
在下文中一共展示了InternetArchiveCommon.get_pdf_url方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: archive_document_locally
# 需要导入模块: import InternetArchiveCommon [as 别名]
# 或者: from InternetArchiveCommon import get_pdf_url [as 别名]
def archive_document_locally(document, directory="blacklisted_documents"):
doc_url = IACommon.get_pdf_url(document.court, document.casenum,
document.docnum, document.subdocnum)
if os.system("wget --quiet --directory-prefix=%s %s" % (directory, doc_url)) != 0:
print "There was an error archiving document (%s.%s.%s.%s), it has been marked as unavailble, but has not been deleted from the Internet Archive" % (document.court, document.casenum, document.docnum, document.subdocnum)
exit()
print " saved document %s.%s for analysis in %s directory" % (document.docnum, document.subdocnum, directory)
示例2: _get_documents_dict
# 需要导入模块: import InternetArchiveCommon [as 别名]
# 或者: from InternetArchiveCommon import get_pdf_url [as 别名]
def _get_documents_dict(court, casenum):
""" Create a dict containing the info for the docs specified """
documents = {}
query = Document.objects.filter(court=court, casenum=casenum)
if query:
for document in query:
if document.docid:
docmeta = {"casenum": document.casenum,
"docnum": document.docnum,
"subdocnum": document.subdocnum}
if document.available:
docmeta.update({"filename": IACommon.get_pdf_url(document.court,
document.casenum,
document.docnum,
document.subdocnum),
"timestamp": document.lastdate.strftime("%m/%d/%y")})
documents[document.docid] = docmeta
return documents
示例3: query
# 需要导入模块: import InternetArchiveCommon [as 别名]
# 或者: from InternetArchiveCommon import get_pdf_url [as 别名]
def query(request):
""" Query the database to check which PDF documents we have.
The json input is {"court": <court>,
"urls": <list of PACER doc1 urls>}
The json output is a set of mappings:
{<pacer url>: { "filename": <public url>,
"timestamp": <last time seen> },
<pacer url>: ... }
"""
response = {}
if request.method != "POST":
message = "query: Not a POST request."
logging.error(message)
return HttpResponse(message)
try:
jsonin = simplejson.loads(request.POST["json"])
except KeyError:
message = "query: no 'json' POST argument"
logging.warning(message)
return HttpResponse(message)
except ValueError:
message = "query: malformed 'json' POST argument"
logging.warning(message)
return HttpResponse(message)
except IOError:
# Not something we can fix I don't think. Client fails to send data.
message = "query: Client read error (Timeout?)"
logging.warning(message)
return HttpResponse(message)
try:
court = jsonin["court"].strip()
except KeyError:
message = "query: missing json 'court' argument."
logging.warning(message)
return HttpResponse(message)
try:
urls = jsonin["urls"]
except KeyError:
message = "query: missing json 'urls' argument."
logging.warning(message)
return HttpResponse(message)
for url in urls:
# detect show_doc style document links
sdre = re.search("show_doc\.pl\?(.*)",url)
if sdre:
argsstring = sdre.group(1)
args = argsstring.split("&")
argsdict = {}
for arg in args:
(key, val) = arg.split("=")
argsdict[key] = val
# maybe need to add some checks for whether
# these vars exist in argsdict
query = Document.objects.filter(court=court) \
.filter(docnum=argsdict["doc_num"]) \
.filter(casenum=argsdict["caseid"]) \
.filter(dm_id=int(argsdict["dm_id"])) \
.filter(available=1)
else:
# otherwise, assume it's a normal doc1 style url
docid = UploadHandler.docid_from_url_name(url)
query = Document.objects.filter(docid=docid) \
.filter(available=1)
if query:
query = query[0]
real_casenum = query.casenum
response[url] = {
"filename": IACommon.get_pdf_url(court,
real_casenum,
query.docnum,
query.subdocnum),
"timestamp": query.lastdate.strftime("%m/%d/%y")}
if query.subdocnum == 0:
subquery = Document.objects.filter(court=court,
casenum=query.casenum,
docnum=query.docnum,
available=1).exclude(
subdocnum=0)
if len(subquery) > 0:
#.........这里部分代码省略.........