当前位置: 首页>>代码示例>>Python>>正文


Python InternetArchiveCommon.get_pdf_url方法代码示例

本文整理汇总了Python中InternetArchiveCommon.get_pdf_url方法的典型用法代码示例。如果您正苦于以下问题:Python InternetArchiveCommon.get_pdf_url方法的具体用法?Python InternetArchiveCommon.get_pdf_url怎么用?Python InternetArchiveCommon.get_pdf_url使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在InternetArchiveCommon的用法示例。


在下文中一共展示了InternetArchiveCommon.get_pdf_url方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: archive_document_locally

# 需要导入模块: import InternetArchiveCommon [as 别名]
# 或者: from InternetArchiveCommon import get_pdf_url [as 别名]
def archive_document_locally(document, directory="blacklisted_documents"):
    doc_url = IACommon.get_pdf_url(document.court, document.casenum,
                                   document.docnum, document.subdocnum)

    if os.system("wget --quiet --directory-prefix=%s %s" % (directory, doc_url)) != 0:
        print "There was an error archiving document (%s.%s.%s.%s), it has been marked as unavailble, but has not been deleted from the Internet Archive" % (document.court, document.casenum, document.docnum, document.subdocnum)
        exit()

    print "    saved document %s.%s for analysis in %s directory" % (document.docnum, document.subdocnum, directory)
开发者ID:ajperrins,项目名称:recap-server,代码行数:11,代码来源:remove_document.py

示例2: _get_documents_dict

# 需要导入模块: import InternetArchiveCommon [as 别名]
# 或者: from InternetArchiveCommon import get_pdf_url [as 别名]
def _get_documents_dict(court, casenum):
    """ Create a dict containing the info for the docs specified """
    documents = {}

    query = Document.objects.filter(court=court, casenum=casenum)
    if query:
        for document in query:
            if document.docid:
                docmeta = {"casenum": document.casenum,
                           "docnum": document.docnum,
                           "subdocnum": document.subdocnum}

                if document.available:
                    docmeta.update({"filename": IACommon.get_pdf_url(document.court,
                                                 document.casenum,
                                                 document.docnum,
                                                 document.subdocnum),
                                    "timestamp": document.lastdate.strftime("%m/%d/%y")})
                documents[document.docid] = docmeta
    return documents
开发者ID:janderse,项目名称:recap-server,代码行数:22,代码来源:UploadHandler.py

示例3: query

# 需要导入模块: import InternetArchiveCommon [as 别名]
# 或者: from InternetArchiveCommon import get_pdf_url [as 别名]
def query(request):
    """  Query the database to check which PDF documents we have.

         The json input is {"court": <court>,
                            "urls": <list of PACER doc1 urls>}

         The json output is a set of mappings:
                           {<pacer url>: { "filename": <public url>,
                                           "timestamp": <last time seen> },
                            <pacer url>: ... }
    """

    response = {}

    if request.method != "POST":
        message = "query: Not a POST request."
        logging.error(message)
        return HttpResponse(message)

    try:
        jsonin = simplejson.loads(request.POST["json"])
    except KeyError:
        message = "query: no 'json' POST argument"
        logging.warning(message)
        return HttpResponse(message)
    except ValueError:
        message = "query: malformed 'json' POST argument"
        logging.warning(message)
        return HttpResponse(message)
    except IOError:
        # Not something we can fix I don't think.  Client fails to send data.
        message = "query: Client read error (Timeout?)"
        logging.warning(message)
        return HttpResponse(message)

    try:
        court = jsonin["court"].strip()
    except KeyError:
        message = "query: missing json 'court' argument."
        logging.warning(message)
        return HttpResponse(message)

    try:
        urls = jsonin["urls"]
    except KeyError:
        message = "query: missing json 'urls' argument."
        logging.warning(message)
        return HttpResponse(message)

    for url in urls:

        # detect show_doc style document links
        sdre = re.search("show_doc\.pl\?(.*)",url)

        if sdre:
            argsstring = sdre.group(1)
            args = argsstring.split("&")
            argsdict = {}

            for arg in args:
                (key, val) = arg.split("=")
                argsdict[key] = val

            # maybe need to add some checks for whether
            # these vars exist in argsdict

            query = Document.objects.filter(court=court) \
                .filter(docnum=argsdict["doc_num"]) \
                .filter(casenum=argsdict["caseid"]) \
                .filter(dm_id=int(argsdict["dm_id"])) \
                .filter(available=1)

        else:
            # otherwise, assume it's a normal doc1 style url
            docid = UploadHandler.docid_from_url_name(url)
            query = Document.objects.filter(docid=docid) \
                .filter(available=1)


        if query:
            query = query[0]
            real_casenum = query.casenum

            response[url] = {
                "filename": IACommon.get_pdf_url(court,
                                                 real_casenum,
                                                 query.docnum,
                                                 query.subdocnum),
                "timestamp": query.lastdate.strftime("%m/%d/%y")}


            if query.subdocnum == 0:

                subquery = Document.objects.filter(court=court,
                                                   casenum=query.casenum,
                                                   docnum=query.docnum,
                                                   available=1).exclude(
                                                   subdocnum=0)

                if len(subquery) > 0:
#.........这里部分代码省略.........
开发者ID:brianwc,项目名称:recap-server,代码行数:103,代码来源:views.py


注:本文中的InternetArchiveCommon.get_pdf_url方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。