本文整理汇总了Python中bill.models.BillType.by_value方法的典型用法代码示例。如果您正苦于以下问题:Python BillType.by_value方法的具体用法?Python BillType.by_value怎么用?Python BillType.by_value使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类bill.models.BillType
的用法示例。
在下文中一共展示了BillType.by_value方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: was_bill_enacted
# 需要导入模块: from bill.models import BillType [as 别名]
# 或者: from bill.models.BillType import by_value [as 别名]
def was_bill_enacted(b, startdate, enddate, recurse=True):
# Our status code is currently tied to the assignment of a slip
# law number, which isn't what we mean exactly.
#
# (Additionally, we should count a bill as enacted if any identified companion
# bill is enacted.)
# If it *was* assigned a slip law number, which in the future might
# be useful for veto overrides, then OK.
if b.current_status in BillStatus.final_status_passed_bill and \
startdate <= b.current_status_date <= enddate:
return True
# Otherwise, check the actions for a <signed> action.
fn = "data/congress/%s/bills/%s/%s%d/data.json" % (
b.congress,
BillType.by_value(b.bill_type).slug,
BillType.by_value(b.bill_type).slug,
b.number)
bj = json.load(open(fn))
for axn in bj["actions"]:
if axn["type"] == "signed" and startdate.isoformat() <= axn["acted_at"] <= enddate.isoformat():
return True
# Otherwise check companion bills.
#if recurse:
# for rb in RelatedBill.objects.filter(bill=b, relation="identical").select_related("related_bill"):
# if was_bill_enacted(rb.related_bill, startdate, enddate, recurse=False):
# return True
return False
示例2: was_bill_enacted_2013
# 需要导入模块: from bill.models import BillType [as 别名]
# 或者: from bill.models.BillType import by_value [as 别名]
def was_bill_enacted_2013(b, startdate, enddate):
# Our status code is currently tied to the assignment of a slip
# law number, which isn't what we mean exactly.
#
# (Additionally, we should count a bill as enacted if any identified companion
# bill is enacted.)
# TODO: See new function in the Bill model.
# If it *was* assigned a slip law number, which in the future might
# be useful for veto overrides, then OK.
if b.current_status in BillStatus.final_status_passed_bill and \
startdate <= b.current_status_date <= enddate:
return True
# Otherwise, check the actions for a <signed> action.
fn = "data/congress/%s/bills/%s/%s%d/data.json" % (
b.congress,
BillType.by_value(b.bill_type).slug,
BillType.by_value(b.bill_type).slug,
b.number)
bj = json.load(open(fn))
for axn in bj["actions"]:
if axn["type"] == "signed" and startdate.isoformat() <= axn["acted_at"] <= enddate.isoformat():
return True
return False
示例3: get_bill_text_metadata
# 需要导入模块: from bill.models import BillType [as 别名]
# 或者: from bill.models.BillType import by_value [as 别名]
def get_bill_text_metadata(bill, version):
from bill.models import BillType # has to be here and not module-level to avoid cyclic dependency
import glob, json
bt = BillType.by_value(bill.bill_type).slug
basename = "data/congress/%d/bills/%s/%s%d/text-versions" % (bill.congress, bt, bt, bill.number)
if version == None:
# Cycle through files to find most recent version by date.
dat = None
for versionfile in glob.glob(basename + "/*/data.json"):
d = json.load(open(versionfile))
if not dat or d["issued_on"] > dat["issued_on"]:
dat = d
if not dat: return None
else:
dat = json.load(open(basename + "/%s/data.json" % version))
basename += "/" + dat["version_code"]
bt2 = BillType.by_value(bill.bill_type).xml_code
html_fn = "data/us/bills.text/%s/%s/%s%d%s.html" % (bill.congress, bt2, bt2, bill.number, dat["version_code"])
if os.path.exists(basename + "/mods.xml"):
dat["mods_file"] = basename + "/mods.xml"
# get a plain text file if one exists
if os.path.exists(basename + "/document.txt"):
dat["text_file"] = basename + "/document.txt"
dat["has_displayable_text"] = True
for source in dat.get("sources", []):
if source["source"] == "statutes":
dat["text_file_source"] = "statutes"
# get an HTML file if one exists
if os.path.exists(html_fn):
dat["html_file"] = html_fn
dat["has_displayable_text"] = True
# get a PDF file if one exists
pdf_fn = "data/us/bills.text/%s/%s/%s%d%s.pdf" % (bill.congress, bt2, bt2, bill.number, dat["version_code"])
if os.path.exists(pdf_fn):
dat["pdf_file"] = pdf_fn
dat["has_thumbnail"] = True
dat["thumbnail_path"] = bill.get_absolute_url() + "/_text_image"
# get an XML file if one exists
if os.path.exists(basename + "/catoxml.xml"):
dat["xml_file"] = basename + "/catoxml.xml"
dat["has_displayable_text"] = True
dat["xml_file_source"] = "cato-deepbills"
elif os.path.exists(basename + "/document.xml"):
dat["xml_file"] = basename + "/document.xml"
dat["has_displayable_text"] = True
return dat
示例4: get_transparency_stats
# 需要导入模块: from bill.models import BillType [as 别名]
# 或者: from bill.models.BillType import by_value [as 别名]
def get_transparency_stats(person, role, stats, congress, startdate, enddate):
global transparency_bills
if not transparency_bills:
transparency_bills = []
for line in open("analysis/transparency-bills.txt"):
bill = Bill.from_congressproject_id(re.split("\s", line)[0])
if bill.congress != congress: continue
transparency_bills.append(bill)
# which bills are in the right chamber?
plausible_bills = []
for bill in transparency_bills:
if BillType.by_value(bill.bill_type).chamber == RoleType.by_value(role.role_type).congress_chamber:
plausible_bills.append(bill)
# did person sponsor any of these within this session?
sponsored = []
for bill in transparency_bills:
if startdate <= bill.introduced_date <= enddate and bill.sponsor == person:
sponsored.append(bill)
# did person cosponsor any of these within this session?
cosponsored = []
for cosp in Cosponsor.objects.filter(person=person, bill__in=transparency_bills, joined__gte=startdate, joined__lte=enddate):
cosponsored.append(cosp.bill)
stats["transparency-bills"] = {
"value": len(sponsored)*3 + len(cosponsored),
"sponsored": make_bill_entries(sponsored),
"cosponsored": make_bill_entries(cosponsored),
"num_bills": len(plausible_bills),
"chamber": RoleType.by_value(role.role_type).congress_chamber,
}
示例5: get_bill_number
# 需要导入模块: from bill.models import BillType [as 别名]
# 或者: from bill.models.BillType import by_value [as 别名]
def get_bill_number(bill, show_congress_number="ARCHIVAL"):
"Compute display form of bill number"
from bill.models import BillType
ret = '%s %s' % (BillType.by_value(bill.bill_type).label, bill.number)
if (bill.congress != settings.CURRENT_CONGRESS and show_congress_number == "ARCHIVAL") or show_congress_number == "ALL":
ret += ' (%s)' % ordinal(bill.congress)
return ret
示例6: load_bill_text
# 需要导入模块: from bill.models import BillType [as 别名]
# 或者: from bill.models.BillType import by_value [as 别名]
def load_bill_text(bill, version, plain_text=False, mods_only=False):
if bill.congress < 103 or plain_text:
return load_bill_text_alt(bill, version, plain_text=plain_text, mods_only=mods_only)
from bill.models import BillType # has to be here and not module-level to avoid cyclic dependency
bt = BillType.by_value(bill.bill_type).xml_code
basename = "data/us/bills.text/%s/%s/%s%d%s" % (bill.congress, bt, bt, bill.number, version if version != None else "")
if mods_only:
bill_text_content = None
else:
bill_text_content = open(basename + ".html").read()
mods = lxml.etree.parse(basename + ".mods.xml")
ns = { "mods": "http://www.loc.gov/mods/v3" }
docdate = mods.xpath("string(mods:originInfo/mods:dateIssued)", namespaces=ns)
gpo_url = "http://www.gpo.gov/fdsys/search/pagedetails.action?packageId=" + mods.xpath("string(mods:recordInfo/mods:recordIdentifier[@source='DGPO'])", namespaces=ns)
#gpo_url = mods.xpath("string(mods:identifier[@type='uri'])", namespaces=ns)
gpo_pdf_url = mods.xpath("string(mods:location/mods:url[@displayLabel='PDF rendition'])", namespaces=ns)
doc_version = mods.xpath("string(mods:extension/mods:billVersion)", namespaces=ns)
numpages = mods.xpath("string(mods:physicalDescription/mods:extent)", namespaces=ns)
if numpages: numpages = re.sub(r" p\.$", " pages", numpages)
docdate = datetime.date(*(int(d) for d in docdate.split("-")))
doc_version_name = bill_gpo_status_codes[doc_version]
# load a list of citations as marked up by GPO
citations = []
for cite in mods.xpath("//mods:identifier", namespaces=ns):
if cite.get("type") == "USC citation":
citations.append( parse_usc_citation(cite) )
elif cite.get("type") == "Statute citation":
citations.append({ "type": "statutes_at_large", "text": cite.text })
elif cite.get("type") == "public law citation":
try:
congress_cite, slip_law_num = re.match(r"Public Law (\d+)-(\d+)$", cite.text).groups()
citations.append({ "type": "slip_law", "text": cite.text, "congress": int(congress_cite), "number": int(slip_law_num) })
except:
citations.append({ "type": "unknown", "text": cite.text })
return {
"bill_id": bill.id,
"bill_name": bill.title,
"basename": basename,
"text_html": bill_text_content,
"docdate": docdate,
"gpo_url": gpo_url,
"gpo_pdf_url": gpo_pdf_url,
"doc_version": doc_version,
"doc_version_name": doc_version_name,
"numpages": numpages,
"has_html_text": True,
"citations": citations,
}
示例7: get_bill_number
# 需要导入模块: from bill.models import BillType [as 别名]
# 或者: from bill.models.BillType import by_value [as 别名]
def get_bill_number(bill, show_congress_number="ARCHIVAL"):
"Compute display form of bill number"
if bill.congress <= 42:
# This is an American Memory bill. It's number is stored.
ret = bill.title.split(":")[0]
else:
from bill.models import BillType
ret = '%s %s' % (BillType.by_value(bill.bill_type).label, bill.number)
if (bill.congress != settings.CURRENT_CONGRESS and show_congress_number == "ARCHIVAL") or show_congress_number == "ALL":
ret += ' (%s)' % ordinal(bill.congress)
return ret
示例8: bill_text
# 需要导入模块: from bill.models import BillType [as 别名]
# 或者: from bill.models.BillType import by_value [as 别名]
def bill_text(request, congress, type_slug, number, version=None):
if version == "":
version = None
try:
bill_type = BillType.by_slug(type_slug)
except BillType.NotFound:
raise Http404("Invalid bill type: " + type_slug)
bill = get_object_or_404(Bill, congress=congress, bill_type=bill_type, number=number)
from billtext import load_bill_text, bill_gpo_status_codes
try:
textdata = load_bill_text(bill, version)
except IOError:
textdata = None
# Get a list of the alternate versions of this bill.
alternates = None
if textdata:
alternates = []
for v in bill_gpo_status_codes:
fn = "data/us/bills.text/%s/%s/%s%d%s.mods.xml" % (bill.congress, BillType.by_value(bill.bill_type).xml_code, BillType.by_value(bill.bill_type).xml_code, bill.number, v)
if os.path.exists(fn):
alternates.append(load_bill_text(bill, v, mods_only=True))
alternates.sort(key = lambda mods : mods["docdate"])
# Get a list of related bills.
from billtext import get_current_version
related_bills = []
for rb in list(bill.find_reintroductions()) + [r.related_bill for r in bill.get_related_bills()]:
try:
rbv = get_current_version(rb)
if not (rb, rbv) in related_bills: related_bills.append((rb, rbv))
except IOError:
pass # text not available
for btc in BillTextComparison.objects.filter(bill1=bill).exclude(bill2=bill):
if not (btc.bill2, btc.ver2) in related_bills: related_bills.append((btc.bill2, btc.ver2))
for btc in BillTextComparison.objects.filter(bill2=bill).exclude(bill1=bill):
if not (btc.bill1, btc.ver1) in related_bills: related_bills.append((btc.bill1, btc.ver1))
return {
'bill': bill,
"congressdates": get_congress_dates(bill.congress),
"textdata": textdata,
"version": version,
"alternates": alternates,
"related_bills": related_bills,
}
示例9: get_bill_text_metadata
# 需要导入模块: from bill.models import BillType [as 别名]
# 或者: from bill.models.BillType import by_value [as 别名]
def get_bill_text_metadata(bill, version):
from bill.models import BillType # has to be here and not module-level to avoid cyclic dependency
import glob, json
bt = BillType.by_value(bill.bill_type).slug
basename = "data/congress/%d/bills/%s/%s%d/text-versions" % (bill.congress, bt, bt, bill.number)
if version == None:
# Cycle through files to find most recent version by date.
dat = None
for versionfile in glob.glob(basename + "/*/data.json"):
d = json.load(open(versionfile))
if not dat or d["issued_on"] > dat["issued_on"]:
dat = d
if not dat: return None
else:
dat = json.load(open(basename + "/%s/data.json" % version))
dat["plain_text_file"] = basename + "/" + dat["version_code"] + "/document.txt"
return dat
示例10: load_bill_text_alt
# 需要导入模块: from bill.models import BillType [as 别名]
# 或者: from bill.models.BillType import by_value [as 别名]
def load_bill_text_alt(bill, version, plain_text=False, mods_only=False):
# Load bill text info from the Congress project JSON files.
from bill.models import BillType # has to be here and not module-level to avoid cyclic dependency
import glob, json
bt = BillType.by_value(bill.bill_type).slug
basename = "data/congress/%d/bills/%s/%s%d/text-versions" % (bill.congress, bt, bt, bill.number)
if version == None:
# Cycle through files to find most recent version by date.
dat = None
for versionfile in glob.glob(basename + "/*.json"):
d = json.load(open(versionfile))
if not dat or d["issued_on"] > dat["issued_on"]:
dat = d
else:
dat = json.load(open(basename + "/%s.json" % version))
if not mods_only:
raise Exception("Bill text not available.")
gpo_url = dat["urls"]["pdf"]
m = re.match(r"http://www.gpo.gov/fdsys/pkg/(STATUTE-\d+)/pdf/(STATUTE-\d+-.*).pdf", gpo_url)
if m:
gpo_url = "http://www.gpo.gov/fdsys/granule/%s/%s/content-detail.html" % m.groups()
return {
"bill_id": bill.id,
"bill_name": bill.title,
"basename": basename,
"docdate": datetime.date(*(int(d) for d in dat["issued_on"].split("-"))),
"gpo_url": gpo_url,
"gpo_pdf_url": dat["urls"]["pdf"],
"doc_version": dat["version_code"],
"doc_version_name": bill_gpo_status_codes[dat["version_code"]],
"has_html_text": False,
}
示例11: load_bill_text
# 需要导入模块: from bill.models import BillType [as 别名]
# 或者: from bill.models.BillType import by_value [as 别名]
def load_bill_text(bill, version, plain_text=False, mods_only=False):
if bill.congress < 103:
return load_bill_text_alt(bill, version, plain_text=plain_text, mods_only=mods_only)
from bill.models import BillType # has to be here and not module-level to avoid cyclic dependency
bt = BillType.by_value(bill.bill_type).xml_code
basename = "data/us/bills.text/%s/%s/%s%d%s" % (bill.congress, bt, bt, bill.number, version if version != None else "")
if mods_only:
bill_text_content = None
else:
if plain_text:
try:
return open(basename + ".txt").read().decode("utf8", "ignore") # otherwise we get 'Chuck failed' in the xapian_backend apparently due to decoding issue.
except IOError:
return ""
elif os.path.exists(basename + ".xml") and False:
dom = lxml.etree.parse(basename + ".xml")
transform = lxml.etree.parse(os.path.join(os.path.dirname(os.path.realpath(__file__)), "textxsl/billres.xsl"))
transform = lxml.etree.XSLT(transform)
result = transform(dom)
# empty nodes cause HTML parsing problems, so remove them.
# iterate in reverse document order so that we hit parents after
# their children, since if we remove all of the children then we may
# want to remove the parent too.
for node in reversed(list(result.getiterator())):
if node.xpath("string(.)") == "":
node.getparent().remove(node)
bill_text_content = lxml.etree.tostring(result.xpath("head/style")[0]) + lxml.etree.tostring(result.xpath("body")[0])
else:
bill_text_content = open(basename + ".html").read()
mods = lxml.etree.parse(basename + ".mods.xml")
ns = { "mods": "http://www.loc.gov/mods/v3" }
docdate = mods.xpath("string(mods:originInfo/mods:dateIssued)", namespaces=ns)
gpo_url = "http://www.gpo.gov/fdsys/search/pagedetails.action?packageId=" + mods.xpath("string(mods:recordInfo/mods:recordIdentifier[@source='DGPO'])", namespaces=ns)
#gpo_url = mods.xpath("string(mods:identifier[@type='uri'])", namespaces=ns)
gpo_pdf_url = mods.xpath("string(mods:location/mods:url[@displayLabel='PDF rendition'])", namespaces=ns)
doc_version = mods.xpath("string(mods:extension/mods:billVersion)", namespaces=ns)
numpages = mods.xpath("string(mods:physicalDescription/mods:extent)", namespaces=ns)
if numpages: numpages = re.sub(r" p\.$", " pages", numpages)
docdate = datetime.date(*(int(d) for d in docdate.split("-")))
doc_version_name = bill_gpo_status_codes[doc_version]
return {
"bill_id": bill.id,
"bill_name": bill.title,
"basename": basename,
"text_html": bill_text_content,
"docdate": docdate,
"gpo_url": gpo_url,
"gpo_pdf_url": gpo_pdf_url,
"doc_version": doc_version,
"doc_version_name": doc_version_name,
"numpages": numpages,
"has_html_text": True,
}
示例12: list
# 需要导入模块: from bill.models import BillType [as 别名]
# 或者: from bill.models.BillType import by_value [as 别名]
#!script
import os.path
from bill.models import Bill, BillType
all_bill_ids = list(Bill.objects.all().values_list('id', flat=True))
def batch(iterable, n = 1):
l = len(iterable)
for ndx in range(0, l, n):
yield iterable[ndx:min(ndx+n, l)]
for idset in batch(all_bill_ids, n=2000):
print "..."
for bill in Bill.objects.only('congress', 'bill_type', 'number').in_bulk(idset).values():
fn = "data/congress/%s/bills/%s/%s%d/data.json" % (
bill.congress,
BillType.by_value(bill.bill_type).slug,
BillType.by_value(bill.bill_type).slug,
bill.number)
if not os.path.exists(fn):
print bill.id, bill
示例13: load_bill_text
# 需要导入模块: from bill.models import BillType [as 别名]
# 或者: from bill.models.BillType import by_value [as 别名]
def load_bill_text(bill, version, plain_text=False, mods_only=False):
if bill.congress < 103 or plain_text:
return load_bill_text_alt(bill, version, plain_text=plain_text, mods_only=mods_only)
from bill.models import BillType # has to be here and not module-level to avoid cyclic dependency
bt = BillType.by_value(bill.bill_type).xml_code
basename = "data/us/bills.text/%s/%s/%s%d%s" % (bill.congress, bt, bt, bill.number, version if version != None else "")
if mods_only:
bill_text_content = None
else:
bill_text_content = open(basename + ".html").read()
mods = lxml.etree.parse(basename + ".mods.xml")
ns = { "mods": "http://www.loc.gov/mods/v3" }
docdate = mods.xpath("string(mods:originInfo/mods:dateIssued)", namespaces=ns)
gpo_url = "http://www.gpo.gov/fdsys/search/pagedetails.action?packageId=" + mods.xpath("string(mods:recordInfo/mods:recordIdentifier[@source='DGPO'])", namespaces=ns)
#gpo_url = mods.xpath("string(mods:identifier[@type='uri'])", namespaces=ns)
gpo_pdf_url = mods.xpath("string(mods:location/mods:url[@displayLabel='PDF rendition'])", namespaces=ns)
doc_version = mods.xpath("string(mods:extension/mods:billVersion)", namespaces=ns)
numpages = mods.xpath("string(mods:physicalDescription/mods:extent)", namespaces=ns)
if numpages: numpages = re.sub(r" p\.$", " pages", numpages)
docdate = datetime.date(*(int(d) for d in docdate.split("-")))
doc_version_name = bill_gpo_status_codes[doc_version]
# citations
citations = []
for cite in mods.xpath("//mods:identifier", namespaces=ns):
if cite.get("type") == "USC citation":
try:
title_cite, title_app_cite, sec_cite, para_cite = re.match(r"(\d+\S*)\s*U.S.C.(\s*App.)?\s*([^\s(]+?)?\s*(\(.*|et ?seq\.?|note)?$", cite.text).groups()
if title_app_cite: title_cite += "a"
if para_cite and para_cite.strip() == "": para_cite = None
if not para_cite and "-" in sec_cite:
# This dash may indicate a range of sections, or it may just be
# a dash that occurs within section names. Be smart and try to
# figure it out.
found_range = False
sec_dash_parts = sec_cite.split("-")
for i in xrange(1, len(sec_dash_parts)):
# Split the citation around each particular dash, and if both
# halves are valid citations with the same parent then assume
# this is a range. (A nice case is 16 U.S.C. 3839aa-8, where
# both 3839aa and 8 are valid sections but are far apart.)
sec_parts = ["-".join(sec_dash_parts[:i]),
"-".join(sec_dash_parts[i:])]
from models import USCSection
sec_parent = None
for sec_part in sec_parts:
matched_sec = list(USCSection.objects.filter(citation="usc/" + title_cite + "/" + sec_part))
if len(matched_sec) == 0:
break # part doesn't exist, skip the else block below and fall through to assume this is not a range
if sec_parent == None:
sec_parent = matched_sec[0].parent_section_id
else:
if sec_parent != matched_sec[0].parent_section_id:
break # likewise, parents dont match so not a range
else:
# Both parts exist. Treat as a USC citation range.
citations.append({ "type": "usc", "text": cite.text, "title": title_cite, "section": sec_parts[0], "paragraph": None, "range_to_section": sec_parts[1] })
found_range = True
break
if found_range: continue
citations.append({ "type": "usc", "text": cite.text, "title": title_cite, "section": sec_cite, "paragraph" : para_cite })
except:
citations.append({ "type": "unknown", "text": cite.text })
elif cite.get("type") == "Statute citation":
citations.append({ "type": "statutes_at_large", "text": cite.text })
elif cite.get("type") == "public law citation":
try:
congress_cite, slip_law_num = re.match(r"Public Law (\d+)-(\d+)$", cite.text).groups()
citations.append({ "type": "slip_law", "text": cite.text, "congress": int(congress_cite), "number": int(slip_law_num) })
except:
citations.append({ "type": "unknown", "text": cite.text })
else:
continue
return {
"bill_id": bill.id,
"bill_name": bill.title,
"basename": basename,
"text_html": bill_text_content,
"docdate": docdate,
"gpo_url": gpo_url,
"gpo_pdf_url": gpo_pdf_url,
"doc_version": doc_version,
"doc_version_name": doc_version_name,
"numpages": numpages,
"has_html_text": True,
"citations": citations,
}
示例14: reference
# 需要导入模块: from bill.models import BillType [as 别名]
# 或者: from bill.models.BillType import by_value [as 别名]
def reference(bill):
bt = BillType.by_value(bill.bill_type)
return bt.xml_code + str(bill.congress) + "-" + str(bill.number)
示例15: load_bill_text
# 需要导入模块: from bill.models import BillType [as 别名]
# 或者: from bill.models.BillType import by_value [as 别名]
def load_bill_text(bill, version, plain_text=False, mods_only=False):
if bill.congress < 103:
return load_bill_text_alt(bill, version, plain_text=plain_text, mods_only=mods_only)
from bill.models import BillType # has to be here and not module-level to avoid cyclic dependency
bt = BillType.by_value(bill.bill_type).xml_code
basename = "data/us/bills.text/%s/%s/%s%d%s" % (bill.congress, bt, bt, bill.number, version if version != None else "")
if mods_only:
bill_text_content = None
else:
if plain_text:
# plain_text never raises an IOError
try:
return open(basename + ".txt").read().decode("utf8", "ignore") # otherwise we get 'Chuck failed' in the xapian_backend apparently due to decoding issue.
except IOError:
return ""
elif os.path.exists(basename + ".xml") and False:
dom = lxml.etree.parse(basename + ".xml")
transform = lxml.etree.parse(os.path.join(os.path.dirname(os.path.realpath(__file__)), "textxsl/billres.xsl"))
transform = lxml.etree.XSLT(transform)
result = transform(dom)
# empty nodes cause HTML parsing problems, so remove them.
# iterate in reverse document order so that we hit parents after
# their children, since if we remove all of the children then we may
# want to remove the parent too.
for node in reversed(list(result.getiterator())):
if node.xpath("string(.)") == "":
node.getparent().remove(node)
bill_text_content = lxml.etree.tostring(result.xpath("head/style")[0]) + lxml.etree.tostring(result.xpath("body")[0])
else:
bill_text_content = open(basename + ".html").read()
mods = lxml.etree.parse(basename + ".mods.xml")
ns = { "mods": "http://www.loc.gov/mods/v3" }
docdate = mods.xpath("string(mods:originInfo/mods:dateIssued)", namespaces=ns)
gpo_url = "http://www.gpo.gov/fdsys/search/pagedetails.action?packageId=" + mods.xpath("string(mods:recordInfo/mods:recordIdentifier[@source='DGPO'])", namespaces=ns)
#gpo_url = mods.xpath("string(mods:identifier[@type='uri'])", namespaces=ns)
gpo_pdf_url = mods.xpath("string(mods:location/mods:url[@displayLabel='PDF rendition'])", namespaces=ns)
doc_version = mods.xpath("string(mods:extension/mods:billVersion)", namespaces=ns)
numpages = mods.xpath("string(mods:physicalDescription/mods:extent)", namespaces=ns)
if numpages: numpages = re.sub(r" p\.$", " pages", numpages)
docdate = datetime.date(*(int(d) for d in docdate.split("-")))
doc_version_name = bill_gpo_status_codes[doc_version]
# citations
citations = []
for cite in mods.xpath("//mods:identifier", namespaces=ns):
if cite.get("type") == "USC citation":
try:
title_cite, title_app_cite, sec_cite, para_cite = re.match(r"(\d+\S*)\s*U.S.C.(\s*App.)?\s*([^\s(]+?)?\s*(\(.*|et ?seq\.?|note)?$", cite.text).groups()
if title_app_cite: title_cite += "a"
citations.append({ "type": "usc", "text": cite.text, "title": title_cite, "section": sec_cite, "paragraph" : para_cite })
except:
citations.append({ "type": "unknown", "text": cite.text })
elif cite.get("type") == "Statute citation":
citations.append({ "type": "statutes_at_large", "text": cite.text })
elif cite.get("type") == "public law citation":
try:
congress_cite, slip_law_num = re.match(r"Public Law (\d+)-(\d+)$", cite.text).groups()
citations.append({ "type": "slip_law", "text": cite.text, "congress": int(congress_cite), "number": int(slip_law_num) })
except:
citations.append({ "type": "unknown", "text": cite.text })
else:
continue
return {
"bill_id": bill.id,
"bill_name": bill.title,
"basename": basename,
"text_html": bill_text_content,
"docdate": docdate,
"gpo_url": gpo_url,
"gpo_pdf_url": gpo_pdf_url,
"doc_version": doc_version,
"doc_version_name": doc_version_name,
"numpages": numpages,
"has_html_text": True,
"citations": citations,
}