当前位置: 首页>>代码示例>>Python>>正文


Python ElementTree.xpath方法代码示例

本文整理汇总了Python中lxml.etree.ElementTree.xpath方法的典型用法代码示例。如果您正苦于以下问题:Python ElementTree.xpath方法的具体用法?Python ElementTree.xpath怎么用?Python ElementTree.xpath使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在lxml.etree.ElementTree的用法示例。


在下文中一共展示了ElementTree.xpath方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: scrape_pre_58_legislators

# 需要导入模块: from lxml.etree import ElementTree [as 别名]
# 或者: from lxml.etree.ElementTree import xpath [as 别名]
    def scrape_pre_58_legislators(self, chamber, term, suffix):
        url = 'http://leg.mt.gov/css/Sessions/%s%s/legname.asp' % (term, suffix)
        legislator_page = ElementTree(lxml.html.fromstring(self.urlopen(url)))

        if term == '57':
            if chamber == 'upper':
                tableName = '57th Legislatore Roster Senate (2001-2002)'
                startRow = 3
            else:
                tableName = '57th Legislator Roster (House)(2001-2002)'
                startRow = 5
        elif term == '56':
            if chamber == 'upper':
                tableName = 'Members of the Senate'
                startRow = 3
            else:
                tableName = 'Members of the House'
                startRow = 5

        for table in legislator_page.xpath("//table"):
            if table.attrib.has_key('name') and table.attrib['name'] == tableName:
                parse_names = False
                for row in table.getchildren():
                    if row.tag != 'tr':
                        continue
                    celldata = row.getchildren()[0].text_content().strip()
                    if parse_names and len(celldata) != 0:
                        name, party_letter = celldata.rsplit(' (', 1)
                        party_letter = party_letter[0]

                        nameParts = [namePart.strip() for namePart in name.split(',')]
                        assert len(nameParts) < 4
                        if len(nameParts) == 2:
                            last_name, first_name = nameParts
                        elif len(nameParts) == 3:
                            last_name = ' '.join(nameParts[0:2])
                            first_name = nameParts[2]
                        else:
                            name, party_letter = celldata.rsplit(' (', 1)

                        district = row.getchildren()[2].text_content().strip()

                        if party_letter == 'R':
                            party = 'Republican'
                        elif party_letter == 'D':
                            party = 'Democrat'
                        else:
                            party = party_letter

                        legislator = Legislator(term, chamber, district, '%s %s' % (first_name, last_name), \
                                                first_name, last_name, '', party)
                        legislator.add_source(url)
                        self.save_legislator(legislator)

                    if celldata == "Name (Party)":
                        # The table headers seem to vary in size, but the last row
                        # always seems to start with 'Name (Party)' -- once we find
                        # that, start parsing legislator names
                        parse_names = True
开发者ID:timfreund,项目名称:fiftystates,代码行数:61,代码来源:legislators.py

示例2: parse_bill

# 需要导入模块: from lxml.etree import ElementTree [as 别名]
# 或者: from lxml.etree.ElementTree import xpath [as 别名]
    def parse_bill(self, bill_url, session, chamber):

        # Temporarily skip the differently-formatted house budget bill.
        if "/2011/billhtml/hb0002.htm" in bill_url.lower():
            return

        bill = None
        try:
            doc = lxml.html.fromstring(self.urlopen(bill_url))
        except XMLSyntaxError as e:
            self.logger.warning("Got %r while parsing %r" % (e, bill_url))
            return
        bill_page = ElementTree(doc)

        for anchor in bill_page.findall("//a"):
            if anchor.text_content().startswith("status of") or anchor.text_content().startswith(
                "Detailed Information (status)"
            ):
                status_url = anchor.attrib["href"].replace("\r", "").replace("\n", "")
                bill = self.parse_bill_status_page(status_url, bill_url, session, chamber)

        if bill is None:
            # No bill was found.  Maybe something like HB0790 in the 2005 session?
            # We can search for the bill metadata.
            page_name = bill_url.split("/")[-1].split(".")[0]
            bill_type = page_name[0:2]
            bill_number = page_name[2:]
            laws_year = self.metadata["session_details"][session]["years"][0] % 100

            status_url = self.search_url_template % (laws_year, bill_type, bill_number)
            bill = self.parse_bill_status_page(status_url, bill_url, session, chamber)

        # Get versions on the detail page.
        versions = [a["action"] for a in bill["actions"]]
        versions = [a for a in versions if "Version Available" in a]
        if not versions:
            version_name = "Introduced"
        else:
            version = versions.pop()
            if "New Version" in version:
                version_name = "Amended"
            elif "Enrolled" in version:
                version_name = "Enrolled"

        self.add_other_versions(bill)

        # Add html.
        bill.add_version(version_name, bill_url, mimetype="text/html")

        # Add pdf.
        url = set(bill_page.xpath('//a/@href[contains(., "BillPdf")]')).pop()
        bill.add_version(version_name, url, mimetype="application/pdf")

        # Add status url as a source.
        bill.add_source(status_url)

        return bill
开发者ID:ranjithtenz,项目名称:openstates,代码行数:59,代码来源:bills.py

示例3: parse_bill_status_page

# 需要导入模块: from lxml.etree import ElementTree [as 别名]
# 或者: from lxml.etree.ElementTree import xpath [as 别名]
    def parse_bill_status_page(self, status_url, bill_url, session, chamber):
        status_page = ElementTree(lxml.html.fromstring(self.urlopen(status_url)))
        # see 2007 HB 2... weird.
        try:
            bill_id = status_page.xpath("/div/form[1]/table[2]/tr[2]/td[2]")[0].text_content()
        except IndexError:
            bill_id = status_page.xpath("/html/html[2]/tr[1]/td[2]")[0].text_content()

        try:
            title = status_page.xpath("/div/form[1]/table[2]/tr[3]/td[2]")[0].text_content()
        except IndexError:
            title = status_page.xpath("/html/html[3]/tr[1]/td[2]")[0].text_content()

        bill = Bill(session, chamber, bill_id, title)
        bill.add_source(bill_url)

        self.add_sponsors(bill, status_page)
        self.add_actions(bill, status_page)

        return bill
开发者ID:Empact,项目名称:fiftystates,代码行数:22,代码来源:bills.py

示例4: parse_bill_status_page

# 需要导入模块: from lxml.etree import ElementTree [as 别名]
# 或者: from lxml.etree.ElementTree import xpath [as 别名]
    def parse_bill_status_page(self, status_url, bill_url, term, session, chamber):
        bill = None
        bill_id = None
        sources = [bill_url, status_url]
        status_page = ElementTree(lxml.html.fromstring(self.urlopen(status_url)))

        if status_url == 'http://leg.mt.gov/css/sessions/special%20session/august_2002/bills/sb0001.asp':
            import pdb; pdb.set_trace()
        
        # see 2007 HB 2... weird.
        try:
            bill_id = status_page.xpath("/div/form[1]/table[2]/tr[2]/td[2]")[0].text_content()
        except IndexError:
            try:
                bill_id = status_page.xpath('/html/html[2]/tr[1]/td[2]')[0].text_content()
            except IndexError:
                pass
        if bill_id is None:
            try:
                bill_table = self.get_bill_table(status_page)
                bill_id = bill_table.xpath('//tr[2]/td[2]')[0].text_content()
                # bill_id = status_page.xpath('/html/body/table[4]/tr/td[2]/table/tr/td[1]/table[1]/tbody/tr[2]/td[2]')[0].text_content()
                # bill_id = status_page.xpath('/html/body/table[4]/tr/td[2]/table/tr[1]/td[1]/table/tr[2]/td[2]')[0].text_content()
                bill = self.parse_special_session_bill_status_page(bill_id,
                                                                   status_page,
                                                                   bill_table,
                                                                   session,
                                                                   chamber,
                                                                   sources)
            except IndexError:
                pass
        else:
            bill = self.parse_standard_bill_status_page(bill_id,
                                                        status_page,
                                                        session,
                                                        chamber,
                                                        sources)

        if bill is None:
            self.logger.error("No bill parsed for %s" % bill_url)
        return bill
开发者ID:timfreund,项目名称:fiftystates,代码行数:43,代码来源:bills.py

示例5: parse_bill

# 需要导入模块: from lxml.etree import ElementTree [as 别名]
# 或者: from lxml.etree.ElementTree import xpath [as 别名]
    def parse_bill(self, bill_url, session, chamber):

        # Temporarily skip the differently-formatted house budget bill.
        if '/2011/billhtml/hb0002.htm' in bill_url.lower():
            return

        bill = None
        bill_page = ElementTree(lxml.html.fromstring(self.urlopen(bill_url)))
        
        for anchor in bill_page.findall('//a'):
            if (anchor.text_content().startswith('status of') or
                anchor.text_content().startswith('Detailed Information (status)')):
                status_url = anchor.attrib['href'].replace("\r", "").replace("\n", "")
                bill = self.parse_bill_status_page(status_url, bill_url, session, chamber)

        if bill is None:
            # No bill was found.  Maybe something like HB0790 in the 2005 session?
            # We can search for the bill metadata.
            page_name = bill_url.split("/")[-1].split(".")[0]
            bill_type = page_name[0:2]
            bill_number = page_name[2:]
            laws_year = self.metadata['session_details'][session]['years'][0] % 100

            status_url = self.search_url_template % (laws_year, bill_type, bill_number)
            bill = self.parse_bill_status_page(status_url, bill_url, session, chamber)

        # Get versions on the detail page.
        versions = [a['action'] for a in bill['actions']]
        versions = [a for a in versions if 'Version Available' in a]
        if not versions:
            version_name = 'Introduced'
        else:
            version = versions.pop()
            if 'New Version' in version:
                version_name = 'Amended'
            elif 'Enrolled' in version:
                version_name = 'Enrolled'

        self.add_other_versions(bill)

        # Add html.
        bill.add_version(version_name, bill_url, mimetype='text/html')

        # Add pdf.
        url = set(bill_page.xpath('//a/@href[contains(., "BillPdf")]')).pop()
        bill.add_version(version_name, url, mimetype='application/pdf')

        # Add status url as a source.
        bill.add_source(status_url)

        return bill
开发者ID:RCGTDev,项目名称:openstates,代码行数:53,代码来源:bills.py

示例6: __init__

# 需要导入模块: from lxml.etree import ElementTree [as 别名]
# 或者: from lxml.etree.ElementTree import xpath [as 别名]
class CDRLog:
	def __init__(self):
		self.str = ''
		self.ast = None
		self.xml = None

	def parseString(self, string):
		self.ast = CDR.parseString(string)

	def parseFile(self, file):
		self.ast = CDR.parseFile(file)

	def printList(self):
		'''for debug purpose only'''
		import pprint
		cdrs = self.ast.asList()
		pprint.pprint(cdrs)

	def dump(self):
		'''for debug purpose only'''
		print self.ast.dump()

	def asXML(self):
		'''generate the XML string by the AST (list/list)
		   then create the XML ElementTree representation
		'''
		self.str = ''
		self.str += '<CDR>\n'
		list = self.ast.asList()
		head = list[0]
		self.__asHeadXML(head)

		cdrs = list[1:]
		for cdr in cdrs:
			self.__asRecordXML(cdr)
		self.str += '</CDR>'
		self.xml = ElementTree(fromstring(self.str))

	def __asHeadXML(self, head):
		''' for the head part of CDR log
		'''
		self.str += ' <head>\n'
		for item in head:
			self.__asItemXML(item, '   ')
		self.str += ' </head>\n'

	def __asRecordXML(self, cdr):
		''' for the each record part of CDR log
		'''
		num = cdr[0]
		cdr = cdr[1]
		self.str += ' <Record n="'+num+'">\n'
		self.str += '   <n>'+num+'</n>\n'
		for item in cdr:
			self.__asItemXML(item, '   ')
		self.str += ' </Record>\n'

	def __asItemXML(self, item, indent='  '):
		''' for the each item(n/v pair) part of CDR log
		'''
		#print 'item=',item
		name, value = item
		# some name start with 3GPP which is not a valid xml tag
		name = name.strip('3')
		if isinstance(value, list):
			self.str += indent+'<'+name+'>\n'
			for i in value:
				self.__asItemXML(i, indent+'  ')
			self.str += indent+'</'+name+'>\n'
		else:
			value = value.strip()
			self.str += indent+'<'+name+'>'+value+'</'+name+'>\n'

	def xpath(self, path):
		''' API to locate the node(s) in the self.xml (ElementTree) by XPATH
		'''
		return self.xml.xpath(path)

	def abstract(self):
		return self.str
        
	def __str__(self):
		'''string representation, should be the XML rep
		'''
		return self.str
开发者ID:frankjiao,项目名称:script,代码行数:87,代码来源:cdrparser.py

示例7: ODTTemplate

# 需要导入模块: from lxml.etree import ElementTree [as 别名]
# 或者: from lxml.etree.ElementTree import xpath [as 别名]
class ODTTemplate(object):
	def __init__(self, filename):
		# first create a temporary copy of the template
		self.tempCopy = create_temporary_copy(filename)
		self.zipfile = zipfile.ZipFile(self.tempCopy,"a")

		# extract the content and parse xml tree
		content = self.zipfile.open("content.xml", "r")
		self.tree = ElementTree()
		self.tree.parse(content)

		self.usedVariables = set()
		self.unknownVariables = set()
		self.rendered = False

	def render(self, context):
		if self.rendered:
			raise RuntimeError("You may only render the template once")

		# create matcher expressions for simple and multiline keywords
		for elem in self.tree.xpath(FIELD_XPATH, namespaces=OD_NSMAP):
			self._field_render(elem, context)

		self.render_warnings = []
		if len(self.unknownVariables) > 0:
			self.render_warnings.append("The following variables occurred in the template but were not provided in the context: {}".format(",".join(self.unknownVariables)))
		unused = set(context.keys()) - self.usedVariables
		if len(unused) > 0:
			self.render_warnings.append("The following variables where not used: {}".format(",".join(unused)))

		# write out result to temporary archive
		outbuf = StringIO()
		self.tree.write(outbuf, encoding="utf8", xml_declaration=True)
		self.zipfile.writestr("content.xml", outbuf.getvalue())
		self.zipfile.close()
		self.rendered = True

	def debugSave(self, target):
		with open(target,"w") as f:
			self.tree.write(f, pretty_print=True, encoding="utf8", xml_declaration=True)

	def saveODT(self, target):
		if not self.rendered:
			raise RuntimeError("You cannot save an unrendered template")
		shutil.copy2(self.tempCopy, target)

	def getTemporaryODT(self):
		if not self.rendered:
			raise RuntimeError("You cannot save an unrendered template")
		return self.tempCopy

	def getTemporaryPDF(self):
		temp = tempfile.NamedTemporaryFile()
		self.savePDF(temp.name)
		return temp

	def savePDF(self, target):
		tempdir = os.path.dirname(self.tempCopy)
		with open(os.devnull,"w") as DEVNULL:
			subprocess.call([ODT_BINARY, "--headless", "--convert-to", "pdf", "--outdir", tempdir, self.tempCopy], stdout=DEVNULL, stderr=DEVNULL)
		pdfPath = os.path.splitext(self.tempCopy)[0] + ".pdf"
		shutil.copy2(pdfPath, target)
		os.unlink(pdfPath)



	def _field_render(self, elem, context):
		# retrieve the variable name of this field
		name = elem.attrib["{{{}}}name".format(OD_TEXT_NS)]
		# check if this is in our dictionary, if not, we leave it as it is
		# but create a warning
		if not name in context.keys():
			self.unknownVariables.add(name)
			return
		# otherwise mark it as used
		self.usedVariables.add(name)
		replacement = context[name]
		parent = elem.getparent()
		idx = parent.index(elem)
		# now distinguish whether its a multi-line or a single-line content
		if not "\n" in replacement:
			# in single-line replacements we just replace this element by its replacement text
			self._replace_var_with_content(elem, parent, idx, replacement)
		else:
			replacementLines = replacement.split("\n")
			# check if we are in a listing to use listing formatter
			li = parent
			isList = False
			while li is not None:
				if li.tag == "{{{}}}list-item".format(OD_TEXT_NS):
					isList = True
					break
				li = li.getparent()
			if isList and self._test_listitem_empty(li):
				# we do list-style replacement
				nodes = []
				for line in replacementLines:
					cli = deepcopy(li)
					var = cli.xpath(FIELD_XPATH, namespaces=OD_NSMAP)
					if len(var) > 1:
#.........这里部分代码省略.........
开发者ID:InsanePrawn,项目名称:SaBoT,代码行数:103,代码来源:odtemplate.py

示例8: XMLRegistery

# 需要导入模块: from lxml.etree import ElementTree [as 别名]
# 或者: from lxml.etree.ElementTree import xpath [as 别名]
class XMLRegistery(object):
    """Represent a LifecycleManager in XML.

    This is a singleton.
    """
    _instance = None
    _xml_root_tree = None

    def __new__(cls, *args, **kwargs):
        if not cls._instance:
            cls._instance = super(XMLRegistery, cls).__new__(cls, *args, **kwargs)
        return cls._instance

    def _xml_register(self, ressource, parent=None):
        """
        :type ressource: XMLRessource
        :type parent: lxml.Element
        """
        attributes = {RESSOURCE_ATTR: ressource._xml_ressource_name()}
        attributes.update(ressource._xml_attributes())

        if parent is None:
            xml_elt = Element(ressource._xml_tag(), attrib=attributes)
            self._xml_root_tree = ElementTree(xml_elt)
        else:
            xml_elt = SubElement(parent,
                                 ressource._xml_tag(),
                                 attrib=attributes)

        ressource._xpath = self._xml_root_tree.getpath(xml_elt)
        try:
            ressource._xpath_relative = ressource._xpath.split("/", 2)[2]
        except IndexError:
            ressource._xpath_relative = ressource._xpath

        if (ressource._xml_add_properties()
                or ressource._xml_add_properties_tuple()):

            properties_node = SubElement(xml_elt, "properties")

            for (prop, value) in ressource._xml_add_properties_tuple():
                logger.trace("Add property '%s:%s' on node with tag %s" % (
                             prop, value, ressource._xml_tag()))

                sub = SubElement(properties_node, prop)
                sub.text = value

            for elt in ressource._xml_add_properties():
                logger.trace("Add property '%s' on node with tag %s" % (
                             elt.tag, ressource._xml_tag()))

                properties_node.append(elt)

        self._xml_register_children(xml_elt, ressource)
        logger.trace("Registered %s in XML registery" % ressource.__repr__())
        ressource._xml_on_registration()

    def _xml_register_children(self, xml_elt, ressource):
        """Be careful, this removes children before adding them."""

        # Children are removed to avoid multiple adding if the
        # lifecycle is created several times.
        for c in xml_elt.iterchildren():
            if c.tag != "properties":
                xml_elt.remove(c)

        for c in ressource._xml_children():
            self._xml_register(c, parent=xml_elt)

    def to_string(self, xpath):
        return tostring(self._find_one(xpath), pretty_print=True)

    def xpath(self, xpath):
        """
        :rtype: [str]
        """
        acc = []
        try:
            request = self._xml_root_tree.xpath(xpath)
            if type(request) != list:
                return [str(request)]

            for e in request:
                if type(e) == _Element:
                    acc.append(tostring(e, pretty_print=True))
                else:
                    acc.append(str(e))
        except XPathEvalError:
            raise XpathInvalidExpression("xpath '%s' is not valid!" % xpath)
        return acc

    def find_all_elts(self, xpath):
        try:
            return [self._xml_root_tree.getpath(e) for e in
                    self._xml_root_tree.xpath(xpath)]
        except XPathEvalError:
            raise XpathInvalidExpression("xpath '%s' is not valid!" % xpath)

    def _find_one(self, xpath):
        """Return the ressource uri. Raise exception if multiple match
#.........这里部分代码省略.........
开发者ID:armonic,项目名称:armonic,代码行数:103,代码来源:xml_register.py

示例9: get_sponsor_table

# 需要导入模块: from lxml.etree import ElementTree [as 别名]
# 或者: from lxml.etree.ElementTree import xpath [as 别名]
 def get_sponsor_table(self, status_page):
     for table in status_page.xpath('//table'):
         table = ElementTree(table)
         if ((len(table.xpath('//th')) == 4) and
         (table.xpath('//th')[0].text_content().startswith('Sponsor,'))):
             return table
开发者ID:timfreund,项目名称:fiftystates,代码行数:8,代码来源:bills.py

示例10: get_action_table

# 需要导入模块: from lxml.etree import ElementTree [as 别名]
# 或者: from lxml.etree.ElementTree import xpath [as 别名]
 def get_action_table(self, status_page):
     for table in status_page.xpath('//table'):
         table = ElementTree(table)
         if ((len(table.xpath('//th')) == 5) and
             (table.xpath('//th')[0].text_content().startswith('Action'))):
             return table
开发者ID:timfreund,项目名称:fiftystates,代码行数:8,代码来源:bills.py

示例11: get_bill_table

# 需要导入模块: from lxml.etree import ElementTree [as 别名]
# 或者: from lxml.etree.ElementTree import xpath [as 别名]
 def get_bill_table(self, status_page):
     for table in status_page.xpath('//table'):
         table = ElementTree(table)
         if ((len(table.xpath('//tr')) == 4) and
             (table.xpath('//tr[1]/td[1]')[0].text_content().strip().startswith('Bill Draft Number:'))):
             return table
开发者ID:timfreund,项目名称:fiftystates,代码行数:8,代码来源:bills.py


注:本文中的lxml.etree.ElementTree.xpath方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。