当前位置: 首页>>代码示例>>Python>>正文


Python ElementTree.findall方法代码示例

本文整理汇总了Python中lxml.etree.ElementTree.findall方法的典型用法代码示例。如果您正苦于以下问题:Python ElementTree.findall方法的具体用法?Python ElementTree.findall怎么用?Python ElementTree.findall使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在lxml.etree.ElementTree的用法示例。


在下文中一共展示了ElementTree.findall方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: parse

# 需要导入模块: from lxml.etree import ElementTree [as 别名]
# 或者: from lxml.etree.ElementTree import findall [as 别名]
    def parse(self, data):
        """ Parse a webdav reply. Retrieve any resources as objects
            and return them as a list.

            :param data: The webdav reply to parse
            :type data: String


            :return: self.response_objects

        """
        data_elements = HTML(data)
        xml_etree = ElementTree(data_elements)
        all_response_elements = xml_etree.findall("//response")
        for response in all_response_elements:
            new_response = Response()
            resp_tree = ElementTree(response)
            new_response.href = resp_tree.find('//href').text
            if resp_tree.find('//collection') is not None:
                new_response.resourcetype = 'collection'
            else:
                new_response.resourcetype = 'resource'
                new_response.executable = getattr(
                    resp_tree.find('//executable'), 'text', None)
            new_response.creationdate = getattr(
                resp_tree.find('//creationdate'), 'text', None)
            new_response.getcontentlength = getattr(
                resp_tree.find('//getcontentlength'), 'text', None)
            new_response.getlastmodified = getattr(
                resp_tree.find('//getlastmodified'), 'text', None)
            new_response.getetag = getattr(
                resp_tree.find('//getetag'), 'text', None)
            new_response.getcontenttype = getattr(
                resp_tree.find('//getcontenttype'), 'text', None)
            new_response.status = getattr(
                resp_tree.find('//status'), 'text', None)

            # Now we have the properties that are easy to get,
            # lets get the lock information
            lock_tree = resp_tree.findall('//lockentry')
            for lock in lock_tree:
                lock_tree = ElementTree(lock)
                lock_obj = Lock()
                lock_obj.locktype = lock_tree.find(
                    '//locktype').getchildren()[-1].tag
                lock_obj.lockscope = lock_tree.find(
                    '//lockscope').getchildren()[-1].tag
                new_response.locks.append(lock_obj)

            self.response_objects.append(new_response)

        return self.response_objects
开发者ID:liyanchang,项目名称:python-webdav,代码行数:54,代码来源:parse.py

示例2: getdescendants

# 需要导入模块: from lxml.etree import ElementTree [as 别名]
# 或者: from lxml.etree.ElementTree import findall [as 别名]
def getdescendants(request, code):
    params = {}
    results = {}
    
    language = request.LANGUAGE_CODE.lower()
    if language == 'pt-br':
        language = 'pt'

    for lang in DECS_LANGS:
        params[lang] = urllib.urlencode({
            'tree_id': code or '',
            'lang': lang,
            })

        resource = urllib.urlopen(settings.DECS_SERVICE, params[lang])

        tree = ElementTree()
        tree.parse(resource)

        descendants = tree.findall('decsws_response/tree/descendants/term_list[@lang="%s"]/term' % lang)
        for d in descendants:
            if d.attrib['tree_id'] in results:
                results[ d.attrib['tree_id'] ] += ',"%s":"%s"' % (lang,d.text.capitalize())
            else:
                results[ d.attrib['tree_id'] ] = '"%s":"%s"' % (lang,d.text.capitalize())

    json = '[%s]' % ','.join((JSON_MULTILINGUAL_TERM % (id,desc) for desc,id in results.items()))
    json_response = json_loads(json)
    json_response.sort(key=lambda x: x['fields']['description'][language])
        
    return HttpResponse(json_dumps(json_response), mimetype='application/json')
开发者ID:AnaBiel,项目名称:opentrials,代码行数:33,代码来源:views.py

示例3: scrape

# 需要导入模块: from lxml.etree import ElementTree [as 别名]
# 或者: from lxml.etree.ElementTree import findall [as 别名]
    def scrape(self, chamber, session):
        for term in self.metadata['terms']:
            if session in term['sessions']:
                year = term['start_year']
                break

        self.versions_dict = self._versions_dict(year)

        base_bill_url = 'http://leg.mt.gov/bills/%d/BillHtml/' % year
        index_page = ElementTree(lxml.html.fromstring(self.get(base_bill_url).text))

        bill_urls = []
        for bill_anchor in index_page.findall('//a'):
            # See 2009 HB 645
            if bill_anchor.text.find("govlineveto") == -1:
                # House bills start with H, Senate bills start with S
                if chamber == 'lower' and bill_anchor.text.startswith('H'):
                    bill_urls.append("%s%s" % (base_bill_url, bill_anchor.text))
                elif chamber == 'upper' and bill_anchor.text.startswith('S'):
                    bill_urls.append("%s%s" % (base_bill_url, bill_anchor.text))

        for bill_url in bill_urls:
            bill = self.parse_bill(bill_url, session, chamber)
            if bill:
                self.save_bill(bill)
开发者ID:4bic,项目名称:open_county,代码行数:27,代码来源:bills.py

示例4: scrape

# 需要导入模块: from lxml.etree import ElementTree [as 别名]
# 或者: from lxml.etree.ElementTree import findall [as 别名]
    def scrape(self, chamber, year):
        year = int(year)
        session = self.getSession(year)
        # 2 year terms starting on odd year, so if even number, use the previous odd year
        if year < 1999:
            raise NoDataForPeriod(year)
        if year % 2 == 0:
            year -= 1

        if year == 1999:
            base_bill_url = "http://data.opi.mt.gov/bills/BillHtml/"
        else:
            base_bill_url = "http://data.opi.mt.gov/bills/%d/BillHtml/" % year
        index_page = ElementTree(lxml.html.fromstring(self.urlopen(base_bill_url)))

        bill_urls = []
        for bill_anchor in index_page.findall("//a"):
            # See 2009 HB 645
            if bill_anchor.text.find("govlineveto") == -1:
                # House bills start with H, Senate bills start with S
                if chamber == "lower" and bill_anchor.text.startswith("H"):
                    bill_urls.append("%s%s" % (base_bill_url, bill_anchor.text))
                elif chamber == "upper" and bill_anchor.text.startswith("S"):
                    bill_urls.append("%s%s" % (base_bill_url, bill_anchor.text))

        for bill_url in bill_urls:
            bill = self.parse_bill(bill_url, session, chamber)
            self.save_bill(bill)
开发者ID:Empact,项目名称:fiftystates,代码行数:30,代码来源:bills.py

示例5: print_predictions

# 需要导入模块: from lxml.etree import ElementTree [as 别名]
# 或者: from lxml.etree.ElementTree import findall [as 别名]
def print_predictions(agency, stops, label=""):
    title_index = build_title_index(stops)
    url = build_url(agency, stops)

    debug("NextBus predictions for %s: %s" % (agency, url))
    f = urllib.urlopen(url)

    e = ElementTree(file=f)

    predictions = e.findall("//predictions")
    predictions = filter(lambda el: el.find(".//prediction") is not None, predictions)
    predictions.sort(key=lambda el: el.find(".//prediction").get("epochTime"))

    for n, p in enumerate(predictions):
        routeTag = p.get("routeTag")
        stopTag = p.get("stopTag")
        title = title_index.get((routeTag, stopTag), False)
        if title:
            title = "<em>%s</em>" % (title.replace("\n", "<br>"), )
        else:
            title = p.get("routeTitle")
            title = re.sub(r'^Saferide ', '', title)
            title = label + title

        print "<h2>"+title+"</h2>"
        times = p.findall(".//prediction")
        print "<ol class='predictions'>"
        print '<li>%s</li>' % minutes(times.pop(0).get("minutes"))
        for t in times[0:2]:
            print '<li>%s</li>' % minutes(t.get("minutes"))
        print "</ol>"
开发者ID:nextmon,项目名称:display,代码行数:33,代码来源:predictions.py

示例6: parse_bill

# 需要导入模块: from lxml.etree import ElementTree [as 别名]
# 或者: from lxml.etree.ElementTree import findall [as 别名]
    def parse_bill(self, bill_url, session, chamber):
        bill = None
        bill_page = ElementTree(lxml.html.fromstring(self.urlopen(bill_url)))
        for anchor in bill_page.findall("//a"):
            if anchor.text_content().startswith("status of") or anchor.text_content().startswith(
                "Detailed Information (status)"
            ):
                status_url = anchor.attrib["href"].replace("\r", "").replace("\n", "")
                bill = self.parse_bill_status_page(status_url, bill_url, session, chamber)
            elif anchor.text_content().startswith("This bill in WP"):
                index_url = anchor.attrib["href"]
                index_url = index_url[0 : index_url.rindex("/")]
                # this looks weird.  See http://data.opi.mt.gov/bills/BillHtml/SB0002.htm for why
                index_url = index_url[index_url.rindex("http://") :]
                self.add_bill_versions(bill, index_url)

        if bill is None:
            # No bill was found.  Maybe something like HB0790 in the 2005 session?
            # We can search for the bill metadata.
            page_name = bill_url.split("/")[-1].split(".")[0]
            bill_type = page_name[0:2]
            bill_number = page_name[2:]
            laws_year = metadata["session_details"][session]["years"][0] % 100

            status_url = self.search_url_template % (laws_year, bill_type, bill_number)
            bill = self.parse_bill_status_page(status_url, bill_url, session, chamber)
        return bill
开发者ID:Empact,项目名称:fiftystates,代码行数:29,代码来源:bills.py

示例7: get_chapters

# 需要导入模块: from lxml.etree import ElementTree [as 别名]
# 或者: from lxml.etree.ElementTree import findall [as 别名]
def get_chapters(request):
    params = {}
    results = {}

    language = "pt"
    #    language = request.LANGUAGE_CODE.lower()
    #    if language == 'pt-br':
    #        language = 'pt'

    params = urllib.urlencode({"LI": "CAPITULO"})

    resource = urllib.urlopen(settings.ICD10_SERVICE, params)

    tree = ElementTree()
    tree.parse(resource)

    terms = tree.findall("cid10ws_response")

    data = []
    for term in terms:
        description = {}
        chapter = term.findall("tree/self/term_list/term")[0]

        for lang in ICD10_LANGS:
            term_trans = term.findall('record_list/record/descriptor_list/descriptor[@lang="%s"]' % lang)[0]
            if term_trans.text:
                description[lang] = "%s - %s" % (chapter.attrib["chapter"], term_trans.text.strip().capitalize())

        data.append({"fields": {"description": description, "label": chapter.attrib["tree_id"]}})

    return HttpResponse(json.dumps(data), mimetype="application/json")
开发者ID:rebec,项目名称:opentrials,代码行数:33,代码来源:views.py

示例8: scrape

# 需要导入模块: from lxml.etree import ElementTree [as 别名]
# 或者: from lxml.etree.ElementTree import findall [as 别名]
    def scrape(self, chamber, session):
        for term in self.metadata["terms"]:
            if session in term["sessions"]:
                year = term["start_year"]
                break

        self.versions_dict = self._versions_dict(year)

        base_bill_url = "http://data.opi.mt.gov/bills/%d/BillHtml/" % year
        index_page = ElementTree(lxml.html.fromstring(self.urlopen(base_bill_url)))

        bill_urls = []
        for bill_anchor in index_page.findall("//a"):
            # See 2009 HB 645
            if bill_anchor.text.find("govlineveto") == -1:
                # House bills start with H, Senate bills start with S
                if chamber == "lower" and bill_anchor.text.startswith("H"):
                    bill_urls.append("%s%s" % (base_bill_url, bill_anchor.text))
                elif chamber == "upper" and bill_anchor.text.startswith("S"):
                    bill_urls.append("%s%s" % (base_bill_url, bill_anchor.text))

        for bill_url in bill_urls:
            bill = self.parse_bill(bill_url, session, chamber)
            if bill:
                self.save_bill(bill)
开发者ID:BrandonLewis,项目名称:openstates,代码行数:27,代码来源:bills.py

示例9: parse_bill

# 需要导入模块: from lxml.etree import ElementTree [as 别名]
# 或者: from lxml.etree.ElementTree import findall [as 别名]
    def parse_bill(self, bill_url, session, chamber):

        # Temporarily skip the differently-formatted house budget bill.
        if "/2011/billhtml/hb0002.htm" in bill_url.lower():
            return

        bill = None
        try:
            doc = lxml.html.fromstring(self.urlopen(bill_url))
        except XMLSyntaxError as e:
            self.logger.warning("Got %r while parsing %r" % (e, bill_url))
            return
        bill_page = ElementTree(doc)

        for anchor in bill_page.findall("//a"):
            if anchor.text_content().startswith("status of") or anchor.text_content().startswith(
                "Detailed Information (status)"
            ):
                status_url = anchor.attrib["href"].replace("\r", "").replace("\n", "")
                bill = self.parse_bill_status_page(status_url, bill_url, session, chamber)

        if bill is None:
            # No bill was found.  Maybe something like HB0790 in the 2005 session?
            # We can search for the bill metadata.
            page_name = bill_url.split("/")[-1].split(".")[0]
            bill_type = page_name[0:2]
            bill_number = page_name[2:]
            laws_year = self.metadata["session_details"][session]["years"][0] % 100

            status_url = self.search_url_template % (laws_year, bill_type, bill_number)
            bill = self.parse_bill_status_page(status_url, bill_url, session, chamber)

        # Get versions on the detail page.
        versions = [a["action"] for a in bill["actions"]]
        versions = [a for a in versions if "Version Available" in a]
        if not versions:
            version_name = "Introduced"
        else:
            version = versions.pop()
            if "New Version" in version:
                version_name = "Amended"
            elif "Enrolled" in version:
                version_name = "Enrolled"

        self.add_other_versions(bill)

        # Add html.
        bill.add_version(version_name, bill_url, mimetype="text/html")

        # Add pdf.
        url = set(bill_page.xpath('//a/@href[contains(., "BillPdf")]')).pop()
        bill.add_version(version_name, url, mimetype="application/pdf")

        # Add status url as a source.
        bill.add_source(status_url)

        return bill
开发者ID:ranjithtenz,项目名称:openstates,代码行数:59,代码来源:bills.py

示例10: getterm

# 需要导入模块: from lxml.etree import ElementTree [as 别名]
# 或者: from lxml.etree.ElementTree import findall [as 别名]
def getterm(request, lang, code):
    params = urllib.urlencode({
        'tree_id': code or '',
        'lang': lang,
        })
    resource = urllib.urlopen(settings.DECS_SERVICE, params)

    tree = ElementTree()
    tree.parse(resource)

    result = tree.find("decsws_response/tree/self/term_list/term")
    if result is None:
        result = tree.findall('decsws_response/tree/term_list[@lang="%s"]/term' % lang)
        json = '[%s]' % ','.join((JSON_TERM % (r.text.capitalize(),r.attrib['tree_id']) for r in result))
    else:
        descriptors = tree.findall('decsws_response/record_list/record/descriptor_list/descriptor')
        description = ','.join(['"%s":"%s"'%(d.attrib['lang'],d.text) for d in descriptors])
        json = '[%s]' % (JSON_MULTILINGUAL_TERM % (description,result.attrib['tree_id']))
    
    return HttpResponse(json, mimetype='application/json')
开发者ID:AnaBiel,项目名称:opentrials,代码行数:22,代码来源:views.py

示例11: parse_bill

# 需要导入模块: from lxml.etree import ElementTree [as 别名]
# 或者: from lxml.etree.ElementTree import findall [as 别名]
    def parse_bill(self, bill_url, session, chamber):

        # Temporarily skip the differently-formatted house budget bill.
        if '/2011/billhtml/hb0002.htm' in bill_url.lower():
            return

        bill = None
        bill_page = ElementTree(lxml.html.fromstring(self.urlopen(bill_url)))
        
        for anchor in bill_page.findall('//a'):
            if (anchor.text_content().startswith('status of') or
                anchor.text_content().startswith('Detailed Information (status)')):
                status_url = anchor.attrib['href'].replace("\r", "").replace("\n", "")
                bill = self.parse_bill_status_page(status_url, bill_url, session, chamber)

        if bill is None:
            # No bill was found.  Maybe something like HB0790 in the 2005 session?
            # We can search for the bill metadata.
            page_name = bill_url.split("/")[-1].split(".")[0]
            bill_type = page_name[0:2]
            bill_number = page_name[2:]
            laws_year = self.metadata['session_details'][session]['years'][0] % 100

            status_url = self.search_url_template % (laws_year, bill_type, bill_number)
            bill = self.parse_bill_status_page(status_url, bill_url, session, chamber)

        # Get versions on the detail page.
        versions = [a['action'] for a in bill['actions']]
        versions = [a for a in versions if 'Version Available' in a]
        if not versions:
            version_name = 'Introduced'
        else:
            version = versions.pop()
            if 'New Version' in version:
                version_name = 'Amended'
            elif 'Enrolled' in version:
                version_name = 'Enrolled'

        self.add_other_versions(bill)

        # Add html.
        bill.add_version(version_name, bill_url, mimetype='text/html')

        # Add pdf.
        url = set(bill_page.xpath('//a/@href[contains(., "BillPdf")]')).pop()
        bill.add_version(version_name, url, mimetype='application/pdf')

        # Add status url as a source.
        bill.add_source(status_url)

        return bill
开发者ID:RCGTDev,项目名称:openstates,代码行数:53,代码来源:bills.py

示例12: fetch_text_from_url

# 需要导入模块: from lxml.etree import ElementTree [as 别名]
# 或者: from lxml.etree.ElementTree import findall [as 别名]
def fetch_text_from_url(url):
    """Simple helper to scrap the text content of a webpage"""
    opener = urllib2.build_opener()
    request = urllib2.Request(url)
    # change the User Agent to avoid being blocked by Wikipedia
    # downloading a couple of articles ones should not be abusive
    request.add_header('User-Agent', 'pignlproc categorizer')
    html_content = opener.open(request).read()
    tree = ElementTree(lxml.html.document_fromstring(html_content))
    elements = [e.text_content()
                for tag in ('h1', 'h2', 'h3', 'h4', 'p')
                for e in tree.findall('//' + tag)]
    text = "\n\n".join(elements)
    return text
开发者ID:Big-Data,项目名称:pignlproc,代码行数:16,代码来源:categorize.py

示例13: __init__

# 需要导入模块: from lxml.etree import ElementTree [as 别名]
# 或者: from lxml.etree.ElementTree import findall [as 别名]
 def __init__(self, name):
     self.name = name
     tree = ElementTree(file = name)
     labels = tuple((elt.tag.strip(), elt.text.strip()) for elt in tree.find("labels"))
     self.labels = tuple(pair[0] for pair in labels)
     self.descrs = dict(labels)
     self.date = tree.getroot().get("date")
     for elt in tree.findall("validation_status"):
         status = elt.get("status")
         uri = elt.text.strip()
         if status.startswith("rsync_transfer_") or elt.get("generation") != "current":
             continue
         if uri not in self:
             self[uri] = Object(self, uri)
         self[uri].add(status)
开发者ID:dragonresearch,项目名称:rpki.net,代码行数:17,代码来源:analyze-transition.py

示例14: get_bill_urls

# 需要导入模块: from lxml.etree import ElementTree [as 别名]
# 或者: from lxml.etree.ElementTree import findall [as 别名]
    def get_bill_urls(self, base_bill_url, chamber):
        bill_urls = []

        if base_bill_url is None:
            return bill_urls
        
        index_page = ElementTree(lxml.html.fromstring(self.urlopen(base_bill_url)))
        for bill_anchor in index_page.findall('//a'):
            # See 2009 HB 645
            if bill_anchor.text.find("govlineveto") == -1:
                # House bills start with H, Senate bills start with S
                if chamber == 'lower' and bill_anchor.text.startswith('H'):
                    bill_urls.append("%s%s" % (base_bill_url, bill_anchor.text))
                elif chamber == 'upper' and bill_anchor.text.startswith('S'):
                    bill_urls.append("%s%s" % (base_bill_url, bill_anchor.text))
        return bill_urls
开发者ID:timfreund,项目名称:fiftystates,代码行数:18,代码来源:bills.py

示例15: add_bill_versions

# 需要导入模块: from lxml.etree import ElementTree [as 别名]
# 或者: from lxml.etree.ElementTree import findall [as 别名]
    def add_bill_versions(self, bill, index_url):
        # This method won't pick up bill versions where the bill is published
        # exclusively in PDF.  See 2009 HB 645 for a sample
        index_page = ElementTree(lxml.html.fromstring(self.urlopen(index_url)))
        tokens = bill["bill_id"].split(" ")
        bill_regex = re.compile("%s0*%s\_" % (tokens[0], tokens[1]))
        for anchor in index_page.findall("//a"):
            if bill_regex.match(anchor.text_content()) is not None:
                file_name = anchor.text_content()
                version = file_name[file_name.find("_") + 1 : file_name.find(".")]
                version_title = "Final Version"
                if version != "x":
                    version_title = "Version %s" % version

                version_url = index_url[0 : index_url.find("bills") - 1] + anchor.attrib["href"]
                bill.add_version(version_title, version_url)
开发者ID:Empact,项目名称:fiftystates,代码行数:18,代码来源:bills.py


注:本文中的lxml.etree.ElementTree.findall方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。