Python ElementTree.iterparse函数代码示例

本文整理汇总了Python中xml.etree.ElementTree.iterparse函数的典型用法代码示例。如果您正苦于以下问题：Python iterparse函数的具体用法？Python iterparse怎么用？Python iterparse使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了iterparse函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: xlsx

def xlsx(fname):
    import zipfile
    from xml.etree.ElementTree import iterparse

    zippy = zipfile.ZipFile(fname)
    try:
        words = [el.text for e, el in iterparse(zippy.open("xl/sharedStrings.xml")) if el.tag.endswith("}t")]
    except:
        words = {}
    rows = []
    row = {}
    val = ""
    for e, el in iterparse(zippy.open("xl/worksheets/sheet1.xml")):
        if el.tag.endswith("}v"):  # <v>84</v>
            val = el.text
        if el.tag.endswith("}c"):  # <c r="A3" t="s"><v>84</v></c>
            if el.attrib.get("t") == "s":
                val = words[int(val)]
            charac = el.attrib["r"]  # AZ22
            while charac[-1].isdigit():
                charac = charac[:-1]
            row[charac] = val
            val = ""
        if el.tag.endswith("}row"):
            rows.append(row)
            row = {}
    return rows

开发者ID:geon12，项目名称:spectrum-sim，代码行数:27，代码来源:databringer.py

示例2: xlsx

def xlsx(path):
    """ Returns a list of rows, where each row is a list of column values.
    """
    import zipfile
    from xml.etree.ElementTree import iterparse
    a = []
    r = {}
    v = ""
    z = zipfile.ZipFile(path)
    s = [e.text for x, e in iterparse(z.open("xl/sharedStrings.xml")) if e.tag.endswith("}t")]
    for x, e in iterparse(z.open("xl/worksheets/sheet1.xml")):
        if e.tag.endswith("}v"): # <v>84</v>
            v = e.text
        if e.tag.endswith("}c") \
         and e.attrib.get("t"):  # <c r="A3" t="s"><v>84</v></c>
            v = s[int(v)]
        if e.tag.endswith("}c"):
            c = e.attrib["r"]    # AZ22
            c = c.rstrip("0123456789")
            r[c], v = v, ""
        if e.tag.endswith("}row"):
            if any(r.values()):  # skip empty rows
                a.append(r)
            r = {}
    m = max([max(r.keys()) for r in a])
    for i, r in enumerate(a):    # fill empty cells
        for c in CELLS.split(m)[0] + m:
            r.setdefault(c, "")
        a[i] = [r[c] for c in sorted(r)]
    return a

开发者ID:OAlm，项目名称:the_stromberg_stories，代码行数:30，代码来源:noc.py

示例3: readXlsx

def readXlsx(fileName, **args):
    # from: Hooshmand zandi http://stackoverflow.com/a/16544219
    import zipfile
    from xml.etree.ElementTree import iterparse

    if "sheet" in args:
        sheet = args["sheet"]
    else:
        sheet = 1
    if "header" in args:
        isHeader = args["header"]
    else:
        isHeader = False

    rows = []
    row = {}
    header = {}
    z = zipfile.ZipFile(fileName)

    # Get shared strings
    strings = [el.text for e, el in iterparse(z.open("xl/sharedStrings.xml")) if el.tag.endswith("}t")]
    value = ""

    # Open specified worksheet
    for e, el in iterparse(z.open("xl/worksheets/sheet%d.xml" % (sheet))):
        # get value or index to shared strings
        if el.tag.endswith("}v"):  # <v>84</v>
            value = el.text
        if el.tag.endswith("}c"):  # <c r="A3" t="s"><v>84</v></c>
            # If value is a shared string, use value as an index

            if el.attrib.get("t") == "s":
                value = strings[int(value)]

            # split the row/col information so that the row leter(s) can be separate
            letter = el.attrib["r"]  # AZ22
            while letter[-1].isdigit():
                letter = letter[:-1]

            # if it is the first row, then create a header hash for the names
            # that COULD be used
            if rows == []:
                header[letter] = value.strip()
            else:
                if value != "":

                    # if there is a header row, use the first row's names as the row hash index
                    if isHeader == True and letter in header:
                        row[header[letter]] = value
                    else:
                        row[letter] = value

            value = ""
        if el.tag.endswith("}row"):
            rows.append(row)
            row = {}
    z.close()
    return [header, rows]

开发者ID:grtwall，项目名称:canmatrix，代码行数:58，代码来源:importxlsx.py

示例4: read_xlsx

def read_xlsx(file, **args):
    # type: (typing.Any, **typing.Any) -> typing.Tuple[typing.Dict[typing.Any, str], typing.List[typing.Dict[str, str]]]
    # from: Hooshmand zandi http://stackoverflow.com/a/16544219
    import zipfile
    from xml.etree.ElementTree import iterparse

    sheet = args.get("sheet", 1)
    is_header = args.get("header", False)

    rows = []  # type: typing.List[typing.Dict[str, str]]
    row = {}
    header = {}
    z = zipfile.ZipFile(file)

    # Get shared strings
    strings = [el.text for e, el
               in iterparse(z.open('xl/sharedStrings.xml'))
               if el.tag.endswith('}t')
               ]  # type: typing.List[str]
    value = ''

    # Open specified worksheet
    for e, el in iterparse(z.open('xl/worksheets/sheet%d.xml' % sheet)):
        # get value or index to shared strings
        if el.tag.endswith('}v'):                                   # <v>84</v>
            value = el.text
        if el.tag.endswith(
                '}c'):                                   # <c r="A3" t="s"><v>84</v></c>
            # If value is a shared string, use value as an index

            if el.attrib.get('t') == 's':
                value = strings[int(value)]

            # split the row/col information so that the row letter(s) can be separate
            letter = el.attrib['r']  # type: str         # AZ22
            while letter[-1].isdigit():
                letter = letter[:-1]

            # if it is the first row, then create a header hash for the names that COULD be used
            if not rows:
                header[letter] = value.strip()
            else:
                if value != '':
                    # if there is a header row, use the first row's names as the row hash index
                    if is_header is True and letter in header:
                        row[header[letter]] = value
                    else:
                        row[letter] = value

            value = ''
        if el.tag.endswith('}row'):
            rows.append(row)
            row = {}
    z.close()
    return header, rows

开发者ID:ebroecker，项目名称:canmatrix，代码行数:55，代码来源:xlsx.py

示例5: readXlsx

def readXlsx(fileName,**args):
 import zipfile
 from xml.etree.ElementTree import iterparse
 if "sheet" in args:
    sheet=args["sheet"]
 else:
    sheet=1
 if "header" in args:
    isHeader=args["header"]
 else:
    isHeader=False

 rows = []
 row = {}
 header = {}
 z=zipfile.ZipFile(fileName)
 # Get shared strings
 strings = [el.text for e, el in iterparse(z.open('xl/sharedStrings.xml')) if el.tag.endswith('}t')]
 value = ''

 # Open specified worksheet
 for e, el in iterparse(z.open('xl/worksheets/sheet%d.xml'%(sheet))):
    # get value or index to shared strings
    if el.tag.endswith('}v'): # <v>84</v>
        value = el.text
    if el.tag.endswith('}c'): # <c r="A3" t="s"><v>84</v></c>
        # If value is a shared string, use value as an index
        if el.attrib.get('t') == 's':
            value = strings[int(value)]
        # split the row/col information so that the row leter(s) can be separate
        letter = el.attrib['r'] # AZ22
        while letter[-1].isdigit():
            letter = letter[:-1]
        # if it is the first row, then create a header hash for the names
        # that COULD be used
        if rows ==[]:
            header[letter]=value
        else:
            if value != '': 
                # if there is a header row, use the first row's names as the row hash index
                if isHeader == True and letter in header:
                    row[header[letter]] = value
                else:
                    row[letter] = value

        value = ''
    if el.tag.endswith('}row'):
        rows.append(row)
        row = {}
 z.close()
 return rows

开发者ID:shaunwbell，项目名称:AtSeaPrograms，代码行数:51，代码来源:IBCAO_Maps_excel_stations_2016.py

示例6: main

def main():
    limited_tags = ['jquery','javascript','python']
    con = lite.connect('bigdata.db')
##    tree = ET.parse('Posts.xml')
##    root = tree.getroot()

    # get an iterable
    context = iterparse('Posts.xml', events=("start", "end"))
    # turn it into an iterator
    context = iter(context)
    # get the root element
    event, root = context.next()

    with con:
        # Commented sections below create a separate table for tags
        #tags_dict = {}
        cur = con.cursor()    
        cur.execute("CREATE TABLE SO(Id INTEGER PRIMARY KEY ASC, Tags TEXT, CreationDate TEXT, UserID INTEGER)")
        #cur.execute("CREATE TABLE TAGS(Id INTEGER PRIMARY KEY ASC, Tag TEXT)")
        #tag_id = 0
        for event, child in context:
            if event == "end" and 'Title' in child.attrib and 'OwnerUserId' in child.attrib and (limited_tags[0] in child.attrib['Tags'] or limited_tags[1] in child.attrib['Tags'] or limited_tags[2] in child.attrib['Tags']):
                sqlQuery = "INSERT INTO SO VALUES(?,?,?,?)"
                cur.execute(sqlQuery,(child.attrib['Id'],child.attrib['Tags'],child.attrib['CreationDate'],child.attrib['OwnerUserId']))
                # tags = child.attrib['Tags'].replace('<','').split('>')[:-1]
                # for tag in tags:
                #     if not tag in tags_dict:
                #         tags_dict[tag] = tag_id
                #         tag_id+=1
                root.clear()
 
        # sqlQuery = "INSERT INTO TAGS VALUES(?,?)"
        # for tag in tags_dict:
        #     cur.execute(sqlQuery,(tags_dict[tag],tag))

    # get an iterable
    context = iterparse('Users.xml', events=("start", "end"))
    # turn it into an iterator
    context = iter(context)
    # get the root element
    event, root = context.next()

    with con:
        cur = con.cursor()    
        cur.execute("CREATE TABLE USERS(UserID INTEGER PRIMARY KEY ASC, Location TEXT)")

        for event, child in context:
            if event == "end" and 'Location' in child.attrib:
                sqlQuery = "INSERT INTO USERS VALUES(?,?)"
                cur.execute(sqlQuery,(child.attrib['Id'],child.attrib['Location']))
                root.clear()

开发者ID:alexeyza，项目名称:stackexchange-trends，代码行数:51，代码来源:create_se_db.py

示例7: parse_and_remove

def parse_and_remove(filename, out):

    doc = iterparse(filename, ('start', 'end'))
    categories = {}
    questions = {}

    for event, elem in doc:
        if event == 'end':
            if elem.tag == 'message':
                if 'QID' in elem.text and 'TITLE' in elem.text and 'BODY' in elem.text and 'CATEGORY' in elem.text:
                    start_ind = elem.text.rfind('CATEGORY:')
                    if start_ind != -1:
                        cat = elem.text[start_ind+len('CATEGORY:'):].strip()
                        if not categories.get(cat):
                            categories[cat] = 1
                            questions[cat] = [elem.text]
                        else:
                            categories[cat] += 1
                            questions[cat].append(elem.text)

    print(categories)
    with open(out, 'w') as outfile:
    	for item in sorted(questions.items(), key=lambda x: x[0]):
          outfile.write('***%s***\n' % item[0])
          for q in item[1]:
            outfile.write('%s\n' % q)

开发者ID:sashavtyurina，项目名称:LiveQATrack，代码行数:26，代码来源:extract_questions.py

示例8: unpack

def unpack( xml ):
    for (event, elem) in iterparse(xml, ['start', 'end', 'start-ns', 'end-ns']):
        if event == 'end':
            if elem.tag == FOLDER:
                os.chdir(os.pardir)
        if event == 'start':
            print "working for ...", elem.attrib[NAME]
            if elem.tag == FILE:
                size = int(elem.attrib[SIZE])
                block = size / contentLength
                remdr = size % contentLength
                file = open(elem.attrib[NAME], 'a')
                for blockIndex in range(0, block):
                    file.write(content)
                for remdrIndex in range(0, remdr):
                    file.write("X")
                file.close()
            if elem.tag == FOLDER:
                os.mkdir(elem.attrib[NAME])
                os.chdir(elem.attrib[NAME])
            if elem.tag == ROOT:
                shutil.rmtree(elem.attrib[NAME], ignore_errors=True)
                os.mkdir(elem.attrib[NAME])
                os.chdir(elem.attrib[NAME])
    return 0;

开发者ID:sumanchakraborty，项目名称:laboratory，代码行数:25，代码来源:setup.py

示例9: read_corpus

def read_corpus(corpus_file_path, sections=['text']):
    for event, elem in iterparse(corpus_file_path):
        if elem.tag == 'item':
            values = [elem.find(section).text for section in sections]
            if not all(values):
                continue

            rating_text = elem.find('rating')
            if rating_text is not None:
                rating_text = rating_text.text
                rating = float(rating_text.strip())
                if rating < 3:
                    label = 0
                else:
                    label = 1
            else:
                rating_text = elem.find('polarity')
                if rating_text is None:
                    label = -1
                elif rating_text.text.strip() == 'N':
                    label = 0
                else:
                    label = 1

            yield values, label

开发者ID:siegfang，项目名称:CDSCL，代码行数:25，代码来源:transfer.py

示例10: loadScheme

 def loadScheme(self):
     que = []
     scheme = self.feed.output_scheme
     map_file = self.feed.map_rules if self.feed.map_rules else ''
     
     if scheme == None:
         return
     filepath = os.path.join("schemas",scheme, "schema.xml")
     for (event, node) in iterparse(filepath, ['start', 'end']):
         if event == 'end':
             que.pop()
         if event == 'start':
             que.append(node.tag)
             if not list(node):
                 o = struct()
                 o.xpath = "/".join(que[1:])
                 o.tag = node.tag
                 o.desc = node.text
                 self.schema.append(o)
             else:
                 if len(que) == 1:
                     o = struct()
                     o.xpath = "/".join(que)
                     o.tag = node.tag
                     self.schema_root = o
                 elif len(que) == 2:
                     o = struct()
                     o.xpath = "/".join(que)
                     o.tag = node.tag
                     self.schema_container = o

开发者ID:xjerab13，项目名称:M-Eco-WP3-package，代码行数:30，代码来源:mapping.py

示例11: wait_for_new_job

def wait_for_new_job(sasl_token):
    # https://developers.google.com/cloud-print/docs/rawxmpp
    import ssl, socket
    from xml.etree.ElementTree import iterparse, tostring

    xmpp = ssl.wrap_socket(socket.socket())
    xmpp.connect(("talk.google.com", 5223))
    parser = iterparse(xmpp, ("start", "end"))

    def msg(msg=" "):
        xmpp.write(msg)
        stack = 0
        for event, el in parser:
            if event == "start" and el.tag.endswith("stream"):
                continue
            stack += 1 if event == "start" else -1
            if stack == 0:
                assert (
                    not el.tag.endswith("failure") and not el.tag.endswith("error") and not el.get("type") == "error"
                ), tostring(el)
                return el

    msg('<stream to="gmail.com" version="1.0" xmlns="http://etherx.jabber.org/streams">')
    msg('<auth xmlns="urn:ietf:params:xml:ns:xmpp-sasl" mechanism="X-GOOGLE-TOKEN">%s</auth>' % sasl_token)
    msg('<s:stream to="gmail.com" version="1.0" xmlns:s="http://etherx.jabber.org/streams" xmlns="jabber:client">')
    iq = msg('<iq type="set"><bind xmlns="urn:ietf:params:xml:ns:xmpp-bind"><resource>Armooo</resource></bind></iq>')
    bare_jid = iq[0][0].text.split("/")[0]
    msg(
        '<iq type="set" to="%s"><subscribe xmlns="google:push"><item channel="cloudprint.google.com" from="cloudprint.google.com"/></subscribe></iq>'
        % bare_jid
    )
    return msg()

开发者ID:renebruns，项目名称:cloudprint，代码行数:32，代码来源:cloudprint.py

示例12: importXML

def importXML(path):    
  header = open(path).readline()
  start = header.find('xmlns=')+7  
  NS = "{%s}" % header[start: header.find('\"', start)]  
  allInfo=[] #to store all the concised info
  myBase='' #to store the base web
  with open(path) as f:
    for event, elem in iterparse(f):      
      # print elem.tag #each elem has its own tag      
      if elem.tag == '{0}base'.format(NS):
        myBase = str(elem.text)

      if elem.tag == '{0}page'.format(NS):        
        
        title = elem.find("{0}title".format(NS))
        contr = elem.find(".//{0}username".format(NS))
        content = elem.find(".//{0}text".format(NS))

        token_dic={} #to parse the content into many tokens and store in the dictionary
        if content is not None:            
            tokenizer = RegexpTokenizer(r'\w+') #so can get rid of punctuation
            # print tokenizer.tokenize(content.text)                                    
            for eachword in tokenizer.tokenize(content.text):                            
              try:
                token_dic[eachword.lower()] += 1
              except:
                token_dic[eachword.lower()] = 1

        allInfo.append((title.text, token_dic, content.text))
        elem.clear()  
  
  return myBase, allInfo

开发者ID:jfriend08，项目名称:Stock_Analysis_EngineII，代码行数:32，代码来源:indexer.py

示例13: show_all_event

def show_all_event():
    """event-based parsing"""
    from xml.etree.ElementTree import iterparse

    depth = 0
    prefix_width = 8
    prefix_dots = '.' * prefix_width
    line_template = '{prefix:<0.{prefix_len}}{event:<8}{suffix:<{suffix_len}} {node.tag:<12} {node_id}'

    for (event, node) in iterparse('podcasts.opml', ['start', 'end', 'start-ns', 'end-ns']):
        if event == 'end':
            depth -= 1

        prefix_len = depth * 2

        print line_template.format(prefix=prefix_dots,
                                   prefix_len=prefix_len,
                                   suffix='',
                                   suffix_len=(prefix_len - prefix_len),
                                   node=node,
                                   node_id=id(node),
                                   event=event)

        if event == 'start':
            depth += 1

开发者ID:vhnuuh，项目名称:pyutil，代码行数:25，代码来源:parsing.py

示例14: parse_and_remove

    def parse_and_remove(self, filename, path):

        print('********')
        from xml.etree.ElementTree import iterparse

        path_parts = path.split('/')
        doc = iterparse(filename, ('start', 'end')) # Skip the root element
        print(path_parts)
        next(doc)
        tag_stack = []
        elem_stack = []
        for event, elem in doc:
            print(event)
            print(elem)
            if event == 'start':
                tag_stack.append(elem.tag)
                elem_stack.append(elem)
            elif event == 'end':
                if tag_stack == path_parts:
                    yield elem
                    elem_stack[-2].remove(elem)
                try:
                    tag_stack.pop()
                    elem_stack.pop()
                except IndexError as e:
                    print(e)
                    pass

开发者ID:yuantuo，项目名称:pysparkexample，代码行数:27，代码来源:test.py

示例15: parse_and_remove

def parse_and_remove(filename,path):
    path_parts=path.split('/')
    doc = iterparse(filename,('start', 'end'))
    #skip the root element
    next(doc)
    
    tag_stack = []
    elem_stack = []
    for event, elem in doc:
        if event == 'start':
            tag_stack.append(elem.tag)
            elem_stack.append(elem)
            print("start.\n")
            print("tag_stack:",tag_stack,"\n")
            print("elem_stack",elem_stack,"\n")
        elif event == 'end':
            if tag_stack == path_parts:
                print("end.\n")
                print("elem:",elem)
                yield elem
                print("elem_stack[-2]",elem_stack[-2])
                elem_stack[-2].remove(elem)
            try:
                tag_stack.pop()
                elem_stack.pop()
            except IndexError:
                pass

开发者ID:lancecopper，项目名称:python_cookbook_exercises，代码行数:27，代码来源:8.4.2.py

注：本文中的xml.etree.ElementTree.iterparse函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。