当前位置: 首页>>代码示例>>Python>>正文


Python ElementTreeUtils.toStr方法代码示例

本文整理汇总了Python中Utils.ElementTreeUtils.toStr方法的典型用法代码示例。如果您正苦于以下问题:Python ElementTreeUtils.toStr方法的具体用法?Python ElementTreeUtils.toStr怎么用?Python ElementTreeUtils.toStr使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Utils.ElementTreeUtils的用法示例。


在下文中一共展示了ElementTreeUtils.toStr方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: loadEventXML

# 需要导入模块: from Utils import ElementTreeUtils [as 别名]
# 或者: from Utils.ElementTreeUtils import toStr [as 别名]
def loadEventXML(path, verbose=False):
    xml = ETUtils.ETFromObj(path)
    sentDict = {}
    for sentence in xml.getiterator("sentence"):
        sentenceText = getText(sentence).strip()
        if not sentDict.has_key(sentenceText):
            sentDict[sentenceText] = []

    for event in xml.getiterator("event"):
        sentenceText = getText(event).strip()
        if not sentDict.has_key(sentenceText):
            sentDict[sentenceText] = []
        events = sentDict[sentenceText]
        
        clue = event.find("clue")
        clueTuple = getClue(clue)
        eventType = event.find("type").get("class")
        if eventType == "Protein_amino_acid_phosphorylation":
            eventType = "Phosphorylation"
        if type(clueTuple) == types.StringType:
            if verbose: print "Event", eventType, "clue with no clueType:", ETUtils.toStr(clue)
        else:
            assert sentenceText[clueTuple[1]:clueTuple[2]+1] == clueTuple[0], (sentenceText, sentenceText[clueTuple[1]:clueTuple[2]+1], clueTuple)
            event = (clueTuple[1], clueTuple[2], eventType, clueTuple[0])
            if event not in events:
                events.append(event)
    return sentDict
开发者ID:ninjin,项目名称:TEES,代码行数:29,代码来源:GeniaEventsToSharedTask.py

示例2: processCorpus

# 需要导入模块: from Utils import ElementTreeUtils [as 别名]
# 或者: from Utils.ElementTreeUtils import toStr [as 别名]
def processCorpus(input, parserName):
    print >> sys.stderr, "Loading corpus file", input
    corpusRoot = ETUtils.ETFromObj(input).getroot()
    documents = corpusRoot.findall("document")

    counts = defaultdict(int)
    matchByType = defaultdict(lambda: [0, 0])
    filteredMatchByType = defaultdict(lambda: [0, 0])
    filter = set(["NP", "TOK-tIN", "WHADVP", "WHNP", "TOK-tWP$", "TOK-tPRP$", "NP-IN"])

    #    # fix spans
    #    for document in documents:
    #        for sentence in document.findall("sentence"):
    #            sentOffset = Range.charOffsetToSingleTuple(sentence.get("charOffset"))
    #            for entity in sentence.findall("entity"):
    #                altOffsetString = entity.get("altOffset")
    #                if altOffsetString == None:
    #                    continue
    #                #print altOffsetString
    #                altOffsets = Range.charOffsetToTuples(altOffsetString)
    #                assert len(altOffsets) == 1
    #                for i in range(len(altOffsets)):
    #                    altOffset = altOffsets[i]
    #                    altOffsets[i] = (altOffset[0] - sentOffset[0], altOffset[1] - sentOffset[0])
    #                entity.set("altOffset", Range.tuplesToCharOffset(altOffsets))

    # counter = ProgressCounter(len(documents), "Documents")
    for document in documents:
        for sentence in document.findall("sentence"):
            entities = sentence.findall("entity")
            parse = ETUtils.getElementByAttrib(sentence.find("sentenceanalyses"), "parse", {"parser": parserName})
            if parse == None:
                continue
            tokenization = ETUtils.getElementByAttrib(
                sentence.find("sentenceanalyses"), "tokenization", {"tokenizer": parse.get("tokenizer")}
            )
            phrases, phraseDict = makePhrases(parse, tokenization, entities)
            phraseOffsets = phraseDict.keys()
            # phraseOffsets.sort()
            phraseNECounts = getNECounts(phrases, entities)

            for value in phraseDict.values():
                counts["phrases"] += len(value)
                for phrase in value:
                    matchByType[phrase.get("type")][0] += 1
                    if phrase.get("type") in filter:
                        filteredMatchByType[phrase.get("type")][0] += 1
                        counts["phrases-filtered"] += 1
                    if phrase.get("type").find("NP") != -1:
                        matchByType[phrase.get("type") + "_NE" + str(phraseNECounts[phrase])][0] += 1
            counts["tokens"] += len(tokenization.findall("token"))

            corefType = {}
            for interaction in sentence.findall("interaction"):
                if interaction.get("type") == "Coref":
                    corefType[interaction.get("e1")] = "Anaphora"
                    corefType[interaction.get("e2")] = "Antecedent"

            for entity in entities:
                if entity.get("given") == "True":
                    continue
                counts["entity"] += 1
                print "entity", entity.get("id")
                print ETUtils.toStr(entity)
                matches = getMatchingPhrases(entity, phraseOffsets, phraseDict)
                count = 0
                filteredCount = 0
                for phrase in matches:
                    cType = "UNKNOWN"
                    if corefType.has_key(entity.get("id")):
                        cType = corefType[entity.get("id")]
                    print "  match", count, ETUtils.toStr(phrase), "NE" + str(
                        phraseNECounts[phrase]
                    ), "ctype:" + cType, "ent:" + ETUtils.toStr(entity)
                    count += 1
                    matchByType[phrase.get("type")][1] += 1
                    matchByType[phrase.get("type") + "_" + cType][1] += 1
                    matchByType[phrase.get("type") + "_" + cType + "_NE" + str(phraseNECounts[phrase])][1] += 1
                    if phrase.get("type") in filter:
                        filteredCount += 1
                        filteredMatchByType[phrase.get("type")][1] += 1
                # Matching
                if count == 0:
                    print "  NO MATCH", ETUtils.toStr(entity)
                    counts["no-match"] += 1
                else:
                    counts["match"] += 1
                # Multimatching
                if len(matches) > 1:
                    bestMatch = selectBestMatch(entity, matches)
                    print "  MULTIMATCH(" + entity.get("charOffset") + "," + str(
                        entity.get("altOffset")
                    ) + ")", ", ".join(
                        [x.get("type") + "_" + x.get("charOffset") for x in matches]
                    ), "SEL(" + bestMatch.get(
                        "type"
                    ) + "_" + bestMatch.get(
                        "charOffset"
                    ) + ")"
                # Filtered matching
#.........这里部分代码省略.........
开发者ID:DUT-LiuYang,项目名称:TEES,代码行数:103,代码来源:MapPhrases.py

示例3: run

# 需要导入模块: from Utils import ElementTreeUtils [as 别名]
# 或者: from Utils.ElementTreeUtils import toStr [as 别名]

#.........这里部分代码省略.........
            #bannerId, offsets, word = line.strip().split("|", 2)
            pathNerTag, mention, pathNerId, confidence = line.strip().split("\t")
            menDict[mention] = pathNerId
            menSet.add(mention)
        mentionfile.close()

        print menSet
        #count for pathway entities
        epCount = 0 
        for sentence in corpusRoot.getiterator(processElement):
            #infile.write("U" + str(idCount) + " " + sentence.get("text").replace("\n", " ").replace("\n", " ") + "\n")
            sentText = sentence.get("text").replace("\n", " ").replace("\n", " ") + "\n"
            startOffsets = []
            endOffsets = []

            bannerEntities = sentence.findall("entity")
            bannerEntityCount = 0

            for bannerEntity in bannerEntities:
                source = bannerEntity.get('source')
                text = bannerEntity.get('text')

                if not source == 'BANNER':
                    print source, text

                bannerEntityCount += 1

            startOffset = 0
            endOffset = 0
            bannerEntity2removed = set()

            for mention in menSet:
                starts = [match.start() for match in re.finditer(re.escape(mention), sentText)]

                #print 'Finding PathNER mention:', mention, starts

                for startOffset in starts:
                    endOffset = startOffset + len(mention)

                    if  startOffset < 0:
                        continue

                    entities = makeEntityElements(int(startOffset), int(endOffset), sentence.get("text"), splitNewlines, elementName)

                    for ent in entities:
                        #Add processing for entities that are overlapped with the PathNER result
                        
                        entOffsets = ent.get("charOffset").split('-')
                        entStart = int(entOffsets[0])
                        entEnd = int(entOffsets[1])

                        for bannerEntity in bannerEntities:
                
                            bannerOffsets = bannerEntity.get('charOffset').split('-')
                            bannerStart = int(bannerOffsets[0])
                            bannerEnd = int(bannerOffsets[1])

                            if debug:
                                print 'PathNER entity:', entStart, entEnd, 'Banner entity:', bannerStart, bannerEnd

                            #Are offsets overlapped or not?
                            if entEnd <= bannerStart or bannerEnd <= entStart: #not overlapped
                                continue
                            else:#overlapped, show remove the banner entity
                                bannerEntity2removed.add(bannerEntity)

                        bannerEntityCount += 1
                        ent.set("id", sentence.get("id") + ".e" + str(bannerEntityCount))
                        epCount += 1

                        sentence.append(ent)
                        pathnerEntityCount += 1
                        
                        if debug:
                            print 'Adding PathNER resutl:', mention
                            print ETUtils.toStr(sentence)
                        
            #Now really to delete the overlapped BANNER entities
            for bEntity in bannerEntity2removed:
                removedEntityCount += 1
                sentence.remove(bEntity)
                
                if debug:
                    print 'Removing entity ', bannerEntity.get('text'), bannerEntity.get('id')
                    print ETUtils.toStr(sentence)

        print >> sys.stderr, "PathNER found", pathnerEntityCount, "entities and remove ", removedEntityCount, " overlapping BANNER entities. "
        print >> sys.stderr, "(" + str(sCount) + " sentences processed)"
        print >> sys.stderr, "New", elementName + "-elements:", totalEntities, "(Split", splitEventCount, "PathNER entities with newlines)"
    
    # Remove work directory
    if not debug:
        shutil.rmtree(workdir)
    else:
        print >> sys.stderr, "PathNER working directory for debugging at", workdir
        
    if output != None:
        print >> sys.stderr, "Writing output to", output
        ETUtils.write(corpusRoot, output)
    return corpusTree
开发者ID:chengkun-wu,项目名称:PWTEES,代码行数:104,代码来源:PathNER.py


注:本文中的Utils.ElementTreeUtils.toStr方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。