当前位置: 首页>>代码示例>>Python>>正文


Python ElementTreeUtils.getElementByAttrib方法代码示例

本文整理汇总了Python中Utils.ElementTreeUtils.getElementByAttrib方法的典型用法代码示例。如果您正苦于以下问题:Python ElementTreeUtils.getElementByAttrib方法的具体用法?Python ElementTreeUtils.getElementByAttrib怎么用?Python ElementTreeUtils.getElementByAttrib使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Utils.ElementTreeUtils的用法示例。


在下文中一共展示了ElementTreeUtils.getElementByAttrib方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: findHeadsSyntactic

# 需要导入模块: from Utils import ElementTreeUtils [as 别名]
# 或者: from Utils.ElementTreeUtils import getElementByAttrib [as 别名]
def findHeadsSyntactic(corpus, parse, tokenization):
    """
    Determine the head token for a named entity or trigger. The head token is the token closest
    to the root for the subtree of the dependency parse spanned by the text of the element.
    
    @param entityElement: a semantic node (trigger or named entity)
    @type entityElement: cElementTree.Element
    @param verbose: Print selected head tokens on screen
    @param verbose: boolean
    """
    counts = [0,0]
    sentences = [x for x in corpus.getiterator("sentence")]
    counter = ProgressCounter(len(sentences), "SYNTAX")
    for sentence in sentences:
        counter.update()
        tokElement = ETUtils.getElementByAttrib(sentence, "sentenceanalyses/tokenizations/tokenization", {"tokenizer":tokenization})
        parseElement = ETUtils.getElementByAttrib(sentence, "sentenceanalyses/parses/parse", {"parser":parse})
        if tokElement == None or parseElement == None:
            print >> sys.stderr, "Warning, sentence", sentence.get("id"), "missing parse or tokenization" 
        tokens = tokElement.findall("token")
        tokenHeadScores = getTokenHeadScores(tokens, parseElement.findall("dependency"), sentenceId=sentence.get("id"))
        for entity in sentence.findall("entity"):
            if entity.get("headOffset") == None:
                headToken = getEntityHeadToken(entity, tokens, tokenHeadScores)
                # The ElementTree entity-element is modified by setting the headOffset attribute
                entity.set("headOffset", headToken.get("charOffset"))
                entity.set("headMethod", "Syntax")
                entity.set("headString", headToken.get("text"))
                counts[0] += 1
    return counts
开发者ID:ninjin,项目名称:TEES,代码行数:32,代码来源:DetectHeads.py

示例2: processCorpus

# 需要导入模块: from Utils import ElementTreeUtils [as 别名]
# 或者: from Utils.ElementTreeUtils import getElementByAttrib [as 别名]
def processCorpus(input, parserName):
    print >> sys.stderr, "Loading corpus file", input
    corpusRoot = ETUtils.ETFromObj(input).getroot()
    documents = corpusRoot.findall("document")

    counts = defaultdict(int)
    matchByType = defaultdict(lambda: [0, 0])
    filteredMatchByType = defaultdict(lambda: [0, 0])
    filter = set(["NP", "TOK-tIN", "WHADVP", "WHNP", "TOK-tWP$", "TOK-tPRP$", "NP-IN"])

    #    # fix spans
    #    for document in documents:
    #        for sentence in document.findall("sentence"):
    #            sentOffset = Range.charOffsetToSingleTuple(sentence.get("charOffset"))
    #            for entity in sentence.findall("entity"):
    #                altOffsetString = entity.get("altOffset")
    #                if altOffsetString == None:
    #                    continue
    #                #print altOffsetString
    #                altOffsets = Range.charOffsetToTuples(altOffsetString)
    #                assert len(altOffsets) == 1
    #                for i in range(len(altOffsets)):
    #                    altOffset = altOffsets[i]
    #                    altOffsets[i] = (altOffset[0] - sentOffset[0], altOffset[1] - sentOffset[0])
    #                entity.set("altOffset", Range.tuplesToCharOffset(altOffsets))

    # counter = ProgressCounter(len(documents), "Documents")
    for document in documents:
        for sentence in document.findall("sentence"):
            entities = sentence.findall("entity")
            parse = ETUtils.getElementByAttrib(sentence.find("sentenceanalyses"), "parse", {"parser": parserName})
            if parse == None:
                continue
            tokenization = ETUtils.getElementByAttrib(
                sentence.find("sentenceanalyses"), "tokenization", {"tokenizer": parse.get("tokenizer")}
            )
            phrases, phraseDict = makePhrases(parse, tokenization, entities)
            phraseOffsets = phraseDict.keys()
            # phraseOffsets.sort()
            phraseNECounts = getNECounts(phrases, entities)

            for value in phraseDict.values():
                counts["phrases"] += len(value)
                for phrase in value:
                    matchByType[phrase.get("type")][0] += 1
                    if phrase.get("type") in filter:
                        filteredMatchByType[phrase.get("type")][0] += 1
                        counts["phrases-filtered"] += 1
                    if phrase.get("type").find("NP") != -1:
                        matchByType[phrase.get("type") + "_NE" + str(phraseNECounts[phrase])][0] += 1
            counts["tokens"] += len(tokenization.findall("token"))

            corefType = {}
            for interaction in sentence.findall("interaction"):
                if interaction.get("type") == "Coref":
                    corefType[interaction.get("e1")] = "Anaphora"
                    corefType[interaction.get("e2")] = "Antecedent"

            for entity in entities:
                if entity.get("given") == "True":
                    continue
                counts["entity"] += 1
                print "entity", entity.get("id")
                print ETUtils.toStr(entity)
                matches = getMatchingPhrases(entity, phraseOffsets, phraseDict)
                count = 0
                filteredCount = 0
                for phrase in matches:
                    cType = "UNKNOWN"
                    if corefType.has_key(entity.get("id")):
                        cType = corefType[entity.get("id")]
                    print "  match", count, ETUtils.toStr(phrase), "NE" + str(
                        phraseNECounts[phrase]
                    ), "ctype:" + cType, "ent:" + ETUtils.toStr(entity)
                    count += 1
                    matchByType[phrase.get("type")][1] += 1
                    matchByType[phrase.get("type") + "_" + cType][1] += 1
                    matchByType[phrase.get("type") + "_" + cType + "_NE" + str(phraseNECounts[phrase])][1] += 1
                    if phrase.get("type") in filter:
                        filteredCount += 1
                        filteredMatchByType[phrase.get("type")][1] += 1
                # Matching
                if count == 0:
                    print "  NO MATCH", ETUtils.toStr(entity)
                    counts["no-match"] += 1
                else:
                    counts["match"] += 1
                # Multimatching
                if len(matches) > 1:
                    bestMatch = selectBestMatch(entity, matches)
                    print "  MULTIMATCH(" + entity.get("charOffset") + "," + str(
                        entity.get("altOffset")
                    ) + ")", ", ".join(
                        [x.get("type") + "_" + x.get("charOffset") for x in matches]
                    ), "SEL(" + bestMatch.get(
                        "type"
                    ) + "_" + bestMatch.get(
                        "charOffset"
                    ) + ")"
                # Filtered matching
#.........这里部分代码省略.........
开发者ID:DUT-LiuYang,项目名称:TEES,代码行数:103,代码来源:MapPhrases.py


注:本文中的Utils.ElementTreeUtils.getElementByAttrib方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。