本文整理汇总了Python中Utils.ElementTreeUtils.getElementByAttrib方法的典型用法代码示例。如果您正苦于以下问题:Python ElementTreeUtils.getElementByAttrib方法的具体用法?Python ElementTreeUtils.getElementByAttrib怎么用?Python ElementTreeUtils.getElementByAttrib使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Utils.ElementTreeUtils
的用法示例。
在下文中一共展示了ElementTreeUtils.getElementByAttrib方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: findHeadsSyntactic
# 需要导入模块: from Utils import ElementTreeUtils [as 别名]
# 或者: from Utils.ElementTreeUtils import getElementByAttrib [as 别名]
def findHeadsSyntactic(corpus, parse, tokenization):
"""
Determine the head token for a named entity or trigger. The head token is the token closest
to the root for the subtree of the dependency parse spanned by the text of the element.
@param entityElement: a semantic node (trigger or named entity)
@type entityElement: cElementTree.Element
@param verbose: Print selected head tokens on screen
@param verbose: boolean
"""
counts = [0,0]
sentences = [x for x in corpus.getiterator("sentence")]
counter = ProgressCounter(len(sentences), "SYNTAX")
for sentence in sentences:
counter.update()
tokElement = ETUtils.getElementByAttrib(sentence, "sentenceanalyses/tokenizations/tokenization", {"tokenizer":tokenization})
parseElement = ETUtils.getElementByAttrib(sentence, "sentenceanalyses/parses/parse", {"parser":parse})
if tokElement == None or parseElement == None:
print >> sys.stderr, "Warning, sentence", sentence.get("id"), "missing parse or tokenization"
tokens = tokElement.findall("token")
tokenHeadScores = getTokenHeadScores(tokens, parseElement.findall("dependency"), sentenceId=sentence.get("id"))
for entity in sentence.findall("entity"):
if entity.get("headOffset") == None:
headToken = getEntityHeadToken(entity, tokens, tokenHeadScores)
# The ElementTree entity-element is modified by setting the headOffset attribute
entity.set("headOffset", headToken.get("charOffset"))
entity.set("headMethod", "Syntax")
entity.set("headString", headToken.get("text"))
counts[0] += 1
return counts
示例2: processCorpus
# 需要导入模块: from Utils import ElementTreeUtils [as 别名]
# 或者: from Utils.ElementTreeUtils import getElementByAttrib [as 别名]
def processCorpus(input, parserName):
print >> sys.stderr, "Loading corpus file", input
corpusRoot = ETUtils.ETFromObj(input).getroot()
documents = corpusRoot.findall("document")
counts = defaultdict(int)
matchByType = defaultdict(lambda: [0, 0])
filteredMatchByType = defaultdict(lambda: [0, 0])
filter = set(["NP", "TOK-tIN", "WHADVP", "WHNP", "TOK-tWP$", "TOK-tPRP$", "NP-IN"])
# # fix spans
# for document in documents:
# for sentence in document.findall("sentence"):
# sentOffset = Range.charOffsetToSingleTuple(sentence.get("charOffset"))
# for entity in sentence.findall("entity"):
# altOffsetString = entity.get("altOffset")
# if altOffsetString == None:
# continue
# #print altOffsetString
# altOffsets = Range.charOffsetToTuples(altOffsetString)
# assert len(altOffsets) == 1
# for i in range(len(altOffsets)):
# altOffset = altOffsets[i]
# altOffsets[i] = (altOffset[0] - sentOffset[0], altOffset[1] - sentOffset[0])
# entity.set("altOffset", Range.tuplesToCharOffset(altOffsets))
# counter = ProgressCounter(len(documents), "Documents")
for document in documents:
for sentence in document.findall("sentence"):
entities = sentence.findall("entity")
parse = ETUtils.getElementByAttrib(sentence.find("sentenceanalyses"), "parse", {"parser": parserName})
if parse == None:
continue
tokenization = ETUtils.getElementByAttrib(
sentence.find("sentenceanalyses"), "tokenization", {"tokenizer": parse.get("tokenizer")}
)
phrases, phraseDict = makePhrases(parse, tokenization, entities)
phraseOffsets = phraseDict.keys()
# phraseOffsets.sort()
phraseNECounts = getNECounts(phrases, entities)
for value in phraseDict.values():
counts["phrases"] += len(value)
for phrase in value:
matchByType[phrase.get("type")][0] += 1
if phrase.get("type") in filter:
filteredMatchByType[phrase.get("type")][0] += 1
counts["phrases-filtered"] += 1
if phrase.get("type").find("NP") != -1:
matchByType[phrase.get("type") + "_NE" + str(phraseNECounts[phrase])][0] += 1
counts["tokens"] += len(tokenization.findall("token"))
corefType = {}
for interaction in sentence.findall("interaction"):
if interaction.get("type") == "Coref":
corefType[interaction.get("e1")] = "Anaphora"
corefType[interaction.get("e2")] = "Antecedent"
for entity in entities:
if entity.get("given") == "True":
continue
counts["entity"] += 1
print "entity", entity.get("id")
print ETUtils.toStr(entity)
matches = getMatchingPhrases(entity, phraseOffsets, phraseDict)
count = 0
filteredCount = 0
for phrase in matches:
cType = "UNKNOWN"
if corefType.has_key(entity.get("id")):
cType = corefType[entity.get("id")]
print " match", count, ETUtils.toStr(phrase), "NE" + str(
phraseNECounts[phrase]
), "ctype:" + cType, "ent:" + ETUtils.toStr(entity)
count += 1
matchByType[phrase.get("type")][1] += 1
matchByType[phrase.get("type") + "_" + cType][1] += 1
matchByType[phrase.get("type") + "_" + cType + "_NE" + str(phraseNECounts[phrase])][1] += 1
if phrase.get("type") in filter:
filteredCount += 1
filteredMatchByType[phrase.get("type")][1] += 1
# Matching
if count == 0:
print " NO MATCH", ETUtils.toStr(entity)
counts["no-match"] += 1
else:
counts["match"] += 1
# Multimatching
if len(matches) > 1:
bestMatch = selectBestMatch(entity, matches)
print " MULTIMATCH(" + entity.get("charOffset") + "," + str(
entity.get("altOffset")
) + ")", ", ".join(
[x.get("type") + "_" + x.get("charOffset") for x in matches]
), "SEL(" + bestMatch.get(
"type"
) + "_" + bestMatch.get(
"charOffset"
) + ")"
# Filtered matching
#.........这里部分代码省略.........