本文整理汇总了Python中Utils.ElementTreeUtils.toStr方法的典型用法代码示例。如果您正苦于以下问题:Python ElementTreeUtils.toStr方法的具体用法?Python ElementTreeUtils.toStr怎么用?Python ElementTreeUtils.toStr使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Utils.ElementTreeUtils
的用法示例。
在下文中一共展示了ElementTreeUtils.toStr方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: loadEventXML
# 需要导入模块: from Utils import ElementTreeUtils [as 别名]
# 或者: from Utils.ElementTreeUtils import toStr [as 别名]
def loadEventXML(path, verbose=False):
xml = ETUtils.ETFromObj(path)
sentDict = {}
for sentence in xml.getiterator("sentence"):
sentenceText = getText(sentence).strip()
if not sentDict.has_key(sentenceText):
sentDict[sentenceText] = []
for event in xml.getiterator("event"):
sentenceText = getText(event).strip()
if not sentDict.has_key(sentenceText):
sentDict[sentenceText] = []
events = sentDict[sentenceText]
clue = event.find("clue")
clueTuple = getClue(clue)
eventType = event.find("type").get("class")
if eventType == "Protein_amino_acid_phosphorylation":
eventType = "Phosphorylation"
if type(clueTuple) == types.StringType:
if verbose: print "Event", eventType, "clue with no clueType:", ETUtils.toStr(clue)
else:
assert sentenceText[clueTuple[1]:clueTuple[2]+1] == clueTuple[0], (sentenceText, sentenceText[clueTuple[1]:clueTuple[2]+1], clueTuple)
event = (clueTuple[1], clueTuple[2], eventType, clueTuple[0])
if event not in events:
events.append(event)
return sentDict
示例2: processCorpus
# 需要导入模块: from Utils import ElementTreeUtils [as 别名]
# 或者: from Utils.ElementTreeUtils import toStr [as 别名]
def processCorpus(input, parserName):
print >> sys.stderr, "Loading corpus file", input
corpusRoot = ETUtils.ETFromObj(input).getroot()
documents = corpusRoot.findall("document")
counts = defaultdict(int)
matchByType = defaultdict(lambda: [0, 0])
filteredMatchByType = defaultdict(lambda: [0, 0])
filter = set(["NP", "TOK-tIN", "WHADVP", "WHNP", "TOK-tWP$", "TOK-tPRP$", "NP-IN"])
# # fix spans
# for document in documents:
# for sentence in document.findall("sentence"):
# sentOffset = Range.charOffsetToSingleTuple(sentence.get("charOffset"))
# for entity in sentence.findall("entity"):
# altOffsetString = entity.get("altOffset")
# if altOffsetString == None:
# continue
# #print altOffsetString
# altOffsets = Range.charOffsetToTuples(altOffsetString)
# assert len(altOffsets) == 1
# for i in range(len(altOffsets)):
# altOffset = altOffsets[i]
# altOffsets[i] = (altOffset[0] - sentOffset[0], altOffset[1] - sentOffset[0])
# entity.set("altOffset", Range.tuplesToCharOffset(altOffsets))
# counter = ProgressCounter(len(documents), "Documents")
for document in documents:
for sentence in document.findall("sentence"):
entities = sentence.findall("entity")
parse = ETUtils.getElementByAttrib(sentence.find("sentenceanalyses"), "parse", {"parser": parserName})
if parse == None:
continue
tokenization = ETUtils.getElementByAttrib(
sentence.find("sentenceanalyses"), "tokenization", {"tokenizer": parse.get("tokenizer")}
)
phrases, phraseDict = makePhrases(parse, tokenization, entities)
phraseOffsets = phraseDict.keys()
# phraseOffsets.sort()
phraseNECounts = getNECounts(phrases, entities)
for value in phraseDict.values():
counts["phrases"] += len(value)
for phrase in value:
matchByType[phrase.get("type")][0] += 1
if phrase.get("type") in filter:
filteredMatchByType[phrase.get("type")][0] += 1
counts["phrases-filtered"] += 1
if phrase.get("type").find("NP") != -1:
matchByType[phrase.get("type") + "_NE" + str(phraseNECounts[phrase])][0] += 1
counts["tokens"] += len(tokenization.findall("token"))
corefType = {}
for interaction in sentence.findall("interaction"):
if interaction.get("type") == "Coref":
corefType[interaction.get("e1")] = "Anaphora"
corefType[interaction.get("e2")] = "Antecedent"
for entity in entities:
if entity.get("given") == "True":
continue
counts["entity"] += 1
print "entity", entity.get("id")
print ETUtils.toStr(entity)
matches = getMatchingPhrases(entity, phraseOffsets, phraseDict)
count = 0
filteredCount = 0
for phrase in matches:
cType = "UNKNOWN"
if corefType.has_key(entity.get("id")):
cType = corefType[entity.get("id")]
print " match", count, ETUtils.toStr(phrase), "NE" + str(
phraseNECounts[phrase]
), "ctype:" + cType, "ent:" + ETUtils.toStr(entity)
count += 1
matchByType[phrase.get("type")][1] += 1
matchByType[phrase.get("type") + "_" + cType][1] += 1
matchByType[phrase.get("type") + "_" + cType + "_NE" + str(phraseNECounts[phrase])][1] += 1
if phrase.get("type") in filter:
filteredCount += 1
filteredMatchByType[phrase.get("type")][1] += 1
# Matching
if count == 0:
print " NO MATCH", ETUtils.toStr(entity)
counts["no-match"] += 1
else:
counts["match"] += 1
# Multimatching
if len(matches) > 1:
bestMatch = selectBestMatch(entity, matches)
print " MULTIMATCH(" + entity.get("charOffset") + "," + str(
entity.get("altOffset")
) + ")", ", ".join(
[x.get("type") + "_" + x.get("charOffset") for x in matches]
), "SEL(" + bestMatch.get(
"type"
) + "_" + bestMatch.get(
"charOffset"
) + ")"
# Filtered matching
#.........这里部分代码省略.........
示例3: run
# 需要导入模块: from Utils import ElementTreeUtils [as 别名]
# 或者: from Utils.ElementTreeUtils import toStr [as 别名]
#.........这里部分代码省略.........
#bannerId, offsets, word = line.strip().split("|", 2)
pathNerTag, mention, pathNerId, confidence = line.strip().split("\t")
menDict[mention] = pathNerId
menSet.add(mention)
mentionfile.close()
print menSet
#count for pathway entities
epCount = 0
for sentence in corpusRoot.getiterator(processElement):
#infile.write("U" + str(idCount) + " " + sentence.get("text").replace("\n", " ").replace("\n", " ") + "\n")
sentText = sentence.get("text").replace("\n", " ").replace("\n", " ") + "\n"
startOffsets = []
endOffsets = []
bannerEntities = sentence.findall("entity")
bannerEntityCount = 0
for bannerEntity in bannerEntities:
source = bannerEntity.get('source')
text = bannerEntity.get('text')
if not source == 'BANNER':
print source, text
bannerEntityCount += 1
startOffset = 0
endOffset = 0
bannerEntity2removed = set()
for mention in menSet:
starts = [match.start() for match in re.finditer(re.escape(mention), sentText)]
#print 'Finding PathNER mention:', mention, starts
for startOffset in starts:
endOffset = startOffset + len(mention)
if startOffset < 0:
continue
entities = makeEntityElements(int(startOffset), int(endOffset), sentence.get("text"), splitNewlines, elementName)
for ent in entities:
#Add processing for entities that are overlapped with the PathNER result
entOffsets = ent.get("charOffset").split('-')
entStart = int(entOffsets[0])
entEnd = int(entOffsets[1])
for bannerEntity in bannerEntities:
bannerOffsets = bannerEntity.get('charOffset').split('-')
bannerStart = int(bannerOffsets[0])
bannerEnd = int(bannerOffsets[1])
if debug:
print 'PathNER entity:', entStart, entEnd, 'Banner entity:', bannerStart, bannerEnd
#Are offsets overlapped or not?
if entEnd <= bannerStart or bannerEnd <= entStart: #not overlapped
continue
else:#overlapped, show remove the banner entity
bannerEntity2removed.add(bannerEntity)
bannerEntityCount += 1
ent.set("id", sentence.get("id") + ".e" + str(bannerEntityCount))
epCount += 1
sentence.append(ent)
pathnerEntityCount += 1
if debug:
print 'Adding PathNER resutl:', mention
print ETUtils.toStr(sentence)
#Now really to delete the overlapped BANNER entities
for bEntity in bannerEntity2removed:
removedEntityCount += 1
sentence.remove(bEntity)
if debug:
print 'Removing entity ', bannerEntity.get('text'), bannerEntity.get('id')
print ETUtils.toStr(sentence)
print >> sys.stderr, "PathNER found", pathnerEntityCount, "entities and remove ", removedEntityCount, " overlapping BANNER entities. "
print >> sys.stderr, "(" + str(sCount) + " sentences processed)"
print >> sys.stderr, "New", elementName + "-elements:", totalEntities, "(Split", splitEventCount, "PathNER entities with newlines)"
# Remove work directory
if not debug:
shutil.rmtree(workdir)
else:
print >> sys.stderr, "PathNER working directory for debugging at", workdir
if output != None:
print >> sys.stderr, "Writing output to", output
ETUtils.write(corpusRoot, output)
return corpusTree