本文整理汇总了Python中Utils.Range.charOffsetToSingleTuple方法的典型用法代码示例。如果您正苦于以下问题:Python Range.charOffsetToSingleTuple方法的具体用法?Python Range.charOffsetToSingleTuple怎么用?Python Range.charOffsetToSingleTuple使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Utils.Range
的用法示例。
在下文中一共展示了Range.charOffsetToSingleTuple方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _markNamedEntities
# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
def _markNamedEntities(self):
"""
This method is used to define which tokens belong to _named_ entities.
Named entities are sometimes masked when testing learning of interactions, to
prevent the system making a trivial decision based on commonly interacting names.
"""
self.tokenIsName = {}
self.tokenIsEntity = {}
self.tokenIsEntityHead = {}
# Initialize the dictionaries
for token in self.tokens:
self.tokenIsName[token] = False
self.tokenIsEntity[token] = False
self.tokenIsEntityHead[token] = []
for entity in self.entities:
entityOffsets = Range.charOffsetToTuples(entity.get("charOffset"))
entityHeadOffset = Range.charOffsetToSingleTuple(entity.get("headOffset"))
for token in self.tokens:
tokenOffset = Range.charOffsetToSingleTuple(token.get("charOffset"))
for entityOffset in entityOffsets:
if Range.overlap(entityOffset, tokenOffset):
self.tokenIsEntity[token] = True
if entity.get("isName") != None:
if entity.get("isName") == "True":
self.tokenIsName[token] = True
else:
entity.set("isName", "True")
self.tokenIsName[token] = True
if Range.overlap(entityHeadOffset, tokenOffset):
self.tokenIsEntityHead[token].append(entity)
示例2: getHeads
# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
def getHeads(corpus):
corpus = ETUtils.ETFromObj(corpus)
headDict = {}
headDict["None"] = {}
for sentence in corpus.getiterator("sentence"):
headOffsetStrings = set()
for entity in sentence.findall("entity"):
eType = entity.get("type")
if not headDict.has_key(eType):
headDict[eType] = {}
eText = entity.get("text")
headOffset = entity.get("headOffset")
headOffsetStrings.add(headOffset)
headOffset = Range.charOffsetToSingleTuple(headOffset)
charOffset = Range.charOffsetToSingleTuple(entity.get("charOffset"))
if headOffset == charOffset:
if not headDict[eType].has_key(eText): headDict[eType][eText] = 0
headDict[eType][eText] += 1
else:
headText = sentenceText[headOffset[0]-charOffset[0]:headOffset[1]-charOffset[0]+1]
if not headDict[eType].has_key(headText): headDict[eType][headText] = 0
headDict[eType][headText] += 1
for token in tokens:
if not token.get("charOffset") in headOffsetStrings: # token is not the head of any entity
headText = token.get("text")
if not headDict["None"].has_key(headText): headDict["None"][headText] = 0
headDict["None"][headText] += 1
return headDict
示例3: selectBestMatch
# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
def selectBestMatch(entity, phrases):
entOffset = Range.charOffsetToSingleTuple(entity.get("charOffset"))
if entity.get("altOffset") != None:
entOffset = Range.charOffsetToSingleTuple(entity.get("altOffset"))
best = (sys.maxint, None)
for phrase in phrases:
matchValue = Range.mismatch(entOffset, Range.charOffsetToSingleTuple(phrase.get("charOffset")))
if best[0] > matchValue:
best = (matchValue, phrase)
return best[1]
示例4: exportChemProtPredictions
# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
def exportChemProtPredictions(xml, outPath, fileTypes="predictions", setNames=None):
if fileTypes == "all":
fileTypes = ["predictions", "abstracts", "entities", "relations"]
elif isinstance(fileTypes, basestring):
fileTypes = fileTypes.split(",")
for fileType in fileTypes:
if fileType not in ["predictions", "abstracts", "entities", "relations"]:
raise Exception("Unknown ChemProt file type '" + str(fileType) + "'")
xml = ETUtils.ETFromObj(xml)
#with open(outPath, "wt") as f
outFiles = {}
openFiles = {}
for document in xml.getiterator("document"):
docId = document.get("origId")
setName = document.get("set")
if setNames != None:
setName = setNames.get(setName, setName)
if setName not in outFiles:
outFiles[setName] = {}
outFile = openOutFile(setName, outPath, "abstracts", fileTypes, outFiles, openFiles)
if outFile != None:
docText = document.get("text")
#assert docText.count("\t") == 1, (docText.count("\t"), document.attrib)
#title, abstract = docText.split("\t")
#titleLength = document.get("titleLength")
titleOffset = Range.charOffsetToSingleTuple(document.get("titleOffset"))
assert titleOffset[0] == 0
outFile.write("\t".join([docId, docText[:titleOffset[1]], docText[titleOffset[1]+1:]]) + "\n")
entityById = {}
for entity in document.getiterator("entity"):
outFile = openOutFile(setName, outPath, "entities", fileTypes, outFiles, openFiles)
if outFile != None:
eType = entity.get("type")
if entity.get("normalized") != None and entity.get("type") == "GENE":
eType += "-Y" if entity.get("normalized") == "True" else "-N"
offset = Range.charOffsetToSingleTuple(entity.get("charOffset"))
outFile.write("\t".join([docId, entity.get("origId"), eType, str(offset[0]), str(offset[1]), entity.get("text")]) + "\n")
assert entity.get("id") not in entityById
entityById[entity.get("id")] = entity
for interaction in document.getiterator("interaction"):
e1 = entityById[interaction.get("e1")]
e2 = entityById[interaction.get("e2")]
outFile = openOutFile(setName, outPath, "relations", fileTypes, outFiles, openFiles)
if outFile != None:
evaluated = "X"
if interaction.get("evaluated") != None:
evaluated = "Y " if interaction.get("evaluated") == "True" else "N "
outFile.write("\t".join([docId, interaction.get("type"), evaluated, interaction.get("relType"), "Arg1:" + e1.get("origId"), "Arg2:" + e2.get("origId")]) + "\n")
outFile = openOutFile(setName, outPath, "predictions", fileTypes, outFiles, openFiles)
if outFile != None:
outFile.write("\t".join([docId, interaction.get("type"), "Arg1:" + e1.get("origId"), "Arg2:" + e2.get("origId")]) + "\n")
print >> sys.stderr, "Closing output files"
for f in openFiles.values():
f.close()
return xml
示例5: getNECounts
# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
def getNECounts(phrases, entities):
counts = {}
for phrase in phrases:
phraseOffset = Range.charOffsetToSingleTuple(phrase.get("charOffset"))
counts[phrase] = 0
for entity in entities:
if entity.get("given") != "True": # only check names
continue
if Range.contains(phraseOffset, Range.charOffsetToSingleTuple(entity.get("charOffset"))):
counts[phrase] += 1
return counts
示例6: getMatchingPhrases
# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
def getMatchingPhrases(entity, phraseOffsets, phraseDict):
matches = []
if entity.get("isName") == "True":
return []
maxOffset = Range.charOffsetToSingleTuple(entity.get("charOffset"))
minOffset = entity.get("altOffset")
if minOffset != None:
minOffset = Range.charOffsetToSingleTuple(minOffset)
else:
minOffset = maxOffset
for phraseOffset in phraseOffsets:
if Range.contains(maxOffset, phraseOffset) and Range.contains(phraseOffset, minOffset):
matches.extend(phraseDict[phraseOffset])
return matches
示例7: insertElements
# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
def insertElements(corpus, specAnn):
for document in corpus.iter('document'):
docId = document.get("origId")
assert docId in specAnn, docId
for sentence in document.iter('sentence'):
sentOffset = Range.charOffsetToSingleTuple(sentence.get("charOffset"))
analyses = sentence.find("analyses")
if not analyses:
analyses = ET.SubElement(sentence, "analyses")
#entitiesElement = sentence.find("entities")
# Find the container
container = analyses.find("entities") #None
# for entitiesElement in entitiesElements:
# if entitiesElement.get("source") == "SPECIES":
# container = entitiesElement
# break
if not container:
container = ET.SubElement(analyses, "entities")
#container.set("source", "SPECIES")
# Map the spans
for span in specAnn[docId][:]:
offset = span.get("offset")
if Range.overlap(offset, sentOffset):
if sentOffset[0] > offset[0] or sentOffset[1] < offset[1]:
continue
specAnn[docId].remove(span)
charOffset = (offset[0] - sentOffset[0], offset[1] - sentOffset[0])
matchingText = sentence.get("text")[charOffset[0]:charOffset[1]]
spanText = span.get("text")
#print matchingText, spanText
assert matchingText == spanText, (matchingText, spanText, charOffset)
span.set("charOffset", "-".join([str(x) for x in charOffset]))
assert not "--" in span.get("charOffset"), [str(x) for x in charOffset]
del span.attrib["offset"] #span.set("offset", "")
container.append(span)
示例8: fixAltOffsets
# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
def fixAltOffsets(input, output=None):
print >> sys.stderr, "Loading corpus", input
corpusTree = ETUtils.ETFromObj(input)
print >> sys.stderr, "Corpus file loaded"
corpusRoot = corpusTree.getroot()
docCount = 0
sentencesCreated = 0
sentences = [x for x in corpusRoot.getiterator("sentence")]
counter = ProgressCounter(len(sentences), "FixAltOffsets")
fixCount = 0
# fix spans
for sentence in sentences:
counter.update(1, "Fixing AltOffsets for sentence ("+sentence.get("id")+"): ")
sentOffset = Range.charOffsetToSingleTuple(sentence.get("charOffset"))
for entity in sentence.findall("entity"):
altOffsetString = entity.get("altOffset")
if altOffsetString == None:
continue
#print altOffsetString
altOffsets = Range.charOffsetToTuples(altOffsetString)
assert len(altOffsets) == 1
for i in range(len(altOffsets)):
altOffset = altOffsets[i]
altOffsets[i] = (altOffset[0] - sentOffset[0], altOffset[1] - sentOffset[0])
entity.set("altOffset", Range.tuplesToCharOffset(altOffsets))
fixCount += 1
print >> sys.stderr, "Fixed", fixCount, "altOffsets"
if output != None:
print >> sys.stderr, "Writing output to", output
ETUtils.write(corpusRoot, output)
return corpusTree
示例9: addSentence
# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
def addSentence(self, sentenceGraph):
if sentenceGraph == None:
return
tokens = sorted([(Range.charOffsetToSingleTuple(x.get("charOffset")), x) for x in sentenceGraph.tokens])
indexByTokenId = {tokens[i][1].get("id"):i for i in range(len(tokens))}
assert len(indexByTokenId) == len(tokens) # check that there were no duplicate ids
entityById = {x.get("id"):x for x in sentenceGraph.entities}
events = {}
for interaction in sentenceGraph.interactions:
e1Id = interaction.get("e1")
e2Id = interaction.get("e2")
e1 = entityById[e1Id]
e2 = entityById[e2Id]
t1 = sentenceGraph.entityHeadTokenByEntity[e1]
t2 = sentenceGraph.entityHeadTokenByEntity[e2]
index1 = indexByTokenId[t1.get("id")]
index2 = indexByTokenId[t2.get("id")]
intSpan = abs(index1 - index2)
self.interactionSpans[intSpan] = self.interactionSpans.get(intSpan, 0) + 1
self.intSpan["min"] = min(self.intSpan.get("min"), intSpan)
self.intSpan["max"] = max(self.intSpan.get("max"), intSpan)
if interaction.get("event") == "True":
if e1Id not in events:
events[e1Id] = {"min":9999, "max":-9999}
events[e1Id]["min"] = min(events[e1Id]["min"], index1, index2)
events[e1Id]["max"] = max(events[e1Id]["max"], index1, index2)
for eventId in sorted(events.keys()):
eventSpan = events[eventId]["max"] - events[eventId]["min"]
self.eventSpans[eventSpan] = self.eventSpans.get(eventSpan, 0) + 1
self.eventSpan["min"] = min(self.eventSpan.get("min"), eventSpan)
self.eventSpan["max"] = max(self.eventSpan.get("max"), eventSpan)
示例10: getPhraseDict
# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
def getPhraseDict(phrases):
phraseDict = {}
# Define offsets
for phrase in phrases:
phraseOffset = Range.charOffsetToSingleTuple(phrase.get("charOffset"))
if not phraseDict.has_key(phraseOffset):
phraseDict[phraseOffset] = []
phraseDict[phraseOffset].append(phrase)
return phraseDict
示例11: moveElements
# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
def moveElements(document):
entMap = {}
entSentence = {}
entSentenceIndex = {}
sentences = document.findall("sentence")
sentenceCount = 0
for sentence in sentences:
sentenceOffset = Range.charOffsetToSingleTuple(sentence.get("charOffset"))
# Move entities
entCount = 0
for entity in document.findall("entity"):
entityOffset = Range.charOffsetToSingleTuple(entity.get("charOffset"))
if Range.overlap(sentenceOffset, entityOffset):
document.remove(entity)
sentence.append(entity)
entityId = entity.get("id")
entityIdLastPart = entityId.rsplit(".", 1)[-1]
if entityIdLastPart.startswith("e"):
entity.set("id", sentence.get("id") + "." + entityIdLastPart)
entMap[entityId] = sentence.get("id") + "." + entityIdLastPart
else:
entity.set("docId", entityId)
entity.set("id", sentence.get("id") + ".e" + str(entCount))
entMap[entityId] = sentence.get("id") + ".e" + str(entCount)
entSentence[entityId] = sentence
entSentenceIndex[entityId] = sentenceCount
newEntityOffset = (entityOffset[0] - sentenceOffset[0], entityOffset[1] - sentenceOffset[0])
entity.set("origOffset", entity.get("charOffset"))
entity.set("charOffset", str(newEntityOffset[0]) + "-" + str(newEntityOffset[1]))
entCount += 1
sentenceCount += 1
# Move interactions
intCount = 0
for interaction in document.findall("interaction"):
if entSentenceIndex[interaction.get("e1")] < entSentenceIndex[interaction.get("e2")]:
targetSentence = entSentence[interaction.get("e1")]
else:
targetSentence = entSentence[interaction.get("e2")]
document.remove(interaction)
targetSentence.append(interaction)
interaction.set("id", targetSentence.get("id") + ".i" + str(intCount))
interaction.set("e1", entMap[interaction.get("e1")])
interaction.set("e2", entMap[interaction.get("e2")])
intCount += 1
示例12: makeDETSubPhrases
# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
def makeDETSubPhrases(phrases, tokens, phraseDict, filter=None):
newPhrases = []
for phrase in phrases:
if filter != None and phrase.get("type") not in filter:
continue
phraseOffset = Range.charOffsetToSingleTuple(phrase.get("charOffset"))
phraseBegin = int(phrase.get("begin"))
phraseEnd = int(phrase.get("end"))
if phraseBegin > 0 and tokens[phraseBegin - 1].get("POS") == "DT":
newPhraseOffset = (
Range.charOffsetToSingleTuple(tokens[phraseBegin - 1].get("charOffset"))[0],
phraseOffset[1],
)
newPhrase = makePhrase("DT-" + phrase.get("type"), newPhraseOffset, phraseBegin - 1, phraseEnd)
if not phraseDict.has_key(newPhraseOffset):
# print "NEW PHRASE:", ETUtils.toStr(newPhrase)
newPhrases.append(newPhrase)
phraseDict[newPhraseOffset] = [newPhrase]
return newPhrases
示例13: getTokens
# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
def getTokens(self, entity, tokenTuples):
offset = entity.get("charOffset")
assert offset != None
offset = Range.charOffsetToSingleTuple(offset)
match = []
for tokenTuple in tokenTuples:
if Range.overlap(offset, tokenTuple[0]):
match.append(tokenTuple[1].get("text"))
elif len(match) > 0: # passed end
break
return match
示例14: makeTokenSubPhrases
# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
def makeTokenSubPhrases(tokens, phraseDict, includePOS=["PRP$", "IN", "WP$"]):
newPhrases = []
for i in range(len(tokens)):
token = tokens[i]
tokPOS = token.get("POS")
if tokPOS in includePOS:
tokOffset = Range.charOffsetToSingleTuple(token.get("charOffset"))
if not phraseDict.has_key(tokOffset):
newPhrase = makePhrase("TOK-t" + tokPOS, tokOffset, i, i)
newPhrases.append(newPhrase)
phraseDict[tokOffset] = [newPhrase]
return newPhrases
示例15: getPatterns
# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
def getPatterns(self, e1, e2):
e1Range = Range.charOffsetToSingleTuple(e1.get("charOffset"))
e2Range = Range.charOffsetToSingleTuple(e2.get("charOffset"))
tokenPositions = {}
for token in self.sentenceGraph.tokens:
tokenPositions[token.get("id")] = self.getRelativePosition(e1Range,e2Range,token)
prevTokenText = None
prevToken2Text = None
prevPosition = None
patternForeBetween = {}
patternBetween = {}
patternBetweenAfter = {}
for token in self.sentenceGraph.tokens:
if self.sentenceGraph.tokenIsName[token]:
continue
id = token.get("id")
text = token.get("text").lower()
if prevPosition != tokenPositions[id]:
prevTokenText = None
prevToken2Text = None
if tokenPositions[id] == "Fore":
self.addToPattern(patternForeBetween, text, prevTokenText, prevToken2Text)
elif tokenPositions[id] == "Between":
self.addToPattern(patternForeBetween, text, prevTokenText, prevToken2Text)
self.addToPattern(patternBetween, text, prevTokenText, prevToken2Text)
self.addToPattern(patternBetweenAfter, text, prevTokenText, prevToken2Text)
elif tokenPositions[id] == "After":
self.addToPattern(patternBetweenAfter, text, prevTokenText, prevToken2Text)
prevPosition = tokenPositions[id]
#if tokenPositions[id].find("Entity") != -1:
prevToken2Text = prevTokenText
prevTokenText = text
return patternForeBetween, patternBetween, patternBetweenAfter