本文整理汇总了Python中Utils.Range.overlap方法的典型用法代码示例。如果您正苦于以下问题:Python Range.overlap方法的具体用法?Python Range.overlap怎么用?Python Range.overlap使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Utils.Range
的用法示例。
在下文中一共展示了Range.overlap方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _markNamedEntities
# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import overlap [as 别名]
def _markNamedEntities(self):
"""
This method is used to define which tokens belong to _named_ entities.
Named entities are sometimes masked when testing learning of interactions, to
prevent the system making a trivial decision based on commonly interacting names.
"""
self.tokenIsName = {}
self.tokenIsEntity = {}
self.tokenIsEntityHead = {}
# Initialize the dictionaries
for token in self.tokens:
self.tokenIsName[token] = False
self.tokenIsEntity[token] = False
self.tokenIsEntityHead[token] = []
for entity in self.entities:
entityOffsets = Range.charOffsetToTuples(entity.get("charOffset"))
entityHeadOffset = Range.charOffsetToSingleTuple(entity.get("headOffset"))
for token in self.tokens:
tokenOffset = Range.charOffsetToSingleTuple(token.get("charOffset"))
for entityOffset in entityOffsets:
if Range.overlap(entityOffset, tokenOffset):
self.tokenIsEntity[token] = True
if entity.get("isName") != None:
if entity.get("isName") == "True":
self.tokenIsName[token] = True
else:
entity.set("isName", "True")
self.tokenIsName[token] = True
if Range.overlap(entityHeadOffset, tokenOffset):
self.tokenIsEntityHead[token].append(entity)
示例2: getRelativePosition
# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import overlap [as 别名]
def getRelativePosition(self, entity1Range, entity2Range, token):
offset = Range.charOffsetToSingleTuple(token.get("charOffset"))
if Range.overlap(entity1Range, offset):
return "Entity1"
if Range.overlap(entity2Range, offset):
return "Entity2"
entitiesRange = (min(entity1Range[0],entity2Range[0]),max(entity1Range[1],entity2Range[1]))
if offset[1] < entitiesRange[0]:
return "Fore"
elif offset[1] > entitiesRange[1]:
return "After"
else:
return "Between"
示例3: insertElements
# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import overlap [as 别名]
def insertElements(corpus, specAnn):
for document in corpus.iter('document'):
docId = document.get("origId")
assert docId in specAnn, docId
for sentence in document.iter('sentence'):
sentOffset = Range.charOffsetToSingleTuple(sentence.get("charOffset"))
analyses = sentence.find("analyses")
if not analyses:
analyses = ET.SubElement(sentence, "analyses")
#entitiesElement = sentence.find("entities")
# Find the container
container = analyses.find("entities") #None
# for entitiesElement in entitiesElements:
# if entitiesElement.get("source") == "SPECIES":
# container = entitiesElement
# break
if not container:
container = ET.SubElement(analyses, "entities")
#container.set("source", "SPECIES")
# Map the spans
for span in specAnn[docId][:]:
offset = span.get("offset")
if Range.overlap(offset, sentOffset):
if sentOffset[0] > offset[0] or sentOffset[1] < offset[1]:
continue
specAnn[docId].remove(span)
charOffset = (offset[0] - sentOffset[0], offset[1] - sentOffset[0])
matchingText = sentence.get("text")[charOffset[0]:charOffset[1]]
spanText = span.get("text")
#print matchingText, spanText
assert matchingText == spanText, (matchingText, spanText, charOffset)
span.set("charOffset", "-".join([str(x) for x in charOffset]))
assert not "--" in span.get("charOffset"), [str(x) for x in charOffset]
del span.attrib["offset"] #span.set("offset", "")
container.append(span)
示例4: getTokens
# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import overlap [as 别名]
def getTokens(self, entity, tokenTuples):
offset = entity.get("charOffset")
assert offset != None
offset = Range.charOffsetToSingleTuple(offset)
match = []
for tokenTuple in tokenTuples:
if Range.overlap(offset, tokenTuple[0]):
match.append(tokenTuple[1].get("text"))
elif len(match) > 0: # passed end
break
return match
示例5: markNamedEntities
# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import overlap [as 别名]
def markNamedEntities(self, entityElements):
""" Marks tokens belonging to named entities
"""
namedEntityTokens = []
for entityElement in entityElements:
offsets = []
offsetStrings = entityElement.attrib["charOffset"].split(",")
for offsetString in offsetStrings:
charFrom, charTo = offsetString.split("-")
offset = (int(charFrom), int(charTo))
offsets.append(offset)
for k,v in self.tokensById.iteritems():
for offset in offsets:
if Range.overlap(offset, v.charOffset):
v.entities.append(entityElement.attrib["id"])
namedEntityTokens.append(v.id)
return namedEntityTokens
示例6: moveElements
# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import overlap [as 别名]
def moveElements(document):
entMap = {}
entSentence = {}
entSentenceIndex = {}
sentences = document.findall("sentence")
sentenceCount = 0
for sentence in sentences:
sentenceOffset = Range.charOffsetToSingleTuple(sentence.get("charOffset"))
# Move entities
entCount = 0
for entity in document.findall("entity"):
entityOffset = Range.charOffsetToSingleTuple(entity.get("charOffset"))
if Range.overlap(sentenceOffset, entityOffset):
document.remove(entity)
sentence.append(entity)
entityId = entity.get("id")
entityIdLastPart = entityId.rsplit(".", 1)[-1]
if entityIdLastPart.startswith("e"):
entity.set("id", sentence.get("id") + "." + entityIdLastPart)
entMap[entityId] = sentence.get("id") + "." + entityIdLastPart
else:
entity.set("docId", entityId)
entity.set("id", sentence.get("id") + ".e" + str(entCount))
entMap[entityId] = sentence.get("id") + ".e" + str(entCount)
entSentence[entityId] = sentence
entSentenceIndex[entityId] = sentenceCount
newEntityOffset = (entityOffset[0] - sentenceOffset[0], entityOffset[1] - sentenceOffset[0])
entity.set("origOffset", entity.get("charOffset"))
entity.set("charOffset", str(newEntityOffset[0]) + "-" + str(newEntityOffset[1]))
entCount += 1
sentenceCount += 1
# Move interactions
intCount = 0
for interaction in document.findall("interaction"):
if entSentenceIndex[interaction.get("e1")] < entSentenceIndex[interaction.get("e2")]:
targetSentence = entSentence[interaction.get("e1")]
else:
targetSentence = entSentence[interaction.get("e2")]
document.remove(interaction)
targetSentence.append(interaction)
interaction.set("id", targetSentence.get("id") + ".i" + str(intCount))
interaction.set("e1", entMap[interaction.get("e1")])
interaction.set("e2", entMap[interaction.get("e2")])
intCount += 1
示例7: getEntityHeadToken
# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import overlap [as 别名]
def getEntityHeadToken(entity, tokens, tokenHeadScores):
if entity.get("headOffset") != None:
charOffsets = Range.charOffsetToTuples(entity.get("headOffset"))
elif entity.get("charOffset") != "":
charOffsets = Range.charOffsetToTuples(entity.get("charOffset"))
else:
charOffsets = []
# Each entity can consist of multiple syntactic tokens, covered by its
# charOffset-range. One of these must be chosen as the head token.
headTokens = [] # potential head tokens
for token in tokens:
tokenOffset = Range.charOffsetToSingleTuple(token.get("charOffset"))
for offset in charOffsets:
if Range.overlap(offset, tokenOffset):
headTokens.append(token)
if len(headTokens)==1: # An unambiguous head token was found
selectedHeadToken = headTokens[0]
else: # One head token must be chosen from the candidates
selectedHeadToken = findHeadToken(headTokens, tokenHeadScores)
#if verbose:
# print >> sys.stderr, "Selected head:", token.attrib["id"], token.attrib["text"]
assert selectedHeadToken != None, entityElement.get("id")
return selectedHeadToken
示例8: getMetaMapFeatures
# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import overlap [as 别名]
def getMetaMapFeatures(self, token, sentenceGraph, features):
analyses = sentenceGraph.sentenceElement.find("analyses")
if analyses == None:
return
metamap = analyses.find("metamap")
if metamap == None:
return
tokenOffset = Range.charOffsetToSingleTuple(token.get("charOffset"))
skipAttr = set(["charOffset", "text"])
for phrase in metamap.findall("phrase"):
phraseOffset = Range.charOffsetToSingleTuple(phrase.get("charOffset"))
if Range.overlap(tokenOffset, phraseOffset):
attr = phrase.attrib
attrNames = sorted(attr.keys())
for attrName in attrNames:
if attrName in skipAttr:
continue
elif attrName == "score":
features["_metamap_score"] = 0.001 * abs(int(attr[attrName]))
else:
attrValues = attr[attrName].split(",")
for attrValue in attrValues:
features["_metamap_"+attrName+"_"+attrValue.replace(" ", "-")] = 1
示例9: moveElements
# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import overlap [as 别名]
def moveElements(document):
entMap = {}
entSentence = {}
entSentenceIndex = {}
sentences = document.findall("sentence")
sentenceCount = 0
for sentence in sentences:
sentenceOffset = Range.charOffsetToSingleTuple(sentence.get("charOffset"))
# Move entities
entCount = 0
for entity in document.findall("entity"):
entityOffsets = Range.charOffsetToTuples(entity.get("charOffset"))
overlaps = False
for entityOffset in entityOffsets:
if Range.overlap(sentenceOffset, entityOffset):
overlaps = True
break
if overlaps:
document.remove(entity)
sentence.append(entity)
entityId = entity.get("id")
entityIdLastPart = entityId.rsplit(".", 1)[-1]
if entityIdLastPart.startswith("e"):
entity.set("id", sentence.get("id") + "." + entityIdLastPart)
entMap[entityId] = sentence.get("id") + "." + entityIdLastPart
else:
entity.set("docId", entityId)
entity.set("id", sentence.get("id") + ".e" + str(entCount))
entMap[entityId] = sentence.get("id") + ".e" + str(entCount)
entSentence[entityId] = sentence
entSentenceIndex[entityId] = sentenceCount
#newEntityOffset = (entityOffset[0] - sentenceOffset[0], entityOffset[1] - sentenceOffset[0])
newEntityOffsets = []
for entityOffset in entityOffsets:
newOffset = (entityOffset[0] - sentenceOffset[0], entityOffset[1] - sentenceOffset[0])
newOffset = (max(0, newOffset[0]), max(0, newOffset[1]))
if newOffset != (0, 0):
assert newOffset[1] > newOffset[0], (entity.attrib, entityOffsets, sentenceOffset)
newEntityOffsets.append( (entityOffset[0] - sentenceOffset[0], entityOffset[1] - sentenceOffset[0]) )
assert len(newEntityOffsets) > 0, (entity.attrib, entityOffsets, sentenceOffset)
entity.set("origOffset", entity.get("charOffset"))
#entity.set("charOffset", str(newEntityOffset[0]) + "-" + str(newEntityOffset[1]))
entity.set("charOffset", Range.tuplesToCharOffset(newEntityOffsets))
entCount += 1
sentenceCount += 1
if len([x for x in document.findall("entity")]) != 0:
raise Exception("Sentence splitting does not cover the entire document")
# Move interactions
intCount = 0
interactions = []
interactionOldToNewId = {}
for interaction in document.findall("interaction"):
interactions.append(interaction)
#if entSentenceIndex[interaction.get("e1")] < entSentenceIndex[interaction.get("e2")]:
# targetSentence = entSentence[interaction.get("e1")]
#else:
# targetSentence = entSentence[interaction.get("e2")]
# Interactions go to a sentence always by e1, as this is the event they are an argument of.
# If an intersentence interaction is a relation, this shouldn't matter.
targetSentence = entSentence[interaction.get("e1")]
document.remove(interaction)
targetSentence.append(interaction)
newId = targetSentence.get("id") + ".i" + str(intCount)
interactionOldToNewId[interaction.get("id")] = newId
interaction.set("id", newId)
interaction.set("e1", entMap[interaction.get("e1")])
interaction.set("e2", entMap[interaction.get("e2")])
intCount += 1
for interaction in interactions:
if interaction.get("siteOf") != None:
interaction.set("siteOf", interactionOldToNewId[interaction.get("siteOf")])
示例10: extend
# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import overlap [as 别名]
def extend(input, output=None, entityTypes=["Bacterium"], verbose=False):
if not (ET.iselement(input) and input.tag == "sentence"):
print >> sys.stderr, "Loading corpus file", input
corpusTree = ETUtils.ETFromObj(input)
corpusRoot = corpusTree.getroot()
bacteriaTokens = ExampleBuilders.PhraseTriggerExampleBuilder.getBacteriaTokens()
if not (ET.iselement(input) and input.tag == "sentence"):
sentences = corpusRoot.getiterator("sentence")
else:
sentences = [input]
counts = defaultdict(int)
for sentence in sentences:
incorrectCount = 0
sentenceText = sentence.get("text")
tokens = tokenize(sentenceText)
for entity in sentence.findall("entity"):
counts["all-entities"] += 1
if entity.get("type") not in entityTypes:
continue
headOffset = entity.get("headOffset")
if headOffset == None:
if verbose: print "WARNING, no head offset for entity", entity.get("id")
headOffset = entity.get("charOffset")
headOffset = Range.charOffsetToTuples(headOffset)[0]
charOffset = entity.get("charOffset")
assert charOffset != None, "WARNING, no head offset for entity " + str(entity.get("id"))
charOffset = Range.charOffsetToTuples(charOffset)[0]
tokPos = [0,0]
tokIndex = None
# find main token
for i in range(len(tokens)):
token = tokens[i]
tokPos[1] = tokPos[0] + len(token) # - 1
if Range.overlap(headOffset, tokPos):
tokIndex = i
break
tokPos[0] += len(token)
assert tokIndex != None, (entity.get("id"), entity.get("text"), tokens)
skip = False
if tokPos[0] < headOffset[0]:
tokPos = headOffset
skip = True
if not skip:
# Extend before
beginIndex = tokIndex
for i in range(tokIndex-1, -1, -1):
token = tokens[i]
if token.isspace():
continue
if not isBacteriaToken(token, bacteriaTokens, i - tokIndex):
beginIndex = i + 1
break
if i == 0:
beginIndex = i
while tokens[beginIndex].isspace() or isExtraWord(tokens[beginIndex], toLower=False):
beginIndex += 1
if beginIndex >= tokIndex:
beginIndex = tokIndex
break
# Extend after
endIndex = tokIndex
if tokens[tokIndex][-1] != ",":
endIndex = tokIndex
for i in range(tokIndex+1, len(tokens)):
token = tokens[i]
if token.isspace():
continue
if not isBacteriaToken(token, bacteriaTokens, i - tokIndex):
endIndex = i - 1
break
if i == len(tokens) - 1:
endIndex = i
while tokens[endIndex].isspace():
endIndex -= 1
# Modify range
if tokIndex > beginIndex:
for token in reversed(tokens[beginIndex:tokIndex]):
tokPos[0] -= len(token)
if tokIndex < endIndex:
for token in tokens[tokIndex+1:endIndex+1]:
tokPos[1] += len(token)
# Attempt to remove trailing periods and commas
while not sentenceText[tokPos[1] - 1].isalnum():
tokPos[1] -= 1
if tokPos[1] < tokPos[0] + 1:
tokPos[1] = tokPos[0] + 1
break
while not sentenceText[tokPos[0]].isalnum():
tokPos[0] += 1
if tokPos[0] >= tokPos[1]:
tokPos[0] = tokPos[1] - 1
break
# Split merged names
#newPos = [tokPos[0], tokPos[1]]
#for split in sentenceText[tokPos[0]:tokPos[1]+1].split("/"):
# newPos[0] += len(split)
# if
# Insert changed charOffset
#.........这里部分代码省略.........
示例11: mapEntity
# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import overlap [as 别名]
def mapEntity(self, entityElement, verbose=False):
"""
Determine the head token for a named entity or trigger. The head token is the token closest
to the root for the subtree of the dependency parse spanned by the text of the element.
@param entityElement: a semantic node (trigger or named entity)
@type entityElement: cElementTree.Element
@param verbose: Print selected head tokens on screen
@param verbose: boolean
"""
headOffset = None
if entityElement.get("headOffset") != None:
headOffset = Range.charOffsetToSingleTuple(entityElement.get("headOffset"))
if entityElement.get("charOffset") != "":
charOffsets = Range.charOffsetToTuples(entityElement.get("charOffset"))
else:
charOffsets = []
# Each entity can consist of multiple syntactic tokens, covered by its
# charOffset-range. One of these must be chosen as the head token.
headTokens = [] # potential head tokens
for token in self.tokens:
#print token.attrib["id"], token.attrib["charOffset"]
tokenOffset = Range.charOffsetToSingleTuple(token.get("charOffset"))
if headOffset != None and entityElement.get("type") != "Binding":
# A head token can already be defined in the headOffset-attribute.
# However, depending on the tokenization, even this range may
# contain multiple tokens. Still, it can always be assumed that
# if headOffset is defined, the corret head token is in this range.
if Range.overlap(headOffset,tokenOffset):
headTokens.append(token)
else:
for offset in charOffsets:
if Range.overlap(offset,tokenOffset):
headTokens.append(token)
if len(headTokens)==1: # An unambiguous head token was found
token = headTokens[0]
else: # One head token must be chosen from the candidates
selHead = None
if entityElement.get("type") == "Binding":
for t in headTokens:
compText = t.get("text").lower()
if compText.find("bind") != -1 or compText.find("complex") != -1:
selHead = t
#print "Head:", selHead.get("text"), "/", entityElement.get("text"), entityElement.get("headOffset"), selHead.get("charOffset")
entityElement.set("headOffset", selHead.get("charOffset"))
break
if selHead == None:
token = self.findHeadToken(headTokens)
else:
token = selHead
if verbose:
print >> sys.stderr, "Selected head:", token.get("id"), token.get("text")
#assert token != None, entityElement.get("id")
if token != None:
# The ElementTree entity-element is modified by setting the headOffset attribute
if entityElement.get("headOffset") == None or entityElement.get("headOffset") != token.get("charOffset"):
entityElement.set("headOffset", token.get("charOffset"))
if not self.entitiesByToken.has_key(token):
self.entitiesByToken[token] = []
self.entitiesByToken[token].append(entityElement)
else:
print >> sys.stderr, "Warning, no tokens for entity", entityElement.get("id")
return token
示例12: mapInteractions
# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import overlap [as 别名]
def mapInteractions(self, entityElements, interactionElements, verbose=False):
"""
Maps the semantic interactions to the syntactic graph.
Syntactic dependencies are defined between tokens. Semantic edges (interactions)
are defined between annotated entities. To utilize the correlation of the dependency
parse with the semantic interactions, the graphs must be aligned by mapping the
interaction graph's nodes (entities) to the syntactic graph's nodes (tokens). This
is done by determining the head tokens of the entities.
@param entityElements: the semantic nodes (triggers and named entities)
@type entityElements: list of cElementTree.Element objects
@param interactionElements: the semantic edges (e.g. Cause and Theme for GENIA)
@type interactionElements: list of cElementTree.Element objects
@param verbose: Print selected head tokens on screen
@param verbose: boolean
"""
self.interactions = interactionElements
self.entities = entityElements
# Entities that have no text binding can not be mapped and are therefore removed
for entity in self.entities[:]:
if entity.get("charOffset") == "":
self.entities.remove(entity)
#self.interactionGraph = NX.XDiGraph(multiedges = multiedges)
#if multiedges:
# self.interactionGraph = NX10.MultiDiGraph()
#else:
# self.interactionGraph = NX10.DiGraph()
self.interactionGraph = Graph()
self.interactionGraph.addNodes(self.tokens)
#for token in self.tokens:
# self.interactionGraph.add_node(token)
self.entitiesByToken = {} # a mapping for fast access
self.entitiesById = {}
self.entityHeadTokenByEntity = {}
sentenceSpan = (0, len(self.sentenceElement.get("text"))) # for validating the entity offsets
for entity in self.entities[:]:
headToken = self.mapEntity(entity, verbose)
if headToken != None:
self.entityHeadTokenByEntity[entity] = headToken
self.entitiesById[entity.get("id")] = entity
else:
# Check that the entity is within the sentence
if not Range.overlap(Range.charOffsetToSingleTuple(entity.get("charOffset")), sentenceSpan):
raise Exception("Entity " + entity.get("id") + ", charOffset " + entity.get("charOffset") + ", does not overlap with sentence " + self.sentenceElement.get("id") + ", length " + str(sentenceSpan[1]) )
# Assume there simply is no token corresponding to the entity
self.entities.remove(entity)
self._markNamedEntities()
for interaction in self.interactions:
if not self.entitiesById.has_key(interaction.get("e1")):
continue # e1 is outside of this sentence
if not self.entitiesById.has_key(interaction.get("e2")):
continue # e2 is outside of this sentence
token1 = self.entityHeadTokenByEntity[self.entitiesById[interaction.get("e1")]]
token2 = self.entityHeadTokenByEntity[self.entitiesById[interaction.get("e2")]]
# found = False
# if multiedges:
# edges = self.interactionGraph.get_edge_data(token1, token2, default={})
# for i in range(len(edges)):
# edge = edges[i]["element"]
# if edge.attrib["type"] == interaction.attrib["type"]:
# found = True
# break
# if not found:
# self.interactionGraph.add_edge(token1, token2, element=interaction)
# else:
# self.duplicateInteractionEdgesRemoved += 1
found = False
edges = self.interactionGraph.getEdges(token1, token2)
for edge in edges:
if edge[2].get("type") == interaction.get("type"):
found = True
break
if not found:
self.interactionGraph.addEdge(token1, token2, interaction)
else:
# TODO: "skipped" would be better than "removed"
self.duplicateInteractionEdgesRemoved += 1