当前位置: 首页>>代码示例>>Python>>正文


Python Range.charOffsetToSingleTuple方法代码示例

本文整理汇总了Python中Utils.Range.charOffsetToSingleTuple方法的典型用法代码示例。如果您正苦于以下问题:Python Range.charOffsetToSingleTuple方法的具体用法?Python Range.charOffsetToSingleTuple怎么用?Python Range.charOffsetToSingleTuple使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Utils.Range的用法示例。


在下文中一共展示了Range.charOffsetToSingleTuple方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _markNamedEntities

# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
 def _markNamedEntities(self):
     """
     This method is used to define which tokens belong to _named_ entities.
     Named entities are sometimes masked when testing learning of interactions, to
     prevent the system making a trivial decision based on commonly interacting names.
     """
     self.tokenIsName = {}
     self.tokenIsEntity = {}
     self.tokenIsEntityHead = {}
     # Initialize the dictionaries
     for token in self.tokens:
         self.tokenIsName[token] = False
         self.tokenIsEntity[token] = False
         self.tokenIsEntityHead[token] = []
     for entity in self.entities:
         entityOffsets = Range.charOffsetToTuples(entity.get("charOffset"))
         entityHeadOffset = Range.charOffsetToSingleTuple(entity.get("headOffset"))
         for token in self.tokens:
             tokenOffset = Range.charOffsetToSingleTuple(token.get("charOffset"))
             for entityOffset in entityOffsets:
                 if Range.overlap(entityOffset, tokenOffset):
                     self.tokenIsEntity[token] = True
                     if entity.get("isName") != None:
                         if entity.get("isName") == "True":
                             self.tokenIsName[token] = True
                     else:
                         entity.set("isName", "True")
                         self.tokenIsName[token] = True
             if Range.overlap(entityHeadOffset, tokenOffset):
                 self.tokenIsEntityHead[token].append(entity)
开发者ID:ninjin,项目名称:TEES,代码行数:32,代码来源:SentenceGraph.py

示例2: getHeads

# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
def getHeads(corpus):
    corpus = ETUtils.ETFromObj(corpus)
    headDict = {}
    headDict["None"] = {}
    for sentence in corpus.getiterator("sentence"):
        headOffsetStrings = set()
        for entity in sentence.findall("entity"):
            eType = entity.get("type")
            if not headDict.has_key(eType):
                headDict[eType] = {}
            eText = entity.get("text")
            headOffset = entity.get("headOffset")
            headOffsetStrings.add(headOffset)
            headOffset = Range.charOffsetToSingleTuple(headOffset)
            charOffset = Range.charOffsetToSingleTuple(entity.get("charOffset"))
            if headOffset == charOffset:
                if not headDict[eType].has_key(eText): headDict[eType][eText] = 0
                headDict[eType][eText] += 1
            else:
                headText = sentenceText[headOffset[0]-charOffset[0]:headOffset[1]-charOffset[0]+1]
                if not headDict[eType].has_key(headText): headDict[eType][headText] = 0
                headDict[eType][headText] += 1
        for token in tokens:
            if not token.get("charOffset") in headOffsetStrings: # token is not the head of any entity
                headText = token.get("text")
                if not headDict["None"].has_key(headText): headDict["None"][headText] = 0
                headDict["None"][headText] += 1
                
    return headDict
开发者ID:ninjin,项目名称:TEES,代码行数:31,代码来源:DetectHeads.py

示例3: selectBestMatch

# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
def selectBestMatch(entity, phrases):
    entOffset = Range.charOffsetToSingleTuple(entity.get("charOffset"))
    if entity.get("altOffset") != None:
        entOffset = Range.charOffsetToSingleTuple(entity.get("altOffset"))
    best = (sys.maxint, None)
    for phrase in phrases:
        matchValue = Range.mismatch(entOffset, Range.charOffsetToSingleTuple(phrase.get("charOffset")))
        if best[0] > matchValue:
            best = (matchValue, phrase)
    return best[1]
开发者ID:DUT-LiuYang,项目名称:TEES,代码行数:12,代码来源:MapPhrases.py

示例4: exportChemProtPredictions

# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
def exportChemProtPredictions(xml, outPath, fileTypes="predictions", setNames=None):
    if fileTypes == "all":
        fileTypes = ["predictions", "abstracts", "entities", "relations"]
    elif isinstance(fileTypes, basestring):
        fileTypes = fileTypes.split(",")
    for fileType in fileTypes:
        if fileType not in ["predictions", "abstracts", "entities", "relations"]:
            raise Exception("Unknown ChemProt file type '" + str(fileType) + "'")
    xml = ETUtils.ETFromObj(xml)
    #with open(outPath, "wt") as f
    outFiles = {}
    openFiles = {}
    for document in xml.getiterator("document"):
        docId = document.get("origId")
        setName = document.get("set")
        if setNames != None:
            setName = setNames.get(setName, setName)
        if setName not in outFiles:
            outFiles[setName] = {}
        outFile = openOutFile(setName, outPath, "abstracts", fileTypes, outFiles, openFiles)
        if outFile != None:
            docText = document.get("text")
            #assert docText.count("\t") == 1, (docText.count("\t"), document.attrib)
            #title, abstract = docText.split("\t")
            #titleLength = document.get("titleLength")
            titleOffset = Range.charOffsetToSingleTuple(document.get("titleOffset"))
            assert titleOffset[0] == 0
            outFile.write("\t".join([docId, docText[:titleOffset[1]], docText[titleOffset[1]+1:]]) + "\n")  
        entityById = {}
        for entity in document.getiterator("entity"):
            outFile = openOutFile(setName, outPath, "entities", fileTypes, outFiles, openFiles)
            if outFile != None:
                eType = entity.get("type")
                if entity.get("normalized") != None and entity.get("type") == "GENE":
                    eType += "-Y" if entity.get("normalized") == "True" else "-N"
                offset = Range.charOffsetToSingleTuple(entity.get("charOffset"))
                outFile.write("\t".join([docId, entity.get("origId"), eType, str(offset[0]), str(offset[1]), entity.get("text")]) + "\n")
            assert entity.get("id") not in entityById
            entityById[entity.get("id")] = entity
        for interaction in document.getiterator("interaction"):
            e1 = entityById[interaction.get("e1")]
            e2 = entityById[interaction.get("e2")]
            outFile = openOutFile(setName, outPath, "relations", fileTypes, outFiles, openFiles)
            if outFile != None:
                evaluated = "X"
                if interaction.get("evaluated") != None:
                    evaluated = "Y " if interaction.get("evaluated") == "True" else "N "
                outFile.write("\t".join([docId, interaction.get("type"), evaluated, interaction.get("relType"), "Arg1:" + e1.get("origId"), "Arg2:" + e2.get("origId")]) + "\n")
            outFile = openOutFile(setName, outPath, "predictions", fileTypes, outFiles, openFiles)
            if outFile != None:
                outFile.write("\t".join([docId, interaction.get("type"), "Arg1:" + e1.get("origId"), "Arg2:" + e2.get("origId")]) + "\n")
    print >> sys.stderr, "Closing output files"
    for f in openFiles.values():
        f.close()
    return xml 
开发者ID:jbjorne,项目名称:TEES,代码行数:57,代码来源:convertChemProt.py

示例5: getNECounts

# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
def getNECounts(phrases, entities):
    counts = {}
    for phrase in phrases:
        phraseOffset = Range.charOffsetToSingleTuple(phrase.get("charOffset"))
        counts[phrase] = 0
        for entity in entities:
            if entity.get("given") != "True":  # only check names
                continue
            if Range.contains(phraseOffset, Range.charOffsetToSingleTuple(entity.get("charOffset"))):
                counts[phrase] += 1
    return counts
开发者ID:DUT-LiuYang,项目名称:TEES,代码行数:13,代码来源:MapPhrases.py

示例6: getMatchingPhrases

# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
def getMatchingPhrases(entity, phraseOffsets, phraseDict):
    matches = []
    if entity.get("isName") == "True":
        return []
    maxOffset = Range.charOffsetToSingleTuple(entity.get("charOffset"))
    minOffset = entity.get("altOffset")
    if minOffset != None:
        minOffset = Range.charOffsetToSingleTuple(minOffset)
    else:
        minOffset = maxOffset
    for phraseOffset in phraseOffsets:
        if Range.contains(maxOffset, phraseOffset) and Range.contains(phraseOffset, minOffset):
            matches.extend(phraseDict[phraseOffset])
    return matches
开发者ID:DUT-LiuYang,项目名称:TEES,代码行数:16,代码来源:MapPhrases.py

示例7: insertElements

# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
def insertElements(corpus, specAnn):
    for document in corpus.iter('document'):
        docId = document.get("origId")
        assert docId in specAnn, docId
        for sentence in document.iter('sentence'):
            sentOffset = Range.charOffsetToSingleTuple(sentence.get("charOffset"))
            analyses = sentence.find("analyses")
            if not analyses:
                analyses = ET.SubElement(sentence, "analyses")
            #entitiesElement = sentence.find("entities")
            # Find the container
            container = analyses.find("entities") #None
#             for entitiesElement in entitiesElements:
#                 if entitiesElement.get("source") == "SPECIES":
#                     container = entitiesElement
#                     break
            if not container:
                container = ET.SubElement(analyses, "entities")
            #container.set("source", "SPECIES")
            # Map the spans
            for span in specAnn[docId][:]:
                offset = span.get("offset")
                if Range.overlap(offset, sentOffset):
                    if sentOffset[0] > offset[0] or sentOffset[1] < offset[1]:
                        continue
                    specAnn[docId].remove(span)
                    charOffset = (offset[0] - sentOffset[0], offset[1] - sentOffset[0])
                    matchingText = sentence.get("text")[charOffset[0]:charOffset[1]]
                    spanText = span.get("text")
                    #print matchingText, spanText
                    assert matchingText == spanText, (matchingText, spanText, charOffset)
                    span.set("charOffset", "-".join([str(x) for x in charOffset]))
                    assert not "--" in span.get("charOffset"), [str(x) for x in charOffset]
                    del span.attrib["offset"] #span.set("offset", "")
                    container.append(span)
开发者ID:jbjorne,项目名称:TEES,代码行数:37,代码来源:insertResources.py

示例8: fixAltOffsets

# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
def fixAltOffsets(input, output=None):
    print >> sys.stderr, "Loading corpus", input
    corpusTree = ETUtils.ETFromObj(input)
    print >> sys.stderr, "Corpus file loaded"
    corpusRoot = corpusTree.getroot()
    
    docCount = 0
    sentencesCreated = 0
    sentences = [x for x in corpusRoot.getiterator("sentence")]
    counter = ProgressCounter(len(sentences), "FixAltOffsets")
    fixCount = 0
    # fix spans
    for sentence in sentences:
        counter.update(1, "Fixing AltOffsets for sentence ("+sentence.get("id")+"): ")
        sentOffset = Range.charOffsetToSingleTuple(sentence.get("charOffset"))
        for entity in sentence.findall("entity"):
            altOffsetString = entity.get("altOffset")
            if altOffsetString == None:
                continue
            #print altOffsetString
            altOffsets = Range.charOffsetToTuples(altOffsetString)
            assert len(altOffsets) == 1
            for i in range(len(altOffsets)):
                altOffset = altOffsets[i] 
                altOffsets[i] = (altOffset[0] - sentOffset[0], altOffset[1] - sentOffset[0])
            entity.set("altOffset", Range.tuplesToCharOffset(altOffsets))
            fixCount += 1
        
    print >> sys.stderr, "Fixed", fixCount, "altOffsets"
        
    if output != None:
        print >> sys.stderr, "Writing output to", output
        ETUtils.write(corpusRoot, output)
    return corpusTree
开发者ID:DUT-LiuYang,项目名称:TEES,代码行数:36,代码来源:FixAltOffsets.py

示例9: addSentence

# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
 def addSentence(self, sentenceGraph):
     if sentenceGraph == None:
         return
     tokens = sorted([(Range.charOffsetToSingleTuple(x.get("charOffset")), x) for x in sentenceGraph.tokens])
     indexByTokenId = {tokens[i][1].get("id"):i for i in range(len(tokens))}
     assert len(indexByTokenId) == len(tokens) # check that there were no duplicate ids
     entityById = {x.get("id"):x for x in sentenceGraph.entities}
     events = {}
     for interaction in sentenceGraph.interactions:
         e1Id = interaction.get("e1")
         e2Id = interaction.get("e2")
         e1 = entityById[e1Id]
         e2 = entityById[e2Id]
         t1 = sentenceGraph.entityHeadTokenByEntity[e1]
         t2 = sentenceGraph.entityHeadTokenByEntity[e2]
         index1 = indexByTokenId[t1.get("id")]
         index2 = indexByTokenId[t2.get("id")]
         intSpan = abs(index1 - index2)
         self.interactionSpans[intSpan] = self.interactionSpans.get(intSpan, 0) + 1
         self.intSpan["min"] = min(self.intSpan.get("min"), intSpan)
         self.intSpan["max"] = max(self.intSpan.get("max"), intSpan)
         if interaction.get("event") == "True":
             if e1Id not in events:
                 events[e1Id] = {"min":9999, "max":-9999}
             events[e1Id]["min"] = min(events[e1Id]["min"], index1, index2)
             events[e1Id]["max"] = max(events[e1Id]["max"], index1, index2)
     for eventId in sorted(events.keys()):
         eventSpan = events[eventId]["max"] - events[eventId]["min"]
         self.eventSpans[eventSpan] = self.eventSpans.get(eventSpan, 0) + 1
         self.eventSpan["min"] = min(self.eventSpan.get("min"), eventSpan)
         self.eventSpan["max"] = max(self.eventSpan.get("max"), eventSpan)
开发者ID:jbjorne,项目名称:TEES,代码行数:33,代码来源:DistanceAnalyzer.py

示例10: getPhraseDict

# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
def getPhraseDict(phrases):
    phraseDict = {}
    # Define offsets
    for phrase in phrases:
        phraseOffset = Range.charOffsetToSingleTuple(phrase.get("charOffset"))
        if not phraseDict.has_key(phraseOffset):
            phraseDict[phraseOffset] = []
        phraseDict[phraseOffset].append(phrase)
    return phraseDict
开发者ID:DUT-LiuYang,项目名称:TEES,代码行数:11,代码来源:MapPhrases.py

示例11: moveElements

# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
def moveElements(document):
    entMap = {}
    entSentence = {}
    entSentenceIndex = {}
    sentences = document.findall("sentence")
    sentenceCount = 0
    for sentence in sentences:
        sentenceOffset = Range.charOffsetToSingleTuple(sentence.get("charOffset"))
        # Move entities
        entCount = 0
        for entity in document.findall("entity"):
            entityOffset = Range.charOffsetToSingleTuple(entity.get("charOffset"))
            if Range.overlap(sentenceOffset, entityOffset):
                document.remove(entity)
                sentence.append(entity)
                entityId = entity.get("id")
                entityIdLastPart = entityId.rsplit(".", 1)[-1]
                if entityIdLastPart.startswith("e"):
                    entity.set("id", sentence.get("id") + "." + entityIdLastPart)
                    entMap[entityId] = sentence.get("id") + "." + entityIdLastPart
                else:
                    entity.set("docId", entityId)
                    entity.set("id", sentence.get("id") + ".e" + str(entCount))
                    entMap[entityId] = sentence.get("id") + ".e" + str(entCount)
                entSentence[entityId] = sentence
                entSentenceIndex[entityId] = sentenceCount
                newEntityOffset = (entityOffset[0] - sentenceOffset[0], entityOffset[1] - sentenceOffset[0])
                entity.set("origOffset", entity.get("charOffset"))
                entity.set("charOffset", str(newEntityOffset[0]) + "-" + str(newEntityOffset[1])) 
                entCount += 1
        sentenceCount += 1
    # Move interactions
    intCount = 0
    for interaction in document.findall("interaction"):
        if entSentenceIndex[interaction.get("e1")] < entSentenceIndex[interaction.get("e2")]:
            targetSentence = entSentence[interaction.get("e1")]
        else:
            targetSentence = entSentence[interaction.get("e2")]
        document.remove(interaction)
        targetSentence.append(interaction)
        interaction.set("id", targetSentence.get("id") + ".i" + str(intCount))
        interaction.set("e1", entMap[interaction.get("e1")])
        interaction.set("e2", entMap[interaction.get("e2")])
        intCount += 1
开发者ID:ninjin,项目名称:TEES,代码行数:46,代码来源:GeniaSentenceSplitter.py

示例12: makeDETSubPhrases

# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
def makeDETSubPhrases(phrases, tokens, phraseDict, filter=None):
    newPhrases = []
    for phrase in phrases:
        if filter != None and phrase.get("type") not in filter:
            continue
        phraseOffset = Range.charOffsetToSingleTuple(phrase.get("charOffset"))
        phraseBegin = int(phrase.get("begin"))
        phraseEnd = int(phrase.get("end"))
        if phraseBegin > 0 and tokens[phraseBegin - 1].get("POS") == "DT":
            newPhraseOffset = (
                Range.charOffsetToSingleTuple(tokens[phraseBegin - 1].get("charOffset"))[0],
                phraseOffset[1],
            )
            newPhrase = makePhrase("DT-" + phrase.get("type"), newPhraseOffset, phraseBegin - 1, phraseEnd)
            if not phraseDict.has_key(newPhraseOffset):
                # print "NEW PHRASE:", ETUtils.toStr(newPhrase)
                newPhrases.append(newPhrase)
                phraseDict[newPhraseOffset] = [newPhrase]
    return newPhrases
开发者ID:DUT-LiuYang,项目名称:TEES,代码行数:21,代码来源:MapPhrases.py

示例13: getTokens

# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
 def getTokens(self, entity, tokenTuples):
     offset = entity.get("charOffset")
     assert offset != None
     offset = Range.charOffsetToSingleTuple(offset)
     match = []
     for tokenTuple in tokenTuples:
         if Range.overlap(offset, tokenTuple[0]):
             match.append(tokenTuple[1].get("text"))
         elif len(match) > 0:  # passed end
             break
     return match
开发者ID:ninjin,项目名称:TEES,代码行数:13,代码来源:NameGazetteer.py

示例14: makeTokenSubPhrases

# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
def makeTokenSubPhrases(tokens, phraseDict, includePOS=["PRP$", "IN", "WP$"]):
    newPhrases = []
    for i in range(len(tokens)):
        token = tokens[i]
        tokPOS = token.get("POS")
        if tokPOS in includePOS:
            tokOffset = Range.charOffsetToSingleTuple(token.get("charOffset"))
            if not phraseDict.has_key(tokOffset):
                newPhrase = makePhrase("TOK-t" + tokPOS, tokOffset, i, i)
                newPhrases.append(newPhrase)
                phraseDict[tokOffset] = [newPhrase]
    return newPhrases
开发者ID:DUT-LiuYang,项目名称:TEES,代码行数:14,代码来源:MapPhrases.py

示例15: getPatterns

# 需要导入模块: from Utils import Range [as 别名]
# 或者: from Utils.Range import charOffsetToSingleTuple [as 别名]
 def getPatterns(self, e1, e2):
     e1Range = Range.charOffsetToSingleTuple(e1.get("charOffset"))
     e2Range = Range.charOffsetToSingleTuple(e2.get("charOffset"))
     
     tokenPositions = {}
     for token in self.sentenceGraph.tokens:
         tokenPositions[token.get("id")] = self.getRelativePosition(e1Range,e2Range,token)
     
     prevTokenText = None
     prevToken2Text = None
     prevPosition = None
     patternForeBetween = {}
     patternBetween = {}
     patternBetweenAfter = {}
     for token in self.sentenceGraph.tokens:
         if self.sentenceGraph.tokenIsName[token]:
             continue
             
         id = token.get("id")
         text = token.get("text").lower()
         
         if prevPosition != tokenPositions[id]:
             prevTokenText = None
             prevToken2Text = None
         
         if tokenPositions[id] == "Fore":
             self.addToPattern(patternForeBetween, text, prevTokenText, prevToken2Text)
         elif tokenPositions[id] == "Between":
             self.addToPattern(patternForeBetween, text, prevTokenText, prevToken2Text)
             self.addToPattern(patternBetween, text, prevTokenText, prevToken2Text)
             self.addToPattern(patternBetweenAfter, text, prevTokenText, prevToken2Text)
         elif tokenPositions[id] == "After":
             self.addToPattern(patternBetweenAfter, text, prevTokenText, prevToken2Text)
         
         prevPosition = tokenPositions[id]
         #if tokenPositions[id].find("Entity") != -1:
         prevToken2Text = prevTokenText
         prevTokenText = text
 
     return patternForeBetween, patternBetween, patternBetweenAfter
开发者ID:jbjorne,项目名称:TEES,代码行数:42,代码来源:GiulianoFeatureBuilder.py


注:本文中的Utils.Range.charOffsetToSingleTuple方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。