当前位置: 首页>>代码示例>>Python>>正文


Python Utils.Range类代码示例

本文整理汇总了Python中Utils.Range的典型用法代码示例。如果您正苦于以下问题:Python Range类的具体用法?Python Range怎么用?Python Range使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Range类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _markNamedEntities

 def _markNamedEntities(self):
     """
     This method is used to define which tokens belong to _named_ entities.
     Named entities are sometimes masked when testing learning of interactions, to
     prevent the system making a trivial decision based on commonly interacting names.
     """
     self.tokenIsName = {}
     self.tokenIsEntity = {}
     self.tokenIsEntityHead = {}
     # Initialize the dictionaries
     for token in self.tokens:
         self.tokenIsName[token] = False
         self.tokenIsEntity[token] = False
         self.tokenIsEntityHead[token] = []
     for entity in self.entities:
         entityOffsets = Range.charOffsetToTuples(entity.get("charOffset"))
         entityHeadOffset = Range.charOffsetToSingleTuple(entity.get("headOffset"))
         for token in self.tokens:
             tokenOffset = Range.charOffsetToSingleTuple(token.get("charOffset"))
             for entityOffset in entityOffsets:
                 if Range.overlap(entityOffset, tokenOffset):
                     self.tokenIsEntity[token] = True
                     if entity.get("isName") != None:
                         if entity.get("isName") == "True":
                             self.tokenIsName[token] = True
                     else:
                         entity.set("isName", "True")
                         self.tokenIsName[token] = True
             if Range.overlap(entityHeadOffset, tokenOffset):
                 self.tokenIsEntityHead[token].append(entity)
开发者ID:ninjin,项目名称:TEES,代码行数:30,代码来源:SentenceGraph.py

示例2: fixAltOffsets

def fixAltOffsets(input, output=None):
    print >> sys.stderr, "Loading corpus", input
    corpusTree = ETUtils.ETFromObj(input)
    print >> sys.stderr, "Corpus file loaded"
    corpusRoot = corpusTree.getroot()
    
    docCount = 0
    sentencesCreated = 0
    sentences = [x for x in corpusRoot.getiterator("sentence")]
    counter = ProgressCounter(len(sentences), "FixAltOffsets")
    fixCount = 0
    # fix spans
    for sentence in sentences:
        counter.update(1, "Fixing AltOffsets for sentence ("+sentence.get("id")+"): ")
        sentOffset = Range.charOffsetToSingleTuple(sentence.get("charOffset"))
        for entity in sentence.findall("entity"):
            altOffsetString = entity.get("altOffset")
            if altOffsetString == None:
                continue
            #print altOffsetString
            altOffsets = Range.charOffsetToTuples(altOffsetString)
            assert len(altOffsets) == 1
            for i in range(len(altOffsets)):
                altOffset = altOffsets[i] 
                altOffsets[i] = (altOffset[0] - sentOffset[0], altOffset[1] - sentOffset[0])
            entity.set("altOffset", Range.tuplesToCharOffset(altOffsets))
            fixCount += 1
        
    print >> sys.stderr, "Fixed", fixCount, "altOffsets"
        
    if output != None:
        print >> sys.stderr, "Writing output to", output
        ETUtils.write(corpusRoot, output)
    return corpusTree
开发者ID:DUT-LiuYang,项目名称:TEES,代码行数:34,代码来源:FixAltOffsets.py

示例3: insertElements

def insertElements(corpus, specAnn):
    for document in corpus.iter('document'):
        docId = document.get("origId")
        assert docId in specAnn, docId
        for sentence in document.iter('sentence'):
            sentOffset = Range.charOffsetToSingleTuple(sentence.get("charOffset"))
            analyses = sentence.find("analyses")
            if not analyses:
                analyses = ET.SubElement(sentence, "analyses")
            #entitiesElement = sentence.find("entities")
            # Find the container
            container = analyses.find("entities") #None
#             for entitiesElement in entitiesElements:
#                 if entitiesElement.get("source") == "SPECIES":
#                     container = entitiesElement
#                     break
            if not container:
                container = ET.SubElement(analyses, "entities")
            #container.set("source", "SPECIES")
            # Map the spans
            for span in specAnn[docId][:]:
                offset = span.get("offset")
                if Range.overlap(offset, sentOffset):
                    if sentOffset[0] > offset[0] or sentOffset[1] < offset[1]:
                        continue
                    specAnn[docId].remove(span)
                    charOffset = (offset[0] - sentOffset[0], offset[1] - sentOffset[0])
                    matchingText = sentence.get("text")[charOffset[0]:charOffset[1]]
                    spanText = span.get("text")
                    #print matchingText, spanText
                    assert matchingText == spanText, (matchingText, spanText, charOffset)
                    span.set("charOffset", "-".join([str(x) for x in charOffset]))
                    assert not "--" in span.get("charOffset"), [str(x) for x in charOffset]
                    del span.attrib["offset"] #span.set("offset", "")
                    container.append(span)
开发者ID:jbjorne,项目名称:TEES,代码行数:35,代码来源:insertResources.py

示例4: getHeads

def getHeads(corpus):
    corpus = ETUtils.ETFromObj(corpus)
    headDict = {}
    headDict["None"] = {}
    for sentence in corpus.getiterator("sentence"):
        headOffsetStrings = set()
        for entity in sentence.findall("entity"):
            eType = entity.get("type")
            if not headDict.has_key(eType):
                headDict[eType] = {}
            eText = entity.get("text")
            headOffset = entity.get("headOffset")
            headOffsetStrings.add(headOffset)
            headOffset = Range.charOffsetToSingleTuple(headOffset)
            charOffset = Range.charOffsetToSingleTuple(entity.get("charOffset"))
            if headOffset == charOffset:
                if not headDict[eType].has_key(eText): headDict[eType][eText] = 0
                headDict[eType][eText] += 1
            else:
                headText = sentenceText[headOffset[0]-charOffset[0]:headOffset[1]-charOffset[0]+1]
                if not headDict[eType].has_key(headText): headDict[eType][headText] = 0
                headDict[eType][headText] += 1
        for token in tokens:
            if not token.get("charOffset") in headOffsetStrings: # token is not the head of any entity
                headText = token.get("text")
                if not headDict["None"].has_key(headText): headDict["None"][headText] = 0
                headDict["None"][headText] += 1
                
    return headDict
开发者ID:ninjin,项目名称:TEES,代码行数:29,代码来源:DetectHeads.py

示例5: selectBestMatch

def selectBestMatch(entity, phrases):
    entOffset = Range.charOffsetToSingleTuple(entity.get("charOffset"))
    if entity.get("altOffset") != None:
        entOffset = Range.charOffsetToSingleTuple(entity.get("altOffset"))
    best = (sys.maxint, None)
    for phrase in phrases:
        matchValue = Range.mismatch(entOffset, Range.charOffsetToSingleTuple(phrase.get("charOffset")))
        if best[0] > matchValue:
            best = (matchValue, phrase)
    return best[1]
开发者ID:DUT-LiuYang,项目名称:TEES,代码行数:10,代码来源:MapPhrases.py

示例6: exportChemProtPredictions

def exportChemProtPredictions(xml, outPath, fileTypes="predictions", setNames=None):
    if fileTypes == "all":
        fileTypes = ["predictions", "abstracts", "entities", "relations"]
    elif isinstance(fileTypes, basestring):
        fileTypes = fileTypes.split(",")
    for fileType in fileTypes:
        if fileType not in ["predictions", "abstracts", "entities", "relations"]:
            raise Exception("Unknown ChemProt file type '" + str(fileType) + "'")
    xml = ETUtils.ETFromObj(xml)
    #with open(outPath, "wt") as f
    outFiles = {}
    openFiles = {}
    for document in xml.getiterator("document"):
        docId = document.get("origId")
        setName = document.get("set")
        if setNames != None:
            setName = setNames.get(setName, setName)
        if setName not in outFiles:
            outFiles[setName] = {}
        outFile = openOutFile(setName, outPath, "abstracts", fileTypes, outFiles, openFiles)
        if outFile != None:
            docText = document.get("text")
            #assert docText.count("\t") == 1, (docText.count("\t"), document.attrib)
            #title, abstract = docText.split("\t")
            #titleLength = document.get("titleLength")
            titleOffset = Range.charOffsetToSingleTuple(document.get("titleOffset"))
            assert titleOffset[0] == 0
            outFile.write("\t".join([docId, docText[:titleOffset[1]], docText[titleOffset[1]+1:]]) + "\n")  
        entityById = {}
        for entity in document.getiterator("entity"):
            outFile = openOutFile(setName, outPath, "entities", fileTypes, outFiles, openFiles)
            if outFile != None:
                eType = entity.get("type")
                if entity.get("normalized") != None and entity.get("type") == "GENE":
                    eType += "-Y" if entity.get("normalized") == "True" else "-N"
                offset = Range.charOffsetToSingleTuple(entity.get("charOffset"))
                outFile.write("\t".join([docId, entity.get("origId"), eType, str(offset[0]), str(offset[1]), entity.get("text")]) + "\n")
            assert entity.get("id") not in entityById
            entityById[entity.get("id")] = entity
        for interaction in document.getiterator("interaction"):
            e1 = entityById[interaction.get("e1")]
            e2 = entityById[interaction.get("e2")]
            outFile = openOutFile(setName, outPath, "relations", fileTypes, outFiles, openFiles)
            if outFile != None:
                evaluated = "X"
                if interaction.get("evaluated") != None:
                    evaluated = "Y " if interaction.get("evaluated") == "True" else "N "
                outFile.write("\t".join([docId, interaction.get("type"), evaluated, interaction.get("relType"), "Arg1:" + e1.get("origId"), "Arg2:" + e2.get("origId")]) + "\n")
            outFile = openOutFile(setName, outPath, "predictions", fileTypes, outFiles, openFiles)
            if outFile != None:
                outFile.write("\t".join([docId, interaction.get("type"), "Arg1:" + e1.get("origId"), "Arg2:" + e2.get("origId")]) + "\n")
    print >> sys.stderr, "Closing output files"
    for f in openFiles.values():
        f.close()
    return xml 
开发者ID:jbjorne,项目名称:TEES,代码行数:55,代码来源:convertChemProt.py

示例7: getNECounts

def getNECounts(phrases, entities):
    counts = {}
    for phrase in phrases:
        phraseOffset = Range.charOffsetToSingleTuple(phrase.get("charOffset"))
        counts[phrase] = 0
        for entity in entities:
            if entity.get("given") != "True":  # only check names
                continue
            if Range.contains(phraseOffset, Range.charOffsetToSingleTuple(entity.get("charOffset"))):
                counts[phrase] += 1
    return counts
开发者ID:DUT-LiuYang,项目名称:TEES,代码行数:11,代码来源:MapPhrases.py

示例8: processElements

def processElements(xml):
    for ddi in xml.getiterator("ddi"):
        ddi.tag = "interaction"
    for entity in xml.getiterator("entity"):
        entity.set("given", "True")
        # Reformat disjoint character offsets and update character range format for TEES 2.0+
        charOffsets = Range.charOffsetToTuples(entity.get("charOffset"), rangeSep=";")
        updatedCharOffsets = []
        for charOffset in charOffsets:
            updatedCharOffsets.append( (charOffset[0], charOffset[1]+1) )
        entity.set("charOffset", Range.tuplesToCharOffset(updatedCharOffsets))
开发者ID:DUT-LiuYang,项目名称:TEES,代码行数:11,代码来源:convertDDI13.py

示例9: getTokens

 def getTokens(self, entity, tokenTuples):
     offset = entity.get("charOffset")
     assert offset != None
     offset = Range.charOffsetToSingleTuple(offset)
     match = []
     for tokenTuple in tokenTuples:
         if Range.overlap(offset, tokenTuple[0]):
             match.append(tokenTuple[1].get("text"))
         elif len(match) > 0:  # passed end
             break
     return match
开发者ID:ninjin,项目名称:TEES,代码行数:11,代码来源:NameGazetteer.py

示例10: fixEntities

def fixEntities(xml):
    counts = defaultdict(int)
    for sentence in xml.getiterator("sentence"):
        sText = sentence.get("text")
        for entity in sentence.findall("entity"):
            charOffset = entity.get("charOffset")
            if charOffset == "-":
                assert False, str(entity)
                sentence.remove(entity)
                counts["removed-invalid"] += 1
            else:
                charOffset = Range.charOffsetToSingleTuple(charOffset)
                # fix length
                realLength = len(entity.get("text"))
                lenDiff = (charOffset[1] - charOffset[0] + 1) - realLength
                if lenDiff != realLength:
                    counts["incorrect-ent-offset"] += 1
                    counts["incorrect-ent-offset-diff"+str(lenDiff)] += 1
                    if abs(lenDiff) > 2:
                        print "Warning, lenDiff:", (lenDiff, charOffset, sText, entity.get("text"), entity.get("id"))
                charOffset = (charOffset[0], charOffset[0] + realLength)
                # find starting position
                entIndex = sText.find(entity.get("text"), charOffset[0])
                if entIndex == -1:
                    for i in [-1,-2,-3]:
                        entIndex = sText.find(entity.get("text"), charOffset[0]+i)
                        if entIndex != -1:
                            break
                if entIndex != 0: # could be lowercase
                    sTextLower = sText.lower()
                    for i in [0,-1,-2,-3]:
                        lowerEntIndex = sTextLower.find(entity.get("text"), charOffset[0]+i)
                        if lowerEntIndex != -1:
                            break
                    if lowerEntIndex != -1 and abs(lowerEntIndex - charOffset[0]) < abs(entIndex - charOffset[0]):
                        entIndex = lowerEntIndex
                assert entIndex != -1, (charOffset, sText, entity.get("text"), entity.get("id"))
                indexDiff = entIndex - charOffset[0]
                if indexDiff != 0:
                    counts["incorrect-ent-index"] += 1
                    counts["incorrect-ent-index-diff"+str(indexDiff)] += 1
                    print "Warning, indexDiff:", (indexDiff, charOffset, sText, entity.get("text"), entity.get("id"))
                # move offset       
                charOffset = (charOffset[0]+indexDiff, charOffset[1]+indexDiff)
                # validate new offset
                sEntity = sText[charOffset[0]:charOffset[1]]
                assert sEntity == entity.get("text") or sEntity.lower() == entity.get("text"), (charOffset, sText, entity.get("text"), entity.get("id"))
                entity.set("charOffset", Range.tuplesToCharOffset( (charOffset[0], charOffset[1])))
                entity.set("given", "True")
        for interaction in sentence.findall("interaction"):
            interaction.set("type", "DDI")
    print "Fix counts:", counts
开发者ID:DUT-LiuYang,项目名称:TEES,代码行数:52,代码来源:convertDDI.py

示例11: getRelativePosition

 def getRelativePosition(self, entity1Range, entity2Range, token):
     offset = Range.charOffsetToSingleTuple(token.get("charOffset"))
     if Range.overlap(entity1Range, offset):
         return "Entity1"
     if Range.overlap(entity2Range, offset):
         return "Entity2"
     entitiesRange = (min(entity1Range[0],entity2Range[0]),max(entity1Range[1],entity2Range[1]))
     if offset[1] < entitiesRange[0]:
         return "Fore"
     elif offset[1] > entitiesRange[1]:
         return "After"
     else:
         return "Between"
开发者ID:jbjorne,项目名称:TEES,代码行数:13,代码来源:GiulianoFeatureBuilder.py

示例12: getMatchingPhrases

def getMatchingPhrases(entity, phraseOffsets, phraseDict):
    matches = []
    if entity.get("isName") == "True":
        return []
    maxOffset = Range.charOffsetToSingleTuple(entity.get("charOffset"))
    minOffset = entity.get("altOffset")
    if minOffset != None:
        minOffset = Range.charOffsetToSingleTuple(minOffset)
    else:
        minOffset = maxOffset
    for phraseOffset in phraseOffsets:
        if Range.contains(maxOffset, phraseOffset) and Range.contains(phraseOffset, minOffset):
            matches.extend(phraseDict[phraseOffset])
    return matches
开发者ID:DUT-LiuYang,项目名称:TEES,代码行数:14,代码来源:MapPhrases.py

示例13: makeEntityElement

def makeEntityElement(ann, idCount, docEl):
    entEl = ET.Element("entity")
    entEl.set("type", ann.type)
    entEl.set("text", ann.text)
    # identifiers
    protId = docEl.get("id") + ".e" + str(idCount)
    entEl.set("id", protId)
    if ann.id != None:
        entEl.set("origId", docEl.get("origId") + "." + str(ann.id))
    # offsets
    entEl.set("charOffset", Range.tuplesToCharOffset(ann.charOffsets))
    if len(ann.alternativeOffsets) > 0:
        altOffs = []
        for alternativeOffset in ann.alternativeOffsets:
            altOffs.append( str(alternativeOffset[0]) + "-" + str(alternativeOffset[1]-1) ) 
        entEl.set("altOffset", ",".join(altOffs))
    if ann.normalization != None:
        entEl.set("normalization", ann.normalization)
    addExtraToElement(entEl, ann.extra)
    # determine if given data
    assert ann.fileType in ["a1", "a2", "rel"], ann.fileType
    if ann.fileType == "a1": #protein.isName():
        entEl.set("given", "True")
    #else:
    #    entEl.set("given", "False")
    return entEl
开发者ID:ayoshiaki,项目名称:TEES,代码行数:26,代码来源:ConvertXML.py

示例14: addSentence

 def addSentence(self, sentenceGraph):
     if sentenceGraph == None:
         return
     tokens = sorted([(Range.charOffsetToSingleTuple(x.get("charOffset")), x) for x in sentenceGraph.tokens])
     indexByTokenId = {tokens[i][1].get("id"):i for i in range(len(tokens))}
     assert len(indexByTokenId) == len(tokens) # check that there were no duplicate ids
     entityById = {x.get("id"):x for x in sentenceGraph.entities}
     events = {}
     for interaction in sentenceGraph.interactions:
         e1Id = interaction.get("e1")
         e2Id = interaction.get("e2")
         e1 = entityById[e1Id]
         e2 = entityById[e2Id]
         t1 = sentenceGraph.entityHeadTokenByEntity[e1]
         t2 = sentenceGraph.entityHeadTokenByEntity[e2]
         index1 = indexByTokenId[t1.get("id")]
         index2 = indexByTokenId[t2.get("id")]
         intSpan = abs(index1 - index2)
         self.interactionSpans[intSpan] = self.interactionSpans.get(intSpan, 0) + 1
         self.intSpan["min"] = min(self.intSpan.get("min"), intSpan)
         self.intSpan["max"] = max(self.intSpan.get("max"), intSpan)
         if interaction.get("event") == "True":
             if e1Id not in events:
                 events[e1Id] = {"min":9999, "max":-9999}
             events[e1Id]["min"] = min(events[e1Id]["min"], index1, index2)
             events[e1Id]["max"] = max(events[e1Id]["max"], index1, index2)
     for eventId in sorted(events.keys()):
         eventSpan = events[eventId]["max"] - events[eventId]["min"]
         self.eventSpans[eventSpan] = self.eventSpans.get(eventSpan, 0) + 1
         self.eventSpan["min"] = min(self.eventSpan.get("min"), eventSpan)
         self.eventSpan["max"] = max(self.eventSpan.get("max"), eventSpan)
开发者ID:jbjorne,项目名称:TEES,代码行数:31,代码来源:DistanceAnalyzer.py

示例15: addParseElements

def addParseElements(doc, docEl):
    if docEl.tag != "sentence":
        return
    sentAnalysesEl = ET.SubElement(docEl, "analyses")
    #parsesEl = ET.SubElement(sentAnalysesEl, "parses")
    parseEl = ET.SubElement(sentAnalysesEl, "parse")
    #tokenizationsEl = ET.SubElement(sentAnalysesEl, "tokenizations")
    tokenizationEl = ET.SubElement(sentAnalysesEl, "tokenization")
    parseEl.set("parser", "gold")
    parseEl.set("tokenizer", "gold")
    tokenizationEl.set("tokenizer", "gold")
    tokenMap = {}
    for word in doc.words:
        tokEl = ET.SubElement(tokenizationEl, "token")
        tokEl.set("id", word.id)
        tokEl.set("text", word.text)
        tokEl.set("POS", "None")
        assert len(word.charOffsets) == 1, (word, word.charOffsets)
        tokEl.set("charOffset", Range.tuplesToCharOffset(word.charOffsets))
        tokenMap[word.id] = tokEl
    for dep in doc.dependencies:
        depEl = ET.SubElement(parseEl, "dependency")
        depEl.set("id", dep.id)
        depEl.set("type", dep.type)
        assert len(dep.arguments) == 2
        depEl.set("t1", dep.arguments[0].target.id)
        depEl.set("t2", dep.arguments[1].target.id)
        if dep.type.find(":") != -1:
            word1Type, word2Type = dep.type.split("(")[0].split(":")[-1].split("-")
            tokenMap[dep.arguments[0].target.id].set("POS", word1Type)
            tokenMap[dep.arguments[1].target.id].set("POS", word2Type)
开发者ID:ayoshiaki,项目名称:TEES,代码行数:31,代码来源:ConvertXML.py


注:本文中的Utils.Range类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。