本文整理汇总了Python中Core.ExampleUtils.loadPredictions方法的典型用法代码示例。如果您正苦于以下问题:Python ExampleUtils.loadPredictions方法的具体用法?Python ExampleUtils.loadPredictions怎么用?Python ExampleUtils.loadPredictions使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Core.ExampleUtils
示例1: classifyToXML
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import loadPredictions [as 别名]
def classifyToXML(self, data, model, exampleFileName=None, tag="", classifierModel=None, goldData=None, parse=None, recallAdjust=None, compressExamples=True):
model = self.openModel(model, "r")
if parse == None:
parse = self.getStr(self.tag+"parse", model)
if exampleFileName == None:
exampleFileName = tag+self.tag+"examples"
if compressExamples:
exampleFileName += ".gz"
self.buildExamples(model, [data], [exampleFileName], [goldData], parse=parse)
if classifierModel == None:
classifierModel = model.get(self.tag+"classifier-model")
assert os.path.exists(classifierModel), classifierModel
classifier = self.Classifier()
classifier.classify(exampleFileName, tag+self.tag+"classifications", classifierModel, finishBeforeReturn=True)
predictions = ExampleUtils.loadPredictions(tag+self.tag+"classifications", recallAdjust)
evaluator = self.evaluator.evaluate(exampleFileName, predictions, model.get(self.tag+"ids.classes"))
#outputFileName = tag+"-"+self.tag+"pred.xml.gz"
return self.exampleWriter.write(exampleFileName, predictions, data, tag+self.tag+"pred.xml.gz", model.get(self.tag+"ids.classes"), parse)
# if evaluator.getData().getTP() + evaluator.getData().getFP() > 0:
# return self.exampleWriter.write(exampleFileName, predictions, data, outputFileName, model.get(self.tag+"ids.classes"), parse)
# else:
# # TODO: e.g. interactions must be removed if task does unmerging
# print >> sys.stderr, "No positive", self.tag + "predictions, XML file", outputFileName, "unchanged from input"
# if type(data) in types.StringTypes: # assume its a file
# shutil.copy(data, outputFileName)
# else: # assume its an elementtree
# ETUtils.write(data, outputFileName)
# #print >> sys.stderr, "No positive predictions, XML file", tag+self.tag+"pred.xml", "not written"
# return data #None
示例2: __init__
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import loadPredictions [as 别名]
def __init__(self, examples, predictions=None, classSet=None):
if type(classSet) == types.StringType: # class names are in file
classSet = IdSet(filename=classSet)
if type(predictions) == types.StringType: # predictions are in file
predictions = ExampleUtils.loadPredictions(predictions)
if type(examples) == types.StringType: # examples are in file
examples = ExampleUtils.readExamples(examples, False)
self.classSet = classSet
# define class ids in alphabetical order
self.classSet = classSet
if classSet != None:
classNames = sorted(classSet.Ids.keys())
classNames = []
# make an ordered list of class ids
self.classes = []
for className in classNames:
# create data structures for per-class evaluation
self.dataByClass = {}
for cls in self.classes:
self.dataByClass[cls] = EvaluationData()
# hack for unnamed classes
if len(self.dataByClass) == 0:
self.dataByClass[1] = EvaluationData()
self.dataByClass[2] = EvaluationData()
#self.untypedUndirected = None
self.untypedCurrentMajorId = None
self.untypedPredictionQueue = []
self.untypedUndirected = EvaluationData()
#self.AUC = None
if predictions != None:
self._calculate(examples, predictions)
示例3: classifyToXML
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import loadPredictions [as 别名]
def classifyToXML(self, data, model, exampleFileName=None, tag="", classifierModel=None, goldData=None, parse=None, recallAdjust=None, compressExamples=True, exampleStyle=None):
model = self.openModel(model, "r")
if parse == None:
parse = self.getStr(self.tag+"parse", model)
if exampleFileName == None:
exampleFileName = tag+self.tag+"examples"
if compressExamples:
exampleFileName += ".gz"
self.buildExamples(model, [data], [exampleFileName], [goldData], parse=parse, exampleStyle=exampleStyle)
if classifierModel == None:
classifierModel = model.get(self.tag+"classifier-model", defaultIfNotExist=None)
# assert os.path.exists(classifierModel), classifierModel
classifier = self.getClassifier(model.getStr(self.tag+"classifier-parameter", defaultIfNotExist=None))()
classifier.classify(exampleFileName, tag+self.tag+"classifications", classifierModel, finishBeforeReturn=True)
threshold = model.getStr(self.tag+"threshold", defaultIfNotExist=None, asType=float)
predictions = ExampleUtils.loadPredictions(tag+self.tag+"classifications", recallAdjust, threshold=threshold)
evaluator = self.evaluator.evaluate(exampleFileName, predictions, model.get(self.tag+"ids.classes"))
#outputFileName = tag+"-"+self.tag+"pred.xml.gz"
#exampleStyle = self.exampleBuilder.getParameters(model.getStr(self.tag+"example-style"))
if exampleStyle == None:
exampleStyle = Parameters.get(model.getStr(self.tag+"example-style")) # no checking, but these should already have passed the ExampleBuilder
return self.exampleWriter.write(exampleFileName, predictions, data, tag+self.tag+"pred.xml.gz", model.get(self.tag+"ids.classes"), parse, exampleStyle=exampleStyle, structureAnalyzer=self.structureAnalyzer)
# if evaluator.getData().getTP() + evaluator.getData().getFP() > 0:
# return self.exampleWriter.write(exampleFileName, predictions, data, outputFileName, model.get(self.tag+"ids.classes"), parse)
# else:
# # TODO: e.g. interactions must be removed if task does unmerging
# print >> sys.stderr, "No positive", self.tag + "predictions, XML file", outputFileName, "unchanged from input"
# if type(data) in types.StringTypes: # assume its a file
# shutil.copy(data, outputFileName)
# else: # assume its an elementtree
# ETUtils.write(data, outputFileName)
# #print >> sys.stderr, "No positive predictions, XML file", tag+self.tag+"pred.xml", "not written"
# return data #None
示例4: loadExamples
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import loadPredictions [as 别名]
def loadExamples(self, examples, predictions):
if type(predictions) == types.StringType:
print >> sys.stderr, "Reading predictions from", predictions
predictions = ExampleUtils.loadPredictions(predictions)
if type(examples) == types.StringType:
print >> sys.stderr, "Reading examples from", examples
examples = ExampleUtils.readExamples(examples, False)
return examples, predictions
示例5: optimize
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import loadPredictions [as 别名]
def optimize(self, examples, outDir, parameters, classifyExamples, classIds, step="BOTH", evaluator=None, determineThreshold=False, timeout=None, downloadAllModels=False):
assert step in ["BOTH", "SUBMIT", "RESULTS"], step
outDir = os.path.abspath(outDir)
# Initialize training (or reconnect to existing jobs)
combinations = Parameters.getCombinations(Parameters.get(parameters, valueListKey="c")) #Core.OptimizeParameters.getParameterCombinations(parameters)
trained = []
for combination in combinations:
trained.append( self.train(examples, outDir, combination, classifyExamples, replaceRemoteExamples=(len(trained) == 0), dummy=(step == "RESULTS")) )
if step == "SUBMIT": # Return already
classifier = copy.copy(self)
return classifier
# Wait for the training to finish
finalJobStatus = self.connection.waitForJobs([x.getJob() for x in trained])
# Evaluate the results
print >> sys.stderr, "Evaluating results"
#Stream.setIndent(" ")
bestResult = None
if evaluator == None:
evaluator = self.defaultEvaluator
for i in range(len(combinations)):
id = trained[i].parameterIdStr
#Stream.setIndent(" ")
# Get predictions
predictions = None
if trained[i].getStatus() == "FINISHED":
predictions = trained[i].downloadPredictions()
print >> sys.stderr, "No results for combination" + id
if downloadAllModels:
# Compare to other results
print >> sys.stderr, "*** Evaluating results for combination" + id + " ***"
threshold = None
if determineThreshold:
print >> sys.stderr, "Thresholding, original micro =",
evaluation = evaluator.evaluate(classifyExamples, predictions, classIds, os.path.join(outDir, "evaluation-before-threshold" + id + ".csv"), verbose=False)
print >> sys.stderr, evaluation.microF.toStringConcise()
threshold, bestF = evaluator.threshold(classifyExamples, predictions)
print >> sys.stderr, "threshold =", threshold, "at binary fscore", str(bestF)[0:6]
evaluation = evaluator.evaluate(classifyExamples, ExampleUtils.loadPredictions(predictions, threshold=threshold), classIds, os.path.join(outDir, "evaluation" + id + ".csv"))
if bestResult == None or evaluation.compare(bestResult[0]) > 0: #: averageResult.fScore > bestResult[1].fScore:
bestResult = [evaluation, trained[i], combinations[i], threshold]
if not self.connection.isLocal():
os.remove(predictions) # remove predictions to save space
if bestResult == None:
raise Exception("No results for any parameter combination")
print >> sys.stderr, "*** Evaluation complete", finalJobStatus, "***"
print >> sys.stderr, "Selected parameters", bestResult[2]
classifier = copy.copy(bestResult[1])
classifier.threshold = bestResult[3]
return classifier
示例6: __init__
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import loadPredictions [as 别名]
def __init__(self, examples, predictions=None, classSet=None):
if type(classSet) == types.StringType: # class names are in file
classSet = IdSet(filename=classSet)
if type(predictions) == types.StringType: # predictions are in file
predictions = ExampleUtils.loadPredictions(predictions)
if type(examples) == types.StringType: # examples are in file
examples = ExampleUtils.readExamples(examples, False)
SharedTaskEvaluator.corpusElements = Core.SentenceGraph.loadCorpus(SharedTaskEvaluator.corpusFilename, SharedTaskEvaluator.parse, SharedTaskEvaluator.tokenization)
# Build interaction xml
xml = BioTextExampleWriter.write(examples, predictions, SharedTaskEvaluator.corpusElements, None, SharedTaskEvaluator.ids+".class_names", SharedTaskEvaluator.parse, SharedTaskEvaluator.tokenization)
#xml = ExampleUtils.writeToInteractionXML(examples, predictions, SharedTaskEvaluator.corpusElements, None, "genia-direct-event-ids.class_names", SharedTaskEvaluator.parse, SharedTaskEvaluator.tokenization)
# Convert to GENIA format
gifxmlToGenia(xml, SharedTaskEvaluator.geniaDir, task=SharedTaskEvaluator.task, verbose=False)
# Use GENIA evaluation tool
self.results = evaluateSharedTask(SharedTaskEvaluator.geniaDir, task=SharedTaskEvaluator.task, evaluations=["approximate"], verbose=False)
示例7: __init__
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import loadPredictions [as 别名]
def __init__(self, examples, predictions=None, classSet=None):
if type(classSet) == types.StringType: # class names are in file
classSet = IdSet(filename=classSet)
if type(predictions) == types.StringType: # predictions are in file
predictions = ExampleUtils.loadPredictions(predictions)
if type(examples) == types.StringType: # examples are in file
examples = ExampleUtils.readExamples(examples, False)
self.classSet = classSet
self.dataByClass = defaultdict(EvaluationData)
#self.untypedUndirected = None
self.untypedCurrentMajorId = None
self.untypedPredictionQueue = []
self.untypedUndirected = EvaluationData()
#self.AUC = None
if predictions != None:
self._calculate(examples, predictions)
示例8: __init__
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import loadPredictions [as 别名]
def __init__(self, examples, predictions=None, classSet=None):
if type(classSet) == types.StringType: # class names are in file
classSet = IdSet(filename=classSet)
if type(predictions) == types.StringType: # predictions are in file
predictions = ExampleUtils.loadPredictions(predictions)
if type(examples) == types.StringType: # examples are in file
examples = ExampleUtils.readExamples(examples, False)
corpusElements = Core.SentenceGraph.loadCorpus(BXEvaluator.corpusFilename, BXEvaluator.parse, BXEvaluator.tokenization)
# Build interaction xml
xml = BioTextExampleWriter.write(examples, predictions, corpusElements, None, BXEvaluator.ids+".class_names", BXEvaluator.parse, BXEvaluator.tokenization)
xml = ix.splitMergedElements(xml, None)
xml = ix.recalculateIds(xml, None, True)
#xml = ExampleUtils.writeToInteractionXML(examples, predictions, SharedTaskEvaluator.corpusElements, None, "genia-direct-event-ids.class_names", SharedTaskEvaluator.parse, SharedTaskEvaluator.tokenization)
# Convert to GENIA format
STFormat.ConvertXML.toSTFormat(xml, BXEvaluator.geniaDir, outputTag="a2")
#gifxmlToGenia(xml, BXEvaluator.geniaDir, task=SharedTaskEvaluator.task, verbose=False)
# Use GENIA evaluation tool
self.results = BioNLP11GeniaTools.evaluateBX(BXEvaluator.geniaDir, corpusName=BXEvaluator.corpusTag)
corpusElements = None
示例9: __init__
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import loadPredictions [as 别名]
def __init__(self, examples=None, predictions=None, classSet=None):
if type(classSet) == types.StringType: # class names are in file
classSet = IdSet(filename=classSet)
if type(predictions) == types.StringType: # predictions are in file
predictions = ExampleUtils.loadPredictions(predictions)
if type(examples) == types.StringType: # examples are in file
examples = ExampleUtils.readExamples(examples, False)
# self.examples = examples
# self.predictions = predictions
self.truePositives = 0
self.falsePositives = 0
self.trueNegatives = 0
self.falseNegatives = 0
self.precision = None
self.recall = None
self.fScore = None
self.AUC = None
self.type = "binary"
if predictions != None:
self._calculate(examples, predictions)
示例10: threshold
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import loadPredictions [as 别名]
def threshold(cls, examples, predictions):
# Make negative confidence score / true class pairs
if type(examples) in types.StringTypes:
examples = ExampleUtils.readExamples(examples, False)
if type(predictions) in types.StringTypes:
predictions = ExampleUtils.loadPredictions(predictions)
pairs = []
realPositives = 0
for example, prediction in itertools.izip(examples, predictions):
trueClass = example[1]
assert(trueClass > 0) # multiclass classification uses non-negative integers
if trueClass > 1:
realPositives += 1
negClassValue = prediction[1]
pairs.append( (negClassValue, trueClass) )
realNegatives = len(pairs) - realPositives
# When starting thresholding, all examples are considered positive
binaryF = EvaluationData()
binaryF._tp = realPositives
binaryF._fp = realNegatives
binaryF._fn = 0
fscore = binaryF.fscore
threshold = pairs[0][0]-1.
# Turn one example negative at a time
for pair in pairs:
if pair[1] == 1: # the real class is negative
binaryF._fp -= 1 # false positive -> true negative
else: # the real class is a positive class
binaryF._tp -= 1 # true positive -> ...
binaryF._fn += 1 # ... false negative
if binaryF.fscore > fscore:
fscore = binaryF.fscore
threshold = pair[0]+0.00000001
return threshold, fscore
示例11: __init__
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import loadPredictions [as 别名]
def __init__(self, examples=None, predictions=None, classSet=None):
if type(classSet) == types.StringType: # class names are in file
classSet = IdSet(filename=classSet)
if type(predictions) == types.StringType: # predictions are in file
predictions = ExampleUtils.loadPredictions(predictions)
if type(examples) == types.StringType: # examples are in file
examples = ExampleUtils.readExamples(examples, False)
self.keep = set(["CPR:3", "CPR:4", "CPR:5", "CPR:6", "CPR:9"])
self.classSet = classSet
self.results = None
self.internal = None
if predictions != None:
for example in examples:
if example[3] != None:
print >> sys.stderr, "ChemProt Evaluator:"
self._calculateExamples(examples, predictions)
print >> sys.stderr, "No example extra info, skipping ChemProt evaluation"
self.internal = AveragingMultiClassEvaluator(examples, predictions, classSet)
print >> sys.stderr, "AveragingMultiClassEvaluator:"
print >> sys.stderr, self.internal.toStringConcise()
示例12: threshold
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import loadPredictions [as 别名]
def threshold(examples, predictionsDir=None, classSet=None):
if type(classSet) == types.StringType: # class names are in file
classSet = IdSet(filename=classSet)
classIds = set()
if type(examples) == types.StringType: # examples are in file
examplesTemp = ExampleUtils.readExamples(examples, False)
examples = []
for example in examplesTemp:
classIds = list(classIds)
#multilabel = MultiLabelMultiClassEvaluator(None, None, classSet)
#multilabel._calculate(examples, predictions)
#print multilabel.toStringConcise(title="multilabel")
bestThrF = [0]
bestBaseF = [0]
predFileNames = []
for filename in os.listdir(predictionsDir):
if "predictions" in filename:
predFileNames.append( (int(filename.rsplit("_")[-1]), filename) )
for predFileName in predFileNames:
predictionsTemp = ExampleUtils.loadPredictions(os.path.join(predictionsDir, predFileName[1]))
predictions = []
for prediction in predictionsTemp:
baseEv = AveragingMultiClassEvaluator(None, None, classSet)
baseEv._calculate(examples, predictions)
print "============================"
print predFileName[1]
print "============================"
#print baseEv.toStringConcise(title="baseline")
baseLineF = baseEv.microF.fscore
for step in [0]:
for classId in [1]: #classIds:
cls = None
if classSet != None:
cls = classSet.getName(classId)
cls = str(classId)
bestF = thresholdClass(examples, predictions, classId, baseLineF)
for prediction in predictions:
prediction[classId] -= bestF[2][0] + 0.00000001
changed = 0
for prediction in predictions:
maxVal = -999999
maxClass = None
for i in range(1, len(prediction)):
if prediction[i] > maxVal:
maxVal = prediction[i]
maxClass = i
if maxClass != prediction[0]:
prediction[0] = maxClass
changed += 1
print step, cls, "changed", changed, bestF[0]
baseLineF = bestF[0]
if bestF[0] > bestThrF[0]:
bestThrF = (bestF[0], predFileName[1], bestF[1], bestF[2], bestF[3])
if baseEv.microF.fscore > bestBaseF[0]:
bestBaseF = (baseEv.microF.fscore, predFileName[1], baseEv.microF.toStringConcise())
print "-------- Baseline ------------"
print baseEv.toStringConcise()
print "-------- Best ------------"
print bestF[0], bestF[1], bestF[2]
print bestF[3]
thEv = AveragingMultiClassEvaluator(None, None, classSet)
thEv._calculate(examples, predictions)
print thEv.toStringConcise()
print "=============== All Best ==============="
print "Threshold", bestThrF
print "Base", bestBaseF
memPredictions = []
bestEv = baseEv
bestPair = [None, None, None]
for p in predictions:
for pair in pairs:
modifier = pair[0] + 0.00000001
changedClass = 0
for pred in memPredictions:
negPred = pred[1] - modifier
maxVal = negPred
maxClass = 1
for i in range(2, len(pred)):
if pred[i] > maxVal:
maxVal = pred[i]
maxClass = i
if pred[0] != maxClass:
changedClass += 1
pred[0] = maxClass
示例13: determineThreshold
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import loadPredictions [as 别名]
def determineThreshold(self, examples, predictions):
if type(predictions) == types.StringType: # predictions are in file
predictions = ExampleUtils.loadPredictions(predictions)
if type(examples) == types.StringType: # examples are in file
examples = ExampleUtils.readExamples(examples, False)
examplesByClass = {}
for cls in self.classes:
examplesByClass[cls] = []
# prepare examples
for example, prediction in itertools.izip(examples, predictions):
# Check true class for multilabel
trueClass = example[1]
trueClassName = self.classSet.getName(trueClass)
assert(trueClass > 0) # multiclass classification uses non-negative integers
if "---" in trueClassName:
trueClass = set()
for name in trueClassName.split("---"):
trueClass = [trueClass]
# Check prediction for multilabel
predictedClasses = prediction[0]
if type(predictedClasses) == types.IntType:
predictedClasses = [predictedClasses]
for predType in predictedClasses:
if predType != 1:
exTrueClass = 1
if predType in trueClass:
exTrueClass = 2
examplesByClass[predType].append( (prediction[predType], exTrueClass, 2) )
# positives are negatives for other classes
for cls in self.classes:
if cls not in predictedClasses:
exTrueClass = 1
if cls in trueClass:
exTrueClass = 2
examplesByClass[cls].append( (prediction[cls], exTrueClass, 1) )
# do the thresholding
thresholdByClass = {}
for cls in self.classes:
if cls == 1:
thresholdByClass[cls] = 0.0
# Start with all below zero being negative, and all above it being what is predicted
ev = EvaluationData()
for example in examplesByClass[cls]:
#print example
if example[0] < 0.0:
updateF(ev, example[1], 2, 1) # always negative
updateF(ev, example[1], example[2], 1) # what is predicted
count = 0
bestF = [self.dataByClass[cls].fscore, None, (0.0, None), None]
for example in examplesByClass[cls]:
if example[0] < 0.0:
# Remove original example
updateF(ev, example[1], 2, -1)
# Add new example
updateF(ev, example[1], example[2], 1)
# Calculate F for this point
# Remove original example
updateF(ev, example[1], example[2], -1)
# Add new example
updateF(ev, example[1], 1, 1)
# Calculate F for this point
#print example, ev.toStringConcise()
count += 1
#if self.classSet.getName(cls) == "Binding":
# print count, example, ev.toStringConcise()
if ev.fscore > bestF[0]:
bestF = (ev.fscore, count, example, ev.toStringConcise())
self.dataByClass[cls] = copy.copy(ev)
print >> sys.stderr, "Threshold", self.classSet.getName(cls), bestF
if bestF[2][0] != 0.0:
thresholdByClass[cls] = bestF[2][0] + 0.00000001
thresholdByClass[cls] = 0.0
#print thresholdByClass
self.thresholds = thresholdByClass
#self._calculate(examples, predictions, thresholdByClass)
#print >> sys.stderr, "Optimal", self.toStringConcise()
return thresholdByClass