本文整理汇总了Python中Core.ExampleUtils类的典型用法代码示例。如果您正苦于以下问题:Python ExampleUtils类的具体用法?Python ExampleUtils怎么用?Python ExampleUtils使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了ExampleUtils类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
def __init__(self, examples, predictions=None, classSet=None):
if type(classSet) == types.StringType: # class names are in file
classSet = IdSet(filename=classSet)
if type(predictions) == types.StringType: # predictions are in file
predictions = ExampleUtils.loadPredictions(predictions)
if type(examples) == types.StringType: # examples are in file
examples = ExampleUtils.readExamples(examples, False)
self.classSet = classSet
# define class ids in alphabetical order
self.classSet = classSet
if classSet != None:
classNames = sorted(classSet.Ids.keys())
else:
classNames = []
# make an ordered list of class ids
self.classes = []
for className in classNames:
self.classes.append(classSet.getId(className))
# create data structures for per-class evaluation
self.dataByClass = {}
for cls in self.classes:
self.dataByClass[cls] = EvaluationData()
# hack for unnamed classes
if len(self.dataByClass) == 0:
self.dataByClass[1] = EvaluationData()
self.dataByClass[2] = EvaluationData()
#self.untypedUndirected = None
self.untypedCurrentMajorId = None
self.untypedPredictionQueue = []
self.untypedUndirected = EvaluationData()
#self.AUC = None
if predictions != None:
self._calculate(examples, predictions)
示例2: buildExamplesForSentences
def buildExamplesForSentences(self, sentences, goldSentences, output, idFileTag=None, append=False):
examples = []
counter = ProgressCounter(len(sentences), "Build examples")
if append:
outfile = open(output, "at")
else:
outfile = open(output, "wt")
exampleCount = 0
for i in range(len(sentences)):
sentence = sentences[i]
goldSentence = [None]
if goldSentences != None:
goldSentence = goldSentences[i]
counter.update(1, "Building examples (" + sentence[0].getSentenceId() + "): ")
examples = self.buildExamples(sentence[0], goldSentence[0], append=append)
exampleCount += len(examples)
examples = self.preProcessExamples(examples)
ExampleUtils.appendExamples(examples, outfile)
outfile.close()
print >>sys.stderr, "Examples built:", exampleCount
print >>sys.stderr, "Features:", len(self.featureSet.getNames())
# IF LOCAL
if self.exampleStats.getExampleCount() > 0:
self.exampleStats.printStats()
# ENDIF
# Save Ids
if idFileTag != None:
print >>sys.stderr, "Saving class names to", idFileTag + ".class_names"
self.classSet.write(idFileTag + ".class_names")
print >>sys.stderr, "Saving feature names to", idFileTag + ".feature_names"
self.featureSet.write(idFileTag + ".feature_names")
示例3: loadExamples
def loadExamples(self, examples, predictions):
if type(predictions) == types.StringType:
print >> sys.stderr, "Reading predictions from", predictions
predictions = ExampleUtils.loadPredictions(predictions)
if type(examples) == types.StringType:
print >> sys.stderr, "Reading examples from", examples
examples = ExampleUtils.readExamples(examples, False)
return examples, predictions
示例4: preProcessExamples
def preProcessExamples(self, allExamples):
# Duplicates cannot be removed here, as they should only be removed from the training set. This is done
# in the classifier.
# if "no_duplicates" in self.styles:
# count = len(allExamples)
# print >> sys.stderr, " Removing duplicates,",
# allExamples = ExampleUtils.removeDuplicates(allExamples)
# print >> sys.stderr, "removed", count - len(allExamples)
if "normalize" in self.styles:
print >> sys.stderr, " Normalizing feature vectors"
ExampleUtils.normalizeFeatureVectors(allExamples)
return allExamples
示例5: classify
def classify(self, examples, parameters=None):
examples, predictions = self.filterClassificationSet(examples, self.isBinary)
ExampleUtils.writeExamples(examples, self.tempDir+"/test.dat")
for i in range(len(examples)):
if self.isBinary:
predictedClass = self.model.predict(examples[i][2])
predictions.append( (examples[i],predictedClass,"binary") )
else:
predictedClass = self.model.predict(examples[i][2])
predictions.append( (examples[i],predictedClass,"multiclass") )
return predictions
示例6: classifyToXML
def classifyToXML(self, data, model, exampleFileName=None, tag="", classifierModel=None, goldData=None, parse=None, recallAdjust=None, compressExamples=True):
model = self.openModel(model, "r")
if parse == None:
parse = self.getStr(self.tag+"parse", model)
if exampleFileName == None:
exampleFileName = tag+self.tag+"examples"
if compressExamples:
exampleFileName += ".gz"
self.buildExamples(model, [data], [exampleFileName], [goldData], parse=parse)
if classifierModel == None:
classifierModel = model.get(self.tag+"classifier-model")
else:
assert os.path.exists(classifierModel), classifierModel
classifier = self.Classifier()
classifier.classify(exampleFileName, tag+self.tag+"classifications", classifierModel, finishBeforeReturn=True)
predictions = ExampleUtils.loadPredictions(tag+self.tag+"classifications", recallAdjust)
evaluator = self.evaluator.evaluate(exampleFileName, predictions, model.get(self.tag+"ids.classes"))
#outputFileName = tag+"-"+self.tag+"pred.xml.gz"
return self.exampleWriter.write(exampleFileName, predictions, data, tag+self.tag+"pred.xml.gz", model.get(self.tag+"ids.classes"), parse)
# if evaluator.getData().getTP() + evaluator.getData().getFP() > 0:
# return self.exampleWriter.write(exampleFileName, predictions, data, outputFileName, model.get(self.tag+"ids.classes"), parse)
# else:
# # TODO: e.g. interactions must be removed if task does unmerging
# print >> sys.stderr, "No positive", self.tag + "predictions, XML file", outputFileName, "unchanged from input"
# if type(data) in types.StringTypes: # assume its a file
# shutil.copy(data, outputFileName)
# else: # assume its an elementtree
# ETUtils.write(data, outputFileName)
# #print >> sys.stderr, "No positive predictions, XML file", tag+self.tag+"pred.xml", "not written"
# return data #None
示例7: __init__
def __init__(self, examples, predictions=None, classSet=None):
if type(classSet) == types.StringType: # class names are in file
classSet = IdSet(filename=classSet)
if type(predictions) == types.StringType: # predictions are in file
predictions = ExampleUtils.loadPredictions(predictions)
if type(examples) == types.StringType: # examples are in file
examples = ExampleUtils.readExamples(examples, False)
SharedTaskEvaluator.corpusElements = Core.SentenceGraph.loadCorpus(SharedTaskEvaluator.corpusFilename, SharedTaskEvaluator.parse, SharedTaskEvaluator.tokenization)
# Build interaction xml
xml = BioTextExampleWriter.write(examples, predictions, SharedTaskEvaluator.corpusElements, None, SharedTaskEvaluator.ids+".class_names", SharedTaskEvaluator.parse, SharedTaskEvaluator.tokenization)
#xml = ExampleUtils.writeToInteractionXML(examples, predictions, SharedTaskEvaluator.corpusElements, None, "genia-direct-event-ids.class_names", SharedTaskEvaluator.parse, SharedTaskEvaluator.tokenization)
# Convert to GENIA format
gifxmlToGenia(xml, SharedTaskEvaluator.geniaDir, task=SharedTaskEvaluator.task, verbose=False)
# Use GENIA evaluation tool
self.results = evaluateSharedTask(SharedTaskEvaluator.geniaDir, task=SharedTaskEvaluator.task, evaluations=["approximate"], verbose=False)
示例8: classifyToXML
def classifyToXML(self, data, model, exampleFileName=None, tag="", classifierModel=None, goldData=None, parse=None, recallAdjust=None, compressExamples=True, exampleStyle=None):
model = self.openModel(model, "r")
if parse == None:
parse = self.getStr(self.tag+"parse", model)
if exampleFileName == None:
exampleFileName = tag+self.tag+"examples"
if compressExamples:
exampleFileName += ".gz"
self.buildExamples(model, [data], [exampleFileName], [goldData], parse=parse, exampleStyle=exampleStyle)
if classifierModel == None:
classifierModel = model.get(self.tag+"classifier-model", defaultIfNotExist=None)
#else:
# assert os.path.exists(classifierModel), classifierModel
classifier = self.getClassifier(model.getStr(self.tag+"classifier-parameter", defaultIfNotExist=None))()
classifier.classify(exampleFileName, tag+self.tag+"classifications", classifierModel, finishBeforeReturn=True)
threshold = model.getStr(self.tag+"threshold", defaultIfNotExist=None, asType=float)
predictions = ExampleUtils.loadPredictions(tag+self.tag+"classifications", recallAdjust, threshold=threshold)
evaluator = self.evaluator.evaluate(exampleFileName, predictions, model.get(self.tag+"ids.classes"))
#outputFileName = tag+"-"+self.tag+"pred.xml.gz"
#exampleStyle = self.exampleBuilder.getParameters(model.getStr(self.tag+"example-style"))
if exampleStyle == None:
exampleStyle = Parameters.get(model.getStr(self.tag+"example-style")) # no checking, but these should already have passed the ExampleBuilder
self.structureAnalyzer.load(model)
return self.exampleWriter.write(exampleFileName, predictions, data, tag+self.tag+"pred.xml.gz", model.get(self.tag+"ids.classes"), parse, exampleStyle=exampleStyle, structureAnalyzer=self.structureAnalyzer)
# if evaluator.getData().getTP() + evaluator.getData().getFP() > 0:
# return self.exampleWriter.write(exampleFileName, predictions, data, outputFileName, model.get(self.tag+"ids.classes"), parse)
# else:
# # TODO: e.g. interactions must be removed if task does unmerging
# print >> sys.stderr, "No positive", self.tag + "predictions, XML file", outputFileName, "unchanged from input"
# if type(data) in types.StringTypes: # assume its a file
# shutil.copy(data, outputFileName)
# else: # assume its an elementtree
# ETUtils.write(data, outputFileName)
# #print >> sys.stderr, "No positive predictions, XML file", tag+self.tag+"pred.xml", "not written"
# return data #None
示例9: __init__
def __init__(self, examples, predictions=None, classSet=None):
if type(classSet) == types.StringType: # class names are in file
classSet = IdSet(filename=classSet)
if type(predictions) == types.StringType: # predictions are in file
predictions = ExampleUtils.loadPredictions(predictions)
if type(examples) == types.StringType: # examples are in file
examples = ExampleUtils.readExamples(examples, False)
self.classSet = classSet
self.dataByClass = defaultdict(EvaluationData)
#self.untypedUndirected = None
self.untypedCurrentMajorId = None
self.untypedPredictionQueue = []
self.untypedUndirected = EvaluationData()
#self.AUC = None
if predictions != None:
self._calculate(examples, predictions)
示例10: optimize
def optimize(self, examples, outDir, parameters, classifyExamples, classIds, step="BOTH", evaluator=None, determineThreshold=False, timeout=None, downloadAllModels=False):
assert step in ["BOTH", "SUBMIT", "RESULTS"], step
outDir = os.path.abspath(outDir)
# Initialize training (or reconnect to existing jobs)
combinations = Parameters.getCombinations(Parameters.get(parameters, valueListKey="c")) #Core.OptimizeParameters.getParameterCombinations(parameters)
trained = []
for combination in combinations:
trained.append( self.train(examples, outDir, combination, classifyExamples, replaceRemoteExamples=(len(trained) == 0), dummy=(step == "RESULTS")) )
if step == "SUBMIT": # Return already
classifier = copy.copy(self)
classifier.setState("OPTIMIZE")
return classifier
# Wait for the training to finish
finalJobStatus = self.connection.waitForJobs([x.getJob() for x in trained])
# Evaluate the results
print >> sys.stderr, "Evaluating results"
#Stream.setIndent(" ")
bestResult = None
if evaluator == None:
evaluator = self.defaultEvaluator
for i in range(len(combinations)):
id = trained[i].parameterIdStr
#Stream.setIndent(" ")
# Get predictions
predictions = None
if trained[i].getStatus() == "FINISHED":
predictions = trained[i].downloadPredictions()
else:
print >> sys.stderr, "No results for combination" + id
continue
if downloadAllModels:
trained[i].downloadModel()
# Compare to other results
print >> sys.stderr, "*** Evaluating results for combination" + id + " ***"
threshold = None
if determineThreshold:
print >> sys.stderr, "Thresholding, original micro =",
evaluation = evaluator.evaluate(classifyExamples, predictions, classIds, os.path.join(outDir, "evaluation-before-threshold" + id + ".csv"), verbose=False)
print >> sys.stderr, evaluation.microF.toStringConcise()
threshold, bestF = evaluator.threshold(classifyExamples, predictions)
print >> sys.stderr, "threshold =", threshold, "at binary fscore", str(bestF)[0:6]
evaluation = evaluator.evaluate(classifyExamples, ExampleUtils.loadPredictions(predictions, threshold=threshold), classIds, os.path.join(outDir, "evaluation" + id + ".csv"))
if bestResult == None or evaluation.compare(bestResult[0]) > 0: #: averageResult.fScore > bestResult[1].fScore:
bestResult = [evaluation, trained[i], combinations[i], threshold]
if not self.connection.isLocal():
os.remove(predictions) # remove predictions to save space
#Stream.setIndent()
if bestResult == None:
raise Exception("No results for any parameter combination")
print >> sys.stderr, "*** Evaluation complete", finalJobStatus, "***"
print >> sys.stderr, "Selected parameters", bestResult[2]
classifier = copy.copy(bestResult[1])
classifier.threshold = bestResult[3]
classifier.downloadModel()
return classifier
示例11: __init__
def __init__(self, examples, predictions=None, classSet=None):
if type(classSet) == types.StringType: # class names are in file
classSet = IdSet(filename=classSet)
if type(predictions) == types.StringType: # predictions are in file
predictions = ExampleUtils.loadPredictions(predictions)
if type(examples) == types.StringType: # examples are in file
examples = ExampleUtils.readExamples(examples, False)
corpusElements = Core.SentenceGraph.loadCorpus(BXEvaluator.corpusFilename, BXEvaluator.parse, BXEvaluator.tokenization)
# Build interaction xml
xml = BioTextExampleWriter.write(examples, predictions, corpusElements, None, BXEvaluator.ids+".class_names", BXEvaluator.parse, BXEvaluator.tokenization)
xml = ix.splitMergedElements(xml, None)
xml = ix.recalculateIds(xml, None, True)
#xml = ExampleUtils.writeToInteractionXML(examples, predictions, SharedTaskEvaluator.corpusElements, None, "genia-direct-event-ids.class_names", SharedTaskEvaluator.parse, SharedTaskEvaluator.tokenization)
# Convert to GENIA format
STFormat.ConvertXML.toSTFormat(xml, BXEvaluator.geniaDir, outputTag="a2")
#gifxmlToGenia(xml, BXEvaluator.geniaDir, task=SharedTaskEvaluator.task, verbose=False)
# Use GENIA evaluation tool
self.results = BioNLP11GeniaTools.evaluateBX(BXEvaluator.geniaDir, corpusName=BXEvaluator.corpusTag)
corpusElements = None
示例12: __init__
def __init__(self, examples=None, predictions=None, classSet=None):
if type(classSet) == types.StringType: # class names are in file
classSet = IdSet(filename=classSet)
if type(predictions) == types.StringType: # predictions are in file
predictions = ExampleUtils.loadPredictions(predictions)
if type(examples) == types.StringType: # examples are in file
examples = ExampleUtils.readExamples(examples, False)
# self.examples = examples
# self.predictions = predictions
self.truePositives = 0
self.falsePositives = 0
self.trueNegatives = 0
self.falseNegatives = 0
self.precision = None
self.recall = None
self.fScore = None
self.AUC = None
self.type = "binary"
if predictions != None:
self._calculate(examples, predictions)
示例13: polynomizeExamples
def polynomizeExamples(exampleFile, outFile, weightFeatures, idSet):
outFile = open(outFile, "wt")
addCount = 0
f = open(exampleFile)
numExamples = sum([1 for line in f])
f.close()
counter = ProgressCounter(numExamples, "Polynomize examples", step=0)
weightFeatureIds = {}
for weightFeature in weightFeatures:
wId = idSet.getId(weightFeature, False)
if wId == None:
sys.exit("Weight vector feature", weightFeature, "not in id file")
weightFeatureIds[weightFeature] = wId
print "Polynomizing", exampleFile
exampleCache = []
for example in ExampleUtils.readExamples(exampleFile):
counter.update(1, "Processing example ("+example[0]+"): ")
features = example[2]
for i in range(len(weightFeatures)-1):
wI = weightFeatures[i]
wIid = weightFeatureIds[wI]
if not features.has_key(wIid):
continue
for j in range(i + 1, len(weightFeatures)):
wJ = weightFeatures[j]
wJid = weightFeatureIds[wJ]
if not features.has_key(wJid):
continue
# Make polynomial feature
features[idSet.getId(wI + "_AND_" + wJ)] = 1
addCount += 1
exampleCache.append(example)
if len(exampleCache) > 50:
ExampleUtils.appendExamples(exampleCache, outFile)
exampleCache = []
ExampleUtils.appendExamples(exampleCache, outFile)
outFile.close()
print "Added", addCount, "polynomial features"
示例14: addExamples
def addExamples(exampleFile, predictionFile, classFile, matrix):
classSet = IdSet(filename=classFile)
f = open(predictionFile, "rt")
for example in ExampleUtils.readExamples(exampleFile, False):
pred = int(f.readline().split()[0])
predClasses = classSet.getName(pred)
goldClasses = classSet.getName(example[1])
for predClass in predClasses.split("---"):
for goldClass in goldClasses.split("---"):
matrix[predClass][goldClass]
matrix[goldClass][predClass] += 1
f.close()
示例15: threshold
def threshold(cls, examples, predictions):
# Make negative confidence score / true class pairs
if type(examples) in types.StringTypes:
examples = ExampleUtils.readExamples(examples, False)
if type(predictions) in types.StringTypes:
predictions = ExampleUtils.loadPredictions(predictions)
pairs = []
realPositives = 0
for example, prediction in itertools.izip(examples, predictions):
trueClass = example[1]
assert(trueClass > 0) # multiclass classification uses non-negative integers
if trueClass > 1:
realPositives += 1
negClassValue = prediction[1]
pairs.append( (negClassValue, trueClass) )
pairs.sort(reverse=True)
realNegatives = len(pairs) - realPositives
# When starting thresholding, all examples are considered positive
binaryF = EvaluationData()
binaryF._tp = realPositives
binaryF._fp = realNegatives
binaryF._fn = 0
binaryF.calculateFScore()
fscore = binaryF.fscore
threshold = pairs[0][0]-1.
# Turn one example negative at a time
for pair in pairs:
if pair[1] == 1: # the real class is negative
binaryF._fp -= 1 # false positive -> true negative
else: # the real class is a positive class
binaryF._tp -= 1 # true positive -> ...
binaryF._fn += 1 # ... false negative
binaryF.calculateFScore()
if binaryF.fscore > fscore:
fscore = binaryF.fscore
threshold = pair[0]+0.00000001
return threshold, fscore