本文整理汇总了Python中Core.ExampleUtils.readExamples方法的典型用法代码示例。如果您正苦于以下问题:Python ExampleUtils.readExamples方法的具体用法?Python ExampleUtils.readExamples怎么用?Python ExampleUtils.readExamples使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Core.ExampleUtils
的用法示例。
在下文中一共展示了ExampleUtils.readExamples方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import readExamples [as 别名]
def test(cls, examples, modelPath, output=None, parameters=None, timeout=None):
if type(examples) == types.ListType:
print >> sys.stderr, "Classifying", len(examples), "with All-True Classifier"
examples, predictions = self.filterClassificationSet(examples, False)
testPath = self.tempDir+"/test.dat"
Example.writeExamples(examples, testPath)
else:
print >> sys.stderr, "Classifying file", examples, "with All-True Classifier"
testPath = examples
examples = Example.readExamples(examples,False)
print >> sys.stderr, "Note! Classification must be binary"
#examples, predictions = self.filterClassificationSet(examples, True)
predictions = []
for example in examples:
#predictions.append( (example, example[1]) )
predictions.append( [2] ) #[example[1]] )
if output == None:
output = "predictions"
f = open(output, "wt")
for p in predictions:
f.write(str(p[0])+"\n")
f.close()
return predictions
示例2: __init__
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import readExamples [as 别名]
def __init__(self, examples, predictions=None, classSet=None):
if type(classSet) == types.StringType: # class names are in file
classSet = IdSet(filename=classSet)
if type(predictions) == types.StringType: # predictions are in file
predictions = ExampleUtils.loadPredictions(predictions)
if type(examples) == types.StringType: # examples are in file
examples = ExampleUtils.readExamples(examples, False)
self.classSet = classSet
# define class ids in alphabetical order
self.classSet = classSet
if classSet != None:
classNames = sorted(classSet.Ids.keys())
else:
classNames = []
# make an ordered list of class ids
self.classes = []
for className in classNames:
self.classes.append(classSet.getId(className))
# create data structures for per-class evaluation
self.dataByClass = {}
for cls in self.classes:
self.dataByClass[cls] = EvaluationData()
# hack for unnamed classes
if len(self.dataByClass) == 0:
self.dataByClass[1] = EvaluationData()
self.dataByClass[2] = EvaluationData()
#self.untypedUndirected = None
self.untypedCurrentMajorId = None
self.untypedPredictionQueue = []
self.untypedUndirected = EvaluationData()
#self.AUC = None
if predictions != None:
self._calculate(examples, predictions)
示例3: loadExamples
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import readExamples [as 别名]
def loadExamples(self, examples, predictions):
if type(predictions) == types.StringType:
print >> sys.stderr, "Reading predictions from", predictions
predictions = ExampleUtils.loadPredictions(predictions)
if type(examples) == types.StringType:
print >> sys.stderr, "Reading examples from", examples
examples = ExampleUtils.readExamples(examples, False)
return examples, predictions
示例4: test
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import readExamples [as 别名]
def test(cls, examples, modelPath, output=None, parameters=None, forceInternal=False): # , timeout=None):
"""
Classify examples with a pre-trained model.
@type examples: string (filename) or list (or iterator) of examples
@param examples: a list or file containing examples in SVM-format
@type modelPath: string
@param modelPath: filename of the pre-trained model file
@type parameters: a dictionary or string
@param parameters: parameters for the classifier
@type output: string
@param output: the name of the predictions file to be written
@type forceInternal: Boolean
@param forceInternal: Use python classifier even if SVM Multiclass binary is defined in Settings.py
"""
if forceInternal or Settings.SVMMultiClassDir == None:
return cls.testInternal(examples, modelPath, output)
timer = Timer()
if type(examples) == types.ListType:
print >> sys.stderr, "Classifying", len(examples), "with SVM-MultiClass model", modelPath
examples, predictions = self.filterClassificationSet(examples, False)
testPath = self.tempDir+"/test.dat"
Example.writeExamples(examples, testPath)
else:
print >> sys.stderr, "Classifying file", examples, "with SVM-MultiClass model", modelPath
testPath = cls.stripComments(examples)
examples = Example.readExamples(examples,False)
args = ["/home/jari/Programs/liblinear-1.5-poly2/predict"]
if modelPath == None:
modelPath = "model"
if parameters != None:
parameters = copy.copy(parameters)
if parameters.has_key("c"):
del parameters["c"]
if parameters.has_key("predefined"):
parameters = copy.copy(parameters)
modelPath = os.path.join(parameters["predefined"][0],"classifier/model")
del parameters["predefined"]
self.__addParametersToSubprocessCall(args, parameters)
if output == None:
output = "predictions"
logFile = open("svmmulticlass.log","at")
else:
logFile = open(output+".log","wt")
args += [testPath, modelPath, output]
#if timeout == None:
# timeout = -1
#print args
subprocess.call(args, stdout = logFile, stderr = logFile)
predictionsFile = open(output, "rt")
lines = predictionsFile.readlines()
predictionsFile.close()
predictions = []
for i in range(len(lines)):
predictions.append( [int(lines[i].split()[0])] + lines[i].split()[1:] )
#predictions.append( (examples[i],int(lines[i].split()[0]),"multiclass",lines[i].split()[1:]) )
print >> sys.stderr, timer.toString()
return predictions
示例5: addExamples
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import readExamples [as 别名]
def addExamples(exampleFile, predictionFile, classFile, matrix):
classSet = IdSet(filename=classFile)
f = open(predictionFile, "rt")
for example in ExampleUtils.readExamples(exampleFile, False):
pred = int(f.readline().split()[0])
predClasses = classSet.getName(pred)
goldClasses = classSet.getName(example[1])
for predClass in predClasses.split("---"):
for goldClass in goldClasses.split("---"):
matrix[predClass][goldClass]
matrix[goldClass][predClass] += 1
f.close()
示例6: write
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import readExamples [as 别名]
def write(
cls,
examples,
predictions,
corpus,
outputFile,
classSet=None,
parse=None,
tokenization=None,
goldCorpus=None,
insertWeights=False,
):
if type(examples) == types.StringType:
print >>sys.stderr, "Reading examples from", examples
examples = ExampleUtils.readExamples(examples, False)
# This looks a bit strange, but should work with the re-iterable
# generators that readExamples returns
xType = None
for example in examples:
assert example[3].has_key("xtype")
xType = example[3]["xtype"]
break
if xType == "token":
w = EntityExampleWriter()
if insertWeights:
w.insertWeights = True
elif xType == "edge":
w = EdgeExampleWriter()
elif xType == "task3":
w = ModifierExampleWriter()
elif xType == "entRel":
w = EntityRelationExampleWriter()
elif xType == "phrase":
w = PhraseTriggerExampleWriter()
# IF LOCAL
elif xType == "um":
w = UnmergingExampleWriter()
# elif xType == "ue":
# w = UnmergedEdgeExampleWriter()
# elif xType == "asym":
# w = AsymmetricEventExampleWriter()
# ENDIF
else:
assert False, ("Unknown entity type", xType)
return w.writeXML(
examples, predictions, corpus, outputFile, classSet, parse, tokenization, goldCorpus=goldCorpus
)
示例7: __init__
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import readExamples [as 别名]
def __init__(self, examples, predictions=None, classSet=None):
if type(classSet) == types.StringType: # class names are in file
classSet = IdSet(filename=classSet)
if type(predictions) == types.StringType: # predictions are in file
predictions = ExampleUtils.loadPredictions(predictions)
if type(examples) == types.StringType: # examples are in file
examples = ExampleUtils.readExamples(examples, False)
SharedTaskEvaluator.corpusElements = Core.SentenceGraph.loadCorpus(SharedTaskEvaluator.corpusFilename, SharedTaskEvaluator.parse, SharedTaskEvaluator.tokenization)
# Build interaction xml
xml = BioTextExampleWriter.write(examples, predictions, SharedTaskEvaluator.corpusElements, None, SharedTaskEvaluator.ids+".class_names", SharedTaskEvaluator.parse, SharedTaskEvaluator.tokenization)
#xml = ExampleUtils.writeToInteractionXML(examples, predictions, SharedTaskEvaluator.corpusElements, None, "genia-direct-event-ids.class_names", SharedTaskEvaluator.parse, SharedTaskEvaluator.tokenization)
# Convert to GENIA format
gifxmlToGenia(xml, SharedTaskEvaluator.geniaDir, task=SharedTaskEvaluator.task, verbose=False)
# Use GENIA evaluation tool
self.results = evaluateSharedTask(SharedTaskEvaluator.geniaDir, task=SharedTaskEvaluator.task, evaluations=["approximate"], verbose=False)
示例8: __init__
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import readExamples [as 别名]
def __init__(self, examples, predictions=None, classSet=None):
if type(classSet) == types.StringType: # class names are in file
classSet = IdSet(filename=classSet)
if type(predictions) == types.StringType: # predictions are in file
predictions = ExampleUtils.loadPredictions(predictions)
if type(examples) == types.StringType: # examples are in file
examples = ExampleUtils.readExamples(examples, False)
self.classSet = classSet
self.dataByClass = defaultdict(EvaluationData)
#self.untypedUndirected = None
self.untypedCurrentMajorId = None
self.untypedPredictionQueue = []
self.untypedUndirected = EvaluationData()
#self.AUC = None
if predictions != None:
self._calculate(examples, predictions)
示例9: classify
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import readExamples [as 别名]
def classify(self, examples, output, model=None, finishBeforeReturn=False, replaceRemoteFiles=True):
output = os.path.abspath(output)
# Get examples
if type(examples) == types.ListType:
print >> sys.stderr, "Classifying", len(examples), "with All-Correct Classifier"
else:
print >> sys.stderr, "Classifying file", examples, "with All-Correct Classifier"
examples = self.getExampleFile(examples, upload=False, replaceRemote=False, dummy=False)
examples = Example.readExamples(examples, False)
# Return a new classifier instance for following the training process and using the model
classifier = copy.copy(self)
# Classify
f = open(output, "wt")
for example in examples:
f.write(str(example[1]) + "\n")
f.close()
classifier.predictions = output
return classifier
示例10: __init__
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import readExamples [as 别名]
def __init__(self, examples, predictions=None, classSet=None):
if type(classSet) == types.StringType: # class names are in file
classSet = IdSet(filename=classSet)
if type(predictions) == types.StringType: # predictions are in file
predictions = ExampleUtils.loadPredictions(predictions)
if type(examples) == types.StringType: # examples are in file
examples = ExampleUtils.readExamples(examples, False)
corpusElements = Core.SentenceGraph.loadCorpus(BXEvaluator.corpusFilename, BXEvaluator.parse, BXEvaluator.tokenization)
# Build interaction xml
xml = BioTextExampleWriter.write(examples, predictions, corpusElements, None, BXEvaluator.ids+".class_names", BXEvaluator.parse, BXEvaluator.tokenization)
xml = ix.splitMergedElements(xml, None)
xml = ix.recalculateIds(xml, None, True)
#xml = ExampleUtils.writeToInteractionXML(examples, predictions, SharedTaskEvaluator.corpusElements, None, "genia-direct-event-ids.class_names", SharedTaskEvaluator.parse, SharedTaskEvaluator.tokenization)
# Convert to GENIA format
STFormat.ConvertXML.toSTFormat(xml, BXEvaluator.geniaDir, outputTag="a2")
#gifxmlToGenia(xml, BXEvaluator.geniaDir, task=SharedTaskEvaluator.task, verbose=False)
# Use GENIA evaluation tool
self.results = BioNLP11GeniaTools.evaluateBX(BXEvaluator.geniaDir, corpusName=BXEvaluator.corpusTag)
corpusElements = None
示例11: __init__
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import readExamples [as 别名]
def __init__(self, examples=None, predictions=None, classSet=None):
if type(classSet) == types.StringType: # class names are in file
classSet = IdSet(filename=classSet)
if type(predictions) == types.StringType: # predictions are in file
predictions = ExampleUtils.loadPredictions(predictions)
if type(examples) == types.StringType: # examples are in file
examples = ExampleUtils.readExamples(examples, False)
# self.examples = examples
# self.predictions = predictions
self.truePositives = 0
self.falsePositives = 0
self.trueNegatives = 0
self.falseNegatives = 0
self.precision = None
self.recall = None
self.fScore = None
self.AUC = None
self.type = "binary"
if predictions != None:
self._calculate(examples, predictions)
示例12: polynomizeExamples
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import readExamples [as 别名]
def polynomizeExamples(exampleFile, outFile, weightFeatures, idSet):
outFile = open(outFile, "wt")
addCount = 0
f = open(exampleFile)
numExamples = sum([1 for line in f])
f.close()
counter = ProgressCounter(numExamples, "Polynomize examples", step=0)
weightFeatureIds = {}
for weightFeature in weightFeatures:
wId = idSet.getId(weightFeature, False)
if wId == None:
sys.exit("Weight vector feature", weightFeature, "not in id file")
weightFeatureIds[weightFeature] = wId
print "Polynomizing", exampleFile
exampleCache = []
for example in ExampleUtils.readExamples(exampleFile):
counter.update(1, "Processing example ("+example[0]+"): ")
features = example[2]
for i in range(len(weightFeatures)-1):
wI = weightFeatures[i]
wIid = weightFeatureIds[wI]
if not features.has_key(wIid):
continue
for j in range(i + 1, len(weightFeatures)):
wJ = weightFeatures[j]
wJid = weightFeatureIds[wJ]
if not features.has_key(wJid):
continue
# Make polynomial feature
features[idSet.getId(wI + "_AND_" + wJ)] = 1
addCount += 1
exampleCache.append(example)
if len(exampleCache) > 50:
ExampleUtils.appendExamples(exampleCache, outFile)
exampleCache = []
ExampleUtils.appendExamples(exampleCache, outFile)
outFile.close()
print "Added", addCount, "polynomial features"
示例13: threshold
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import readExamples [as 别名]
def threshold(cls, examples, predictions):
# Make negative confidence score / true class pairs
if type(examples) in types.StringTypes:
examples = ExampleUtils.readExamples(examples, False)
if type(predictions) in types.StringTypes:
predictions = ExampleUtils.loadPredictions(predictions)
pairs = []
realPositives = 0
for example, prediction in itertools.izip(examples, predictions):
trueClass = example[1]
assert(trueClass > 0) # multiclass classification uses non-negative integers
if trueClass > 1:
realPositives += 1
negClassValue = prediction[1]
pairs.append( (negClassValue, trueClass) )
pairs.sort(reverse=True)
realNegatives = len(pairs) - realPositives
# When starting thresholding, all examples are considered positive
binaryF = EvaluationData()
binaryF._tp = realPositives
binaryF._fp = realNegatives
binaryF._fn = 0
binaryF.calculateFScore()
fscore = binaryF.fscore
threshold = pairs[0][0]-1.
# Turn one example negative at a time
for pair in pairs:
if pair[1] == 1: # the real class is negative
binaryF._fp -= 1 # false positive -> true negative
else: # the real class is a positive class
binaryF._tp -= 1 # true positive -> ...
binaryF._fn += 1 # ... false negative
binaryF.calculateFScore()
if binaryF.fscore > fscore:
fscore = binaryF.fscore
threshold = pair[0]+0.00000001
return threshold, fscore
示例14: __init__
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import readExamples [as 别名]
def __init__(self, examples=None, predictions=None, classSet=None):
if type(classSet) == types.StringType: # class names are in file
classSet = IdSet(filename=classSet)
if type(predictions) == types.StringType: # predictions are in file
predictions = ExampleUtils.loadPredictions(predictions)
if type(examples) == types.StringType: # examples are in file
examples = ExampleUtils.readExamples(examples, False)
self.keep = set(["CPR:3", "CPR:4", "CPR:5", "CPR:6", "CPR:9"])
self.classSet = classSet
self.results = None
self.internal = None
if predictions != None:
for example in examples:
if example[3] != None:
print >> sys.stderr, "ChemProt Evaluator:"
self._calculateExamples(examples, predictions)
else:
print >> sys.stderr, "No example extra info, skipping ChemProt evaluation"
break
self.internal = AveragingMultiClassEvaluator(examples, predictions, classSet)
print >> sys.stderr, "AveragingMultiClassEvaluator:"
print >> sys.stderr, self.internal.toStringConcise()
示例15: determineThreshold
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import readExamples [as 别名]
def determineThreshold(self, examples, predictions):
if type(predictions) == types.StringType: # predictions are in file
predictions = ExampleUtils.loadPredictions(predictions)
if type(examples) == types.StringType: # examples are in file
examples = ExampleUtils.readExamples(examples, False)
examplesByClass = {}
for cls in self.classes:
examplesByClass[cls] = []
# prepare examples
for example, prediction in itertools.izip(examples, predictions):
# Check true class for multilabel
trueClass = example[1]
trueClassName = self.classSet.getName(trueClass)
assert(trueClass > 0) # multiclass classification uses non-negative integers
if "---" in trueClassName:
trueClass = set()
for name in trueClassName.split("---"):
trueClass.add(self.classSet.getId(name))
else:
trueClass = [trueClass]
# Check prediction for multilabel
predictedClasses = prediction[0]
if type(predictedClasses) == types.IntType:
predictedClasses = [predictedClasses]
for predType in predictedClasses:
if predType != 1:
exTrueClass = 1
if predType in trueClass:
exTrueClass = 2
examplesByClass[predType].append( (prediction[predType], exTrueClass, 2) )
# positives are negatives for other classes
for cls in self.classes:
if cls not in predictedClasses:
exTrueClass = 1
if cls in trueClass:
exTrueClass = 2
examplesByClass[cls].append( (prediction[cls], exTrueClass, 1) )
# do the thresholding
thresholdByClass = {}
for cls in self.classes:
if cls == 1:
continue
thresholdByClass[cls] = 0.0
examplesByClass[cls].sort()
# Start with all below zero being negative, and all above it being what is predicted
ev = EvaluationData()
for example in examplesByClass[cls]:
#print example
if example[0] < 0.0:
updateF(ev, example[1], 2, 1) # always negative
else:
updateF(ev, example[1], example[2], 1) # what is predicted
count = 0
bestF = [self.dataByClass[cls].fscore, None, (0.0, None), None]
for example in examplesByClass[cls]:
if example[0] < 0.0:
# Remove original example
updateF(ev, example[1], 2, -1)
# Add new example
updateF(ev, example[1], example[2], 1)
# Calculate F for this point
else:
# Remove original example
updateF(ev, example[1], example[2], -1)
# Add new example
updateF(ev, example[1], 1, 1)
# Calculate F for this point
ev.calculateFScore()
#print example, ev.toStringConcise()
count += 1
#if self.classSet.getName(cls) == "Binding":
# print count, example, ev.toStringConcise()
if ev.fscore > bestF[0]:
bestF = (ev.fscore, count, example, ev.toStringConcise())
self.dataByClass[cls] = copy.copy(ev)
print >> sys.stderr, "Threshold", self.classSet.getName(cls), bestF
if bestF[2][0] != 0.0:
thresholdByClass[cls] = bestF[2][0] + 0.00000001
else:
thresholdByClass[cls] = 0.0
#print thresholdByClass
self.thresholds = thresholdByClass
#self._calculate(examples, predictions, thresholdByClass)
#print >> sys.stderr, "Optimal", self.toStringConcise()
return thresholdByClass