本文整理汇总了Python中Core.ExampleUtils.writeExamples方法的典型用法代码示例。如果您正苦于以下问题:Python ExampleUtils.writeExamples方法的具体用法?Python ExampleUtils.writeExamples怎么用?Python ExampleUtils.writeExamples使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Core.ExampleUtils
的用法示例。
在下文中一共展示了ExampleUtils.writeExamples方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: train
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import writeExamples [as 别名]
def train(cls, examples, parameters, outputFile=None): #, timeout=None):
"""
Train the SVM-multiclass classifier on a set of examples.
@type examples: string (filename) or list (or iterator) of examples
@param examples: a list or file containing examples in SVM-format
@type parameters: a dictionary or string
@param parameters: parameters for the classifier
@type outputFile: string
@param outputFile: the name of the model file to be written
"""
timer = Timer()
parameters = cls.getParams(parameters)
# If examples are in a list, they will be written to a file for SVM-multiclass
if type(examples) == types.ListType:
print >> sys.stderr, "Training SVM-MultiClass on", len(examples), "examples"
trainPath = self.tempDir+"/train.dat"
examples = self.filterTrainingSet(examples)
Example.writeExamples(examples, trainPath)
else:
print >> sys.stderr, "Training SVM-MultiClass on file", examples
trainPath = cls.stripComments(examples)
args = ["/home/jari/Programs/liblinear-1.5-poly2/train"]
cls.__addParametersToSubprocessCall(args, parameters)
if outputFile == None:
args += [trainPath, "model"]
logFile = open("svmmulticlass.log","at")
else:
args += [trainPath, outputFile]
logFile = open(outputFile+".log","wt")
rv = subprocess.call(args, stdout = logFile)
logFile.close()
print >> sys.stderr, timer.toString()
return rv
示例2: classify
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import writeExamples [as 别名]
def classify(self, examples, parameters=None):
if type(examples) == types.StringType:
testFilePath = examples
predictions = []
realClasses = []
exampleFile = open(examples,"rt")
for line in exampleFile.readlines():
realClasses.append(int(line.split(" ",1)[0].strip()))
exampleFile.close()
elif type(examples) == types.ListType:
examples, predictions = self.filterClassificationSet(examples, True)
Example.writeExamples(examples, self.tempDir+"/test.dat")
testFilePath = self.tempDir+"/test.dat"
args = [self.classifyBin]
if parameters != None:
self.__addParametersToSubprocessCall(args, parameters)
args += [testFilePath, self.tempDir+"/model", self.tempDir+"/predictions"]
#print args
subprocess.call(args, stdout = self.debugFile)
os.remove(self.tempDir+"/model")
predictionsFile = open(self.tempDir+"/predictions", "rt")
lines = predictionsFile.readlines()
predictionsFile.close()
#predictions = []
for i in range(len(lines)):
if type(examples) == types.ListType:
predictions.append( (examples[i],float(lines[i]),self.type,lines[i]) )
else:
predictions.append( ([None,realClasses[i]],float(lines[i]),self.type) )
return predictions
示例3: test
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import writeExamples [as 别名]
def test(cls, examples, modelPath, output=None, parameters=None, timeout=None):
if type(examples) == types.ListType:
print >> sys.stderr, "Classifying", len(examples), "with All-True Classifier"
examples, predictions = self.filterClassificationSet(examples, False)
testPath = self.tempDir+"/test.dat"
Example.writeExamples(examples, testPath)
else:
print >> sys.stderr, "Classifying file", examples, "with All-True Classifier"
testPath = examples
examples = Example.readExamples(examples,False)
print >> sys.stderr, "Note! Classification must be binary"
#examples, predictions = self.filterClassificationSet(examples, True)
predictions = []
for example in examples:
#predictions.append( (example, example[1]) )
predictions.append( [2] ) #[example[1]] )
if output == None:
output = "predictions"
f = open(output, "wt")
for p in predictions:
f.write(str(p[0])+"\n")
f.close()
return predictions
示例4: test
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import writeExamples [as 别名]
def test(cls, examples, modelPath, output=None, parameters=None, forceInternal=False): # , timeout=None):
"""
Classify examples with a pre-trained model.
@type examples: string (filename) or list (or iterator) of examples
@param examples: a list or file containing examples in SVM-format
@type modelPath: string
@param modelPath: filename of the pre-trained model file
@type parameters: a dictionary or string
@param parameters: parameters for the classifier
@type output: string
@param output: the name of the predictions file to be written
@type forceInternal: Boolean
@param forceInternal: Use python classifier even if SVM Multiclass binary is defined in Settings.py
"""
if forceInternal or Settings.SVMMultiClassDir == None:
return cls.testInternal(examples, modelPath, output)
timer = Timer()
if type(examples) == types.ListType:
print >> sys.stderr, "Classifying", len(examples), "with SVM-MultiClass model", modelPath
examples, predictions = self.filterClassificationSet(examples, False)
testPath = self.tempDir+"/test.dat"
Example.writeExamples(examples, testPath)
else:
print >> sys.stderr, "Classifying file", examples, "with SVM-MultiClass model", modelPath
testPath = cls.stripComments(examples)
examples = Example.readExamples(examples,False)
args = ["/home/jari/Programs/liblinear-1.5-poly2/predict"]
if modelPath == None:
modelPath = "model"
if parameters != None:
parameters = copy.copy(parameters)
if parameters.has_key("c"):
del parameters["c"]
if parameters.has_key("predefined"):
parameters = copy.copy(parameters)
modelPath = os.path.join(parameters["predefined"][0],"classifier/model")
del parameters["predefined"]
self.__addParametersToSubprocessCall(args, parameters)
if output == None:
output = "predictions"
logFile = open("svmmulticlass.log","at")
else:
logFile = open(output+".log","wt")
args += [testPath, modelPath, output]
#if timeout == None:
# timeout = -1
#print args
subprocess.call(args, stdout = logFile, stderr = logFile)
predictionsFile = open(output, "rt")
lines = predictionsFile.readlines()
predictionsFile.close()
predictions = []
for i in range(len(lines)):
predictions.append( [int(lines[i].split()[0])] + lines[i].split()[1:] )
#predictions.append( (examples[i],int(lines[i].split()[0]),"multiclass",lines[i].split()[1:]) )
print >> sys.stderr, timer.toString()
return predictions
示例5: classify
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import writeExamples [as 别名]
def classify(self, examples, parameters=None):
examples, predictions = self.filterClassificationSet(examples, self.isBinary)
ExampleUtils.writeExamples(examples, self.tempDir+"/test.dat")
for i in range(len(examples)):
if self.isBinary:
predictedClass = self.model.predict(examples[i][2])
predictions.append( (examples[i],predictedClass,"binary") )
else:
predictedClass = self.model.predict(examples[i][2])
predictions.append( (examples[i],predictedClass,"multiclass") )
return predictions
示例6: train
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import writeExamples [as 别名]
def train(self, examples, parameters=None, outputDir=None):
timeout = -1
if type(examples) == types.StringType:
trainFilePath = examples
elif type(examples) == types.ListType:
examples = self.filterTrainingSet(examples)
parameters = copy.copy(parameters)
if parameters.has_key("style"):
if "no_duplicates" in parameters["style"]:
examples = Example.removeDuplicates(examples)
del parameters["style"]
Example.writeExamples(examples, self.tempDir+"/train.dat")
trainFilePath = self.tempDir+"/train.dat"
if parameters.has_key("timeout"):
timeout = parameters["timeout"]
del parameters["timeout"]
args = [self.trainBin]
if parameters != None:
self.__addParametersToSubprocessCall(args, parameters)
args += [trainFilePath, self.tempDir+"/model"]
return killableprocess.call(args, stdout = self.debugFile, timeout = timeout)
示例7: buildGraphKernelFeatures
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import writeExamples [as 别名]
def buildGraphKernelFeatures(self, sentenceGraph, path):
edgeList = []
depGraph = sentenceGraph.dependencyGraph
pt = path
for i in range(1, len(path)):
edgeList.extend(depGraph.getEdges(pt[i], pt[i-1]))
edgeList.extend(depGraph.getEdges(pt[i-1], pt[i]))
edges = edgeList
adjacencyMatrix, labels = self._buildAdjacencyMatrix(sentenceGraph, path, edges)
node_count = 2*len(sentenceGraph.tokens) + len(sentenceGraph.dependencies)
if sentenceGraph.sentenceElement.attrib["id"] == "LLL.d0.s0":
adjacencyMatrixToHtml(adjacencyMatrix, labels, "LLL.d0.s0_adjacency_matrix.html")
allPathsMatrix = self._prepareMatrix(adjacencyMatrix, node_count)
self._matrixToFeatures(allPathsMatrix, labels)
if sentenceGraph.sentenceElement.attrib["id"] == "LLL.d0.s0":
adjacencyMatrixToHtml(allPathsMatrix, labels, "LLL.d0.s0_all_paths_matrix.html")
commentLines = []
commentLines.extend(self.featureSet.toStrings())
example = ["example_"+self.entity1.attrib["id"]+"_"+self.entity2.attrib["id"],"unknown",self.features]
ExampleUtils.writeExamples([example],"LLL.d0.s0_example.txt",commentLines)
示例8: train
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import writeExamples [as 别名]
def train(self, examples, parameters=None):
self.isBinary = self.isBinaryProblem(examples)
examples = self.filterTrainingSet(examples)
ExampleUtils.writeExamples(examples, self.tempDir+"/train.dat")
#prepare parameters:
if parameters.has_key("c"):
assert(not parameters.has_key("C"))
parameters["C"] = parameters["c"]
del parameters["c"]
totalExamples = float(sum(self.classes.values()))
weight_label = self.classes.keys()
weight_label.sort()
weight = []
for k in weight_label:
weight.append(1.0-self.classes[k]/totalExamples)
libSVMparam = svm.svm_parameter(nr_weight = len(self.classes), weight_label=weight_label, weight=weight, **parameters)
labels = []
samples = []
for example in examples:
labels.append(example[1])
samples.append(example[2])
problem = svm.svm_problem(labels, samples)
self.model = svm.svm_model(problem, libSVMparam)
示例9: OptionParser
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import writeExamples [as 别名]
defaultAnalysisFilename = "/usr/share/biotext/ComplexPPI/BioInferForComplexPPIVisible.xml"
optparser = OptionParser(usage="%prog [options]\nCreate an html visualization for a corpus.")
optparser.add_option("-i", "--invariant", default=None, dest="invariant", help="Corpus in analysis format", metavar="FILE")
optparser.add_option("-v", "--variant", default=None, dest="variant", help="Corpus in analysis format", metavar="FILE")
(options, args) = optparser.parse_args()
#invariantExamples = ExampleUtils.readExamples(os.path.join(options.invariant, "examples.txt"))
variantExamples = ExampleUtils.readExamples(os.path.join(options.variant, "test-triggers.examples"))
invariantFeatureSet = IdSet()
invariantFeatureSet.load(os.path.join(options.invariant, "feature_names.txt"))
invariantClassSet = IdSet()
invariantClassSet.load(os.path.join(options.invariant, "class_names.txt"))
variantFeatureSet = IdSet()
variantFeatureSet.load(os.path.join(options.variant, "test-triggers.examples.feature_names"))
variantClassSet = IdSet()
variantClassSet.load(os.path.join(options.variant, "test-triggers.examples.class_names"))
counter = ProgressCounter(len(variantExamples))
for example in variantExamples:
counter.update()
example[1] = invariantClassSet.getId(variantClassSet.getName(example[1]))
newFeatures = {}
for k,v in example[2].iteritems():
newFeatures[ invariantFeatureSet.getId(variantFeatureSet.getName(k)) ] = v
example[2] = newFeatures
ExampleUtils.writeExamples(variantExamples, os.path.join(options.variant, "realignedExamples.txt"))
示例10: test
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import writeExamples [as 别名]
def test(cls, examples, modelPath, output=None, parameters=None, forceInternal=False, classIds=None): # , timeout=None):
"""
Classify examples with a pre-trained model.
@type examples: string (filename) or list (or iterator) of examples
@param examples: a list or file containing examples in SVM-format
@type modelPath: string
@param modelPath: filename of the pre-trained model file
@type parameters: a dictionary or string
@param parameters: parameters for the classifier
@type output: string
@param output: the name of the predictions file to be written
@type forceInternal: Boolean
@param forceInternal: Use python classifier even if SVM Multiclass binary is defined in Settings.py
"""
if type(parameters) == types.StringType:
parameters = splitParameters(parameters)
timer = Timer()
if type(examples) == types.ListType:
print >> sys.stderr, "Classifying", len(examples), "with SVM-MultiClass model", modelPath
examples, predictions = self.filterClassificationSet(examples, False)
testPath = self.tempDir+"/test.dat"
Example.writeExamples(examples, testPath)
else:
print >> sys.stderr, "Classifying file", examples, "with SVM-MultiClass model", modelPath
testPath = examples
examples = Example.readExamples(examples,False)
if parameters != None:
parameters = copy.copy(parameters)
if parameters.has_key("c"):
del parameters["c"]
if parameters.has_key("predefined"):
parameters = copy.copy(parameters)
modelPath = os.path.join(parameters["predefined"][0],"classifier/model")
del parameters["predefined"]
# Read model
if modelPath == None:
modelPath = "model-multilabel"
classModels = {}
if modelPath.endswith(".gz"):
f = gzip.open(modelPath, "rt")
else:
f = open(modelPath, "rt")
thresholds = {}
for line in f:
key, value, threshold = line.split()
classModels[key] = value
if threshold != "None":
thresholds[key] = float(threshold)
else:
thresholds[key] = 0.0
f.close()
mergedPredictions = []
if type(classIds) == types.StringType:
classIds = IdSet(filename=classIds)
#print classModels
print "Thresholds", thresholds
classifierBin = Settings.SVMMultiClassDir+"/svm_multiclass_classify"
print parameters
if "classifier" in parameters and "svmperf" in parameters["classifier"]:
classifierBin = Settings.SVMPerfDir+"/svm_perf_classify"
parameters = copy.copy(parameters)
del parameters["classifier"]
for className in classIds.getNames():
if className != "neg" and not "---" in className:
classId = classIds.getId(className)
if thresholds[str(className)] != 0.0:
print >> sys.stderr, "Classifying", className, "with threshold", thresholds[str(className)]
else:
print >> sys.stderr, "Classifying", className
args = [classifierBin]
#self.__addParametersToSubprocessCall(args, parameters)
classOutput = "predictions" + ".cls-" + className
logFile = open("svmmulticlass" + ".cls-" + className + ".log","at")
args += [testPath, classModels[str(className)], classOutput]
print args
subprocess.call(args, stdout = logFile, stderr = logFile)
cls.addPredictions(classOutput, mergedPredictions, classId, len(classIds.Ids), threshold=thresholds[str(className)])
print >> sys.stderr, timer.toString()
predFileName = output
f = open(predFileName, "wt")
for mergedPred in mergedPredictions:
if len(mergedPred[0]) > 1 and "1" in mergedPred[0]:
mergedPred[0].remove("1")
mergedPred[1] = str(mergedPred[1])
mergedPred[0] = ",".join(sorted(list(mergedPred[0])))
f.write(" ".join(mergedPred) + "\n")
f.close()
return mergedPredictions
示例11: crossValidate
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import writeExamples [as 别名]
#.........这里部分代码省略.........
parameterOptimizationSet = exampleSets[parameterOptimizationSetKey]
trainSet = []
for key2 in keys:
if key2 != key and key2 != parameterOptimizationSetKey:
trainSet.extend(exampleSets[key2])
if parameterOptimizationSet != None: # constant external parameter optimization set
evaluationArgs = {"classSet":exampleBuilder.classSet}
if options.parameters != None:
paramDict = splitParameters(options.parameters)
bestResults = classifier.optimize([trainSet], [parameterOptimizationSet], paramDict, Evaluation, evaluationArgs, combinationsThatTimedOut=discardedParameterCombinations)
else:
bestResults = classifier.optimize([trainSet], [parameterOptimizationSet], evaluationClass=Evaluation, evaluationArgs=evaluationArgs, combinationsThatTimedOut=discardedParameterCombinations)
else: # nested x-fold parameter optimization
assert (options.folds[1] >= 2)
optimizationFolds = Example.makeExampleFolds(trainSet, options.folds[1])
optimizationSets = Example.divideExamples(trainSet, optimizationFolds)
optimizationSetList = []
optSetKeys = optimizationSets.keys()
optSetKeys.sort()
for optSetKey in optSetKeys:
optimizationSetList.append(optimizationSets[optSetKey])
evaluationArgs = {"classSet":exampleBuilder.classSet}
if options.parameters != None:
paramDict = splitParameters(options.parameters)
bestResults = classifier.optimize(optimizationSetList, optimizationSetList, paramDict, Evaluation, evaluationArgs, combinationsThatTimedOut=discardedParameterCombinations)
else:
bestResults = classifier.optimize(optimizationSetList, optimizationSetList, evaluationClass=Evaluation, evaluationArgs=evaluationArgs, combinationsThatTimedOut=discardedParameterCombinations)
# Classify
print >> sys.stderr, "Classifying test data"
bestParams = bestResults[2]
if bestParams.has_key("timeout"):
del bestParams["timeout"]
print >> sys.stderr, "Parameters:", bestParams
print >> sys.stderr, "Training",
startTime = time.time()
classifier.train(trainSet, bestParams)
print >> sys.stderr, "(Time spent:", time.time() - startTime, "s)"
print >> sys.stderr, "Testing",
startTime = time.time()
predictions = classifier.classify(testSet)
if options.output != None:
pdict = []
fieldnames = ["class","prediction","id","fold"]
for p in predictions:
if "typed" in exampleBuilder.styles:
pdict.append( {"class":exampleBuilder.classSet.getName(p[0][1]), "prediction":exampleBuilder.classSet.getName(p[1]), "id":p[0][0], "fold":key} )
else:
pdict.append( {"class":p[0][1], "prediction":p[1], "id":p[0][0], "fold":key} )
TableUtils.addToCSV(pdict, options.output +"/predictions.csv", fieldnames)
print >> sys.stderr, "(Time spent:", time.time() - startTime, "s)"
# Calculate statistics
evaluation = Evaluation(predictions, classSet=exampleBuilder.classSet)
print >> sys.stderr, evaluation.toStringConcise()
print >> sys.stderr, timer.toString()
evaluations.append(evaluation)
# Save example sets
if options.output != None:
print >> sys.stderr, "Saving example sets to", options.output
Example.writeExamples(exampleSets[0], options.output +"/fold"+str(key+1) + "/examplesTest.txt")
Example.writeExamples(exampleSets[1], options.output +"/fold"+str(key+1) + "/examplesTrain.txt")
if parameterOptimizationSet == None:
for k,v in optimizationSets.iteritems():
Example.writeExamples(v, options.output +"/fold"+str(key+1) + "/examplesOptimizationSet" + str(k) + ".txt")
else:
Example.writeExamples(parameterOptimizationSet, options.output +"/fold"+str(key+1) + "/examplesOptimizationSetPredefined.txt")
TableUtils.writeCSV(bestResults[2], options.output +"/fold"+str(key+1) + "/parameters.csv")
evaluation.saveCSV(options.output +"/fold"+str(key+1) + "/results.csv")
print >> sys.stderr, "Compressing folder"
zipTree(options.output, "fold"+str(key+1))
parameterOptimizationSet = constantParameterOptimizationSet
print >> sys.stderr, "Cross-validation Results"
for i in range(len(evaluations)):
print >> sys.stderr, evaluations[i].toStringConcise(" Fold "+str(i)+": ")
averageResult = Evaluation.average(evaluations)
print >> sys.stderr, averageResult.toStringConcise(" Avg: ")
pooledResult = Evaluation.pool(evaluations)
print >> sys.stderr, pooledResult.toStringConcise(" Pool: ")
if options.output != None:
for i in range(len(evaluations)):
evaluations[i].saveCSV(options.output+"/results.csv", i)
averageResult.saveCSV(options.output+"/results.csv", "Avg")
pooledResult.saveCSV(options.output+"/results.csv", "Pool")
averageResult.saveCSV(options.output+"/resultsAverage.csv")
pooledResult.saveCSV(options.output+"/resultsPooled.csv")
# Visualize
if options.visualization != None:
visualize(sentences, pooledResult.classifications, options, exampleBuilder)
# Save interactionXML
if options.resultsToXML != None:
classSet = None
if "typed" in exampleBuilder.styles:
classSet = exampleBuilder.classSet
Example.writeToInteractionXML(pooledResult.classifications, corpusElements, options.resultsToXML, classSet)
示例12: OptionParser
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import writeExamples [as 别名]
return examples
if __name__=="__main__":
# Import Psyco if available
try:
import psyco
psyco.full()
print >> sys.stderr, "Found Psyco, using"
except ImportError:
print >> sys.stderr, "Psyco not installed"
from optparse import OptionParser
import os
optparser = OptionParser(usage="%prog [options]\nCreate an html visualization for a corpus.")
optparser.add_option("-i", "--input", default=None, dest="input", help="Corpus in analysis format", metavar="FILE")
optparser.add_option("-o", "--output", default=None, dest="output", help="Output directory, useful for debugging")
(options, args) = optparser.parse_args()
print >> sys.stderr, "Reading input from " + options.input
examples = readARFF(options.input)
if options.output == None:
if options.input.rsplit(".",1)[-1] == "arff":
options.output = options.input.rsplit(".",1)[0] + ".examples"
else:
options.output = options.input + ".examples"
print >> sys.stderr, "Writing output to " + options.output
ExampleUtils.writeExamples(examples, options.output)
示例13: IdSet
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import writeExamples [as 别名]
else:
print >> sys.stderr, "Using predefined model"
bestResults = [None,None,{}]
for k,v in classifierParamDict.iteritems():
bestResults[2][k] = v
featureSet = IdSet()
featureSet.load(os.path.join(classifierParamDict["predefined"][0], "feature_names.txt"))
classSet = None
if os.path.exists(os.path.join(classifierParamDict["predefined"][0], "class_names.txt")):
classSet = IdSet()
classSet.load(os.path.join(classifierParamDict["predefined"][0], "class_names.txt"))
exampleBuilder = ExampleBuilder(featureSet=featureSet, classSet=classSet, **splitParameters(options.exampleBuilderParameters))
# Save training sets
if options.output != None:
print >> sys.stderr, "Saving example sets to", options.output
Example.writeExamples(exampleSets[0], options.output + "/examplesTrain.txt")
if not classifierParamDict.has_key("predefined"):
Example.writeExamples(optimizationSets[0], options.output + "/examplesOptimizationTest.txt")
Example.writeExamples(optimizationSets[1], options.output + "/examplesOptimizationTrain.txt")
TableUtils.writeCSV(bestResults[2], options.output +"/best_parameters.csv")
# Optimize and train
if options.output != None:
classifier = Classifier(workDir = options.output + "/classifier")
else:
classifier = Classifier()
classifier.featureSet = exampleBuilder.featureSet
if hasattr(exampleBuilder,"classSet"):
classifier.classSet = exampleBuilder.classSet
print >> sys.stderr, "Classifying test data"
if bestResults[2].has_key("timeout"):