本文整理汇总了Python中Core.ExampleUtils.divideExamples方法的典型用法代码示例。如果您正苦于以下问题:Python ExampleUtils.divideExamples方法的具体用法?Python ExampleUtils.divideExamples怎么用?Python ExampleUtils.divideExamples使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Core.ExampleUtils
的用法示例。
在下文中一共展示了ExampleUtils.divideExamples方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: crossValidate
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import divideExamples [as 别名]
def crossValidate(exampleBuilder, corpusElements, examples, options, timer):
parameterOptimizationSet = None
constantParameterOptimizationSet = None
if options.paramOptData != None:
print >> sys.stderr, "Separating parameter optimization set"
parameterOptimizationDivision = Example.makeCorpusDivision(corpusElements, float(options.paramOptData))
exampleSets = Example.divideExamples(examples, parameterOptimizationDivision)
constantParameterOptimizationSet = exampleSets[0]
parameterOptimizationSet = constantParameterOptimizationSet
optDocs = 0
for k,v in parameterOptimizationDivision.iteritems():
if v == 0:
del corpusElements.documentsById[k]
optDocs += 1
print >> sys.stderr, " Documents for parameter optimization:", optDocs
discardedParameterCombinations = []
print >> sys.stderr, "Dividing data into folds"
corpusFolds = Example.makeCorpusFolds(corpusElements, options.folds[0])
exampleSets = Example.divideExamples(examples, corpusFolds)
keys = exampleSets.keys()
keys.sort()
evaluations = []
for key in keys:
testSet = exampleSets[key]
for example in testSet:
example[3]["visualizationSet"] = key + 1
trainSet = []
for key2 in keys:
if key != key2:
trainSet.extend(exampleSets[key2])
print >> sys.stderr, "Fold", str(key + 1)
# Create classifier object
if options.output != None:
if not os.path.exists(options.output+"/fold"+str(key+1)):
os.mkdir(options.output+"/fold"+str(key+1))
# if not os.path.exists(options.output+"/fold"+str(key+1)+"/classifier"):
# os.mkdir(options.output+"/fold"+str(key+1)+"/classifier")
classifier = Classifier(workDir = options.output + "/fold"+str(key + 1))
else:
classifier = Classifier()
classifier.featureSet = exampleBuilder.featureSet
# Optimize ####################
# Check whether there is need for included param opt set
if parameterOptimizationSet == None and options.folds[1] == 0: # 8-1-1 folds
assert(len(keys) > 1)
if keys.index(key) == 0:
parameterOptimizationSetKey = keys[-1]
else:
parameterOptimizationSetKey = keys[keys.index(key)-1]
parameterOptimizationSet = exampleSets[parameterOptimizationSetKey]
trainSet = []
for key2 in keys:
if key2 != key and key2 != parameterOptimizationSetKey:
trainSet.extend(exampleSets[key2])
if parameterOptimizationSet != None: # constant external parameter optimization set
evaluationArgs = {"classSet":exampleBuilder.classSet}
if options.parameters != None:
paramDict = splitParameters(options.parameters)
bestResults = classifier.optimize([trainSet], [parameterOptimizationSet], paramDict, Evaluation, evaluationArgs, combinationsThatTimedOut=discardedParameterCombinations)
else:
bestResults = classifier.optimize([trainSet], [parameterOptimizationSet], evaluationClass=Evaluation, evaluationArgs=evaluationArgs, combinationsThatTimedOut=discardedParameterCombinations)
else: # nested x-fold parameter optimization
assert (options.folds[1] >= 2)
optimizationFolds = Example.makeExampleFolds(trainSet, options.folds[1])
optimizationSets = Example.divideExamples(trainSet, optimizationFolds)
optimizationSetList = []
optSetKeys = optimizationSets.keys()
optSetKeys.sort()
for optSetKey in optSetKeys:
optimizationSetList.append(optimizationSets[optSetKey])
evaluationArgs = {"classSet":exampleBuilder.classSet}
if options.parameters != None:
paramDict = splitParameters(options.parameters)
bestResults = classifier.optimize(optimizationSetList, optimizationSetList, paramDict, Evaluation, evaluationArgs, combinationsThatTimedOut=discardedParameterCombinations)
else:
bestResults = classifier.optimize(optimizationSetList, optimizationSetList, evaluationClass=Evaluation, evaluationArgs=evaluationArgs, combinationsThatTimedOut=discardedParameterCombinations)
# Classify
print >> sys.stderr, "Classifying test data"
bestParams = bestResults[2]
if bestParams.has_key("timeout"):
del bestParams["timeout"]
print >> sys.stderr, "Parameters:", bestParams
print >> sys.stderr, "Training",
startTime = time.time()
classifier.train(trainSet, bestParams)
print >> sys.stderr, "(Time spent:", time.time() - startTime, "s)"
print >> sys.stderr, "Testing",
startTime = time.time()
predictions = classifier.classify(testSet)
if options.output != None:
pdict = []
fieldnames = ["class","prediction","id","fold"]
for p in predictions:
if "typed" in exampleBuilder.styles:
pdict.append( {"class":exampleBuilder.classSet.getName(p[0][1]), "prediction":exampleBuilder.classSet.getName(p[1]), "id":p[0][0], "fold":key} )
else:
#.........这里部分代码省略.........
示例2: buildExamples
# 需要导入模块: from Core import ExampleUtils [as 别名]
# 或者: from Core.ExampleUtils import divideExamples [as 别名]
# Build examples
trainExamples = buildExamples(exampleBuilder, sentences, options)
exampleSets[0] = trainExamples
# Create classifier object
classifier = Classifier()
#if options.output != None:
# classifier = Classifier(workDir = options.output + "/classifier")
#else:
# classifier = Classifier()
classifier.featureSet = exampleBuilder.featureSet
if hasattr(exampleBuilder,"classSet"):
classifier.classSet = None
# Optimize
optimizationSets = Example.divideExamples(exampleSets[0])
evaluationArgs = {"classSet":exampleBuilder.classSet}
if options.parameters != None:
paramDict = splitParameters(options.parameters)
bestResults = classifier.optimize([optimizationSets[0]], [optimizationSets[1]], paramDict, Evaluation, evaluationArgs)
else:
bestResults = classifier.optimize([optimizationSets[0]], [optimizationSets[1]], evaluationClass=Evaluation, evaluationArgs=evaluationArgs)
else:
print >> sys.stderr, "Using predefined model"
bestResults = [None,None,{}]
for k,v in classifierParamDict.iteritems():
bestResults[2][k] = v
featureSet = IdSet()
featureSet.load(os.path.join(classifierParamDict["predefined"][0], "feature_names.txt"))
classSet = None
if os.path.exists(os.path.join(classifierParamDict["predefined"][0], "class_names.txt")):