本文整理汇总了Python中weka.classifiers.Evaluation.pctCorrect方法的典型用法代码示例。如果您正苦于以下问题:Python Evaluation.pctCorrect方法的具体用法?Python Evaluation.pctCorrect怎么用?Python Evaluation.pctCorrect使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类weka.classifiers.Evaluation
的用法示例。
在下文中一共展示了Evaluation.pctCorrect方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: myGridSearch
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import pctCorrect [as 别名]
def myGridSearch(data,RBound,MBound):
bestlogistic = None
best_acc = -float('inf')
class bestValues(object):
m = float('nan')
r = float('nan')
for r in range(RBound[0],RBound[1]+RBound[2],RBound[2]):
for m in range(MBound[0],MBound[1]+MBound[2],MBound[2]):
logistic = Logistic()
logistic.setMaxIts(int(m))
logistic.setRidge(pow(10,r))
evaluation = Evaluation(data)
output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(logistic,data,numFolds,random,[output, attRange, outputDistribution])
acc = evaluation.pctCorrect()
if (acc>best_acc):
bestlogistic = logistic
best_acc = acc
bestValues.m = int(m)
bestValues.r = pow(10,r)
print "Best accuracy: ", best_acc
print "Best values: M = ", bestValues.m, ", Ridge = ", bestValues.r
print "-----------------------------------------"
return bestlogistic, bestValues.r, bestValues.m, best_acc
示例2: myGridSearch
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import pctCorrect [as 别名]
def myGridSearch(data,NTreeBounds,NFeaturesBounds):
best_acc = -float('inf')
bestrandomforest = None
class bestValues(object):
t = float('nan')
f = float('nan')
for t in range(NTreeBounds[0],NTreeBounds[1]+NTreeBounds[2],NTreeBounds[2]):
for f in range(NFeaturesBounds[0],NFeaturesBounds[1]+NFeaturesBounds[2],NFeaturesBounds[2]):
randomforest = RandomForest()
randomforest.setNumTrees(int(t))
randomforest.setNumFeatures(int(f))
evaluation = Evaluation(data)
output = output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(randomforest,data,numFolds,random,[output, attRange, outputDistribution])
acc = evaluation.pctCorrect()
if (acc>best_acc):
bestrandomforest = randomforest
best_acc = acc
bestValues.t = t
bestValues.f = f
print "Best accuracy:", best_acc
print "Best values: NTreeBounds = ", bestValues.t, ", NFeaturesBounds = ", bestValues.f
print "-----------------------------------------"
return bestrandomforest, bestValues.t, bestValues.f, best_acc
示例3: Logistic_ParamFinder
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import pctCorrect [as 别名]
def Logistic_ParamFinder(data):
# Possible set for Ridge-value
RBounds = [-10,2,1]
# possible set for maximum Iteration
MBounds = [-1,10,1]
if (data.numInstances()>10): # grid search does 10-fold cross validation; hence number of samples must be more than 10
gridsearch = GridSearch()
acctag = gridsearch.getEvaluation()
acctag = SelectedTag('ACC',acctag.getTags())
gridsearch.setEvaluation(acctag)
allfilters = AllFilters()
gridsearch.setFilter(allfilters)
gridsearch.setGridIsExtendable(Boolean(True))
logistic = Logistic()
gridsearch.setClassifier(logistic)
gridsearch.setXProperty(String('classifier.maxIts'))
gridsearch.setYProperty(String('classifier.ridge'))
gridsearch.setXExpression(String('I'))
gridsearch.setYExpression(String('pow(BASE,I)'))
gridsearch.setXMin(MBounds[0])
gridsearch.setXMax(MBounds[1])
gridsearch.setXStep(MBounds[2])
gridsearch.setYMin(RBounds[0])
gridsearch.setYMax(RBounds[1])
gridsearch.setYStep(RBounds[2])
gridsearch.setYBase(10)
print "searching for logistic lcassifier Max Iteration = [", MBounds[0], ",", MBounds[1], "], Ridge = [ 10E", RBounds[0], ",10E", RBounds[1], "] ...."
gridsearch.buildClassifier(data)
bestValues = gridsearch.getValues()
# ----------------------- Evaluation
bestlogistic = Logistic()
bestlogistic.setMaxIts(int(bestValues.x))
bestlogistic.setRidge(pow(10,bestValues.y))
evaluation = Evaluation(data)
output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(bestlogistic,data,numFolds,random,[output, attRange, outputDistribution])
acc = evaluation.pctCorrect()
print "best accuracy: ", acc
print "best logistic classifier with Ridge = ", bestlogistic.getRidge(), " Max Iteration = ", bestlogistic.getMaxIts()
OptLog = bestlogistic
OptLogp1 = bestlogistic.getRidge()
OptLogp2 = bestlogistic.getMaxIts()
OptLogAcc = acc
else:
OptLog, OptLogp1, OptLogp2, OptLogAcc = myGridSearch(data,RBounds,MBounds)
Description = 'Logistic classifier OptRidge = ' + str(OptLogp1) + \
', OptMaxIts = ' + str(OptLogp2) + ', OptAcc = ' + str(OptLogAcc)
print "-----------------------------------------"
return OptLog, OptLogp1, OptLogp2, OptLogAcc, Description
示例4: RandomForest_ParamFinder
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import pctCorrect [as 别名]
def RandomForest_ParamFinder(data):
# possible set for Number of trees
NTreeBounds = [1,20,1]
# possible set for number of features
NFeaturesBounds = [0,20,1]
if (data.numInstances()>10): # grid search does 10-fold cross validation; hence number of samples must be more than 10
gridsearch = GridSearch()
acctag = gridsearch.getEvaluation()
acctag = SelectedTag('ACC',acctag.getTags())
gridsearch.setEvaluation(acctag)
allfilters = AllFilters()
gridsearch.setFilter(allfilters)
gridsearch.setGridIsExtendable(Boolean(True))
randomforest = RandomForest()
gridsearch.setClassifier(randomforest)
gridsearch.setXProperty(String('classifier.numTrees'))
gridsearch.setYProperty(String('classifier.numFeatures'))
gridsearch.setXExpression(String('I'))
gridsearch.setYExpression(String('I'))
gridsearch.setXMin(NTreeBounds[0])
gridsearch.setXMax(NTreeBounds[1])
gridsearch.setXStep(NTreeBounds[2])
gridsearch.setYMin(NFeaturesBounds[0])
gridsearch.setYMax(NFeaturesBounds[1])
gridsearch.setYStep(NFeaturesBounds[2])
gridsearch.setYBase(10)
print "searching for random-forest NumTrees = [", NTreeBounds[0], ",", NTreeBounds[1], "], NumFeatures = [ ", NFeaturesBounds[0], ",", NFeaturesBounds[1], "] ...."
gridsearch.buildClassifier(data)
bestValues = gridsearch.getValues()
# ----------------------- Evaluation
bestrandomforest = RandomForest()
bestrandomforest.setNumTrees(int(bestValues.x))
bestrandomforest.setNumFeatures(int(bestValues.y))
evaluation = Evaluation(data)
output = output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(bestrandomforest,data,numFolds,random,[output, attRange, outputDistribution])
acc = evaluation.pctCorrect()
print "best accuracy: ", acc
print "best random-forest classifier with NumTrees=",bestValues.x , ", NumFeatures = ", bestValues.y
OptRndFrst = bestrandomforest
OptRndFrstp1 = bestValues.x
OptRndFrstp2 = bestValues.y
OptRndFrstAcc = acc
else:
OptRndFrst, OptRndFrstp1, OptRndFrstp2, OptRndFrstAcc = myGridSearch(data,NTreeBounds,NFeaturesBounds)
Description = 'Random-Forest classifier: OptNumTrees = ' + str(OptRndFrstp1) + \
', OptNumFeatures = ' + str(OptRndFrstp2) + ', OptAcc = ' + str(OptRndFrstAcc)
print "-----------------------------------------"
return OptRndFrst, OptRndFrstp1, OptRndFrstp2, OptRndFrstAcc, Description
示例5: myGridSearch
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import pctCorrect [as 别名]
def myGridSearch(data,cBounds,GBound,eBounds):
IsBestRBFKernel = False
best_acc_poly = -float('inf')
best_acc_rbf = -float('inf')
# Poly Kernel
class bestValues_poly(object):
x = float('nan')
y = float('nan')
for Cbnd in cBounds:
for c in range(Cbnd[0],Cbnd[1]+Cbnd[2],Cbnd[2]):
for e in range(eBounds[0],eBounds[1]+eBounds[2],eBounds[2]):
smo = SMO()
kernel = PolyKernel()
kernel.setExponent(e)
smo.setC(c)
smo.setKernel(kernel)
evaluation = Evaluation(data)
output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(smo,data,numFolds,random,[output, attRange, outputDistribution])
acc = evaluation.pctCorrect()
if (acc>best_acc_poly):
best_smo_poly = smo
best_acc_poly = acc
bestValues_poly.x = c
bestValues_poly.y = e
print "Best accuracy (Poly Kernel): ", best_acc_poly
print "Best values (Poly Kernel): C = ", bestValues_poly.x, ", exponent = ", bestValues_poly.y
print "-----------------------------------------"
# RBF Kernel
class bestValues_rbf(object):
x = float('nan')
y = float('nan')
for Cbnd in cBounds:
for c in range(Cbnd[0],Cbnd[1]+Cbnd[2],Cbnd[2]):
for g in range(GBound[0],GBound[1]+GBound[2],GBound[2]):
smo = SMO()
kernel = RBFKernel()
kernel.setGamma(pow(10,g))
smo.setC(c)
smo.setKernel(kernel)
evaluation = Evaluation(data)
output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(smo,data,numFolds,random,[output, attRange, outputDistribution])
acc = evaluation.pctCorrect()
if (acc>best_acc_rbf):
best_smo_rbf = smo
best_acc_rbf = acc
bestValues_rbf.x = c
bestValues_rbf.y = g
print "Best accuracy (RBF Kernel): ", best_acc_rbf
print "Best values (RBF Kernel): C = ", bestValues_rbf.x, ", gamma = ", bestValues_rbf.y
if (best_acc_rbf > best_acc_poly):
IsBestRBFKernel = True
print "best smo classifier is RBF kernel with C = ", bestValues_rbf.x," and gamma = ", pow(10,bestValues_rbf.y)
best_smo = best_smo_rbf
OptSMOp1 = bestValues_rbf.x
OptSMOp2 = pow(10,bestValues_rbf.y)
OptSMOAcc = best_acc_rbf
OptSMOIsRBF = IsBestRBFKernel
else:
IsBestRBFKernel = False
print "best smo classifier is Poly kernel with C = ", bestValues_poly.x," and exponent = ", bestValues_poly.y
best_smo = best_smo_poly
OptSMOp1 = bestValues_poly.x
OptSMOp2 = bestValues_poly.y
OptSMOAcc = best_acc_poly
OptSMOIsRBF = IsBestRBFKernel
return IsBestRBFKernel, best_smo, OptSMOp1, OptSMOp2, OptSMOAcc
示例6: SMO_ParamFinder
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import pctCorrect [as 别名]
def SMO_ParamFinder(data):
# Possible set for C-value
cBounds = [[1,10,1],[10,100,10],[100,300,20]]
# possible set for exponents
eBounds = [1,3,1]
# possible set for Gamma
GBound = [-5,2,1]
if (data.numInstances()>10): # grid search does 10-fold cross validation; hence number of samples must be more than 10
# Polynomials Kernel
gridsearch = GridSearch()
acctag = gridsearch.getEvaluation()
acctag = SelectedTag('ACC',acctag.getTags())
gridsearch.setEvaluation(acctag)
allfilters = AllFilters()
gridsearch.setFilter(allfilters)
gridsearch.setGridIsExtendable(Boolean(True))
smo = SMO()
kernel = PolyKernel()
smo.setKernel(kernel)
gridsearch.setClassifier(smo)
gridsearch.setXProperty(String('classifier.c'))
gridsearch.setYProperty(String('classifier.kernel.Exponent'))
gridsearch.setXExpression(String('I'))
gridsearch.setYExpression(String('I'))
best_acc_poly = -float('inf')
for cnt in range(0,len(cBounds)):
cbound = cBounds[cnt]
cmin = cbound[0]
cmax = cbound[1]
cstep = cbound[2]
gridsearch.setXMin(cmin)
gridsearch.setXMax(cmax)
gridsearch.setXStep(cstep)
gridsearch.setYMin(eBounds[0])
gridsearch.setYMax(eBounds[1])
gridsearch.setYStep(eBounds[2])
print "searching for Polykernel C = [", cmin, ",", cmax, "], exponent = [", eBounds[0], ",", eBounds[1], "] ...."
gridsearch.buildClassifier(data)
bestValues = gridsearch.getValues()
# --------------------------------- Evaluation
bestsmo = SMO()
kernel = PolyKernel()
kernel.setExponent(bestValues.y)
bestsmo.setC(bestValues.x)
bestsmo.setKernel(kernel)
evaluation = Evaluation(data)
output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
print "numFolds : ", numFolds
evaluation.crossValidateModel(bestsmo,data,numFolds,random,[output, attRange, outputDistribution])
acc = evaluation.pctCorrect()
if (acc>best_acc_poly):
best_smo_poly = bestsmo
best_acc_poly = acc
bestValues_poly = bestValues
print "Best accuracy so far: ",best_acc_poly
print "Best values so far: ",bestValues_poly
print "Best accuracy (Poly Kernel): ", best_acc_poly
print "Best values (Poly Kernel): ", bestValues_poly
print "-----------------------------------------"
# RBF Kernel
smo = SMO()
kernel = RBFKernel()
smo.setKernel(kernel)
gridsearch.setClassifier(smo)
gridsearch.setXProperty(String('classifier.c'))
gridsearch.setYProperty(String('classifier.kernel.gamma'))
gridsearch.setXExpression(String('I'))
gridsearch.setYExpression(String('pow(BASE,I)'))
gridsearch.setYBase(10)
best_acc_rbf = -float('inf')
for cnt in range(0,len(cBounds)):
cbound = cBounds[cnt]
cmin = cbound[0]
cmax = cbound[1]
cstep = cbound[2]
gridsearch.setXMin(cmin)
gridsearch.setXMax(cmax)
gridsearch.setXStep(cstep)
gridsearch.setYMin(GBound[0])
gridsearch.setYMax(GBound[1])
gridsearch.setYStep(GBound[2])
gridsearch.setYBase(10)
print "searching for RBF Kernel C = [", cmin, ",", cmax, "], gamma = [10^", GBound[0], ",10^", GBound[1], "] ...."
gridsearch.buildClassifier(data)
bestValues = gridsearch.getValues()
# ----------------------------------- Evaluation
bestsmo = SMO()
kernel = RBFKernel()
kernel.setGamma(pow(10,bestValues.y))
bestsmo.setC(bestValues.x)
bestsmo.setKernel(kernel)
evaluation = Evaluation(data)
output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
#.........这里部分代码省略.........
示例7: runClassifierAlgo
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import pctCorrect [as 别名]
def runClassifierAlgo(algo, class_index, training_filename, test_filename, do_model, do_eval, do_predict):
""" If <test_filename>
Run classifier algorithm <algo> on training data in <training_filename> to build a model
then test on data in <test_filename> (equivalent of Weka "Supplied test set")
else
do 10 fold CV lassifier algorithm <algo> on data in <training_filename>
<class_index> is the column containing the dependent variable
http://weka.wikispaces.com/Generating+classifier+evaluation+output+manually
http://weka.sourceforge.net/doc.dev/weka/classifiers/Evaluation.html
"""
print ' runClassifierAlgo: training_filename= ', training_filename, ', test_filename=', test_filename
misc.checkExists(training_filename)
training_file = FileReader(training_filename)
training_data = Instances(training_file)
if test_filename:
test_file = FileReader(test_filename)
test_data = Instances(test_file)
else:
test_data = training_data
# set the class Index - the index of the dependent variable
training_data.setClassIndex(class_index)
test_data.setClassIndex(class_index)
# create the model
if test_filename:
algo.buildClassifier(training_data)
evaluation = None
# only a trained classifier can be evaluated
if do_eval or do_predict:
evaluation = Evaluation(test_data)
buffer = StringBuffer() # buffer for the predictions
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
if test_filename:
evaluation.evaluateModel(algo, test_data, [buffer, attRange, outputDistribution])
else:
# evaluation.evaluateModel(algo, [String('-t ' + training_filename), String('-c 1')])
# print evaluation.toSummaryString()
rand = Random(1)
evaluation.crossValidateModel(algo, training_data, 4, rand)
if False:
print 'percentage correct =', evaluation.pctCorrect()
print 'area under ROC =', evaluation.areaUnderROC(class_index)
confusion_matrix = evaluation.confusionMatrix()
for l in confusion_matrix:
print '** ', ','.join('%2d'%int(x) for x in l)
if verbose:
if do_model:
print '--> Generated model:\n'
print algo.toString()
if do_eval:
print '--> Evaluation:\n'
print evaluation.toSummaryString()
if do_predict:
print '--> Predictions:\n'
print buffer
return {'model':str(algo), 'eval':str(evaluation.toSummaryString()), 'predict':str(buffer) }
示例8: Bayes_ParamFinder
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import pctCorrect [as 别名]
def Bayes_ParamFinder(data):
# ----------------------- Evaluation of Naive Bayes without kernel estimation
naivebayes = NaiveBayes()
evaluation = Evaluation(data)
output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(naivebayes,data,numFolds,random,[output, attRange, outputDistribution])
acc_naivebayes = evaluation.pctCorrect()
print "Naive Bayesisn accuracy (without kernel density estimation): ", acc_naivebayes
# ----------------------- Evaluation of Naive Bayes with kernel estimation
naivebayes = NaiveBayes()
naivebayes.setUseKernelEstimator(Boolean(True)) # use kernel density estimation
evaluation = Evaluation(data)
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(naivebayes,data,numFolds,random,[output, attRange, outputDistribution])
acc_naivebayes_withkernel = evaluation.pctCorrect()
print "Naive Bayesisn accuracy (with kernel density estimation): ", acc_naivebayes_withkernel
# ----------------------- Evaluation of Naive bayes multinomial
naivebayesmultinomial = NaiveBayesMultinomial()
evaluation = Evaluation(data)
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
if (allAttributesPositive(data)): # multinomial bayes classifier only work on positive attributes
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(naivebayesmultinomial,data,numFolds,random,[output, attRange, outputDistribution])
acc_naivemultinomialbayes = evaluation.pctCorrect()
else:
acc_naivemultinomialbayes = 0
print "Naive Multinomial Bayesisn accuracy : ", acc_naivemultinomialbayes
# ------------------------- Comparision
if (acc_naivemultinomialbayes > acc_naivebayes):
if (acc_naivemultinomialbayes > acc_naivebayes_withkernel):
IsOptMultinomialBayes = True
IsOptNaiveKernelDensity = False
acc = acc_naivemultinomialbayes
else:
IsOptMultinomialBayes = False
IsOptNaiveKernelDensity = True
acc = acc_naivebayes_withkernel
else:
if (acc_naivebayes > acc_naivebayes_withkernel):
IsOptMultinomialBayes = False
IsOptNaiveKernelDensity = False
acc = acc_naivebayes
else:
IsOptMultinomialBayes = False
IsOptNaiveKernelDensity = True
acc = acc_naivebayes_withkernel
print "-----------------------------------------"
OptBayesAcc = acc
if IsOptMultinomialBayes:
Description = 'Optimal Bayes classifier is Multinomial Bayes: OptAcc = ' + str(OptBayesAcc)
elif IsOptNaiveKernelDensity:
Description = 'Optimal Bayes classifier is Naive Bayes with kernel density estimation: OptAcc = ' +\
str(OptBayesAcc)
else:
Description = 'Optimal Bayes classifier is Naive Bayes: OptAcc = ' + str(OptBayesAcc)
return IsOptMultinomialBayes, IsOptNaiveKernelDensity, OptBayesAcc, Description
示例9: AdaBoostedSimpleLogistic_ParamFinder
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import pctCorrect [as 别名]
def AdaBoostedSimpleLogistic_ParamFinder(data, param1, param2):
# Adaboost params: Possible set for Weight Threshold
WeightThresholdBounds = [99,100,1]
# Adaboost params: possible set for NumIteration
NumItrBound = [5,50,5]
# Simple Logisitic params: Possible set for num of boosting
NumBoostIterationBounds = [0,200,10]
# This section tries to boost the best simple logistic
print "searching for the best parameters to boosting on the optimal simple Logistic ...."
gridsearch = GridSearch()
acctag = gridsearch.getEvaluation()
acctag = SelectedTag('ACC',acctag.getTags())
gridsearch.setEvaluation(acctag)
allfilters = AllFilters()
gridsearch.setFilter(allfilters)
gridsearch.setGridIsExtendable(Boolean(True))
simplelogistic = SimpleLogistic()
adaboostm = AdaBoostM1()
simplelogistic.setHeuristicStop(param1)
simplelogistic.setNumBoostingIterations(param2)
adaboostm.setClassifier(simplelogistic)
gridsearch.setClassifier(adaboostm)
gridsearch.setXProperty(String('classifier.weightThreshold'))
gridsearch.setYProperty(String('classifier.numIterations'))
gridsearch.setXExpression(String('I'))
gridsearch.setYExpression(String('I'))
gridsearch.setXMin(WeightThresholdBounds[0])
gridsearch.setXMax(WeightThresholdBounds[1])
gridsearch.setXStep(WeightThresholdBounds[2])
gridsearch.setYMin(NumItrBound[0])
gridsearch.setYMax(NumItrBound[1])
gridsearch.setYStep(NumItrBound[2])
print "searching for best parameters for boosting simple Logistic weightThreshold = [", WeightThresholdBounds[0], ",", WeightThresholdBounds[1], "], # Iterations = [", NumItrBound[0], ",", NumItrBound[1], "] ...."
gridsearch.buildClassifier(data)
bestValues1 = gridsearch.getValues()
# ------------------------------ Evaluation
simplelogistic = SimpleLogistic()
bestadaboostm1 = AdaBoostM1()
simplelogistic.setHeuristicStop(param1)
simplelogistic.setNumBoostingIterations(param2)
bestadaboostm1.setWeightThreshold(int(bestValues1.x))
bestadaboostm1.setNumIterations(int(bestValues1.y))
bestadaboostm1.setClassifier(simplelogistic)
evaluation = Evaluation(data)
output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(bestadaboostm1,data,numFolds,random,[output, attRange, outputDistribution])
best_acc1 = evaluation.pctCorrect()
print "best accuracy by boosting the optimal simple Logistic classifier: ", best_acc1
print "Optimal weight Threshold Percent : ", bestValues1.x , "Optimal number of Iterations : ", bestValues1.y
print "-----------------------------------------"
# -------------------------------------------------------------------------------------------------------------------------
# in this section we set the weak classifier to the linear SMO and optimize over c-value of the SMO and number of iteration
simplelogistic = SimpleLogistic()
adaboostm = AdaBoostM1()
adaboostm.setClassifier(simplelogistic)
gridsearch.setClassifier(adaboostm)
gridsearch.setXProperty(String('classifier.classifier.numBoostingIterations'))
gridsearch.setYProperty(String('classifier.numIterations'))
gridsearch.setXExpression(String('I'))
gridsearch.setYExpression(String('I'))
gridsearch.setXBase(10)
gridsearch.setXMin(NumBoostIterationBounds[0])
gridsearch.setXMax(NumBoostIterationBounds[1])
gridsearch.setXStep(NumBoostIterationBounds[2])
gridsearch.setYMin(NumItrBound[0])
gridsearch.setYMax(NumItrBound[1])
gridsearch.setYStep(NumItrBound[2])
print "searching for number of boosting Iterations bound = [", NumBoostIterationBounds[0], ",", NumBoostIterationBounds[1], "], # Iteration = [", NumItrBound[0], ",", NumItrBound[1], "] ...."
gridsearch.buildClassifier(data)
bestValues2 = gridsearch.getValues()
# ------------------ Evaluation
simplelogistic = SimpleLogistic()
bestadaboostm2 = AdaBoostM1()
simplelogistic.setNumBoostingIterations(int(bestValues2.x))
bestadaboostm2.setNumIterations(int(bestValues2.y))
bestadaboostm2.setClassifier(simplelogistic)
evaluation = Evaluation(data)
output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(bestadaboostm2,data,numFolds,random,[output, attRange, outputDistribution])
best_acc2 = evaluation.pctCorrect()
print "best accuracy by boosting the Simple Logistic classifier (with optimization over ridge): ", best_acc2
print "Optimal number of boosting Iteration : ", bestValues2.x , "Optimal number of Iteration : ", bestValues2.y
print "-----------------------------------------"
print "Final optimal boosting classifier:"
if (best_acc2 > best_acc1):
print " Best boosting is based on simple logistic with optimal numBoostingIterations :",\
bestValues2.x, " optimal numIteration :", bestValues2.y
print " optimal accuracy: ", best_acc2
IsOptimalBoostingOnOptSimpleLogistic = False # is optimal boosting based on optimal simple Logistic ?
IsOptBoostOnOptSimpLog = IsOptimalBoostingOnOptSimpleLogistic
OptBoostSimpLog = bestadaboostm2
OptBoostSimpLogp1 = bestValues2.x
#.........这里部分代码省略.........
示例10: BaggingSMO_ParamFinder
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import pctCorrect [as 别名]
def BaggingSMO_ParamFinder(data, BestSMOIsRBFKernel, param1, param2):
# Possible set for C-value
cBounds = [[1,10,1],[10,100,10],[100,300,20]]
# possible set bag size percent
BagSizePercentBound = [ max(10, int(float(1)/float(data.numInstances())*100)+1 ) ,100,10] # max operation is to make sure that least number of samples are provided to the classifier
# possible set for Iteration
ItrBound = [5,50,5]
# This section tries to boost the best smo
print "searching for the best parameters to Bag the best SMO ...."
gridsearch = GridSearch()
acctag = gridsearch.getEvaluation()
acctag = SelectedTag('ACC',acctag.getTags())
gridsearch.setEvaluation(acctag)
allfilters = AllFilters()
gridsearch.setFilter(allfilters)
gridsearch.setGridIsExtendable(Boolean(False))
smo = SMO()
bagging = Bagging()
if BestSMOIsRBFKernel:
kernel = RBFKernel()
kernel.setGamma(param2)
smo.setKernel(kernel)
smo.setC(param1)
else:
kernel = PolyKernel()
kernel.setExponent(param2)
smo.setKernel(kernel)
smo.setC(param1)
bagging.setClassifier(smo)
gridsearch.setClassifier(bagging)
gridsearch.setXProperty(String('classifier.bagSizePercent'))
gridsearch.setYProperty(String('classifier.numIterations'))
gridsearch.setXExpression(String('I'))
gridsearch.setYExpression(String('I'))
gridsearch.setXMin(BagSizePercentBound[0])
gridsearch.setXMax(BagSizePercentBound[1])
gridsearch.setXStep(BagSizePercentBound[2])
gridsearch.setYMin(ItrBound[0])
gridsearch.setYMax(ItrBound[1])
gridsearch.setYStep(ItrBound[2])
print "searching for best parameters for bagging SMO bagSizePercent = [", BagSizePercentBound[0], ",", BagSizePercentBound[1], "], # Iteration = [", ItrBound[0], ",", ItrBound[1], "] ...."
gridsearch.buildClassifier(data)
#bestbagging1 = gridsearch.getBestClassifier()
bestValues1 = gridsearch.getValues()
# ------------------ Evaluation
smo = SMO()
bestbagging1 = Bagging()
smo.setKernel(kernel)
smo.setC(param1)
bestbagging1.setBagSizePercent(int(bestValues1.x))
bestbagging1.setNumIterations(int(bestValues1.y))
bestbagging1.setClassifier(smo)
evaluation = Evaluation(data)
output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(bestbagging1,data,numFolds,random,[output, attRange, outputDistribution])
best_acc1 = evaluation.pctCorrect()
bestValues1 = gridsearch.getValues()
print "best accuracy by bagging the optimal SMO classifier: ", best_acc1
print "Optimal Bag size Percent : ", bestValues1.x , "Optimal number of Iteration : ", bestValues1.y
print "-----------------------------------------"
# ------------------------------------------------------------------------------------------------------------------------
# in this section we set the weak classifier to the linear SMO and optimize over c-value of the SMO and number of iteration
smo = SMO()
kernel = PolyKernel()
smo.setKernel(kernel)
bagging.setClassifier(smo)
gridsearch.setClassifier(bagging)
gridsearch.setXProperty(String('classifier.classifier.c'))
gridsearch.setYProperty(String('classifier.numIterations'))
gridsearch.setXExpression(String('I'))
gridsearch.setYExpression(String('I'))
gridsearch.setGridIsExtendable(Boolean(True))
best_acc2 = -float('inf')
for cnt in range(0,len(cBounds)):
cbound = cBounds[cnt]
cmin = cbound[0]
cmax = cbound[1]
cstep = cbound[2]
gridsearch.setXMin(cmin)
gridsearch.setXMax(cmax)
gridsearch.setXStep(cstep)
gridsearch.setYMin(ItrBound[0])
gridsearch.setYMax(ItrBound[1])
gridsearch.setYStep(ItrBound[2])
print "searching for RBF Kernel C = [", cmin, ",", cmax, "], # Iteration = [", ItrBound[0], ",", ItrBound[1], "] ...."
gridsearch.buildClassifier(data)
bestValues = gridsearch.getValues()
# ------------ Evaluation
smo = SMO()
bestbagging = Bagging()
kernel = PolyKernel()
smo.setKernel(kernel)
smo.setC(bestValues.x)
bestbagging.setNumIterations(int(bestValues.y))
bestbagging.setClassifier(smo)
evaluation = Evaluation(data)
#.........这里部分代码省略.........
示例11: BaggingLogistic_ParamFinder
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import pctCorrect [as 别名]
def BaggingLogistic_ParamFinder(data, param1, param2):
# Possible set for Ridge-value
RBounds = [-10,2,1]
# possible set bag size percent
BagSizePercentBound = [ max(10, int(float(1)/float(data.numInstances())*100)+1 ) ,100,10] # max operation is to make sure that least number of samples are provided to the classifier
# possible set for Iteration
ItrBound = [5,50,5]
# This section tries to boost the best logistic
print "searching for the best parameters to Bag the optimal Logistic ...."
gridsearch = GridSearch()
acctag = gridsearch.getEvaluation()
acctag = SelectedTag('ACC',acctag.getTags())
gridsearch.setEvaluation(acctag)
allfilters = AllFilters()
gridsearch.setFilter(allfilters)
gridsearch.setGridIsExtendable(Boolean(False))
logistic = Logistic()
bagging = Bagging()
logistic.setRidge(param1)
logistic.setMaxIts(param2)
bagging.setClassifier(logistic)
gridsearch.setClassifier(bagging)
gridsearch.setXProperty(String('classifier.bagSizePercent'))
gridsearch.setYProperty(String('classifier.numIterations'))
gridsearch.setXExpression(String('I'))
gridsearch.setYExpression(String('I'))
gridsearch.setXMin(BagSizePercentBound[0])
gridsearch.setXMax(BagSizePercentBound[1])
gridsearch.setXStep(BagSizePercentBound[2])
gridsearch.setYMin(ItrBound[0])
gridsearch.setYMax(ItrBound[1])
gridsearch.setYStep(ItrBound[2])
print "searching for best parameters for bagging Logistic bagSizePercent = [", BagSizePercentBound[0], ",", BagSizePercentBound[1], "], # Iteration = [", ItrBound[0], ",", ItrBound[1], "] ...."
gridsearch.buildClassifier(data)
#bestbagging1 = gridsearch.getBestClassifier()
bestValues1 = gridsearch.getValues()
# ------------------------------ Evaluation
logistic = Logistic()
bestbagging1 = Bagging()
logistic.setRidge(param1)
logistic.setMaxIts(param2)
bestbagging1.setBagSizePercent(int(bestValues1.x))
bestbagging1.setNumIterations(int(bestValues1.y))
bestbagging1.setClassifier(logistic)
evaluation = Evaluation(data)
output = output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(bestbagging1,data,numFolds,random,[output, attRange, outputDistribution])
best_acc1 = evaluation.pctCorrect()
print "best accuracy by bagging the optimal Logistic classifier: ", best_acc1
print "Optimal Bag size Percent: ", bestValues1.x, " Optimal number of Iterations: ", bestValues1.y
print "-----------------------------------------"
# -------------------------------------------------------------------------------------------------------------------------
# in this section we set the weak classifier to the linear SMO and optimize over c-value of the SMO and number of iteration
logistic = Logistic()
bagging = Bagging()
bagging.setClassifier(logistic)
gridsearch.setClassifier(bagging)
gridsearch.setXProperty(String('classifier.classifier.ridge'))
gridsearch.setYProperty(String('classifier.numIterations'))
gridsearch.setXExpression(String('pow(BASE,I)'))
gridsearch.setYExpression(String('I'))
gridsearch.setXBase(10)
gridsearch.setGridIsExtendable(Boolean(True))
gridsearch.setXMin(RBounds[0])
gridsearch.setXMax(RBounds[1])
gridsearch.setXStep(RBounds[2])
gridsearch.setYMin(ItrBound[0])
gridsearch.setYMax(ItrBound[1])
gridsearch.setYStep(ItrBound[2])
print "searching for ridge bound = [10^", RBounds[0], ",10^", RBounds[1], "], # Iteration = [", ItrBound[0], ",", ItrBound[1], "] ...."
gridsearch.buildClassifier(data)
#bestbagging = gridsearch.getBestClassifier()
bestValues2 = gridsearch.getValues()
# ------------------ Evaluation
logistic = Logistic()
bestbagging2 = Bagging()
logistic.setRidge(pow(10,bestValues2.x))
bestbagging2.setNumIterations(int(bestValues2.y))
bestbagging2.setClassifier(logistic)
evaluation = Evaluation(data)
output = output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(bestbagging2,data,numFolds,random,[output, attRange, outputDistribution])
best_acc2 = evaluation.pctCorrect()
print "best accuracy by bagging the Logistic classifier (with optimization over ridge): ", best_acc2
print "Optimal Ridge value : ", bestValues2.x , "Optimal number of Iteration : ", bestValues2.y
print "-----------------------------------------"
print "Final optimal bagging classifier:"
if (best_acc2 > best_acc1):
print " Best bagging is based on logistic with optimal ridge-value :", bestValues2.x, " optimal numIteration :", bestValues2.y
print " optimal accuracy: ", best_acc2
IsOptimalBaggingIsOptLogistic = False # is optimal bagging based on optimal Logistic ?
IsOptBagOnOptLog = IsOptimalBaggingIsOptLogistic
#.........这里部分代码省略.........