本文整理汇总了Python中weka.classifiers.Evaluation.crossValidateModel方法的典型用法代码示例。如果您正苦于以下问题:Python Evaluation.crossValidateModel方法的具体用法?Python Evaluation.crossValidateModel怎么用?Python Evaluation.crossValidateModel使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类weka.classifiers.Evaluation
的用法示例。
在下文中一共展示了Evaluation.crossValidateModel方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: myGridSearch
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import crossValidateModel [as 别名]
def myGridSearch(data,RBound,MBound):
bestlogistic = None
best_acc = -float('inf')
class bestValues(object):
m = float('nan')
r = float('nan')
for r in range(RBound[0],RBound[1]+RBound[2],RBound[2]):
for m in range(MBound[0],MBound[1]+MBound[2],MBound[2]):
logistic = Logistic()
logistic.setMaxIts(int(m))
logistic.setRidge(pow(10,r))
evaluation = Evaluation(data)
output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(logistic,data,numFolds,random,[output, attRange, outputDistribution])
acc = evaluation.pctCorrect()
if (acc>best_acc):
bestlogistic = logistic
best_acc = acc
bestValues.m = int(m)
bestValues.r = pow(10,r)
print "Best accuracy: ", best_acc
print "Best values: M = ", bestValues.m, ", Ridge = ", bestValues.r
print "-----------------------------------------"
return bestlogistic, bestValues.r, bestValues.m, best_acc
示例2: myGridSearch
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import crossValidateModel [as 别名]
def myGridSearch(data,NTreeBounds,NFeaturesBounds):
best_acc = -float('inf')
bestrandomforest = None
class bestValues(object):
t = float('nan')
f = float('nan')
for t in range(NTreeBounds[0],NTreeBounds[1]+NTreeBounds[2],NTreeBounds[2]):
for f in range(NFeaturesBounds[0],NFeaturesBounds[1]+NFeaturesBounds[2],NFeaturesBounds[2]):
randomforest = RandomForest()
randomforest.setNumTrees(int(t))
randomforest.setNumFeatures(int(f))
evaluation = Evaluation(data)
output = output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(randomforest,data,numFolds,random,[output, attRange, outputDistribution])
acc = evaluation.pctCorrect()
if (acc>best_acc):
bestrandomforest = randomforest
best_acc = acc
bestValues.t = t
bestValues.f = f
print "Best accuracy:", best_acc
print "Best values: NTreeBounds = ", bestValues.t, ", NFeaturesBounds = ", bestValues.f
print "-----------------------------------------"
return bestrandomforest, bestValues.t, bestValues.f, best_acc
示例3: readCross
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import crossValidateModel [as 别名]
def readCross(num,type,numtrees):
filename=resultFile+'_'+type+'_'+num+'_all.csv'
loader=CSVLoader()
loader.setSource(File(filename))
data=loader.getDataSet()
#print data.numAttributes()
data.setClassIndex(data.numAttributes()-1)
rf=RF()
rf.setNumTrees(numtrees)
#pred_output = PredictionOutput( classname="weka.classifiers.evaluation.output.prediction.PlainText", options=["-distribution"])
buffer = StringBuffer() # buffer for the predictions
output=PlainText()
output.setHeader(data)
output.setBuffer(buffer)
output.setOutputDistribution(True)
attRange = Range() # attributes to output
outputDistributions = Boolean(True)
evaluator=Evaluation(data)
evaluator.crossValidateModel(rf,data,10, Random(1),[output,attRange,outputDistributions])
print evaluator.toSummaryString()
print evaluator.toClassDetailsString()
print evaluator.toMatrixString()
return [evaluator.weightedPrecision(),evaluator.weightedRecall(),evaluator.weightedFMeasure(),evaluator.weightedMatthewsCorrelation(),evaluator.weightedFalseNegativeRate(),evaluator.weightedFalsePositiveRate(),evaluator.weightedTruePositiveRate(),evaluator.weightedTrueNegativeRate(),evaluator.weightedAreaUnderROC()]
示例4: Logistic_ParamFinder
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import crossValidateModel [as 别名]
def Logistic_ParamFinder(data):
# Possible set for Ridge-value
RBounds = [-10,2,1]
# possible set for maximum Iteration
MBounds = [-1,10,1]
if (data.numInstances()>10): # grid search does 10-fold cross validation; hence number of samples must be more than 10
gridsearch = GridSearch()
acctag = gridsearch.getEvaluation()
acctag = SelectedTag('ACC',acctag.getTags())
gridsearch.setEvaluation(acctag)
allfilters = AllFilters()
gridsearch.setFilter(allfilters)
gridsearch.setGridIsExtendable(Boolean(True))
logistic = Logistic()
gridsearch.setClassifier(logistic)
gridsearch.setXProperty(String('classifier.maxIts'))
gridsearch.setYProperty(String('classifier.ridge'))
gridsearch.setXExpression(String('I'))
gridsearch.setYExpression(String('pow(BASE,I)'))
gridsearch.setXMin(MBounds[0])
gridsearch.setXMax(MBounds[1])
gridsearch.setXStep(MBounds[2])
gridsearch.setYMin(RBounds[0])
gridsearch.setYMax(RBounds[1])
gridsearch.setYStep(RBounds[2])
gridsearch.setYBase(10)
print "searching for logistic lcassifier Max Iteration = [", MBounds[0], ",", MBounds[1], "], Ridge = [ 10E", RBounds[0], ",10E", RBounds[1], "] ...."
gridsearch.buildClassifier(data)
bestValues = gridsearch.getValues()
# ----------------------- Evaluation
bestlogistic = Logistic()
bestlogistic.setMaxIts(int(bestValues.x))
bestlogistic.setRidge(pow(10,bestValues.y))
evaluation = Evaluation(data)
output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(bestlogistic,data,numFolds,random,[output, attRange, outputDistribution])
acc = evaluation.pctCorrect()
print "best accuracy: ", acc
print "best logistic classifier with Ridge = ", bestlogistic.getRidge(), " Max Iteration = ", bestlogistic.getMaxIts()
OptLog = bestlogistic
OptLogp1 = bestlogistic.getRidge()
OptLogp2 = bestlogistic.getMaxIts()
OptLogAcc = acc
else:
OptLog, OptLogp1, OptLogp2, OptLogAcc = myGridSearch(data,RBounds,MBounds)
Description = 'Logistic classifier OptRidge = ' + str(OptLogp1) + \
', OptMaxIts = ' + str(OptLogp2) + ', OptAcc = ' + str(OptLogAcc)
print "-----------------------------------------"
return OptLog, OptLogp1, OptLogp2, OptLogAcc, Description
示例5: RandomForest_ParamFinder
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import crossValidateModel [as 别名]
def RandomForest_ParamFinder(data):
# possible set for Number of trees
NTreeBounds = [1,20,1]
# possible set for number of features
NFeaturesBounds = [0,20,1]
if (data.numInstances()>10): # grid search does 10-fold cross validation; hence number of samples must be more than 10
gridsearch = GridSearch()
acctag = gridsearch.getEvaluation()
acctag = SelectedTag('ACC',acctag.getTags())
gridsearch.setEvaluation(acctag)
allfilters = AllFilters()
gridsearch.setFilter(allfilters)
gridsearch.setGridIsExtendable(Boolean(True))
randomforest = RandomForest()
gridsearch.setClassifier(randomforest)
gridsearch.setXProperty(String('classifier.numTrees'))
gridsearch.setYProperty(String('classifier.numFeatures'))
gridsearch.setXExpression(String('I'))
gridsearch.setYExpression(String('I'))
gridsearch.setXMin(NTreeBounds[0])
gridsearch.setXMax(NTreeBounds[1])
gridsearch.setXStep(NTreeBounds[2])
gridsearch.setYMin(NFeaturesBounds[0])
gridsearch.setYMax(NFeaturesBounds[1])
gridsearch.setYStep(NFeaturesBounds[2])
gridsearch.setYBase(10)
print "searching for random-forest NumTrees = [", NTreeBounds[0], ",", NTreeBounds[1], "], NumFeatures = [ ", NFeaturesBounds[0], ",", NFeaturesBounds[1], "] ...."
gridsearch.buildClassifier(data)
bestValues = gridsearch.getValues()
# ----------------------- Evaluation
bestrandomforest = RandomForest()
bestrandomforest.setNumTrees(int(bestValues.x))
bestrandomforest.setNumFeatures(int(bestValues.y))
evaluation = Evaluation(data)
output = output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(bestrandomforest,data,numFolds,random,[output, attRange, outputDistribution])
acc = evaluation.pctCorrect()
print "best accuracy: ", acc
print "best random-forest classifier with NumTrees=",bestValues.x , ", NumFeatures = ", bestValues.y
OptRndFrst = bestrandomforest
OptRndFrstp1 = bestValues.x
OptRndFrstp2 = bestValues.y
OptRndFrstAcc = acc
else:
OptRndFrst, OptRndFrstp1, OptRndFrstp2, OptRndFrstAcc = myGridSearch(data,NTreeBounds,NFeaturesBounds)
Description = 'Random-Forest classifier: OptNumTrees = ' + str(OptRndFrstp1) + \
', OptNumFeatures = ' + str(OptRndFrstp2) + ', OptAcc = ' + str(OptRndFrstAcc)
print "-----------------------------------------"
return OptRndFrst, OptRndFrstp1, OptRndFrstp2, OptRndFrstAcc, Description
示例6: CoverTree
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import crossValidateModel [as 别名]
cover = CoverTree()
cover.setDistanceFunction(EuclideanDistance()) # only Euclidean Distance function
tree_algorithms.append(cover)
data.setClassIndex(data.numAttributes() - 1)
for num in range(1,30,2):
file.write(str(num))
for algoknn in tree_algorithms :
log.write("---------------------------------\nK: " + str(num) + ", Search Algorithm: " + algoknn.__class__.__name__ + "\n")
algo = IBk()
algo.setNearestNeighbourSearchAlgorithm(algoknn)
algo.setKNN(num)
x = time.time()
algo.buildClassifier(data)
log.write("Time to build classifier: " + str(time.time() - x) + "\n")
evaluation = Evaluation(data)
output = PlainText() # plain text output for predictions
output.setHeader(data)
buffer = StringBuffer() # buffer to use
output.setBuffer(buffer)
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
x = time.time()
#evaluation.evaluateModel(algo, data, [output, attRange, outputDistribution])
evaluation.crossValidateModel(algo, data, 10, rand, [output, attRange, outputDistribution])
log.write("Time to evaluate model: " + str(time.time() - x) + "\n")
log.write(evaluation.toSummaryString())
file.write("," + str(evaluation.rootMeanSquaredError()))
file.write("\n")
file.close()
log.close()
示例7: myGridSearch
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import crossValidateModel [as 别名]
def myGridSearch(data,cBounds,GBound,eBounds):
IsBestRBFKernel = False
best_acc_poly = -float('inf')
best_acc_rbf = -float('inf')
# Poly Kernel
class bestValues_poly(object):
x = float('nan')
y = float('nan')
for Cbnd in cBounds:
for c in range(Cbnd[0],Cbnd[1]+Cbnd[2],Cbnd[2]):
for e in range(eBounds[0],eBounds[1]+eBounds[2],eBounds[2]):
smo = SMO()
kernel = PolyKernel()
kernel.setExponent(e)
smo.setC(c)
smo.setKernel(kernel)
evaluation = Evaluation(data)
output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(smo,data,numFolds,random,[output, attRange, outputDistribution])
acc = evaluation.pctCorrect()
if (acc>best_acc_poly):
best_smo_poly = smo
best_acc_poly = acc
bestValues_poly.x = c
bestValues_poly.y = e
print "Best accuracy (Poly Kernel): ", best_acc_poly
print "Best values (Poly Kernel): C = ", bestValues_poly.x, ", exponent = ", bestValues_poly.y
print "-----------------------------------------"
# RBF Kernel
class bestValues_rbf(object):
x = float('nan')
y = float('nan')
for Cbnd in cBounds:
for c in range(Cbnd[0],Cbnd[1]+Cbnd[2],Cbnd[2]):
for g in range(GBound[0],GBound[1]+GBound[2],GBound[2]):
smo = SMO()
kernel = RBFKernel()
kernel.setGamma(pow(10,g))
smo.setC(c)
smo.setKernel(kernel)
evaluation = Evaluation(data)
output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(smo,data,numFolds,random,[output, attRange, outputDistribution])
acc = evaluation.pctCorrect()
if (acc>best_acc_rbf):
best_smo_rbf = smo
best_acc_rbf = acc
bestValues_rbf.x = c
bestValues_rbf.y = g
print "Best accuracy (RBF Kernel): ", best_acc_rbf
print "Best values (RBF Kernel): C = ", bestValues_rbf.x, ", gamma = ", bestValues_rbf.y
if (best_acc_rbf > best_acc_poly):
IsBestRBFKernel = True
print "best smo classifier is RBF kernel with C = ", bestValues_rbf.x," and gamma = ", pow(10,bestValues_rbf.y)
best_smo = best_smo_rbf
OptSMOp1 = bestValues_rbf.x
OptSMOp2 = pow(10,bestValues_rbf.y)
OptSMOAcc = best_acc_rbf
OptSMOIsRBF = IsBestRBFKernel
else:
IsBestRBFKernel = False
print "best smo classifier is Poly kernel with C = ", bestValues_poly.x," and exponent = ", bestValues_poly.y
best_smo = best_smo_poly
OptSMOp1 = bestValues_poly.x
OptSMOp2 = bestValues_poly.y
OptSMOAcc = best_acc_poly
OptSMOIsRBF = IsBestRBFKernel
return IsBestRBFKernel, best_smo, OptSMOp1, OptSMOp2, OptSMOAcc
示例8: SMO_ParamFinder
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import crossValidateModel [as 别名]
def SMO_ParamFinder(data):
# Possible set for C-value
cBounds = [[1,10,1],[10,100,10],[100,300,20]]
# possible set for exponents
eBounds = [1,3,1]
# possible set for Gamma
GBound = [-5,2,1]
if (data.numInstances()>10): # grid search does 10-fold cross validation; hence number of samples must be more than 10
# Polynomials Kernel
gridsearch = GridSearch()
acctag = gridsearch.getEvaluation()
acctag = SelectedTag('ACC',acctag.getTags())
gridsearch.setEvaluation(acctag)
allfilters = AllFilters()
gridsearch.setFilter(allfilters)
gridsearch.setGridIsExtendable(Boolean(True))
smo = SMO()
kernel = PolyKernel()
smo.setKernel(kernel)
gridsearch.setClassifier(smo)
gridsearch.setXProperty(String('classifier.c'))
gridsearch.setYProperty(String('classifier.kernel.Exponent'))
gridsearch.setXExpression(String('I'))
gridsearch.setYExpression(String('I'))
best_acc_poly = -float('inf')
for cnt in range(0,len(cBounds)):
cbound = cBounds[cnt]
cmin = cbound[0]
cmax = cbound[1]
cstep = cbound[2]
gridsearch.setXMin(cmin)
gridsearch.setXMax(cmax)
gridsearch.setXStep(cstep)
gridsearch.setYMin(eBounds[0])
gridsearch.setYMax(eBounds[1])
gridsearch.setYStep(eBounds[2])
print "searching for Polykernel C = [", cmin, ",", cmax, "], exponent = [", eBounds[0], ",", eBounds[1], "] ...."
gridsearch.buildClassifier(data)
bestValues = gridsearch.getValues()
# --------------------------------- Evaluation
bestsmo = SMO()
kernel = PolyKernel()
kernel.setExponent(bestValues.y)
bestsmo.setC(bestValues.x)
bestsmo.setKernel(kernel)
evaluation = Evaluation(data)
output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
print "numFolds : ", numFolds
evaluation.crossValidateModel(bestsmo,data,numFolds,random,[output, attRange, outputDistribution])
acc = evaluation.pctCorrect()
if (acc>best_acc_poly):
best_smo_poly = bestsmo
best_acc_poly = acc
bestValues_poly = bestValues
print "Best accuracy so far: ",best_acc_poly
print "Best values so far: ",bestValues_poly
print "Best accuracy (Poly Kernel): ", best_acc_poly
print "Best values (Poly Kernel): ", bestValues_poly
print "-----------------------------------------"
# RBF Kernel
smo = SMO()
kernel = RBFKernel()
smo.setKernel(kernel)
gridsearch.setClassifier(smo)
gridsearch.setXProperty(String('classifier.c'))
gridsearch.setYProperty(String('classifier.kernel.gamma'))
gridsearch.setXExpression(String('I'))
gridsearch.setYExpression(String('pow(BASE,I)'))
gridsearch.setYBase(10)
best_acc_rbf = -float('inf')
for cnt in range(0,len(cBounds)):
cbound = cBounds[cnt]
cmin = cbound[0]
cmax = cbound[1]
cstep = cbound[2]
gridsearch.setXMin(cmin)
gridsearch.setXMax(cmax)
gridsearch.setXStep(cstep)
gridsearch.setYMin(GBound[0])
gridsearch.setYMax(GBound[1])
gridsearch.setYStep(GBound[2])
gridsearch.setYBase(10)
print "searching for RBF Kernel C = [", cmin, ",", cmax, "], gamma = [10^", GBound[0], ",10^", GBound[1], "] ...."
gridsearch.buildClassifier(data)
bestValues = gridsearch.getValues()
# ----------------------------------- Evaluation
bestsmo = SMO()
kernel = RBFKernel()
kernel.setGamma(pow(10,bestValues.y))
bestsmo.setC(bestValues.x)
bestsmo.setKernel(kernel)
evaluation = Evaluation(data)
output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
#.........这里部分代码省略.........
示例9: runClassifierAlgo
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import crossValidateModel [as 别名]
def runClassifierAlgo(algo, class_index, training_filename, test_filename, do_model, do_eval, do_predict):
""" If <test_filename>
Run classifier algorithm <algo> on training data in <training_filename> to build a model
then test on data in <test_filename> (equivalent of Weka "Supplied test set")
else
do 10 fold CV lassifier algorithm <algo> on data in <training_filename>
<class_index> is the column containing the dependent variable
http://weka.wikispaces.com/Generating+classifier+evaluation+output+manually
http://weka.sourceforge.net/doc.dev/weka/classifiers/Evaluation.html
"""
print ' runClassifierAlgo: training_filename= ', training_filename, ', test_filename=', test_filename
misc.checkExists(training_filename)
training_file = FileReader(training_filename)
training_data = Instances(training_file)
if test_filename:
test_file = FileReader(test_filename)
test_data = Instances(test_file)
else:
test_data = training_data
# set the class Index - the index of the dependent variable
training_data.setClassIndex(class_index)
test_data.setClassIndex(class_index)
# create the model
if test_filename:
algo.buildClassifier(training_data)
evaluation = None
# only a trained classifier can be evaluated
if do_eval or do_predict:
evaluation = Evaluation(test_data)
buffer = StringBuffer() # buffer for the predictions
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
if test_filename:
evaluation.evaluateModel(algo, test_data, [buffer, attRange, outputDistribution])
else:
# evaluation.evaluateModel(algo, [String('-t ' + training_filename), String('-c 1')])
# print evaluation.toSummaryString()
rand = Random(1)
evaluation.crossValidateModel(algo, training_data, 4, rand)
if False:
print 'percentage correct =', evaluation.pctCorrect()
print 'area under ROC =', evaluation.areaUnderROC(class_index)
confusion_matrix = evaluation.confusionMatrix()
for l in confusion_matrix:
print '** ', ','.join('%2d'%int(x) for x in l)
if verbose:
if do_model:
print '--> Generated model:\n'
print algo.toString()
if do_eval:
print '--> Evaluation:\n'
print evaluation.toSummaryString()
if do_predict:
print '--> Predictions:\n'
print buffer
return {'model':str(algo), 'eval':str(evaluation.toSummaryString()), 'predict':str(buffer) }
示例10: Bayes_ParamFinder
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import crossValidateModel [as 别名]
def Bayes_ParamFinder(data):
# ----------------------- Evaluation of Naive Bayes without kernel estimation
naivebayes = NaiveBayes()
evaluation = Evaluation(data)
output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(naivebayes,data,numFolds,random,[output, attRange, outputDistribution])
acc_naivebayes = evaluation.pctCorrect()
print "Naive Bayesisn accuracy (without kernel density estimation): ", acc_naivebayes
# ----------------------- Evaluation of Naive Bayes with kernel estimation
naivebayes = NaiveBayes()
naivebayes.setUseKernelEstimator(Boolean(True)) # use kernel density estimation
evaluation = Evaluation(data)
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(naivebayes,data,numFolds,random,[output, attRange, outputDistribution])
acc_naivebayes_withkernel = evaluation.pctCorrect()
print "Naive Bayesisn accuracy (with kernel density estimation): ", acc_naivebayes_withkernel
# ----------------------- Evaluation of Naive bayes multinomial
naivebayesmultinomial = NaiveBayesMultinomial()
evaluation = Evaluation(data)
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
if (allAttributesPositive(data)): # multinomial bayes classifier only work on positive attributes
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(naivebayesmultinomial,data,numFolds,random,[output, attRange, outputDistribution])
acc_naivemultinomialbayes = evaluation.pctCorrect()
else:
acc_naivemultinomialbayes = 0
print "Naive Multinomial Bayesisn accuracy : ", acc_naivemultinomialbayes
# ------------------------- Comparision
if (acc_naivemultinomialbayes > acc_naivebayes):
if (acc_naivemultinomialbayes > acc_naivebayes_withkernel):
IsOptMultinomialBayes = True
IsOptNaiveKernelDensity = False
acc = acc_naivemultinomialbayes
else:
IsOptMultinomialBayes = False
IsOptNaiveKernelDensity = True
acc = acc_naivebayes_withkernel
else:
if (acc_naivebayes > acc_naivebayes_withkernel):
IsOptMultinomialBayes = False
IsOptNaiveKernelDensity = False
acc = acc_naivebayes
else:
IsOptMultinomialBayes = False
IsOptNaiveKernelDensity = True
acc = acc_naivebayes_withkernel
print "-----------------------------------------"
OptBayesAcc = acc
if IsOptMultinomialBayes:
Description = 'Optimal Bayes classifier is Multinomial Bayes: OptAcc = ' + str(OptBayesAcc)
elif IsOptNaiveKernelDensity:
Description = 'Optimal Bayes classifier is Naive Bayes with kernel density estimation: OptAcc = ' +\
str(OptBayesAcc)
else:
Description = 'Optimal Bayes classifier is Naive Bayes: OptAcc = ' + str(OptBayesAcc)
return IsOptMultinomialBayes, IsOptNaiveKernelDensity, OptBayesAcc, Description
示例11: feat_trimming
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import crossValidateModel [as 别名]
def feat_trimming(cl_list, config, f, fe, min_feat, new_instances, num_feat, pos_class_weight, progress,
progress_per_iteration, result_list, split_ratio, iterative = False):
# print "num_feat:%s"%num_feat
# print "min_feat:%s"%min_feat
min_feat = int(min_feat)
num_feat = int(num_feat)
if debug:
print "num_feat:%s"%num_feat
print "min_feat:%s"%min_feat
print "In feat_trimming"
if split_ratio == 0:
cut_amount = 1
else:
cut_amount = compute_cut_amount(min_feat, num_feat, split_ratio)
if not iterative:
num_feat = min_feat
if num_feat > new_instances.numAttributes():
return new_instances, 30
# else:
# num_feat -= cut_amount
classifier_list = []
for cl in cl_list:
if cl == 0:
liblinear = Liblinear()
liblinear.setConvertNominalToBinary(True)
liblinear.setWeights(str(pos_class_weight) + " 1")
classifier_list.append(liblinear)
elif cl == 1:
k2 = weka.classifiers.bayes.net.search.local.K2()
k2. setMaxNrOfParents(1)
bayesNet = BayesNet()
bayesNet.setSearchAlgorithm(k2)
classifier_list.append(bayesNet)
elif cl == 2:
j48 = J48()
classifier_list.append(j48)
elif cl == 3:
jRip = JRip()
classifier_list.append(jRip)
else:
raise ValueError('Unknown Classifier number -- %d given' % cl)
while num_feat >= min_feat:
if debug:
print "Num_feat:%d, min_feat:%d"%(num_feat, min_feat)
start = time.time()
for classifier in classifier_list:
if debug:
print "Before selecting Features"
# Assigns to t_selector the classifier
if config.optimize:
master_map, header_rows, new_instances, t_selector = select_features(classifier, config.fmeasure, fe,
new_instances, num_feat,
config.optimize)
else:
master_map, header_rows, new_instances, t_selector = select_features(classifier, config.fmeasure, fe,
new_instances, num_feat)
if debug:
print "After selecting Features"
print "Num_selected_features:%d"%(new_instances.numAttributes()-1)
if isinstance(t_selector, tSelector):
classifier_name = t_selector.getClassifier().getClass().__name__
else:
classifier_name = t_selector.getClass().__name__
evaluation = Evaluation(new_instances)
variance_analysis(config, evaluation, new_instances, t_selector)
if config.temporal_folds:
do_temporal_cv(t_selector, new_instances, config.folds)
else:
evaluation.crossValidateModel(t_selector, new_instances, config.folds, Random(1), [])
# Add to candidate feature list only if its in the iterative stage
report_results(classifier_name, config, evaluation, f, fe, new_instances, num_feat, result_list,
t_selector, add_to_list=not iterative)
progress = update_progress(progress, progress_per_iteration)
cut_amount = compute_cut_amount(min_feat, num_feat, split_ratio)
num_feat -= cut_amount
# Break slow feature selection after first iteration
if is_slow_fs(fe):
break
if debug:
elapsed = (time.time() - start)
print "Time elapsed:%d ms for num_feat=%d, min_feat=%d"%(elapsed, num_feat, min_feat)
return new_instances, progress
示例12: AdaBoostedSimpleLogistic_ParamFinder
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import crossValidateModel [as 别名]
def AdaBoostedSimpleLogistic_ParamFinder(data, param1, param2):
# Adaboost params: Possible set for Weight Threshold
WeightThresholdBounds = [99,100,1]
# Adaboost params: possible set for NumIteration
NumItrBound = [5,50,5]
# Simple Logisitic params: Possible set for num of boosting
NumBoostIterationBounds = [0,200,10]
# This section tries to boost the best simple logistic
print "searching for the best parameters to boosting on the optimal simple Logistic ...."
gridsearch = GridSearch()
acctag = gridsearch.getEvaluation()
acctag = SelectedTag('ACC',acctag.getTags())
gridsearch.setEvaluation(acctag)
allfilters = AllFilters()
gridsearch.setFilter(allfilters)
gridsearch.setGridIsExtendable(Boolean(True))
simplelogistic = SimpleLogistic()
adaboostm = AdaBoostM1()
simplelogistic.setHeuristicStop(param1)
simplelogistic.setNumBoostingIterations(param2)
adaboostm.setClassifier(simplelogistic)
gridsearch.setClassifier(adaboostm)
gridsearch.setXProperty(String('classifier.weightThreshold'))
gridsearch.setYProperty(String('classifier.numIterations'))
gridsearch.setXExpression(String('I'))
gridsearch.setYExpression(String('I'))
gridsearch.setXMin(WeightThresholdBounds[0])
gridsearch.setXMax(WeightThresholdBounds[1])
gridsearch.setXStep(WeightThresholdBounds[2])
gridsearch.setYMin(NumItrBound[0])
gridsearch.setYMax(NumItrBound[1])
gridsearch.setYStep(NumItrBound[2])
print "searching for best parameters for boosting simple Logistic weightThreshold = [", WeightThresholdBounds[0], ",", WeightThresholdBounds[1], "], # Iterations = [", NumItrBound[0], ",", NumItrBound[1], "] ...."
gridsearch.buildClassifier(data)
bestValues1 = gridsearch.getValues()
# ------------------------------ Evaluation
simplelogistic = SimpleLogistic()
bestadaboostm1 = AdaBoostM1()
simplelogistic.setHeuristicStop(param1)
simplelogistic.setNumBoostingIterations(param2)
bestadaboostm1.setWeightThreshold(int(bestValues1.x))
bestadaboostm1.setNumIterations(int(bestValues1.y))
bestadaboostm1.setClassifier(simplelogistic)
evaluation = Evaluation(data)
output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(bestadaboostm1,data,numFolds,random,[output, attRange, outputDistribution])
best_acc1 = evaluation.pctCorrect()
print "best accuracy by boosting the optimal simple Logistic classifier: ", best_acc1
print "Optimal weight Threshold Percent : ", bestValues1.x , "Optimal number of Iterations : ", bestValues1.y
print "-----------------------------------------"
# -------------------------------------------------------------------------------------------------------------------------
# in this section we set the weak classifier to the linear SMO and optimize over c-value of the SMO and number of iteration
simplelogistic = SimpleLogistic()
adaboostm = AdaBoostM1()
adaboostm.setClassifier(simplelogistic)
gridsearch.setClassifier(adaboostm)
gridsearch.setXProperty(String('classifier.classifier.numBoostingIterations'))
gridsearch.setYProperty(String('classifier.numIterations'))
gridsearch.setXExpression(String('I'))
gridsearch.setYExpression(String('I'))
gridsearch.setXBase(10)
gridsearch.setXMin(NumBoostIterationBounds[0])
gridsearch.setXMax(NumBoostIterationBounds[1])
gridsearch.setXStep(NumBoostIterationBounds[2])
gridsearch.setYMin(NumItrBound[0])
gridsearch.setYMax(NumItrBound[1])
gridsearch.setYStep(NumItrBound[2])
print "searching for number of boosting Iterations bound = [", NumBoostIterationBounds[0], ",", NumBoostIterationBounds[1], "], # Iteration = [", NumItrBound[0], ",", NumItrBound[1], "] ...."
gridsearch.buildClassifier(data)
bestValues2 = gridsearch.getValues()
# ------------------ Evaluation
simplelogistic = SimpleLogistic()
bestadaboostm2 = AdaBoostM1()
simplelogistic.setNumBoostingIterations(int(bestValues2.x))
bestadaboostm2.setNumIterations(int(bestValues2.y))
bestadaboostm2.setClassifier(simplelogistic)
evaluation = Evaluation(data)
output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(bestadaboostm2,data,numFolds,random,[output, attRange, outputDistribution])
best_acc2 = evaluation.pctCorrect()
print "best accuracy by boosting the Simple Logistic classifier (with optimization over ridge): ", best_acc2
print "Optimal number of boosting Iteration : ", bestValues2.x , "Optimal number of Iteration : ", bestValues2.y
print "-----------------------------------------"
print "Final optimal boosting classifier:"
if (best_acc2 > best_acc1):
print " Best boosting is based on simple logistic with optimal numBoostingIterations :",\
bestValues2.x, " optimal numIteration :", bestValues2.y
print " optimal accuracy: ", best_acc2
IsOptimalBoostingOnOptSimpleLogistic = False # is optimal boosting based on optimal simple Logistic ?
IsOptBoostOnOptSimpLog = IsOptimalBoostingOnOptSimpleLogistic
OptBoostSimpLog = bestadaboostm2
OptBoostSimpLogp1 = bestValues2.x
#.........这里部分代码省略.........
示例13: BaggingSMO_ParamFinder
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import crossValidateModel [as 别名]
def BaggingSMO_ParamFinder(data, BestSMOIsRBFKernel, param1, param2):
# Possible set for C-value
cBounds = [[1,10,1],[10,100,10],[100,300,20]]
# possible set bag size percent
BagSizePercentBound = [ max(10, int(float(1)/float(data.numInstances())*100)+1 ) ,100,10] # max operation is to make sure that least number of samples are provided to the classifier
# possible set for Iteration
ItrBound = [5,50,5]
# This section tries to boost the best smo
print "searching for the best parameters to Bag the best SMO ...."
gridsearch = GridSearch()
acctag = gridsearch.getEvaluation()
acctag = SelectedTag('ACC',acctag.getTags())
gridsearch.setEvaluation(acctag)
allfilters = AllFilters()
gridsearch.setFilter(allfilters)
gridsearch.setGridIsExtendable(Boolean(False))
smo = SMO()
bagging = Bagging()
if BestSMOIsRBFKernel:
kernel = RBFKernel()
kernel.setGamma(param2)
smo.setKernel(kernel)
smo.setC(param1)
else:
kernel = PolyKernel()
kernel.setExponent(param2)
smo.setKernel(kernel)
smo.setC(param1)
bagging.setClassifier(smo)
gridsearch.setClassifier(bagging)
gridsearch.setXProperty(String('classifier.bagSizePercent'))
gridsearch.setYProperty(String('classifier.numIterations'))
gridsearch.setXExpression(String('I'))
gridsearch.setYExpression(String('I'))
gridsearch.setXMin(BagSizePercentBound[0])
gridsearch.setXMax(BagSizePercentBound[1])
gridsearch.setXStep(BagSizePercentBound[2])
gridsearch.setYMin(ItrBound[0])
gridsearch.setYMax(ItrBound[1])
gridsearch.setYStep(ItrBound[2])
print "searching for best parameters for bagging SMO bagSizePercent = [", BagSizePercentBound[0], ",", BagSizePercentBound[1], "], # Iteration = [", ItrBound[0], ",", ItrBound[1], "] ...."
gridsearch.buildClassifier(data)
#bestbagging1 = gridsearch.getBestClassifier()
bestValues1 = gridsearch.getValues()
# ------------------ Evaluation
smo = SMO()
bestbagging1 = Bagging()
smo.setKernel(kernel)
smo.setC(param1)
bestbagging1.setBagSizePercent(int(bestValues1.x))
bestbagging1.setNumIterations(int(bestValues1.y))
bestbagging1.setClassifier(smo)
evaluation = Evaluation(data)
output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(bestbagging1,data,numFolds,random,[output, attRange, outputDistribution])
best_acc1 = evaluation.pctCorrect()
bestValues1 = gridsearch.getValues()
print "best accuracy by bagging the optimal SMO classifier: ", best_acc1
print "Optimal Bag size Percent : ", bestValues1.x , "Optimal number of Iteration : ", bestValues1.y
print "-----------------------------------------"
# ------------------------------------------------------------------------------------------------------------------------
# in this section we set the weak classifier to the linear SMO and optimize over c-value of the SMO and number of iteration
smo = SMO()
kernel = PolyKernel()
smo.setKernel(kernel)
bagging.setClassifier(smo)
gridsearch.setClassifier(bagging)
gridsearch.setXProperty(String('classifier.classifier.c'))
gridsearch.setYProperty(String('classifier.numIterations'))
gridsearch.setXExpression(String('I'))
gridsearch.setYExpression(String('I'))
gridsearch.setGridIsExtendable(Boolean(True))
best_acc2 = -float('inf')
for cnt in range(0,len(cBounds)):
cbound = cBounds[cnt]
cmin = cbound[0]
cmax = cbound[1]
cstep = cbound[2]
gridsearch.setXMin(cmin)
gridsearch.setXMax(cmax)
gridsearch.setXStep(cstep)
gridsearch.setYMin(ItrBound[0])
gridsearch.setYMax(ItrBound[1])
gridsearch.setYStep(ItrBound[2])
print "searching for RBF Kernel C = [", cmin, ",", cmax, "], # Iteration = [", ItrBound[0], ",", ItrBound[1], "] ...."
gridsearch.buildClassifier(data)
bestValues = gridsearch.getValues()
# ------------ Evaluation
smo = SMO()
bestbagging = Bagging()
kernel = PolyKernel()
smo.setKernel(kernel)
smo.setC(bestValues.x)
bestbagging.setNumIterations(int(bestValues.y))
bestbagging.setClassifier(smo)
evaluation = Evaluation(data)
#.........这里部分代码省略.........
示例14: BaggingLogistic_ParamFinder
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import crossValidateModel [as 别名]
def BaggingLogistic_ParamFinder(data, param1, param2):
# Possible set for Ridge-value
RBounds = [-10,2,1]
# possible set bag size percent
BagSizePercentBound = [ max(10, int(float(1)/float(data.numInstances())*100)+1 ) ,100,10] # max operation is to make sure that least number of samples are provided to the classifier
# possible set for Iteration
ItrBound = [5,50,5]
# This section tries to boost the best logistic
print "searching for the best parameters to Bag the optimal Logistic ...."
gridsearch = GridSearch()
acctag = gridsearch.getEvaluation()
acctag = SelectedTag('ACC',acctag.getTags())
gridsearch.setEvaluation(acctag)
allfilters = AllFilters()
gridsearch.setFilter(allfilters)
gridsearch.setGridIsExtendable(Boolean(False))
logistic = Logistic()
bagging = Bagging()
logistic.setRidge(param1)
logistic.setMaxIts(param2)
bagging.setClassifier(logistic)
gridsearch.setClassifier(bagging)
gridsearch.setXProperty(String('classifier.bagSizePercent'))
gridsearch.setYProperty(String('classifier.numIterations'))
gridsearch.setXExpression(String('I'))
gridsearch.setYExpression(String('I'))
gridsearch.setXMin(BagSizePercentBound[0])
gridsearch.setXMax(BagSizePercentBound[1])
gridsearch.setXStep(BagSizePercentBound[2])
gridsearch.setYMin(ItrBound[0])
gridsearch.setYMax(ItrBound[1])
gridsearch.setYStep(ItrBound[2])
print "searching for best parameters for bagging Logistic bagSizePercent = [", BagSizePercentBound[0], ",", BagSizePercentBound[1], "], # Iteration = [", ItrBound[0], ",", ItrBound[1], "] ...."
gridsearch.buildClassifier(data)
#bestbagging1 = gridsearch.getBestClassifier()
bestValues1 = gridsearch.getValues()
# ------------------------------ Evaluation
logistic = Logistic()
bestbagging1 = Bagging()
logistic.setRidge(param1)
logistic.setMaxIts(param2)
bestbagging1.setBagSizePercent(int(bestValues1.x))
bestbagging1.setNumIterations(int(bestValues1.y))
bestbagging1.setClassifier(logistic)
evaluation = Evaluation(data)
output = output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(bestbagging1,data,numFolds,random,[output, attRange, outputDistribution])
best_acc1 = evaluation.pctCorrect()
print "best accuracy by bagging the optimal Logistic classifier: ", best_acc1
print "Optimal Bag size Percent: ", bestValues1.x, " Optimal number of Iterations: ", bestValues1.y
print "-----------------------------------------"
# -------------------------------------------------------------------------------------------------------------------------
# in this section we set the weak classifier to the linear SMO and optimize over c-value of the SMO and number of iteration
logistic = Logistic()
bagging = Bagging()
bagging.setClassifier(logistic)
gridsearch.setClassifier(bagging)
gridsearch.setXProperty(String('classifier.classifier.ridge'))
gridsearch.setYProperty(String('classifier.numIterations'))
gridsearch.setXExpression(String('pow(BASE,I)'))
gridsearch.setYExpression(String('I'))
gridsearch.setXBase(10)
gridsearch.setGridIsExtendable(Boolean(True))
gridsearch.setXMin(RBounds[0])
gridsearch.setXMax(RBounds[1])
gridsearch.setXStep(RBounds[2])
gridsearch.setYMin(ItrBound[0])
gridsearch.setYMax(ItrBound[1])
gridsearch.setYStep(ItrBound[2])
print "searching for ridge bound = [10^", RBounds[0], ",10^", RBounds[1], "], # Iteration = [", ItrBound[0], ",", ItrBound[1], "] ...."
gridsearch.buildClassifier(data)
#bestbagging = gridsearch.getBestClassifier()
bestValues2 = gridsearch.getValues()
# ------------------ Evaluation
logistic = Logistic()
bestbagging2 = Bagging()
logistic.setRidge(pow(10,bestValues2.x))
bestbagging2.setNumIterations(int(bestValues2.y))
bestbagging2.setClassifier(logistic)
evaluation = Evaluation(data)
output = output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(bestbagging2,data,numFolds,random,[output, attRange, outputDistribution])
best_acc2 = evaluation.pctCorrect()
print "best accuracy by bagging the Logistic classifier (with optimization over ridge): ", best_acc2
print "Optimal Ridge value : ", bestValues2.x , "Optimal number of Iteration : ", bestValues2.y
print "-----------------------------------------"
print "Final optimal bagging classifier:"
if (best_acc2 > best_acc1):
print " Best bagging is based on logistic with optimal ridge-value :", bestValues2.x, " optimal numIteration :", bestValues2.y
print " optimal accuracy: ", best_acc2
IsOptimalBaggingIsOptLogistic = False # is optimal bagging based on optimal Logistic ?
IsOptBagOnOptLog = IsOptimalBaggingIsOptLogistic
#.........这里部分代码省略.........