本文整理汇总了Python中weka.classifiers.Evaluation类的典型用法代码示例。如果您正苦于以下问题:Python Evaluation类的具体用法?Python Evaluation怎么用?Python Evaluation使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Evaluation类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: use_classifier
def use_classifier(data, cli, args):
cli = cli.format(cli, **args)
cls = from_commandline(cli, classname="weka.classifiers.Classifier")
cls.build_classifier(data)
evaluation = Evaluation(data)
evaluation.crossvalidate_model(cls, data, 10, Random(1))
return cls, evaluation
示例2: myGridSearch
def myGridSearch(data,RBound,MBound):
bestlogistic = None
best_acc = -float('inf')
class bestValues(object):
m = float('nan')
r = float('nan')
for r in range(RBound[0],RBound[1]+RBound[2],RBound[2]):
for m in range(MBound[0],MBound[1]+MBound[2],MBound[2]):
logistic = Logistic()
logistic.setMaxIts(int(m))
logistic.setRidge(pow(10,r))
evaluation = Evaluation(data)
output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(logistic,data,numFolds,random,[output, attRange, outputDistribution])
acc = evaluation.pctCorrect()
if (acc>best_acc):
bestlogistic = logistic
best_acc = acc
bestValues.m = int(m)
bestValues.r = pow(10,r)
print "Best accuracy: ", best_acc
print "Best values: M = ", bestValues.m, ", Ridge = ", bestValues.r
print "-----------------------------------------"
return bestlogistic, bestValues.r, bestValues.m, best_acc
示例3: main
def main(args):
"""
Loads a dataset, shuffles it, splits it into train/test set. Trains J48 with training set and
evaluates the built model on the test set.
:param args: the commandline arguments (optional, can be dataset filename)
:type args: list
"""
# load a dataset
if len(args) <= 1:
data_file = helper.get_data_dir() + os.sep + "vote.arff"
else:
data_file = args[1]
helper.print_info("Loading dataset: " + data_file)
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(data_file)
data.class_is_last()
# generate train/test split of randomized data
train, test = data.train_test_split(66.0, Random(1))
# build classifier
cls = Classifier(classname="weka.classifiers.trees.J48")
cls.build_classifier(train)
print(cls)
# evaluate
evl = Evaluation(train)
evl.test_model(cls, test)
print(evl.summary())
示例4: main
def main():
"""
Shows how to use the CostSensitiveClassifier.
"""
# load a dataset
data_file = helper.get_data_dir() + os.sep + "diabetes.arff"
helper.print_info("Loading dataset: " + data_file)
loader = Loader("weka.core.converters.ArffLoader")
data = loader.load_file(data_file)
data.class_is_last()
# classifier
classifier = SingleClassifierEnhancer(
classname="weka.classifiers.meta.CostSensitiveClassifier",
options=["-cost-matrix", "[0 1; 2 0]", "-S", "2"])
base = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"])
classifier.classifier = base
folds = 10
evaluation = Evaluation(data)
evaluation.crossvalidate_model(classifier, data, folds, Random(1))
print("")
print("=== Setup ===")
print("Classifier: " + classifier.to_commandline())
print("Dataset: " + data.relationname)
print("")
print(evaluation.summary("=== " + str(folds) + " -fold Cross-Validation ==="))
示例5: myGridSearch
def myGridSearch(data,NTreeBounds,NFeaturesBounds):
best_acc = -float('inf')
bestrandomforest = None
class bestValues(object):
t = float('nan')
f = float('nan')
for t in range(NTreeBounds[0],NTreeBounds[1]+NTreeBounds[2],NTreeBounds[2]):
for f in range(NFeaturesBounds[0],NFeaturesBounds[1]+NFeaturesBounds[2],NFeaturesBounds[2]):
randomforest = RandomForest()
randomforest.setNumTrees(int(t))
randomforest.setNumFeatures(int(f))
evaluation = Evaluation(data)
output = output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(randomforest,data,numFolds,random,[output, attRange, outputDistribution])
acc = evaluation.pctCorrect()
if (acc>best_acc):
bestrandomforest = randomforest
best_acc = acc
bestValues.t = t
bestValues.f = f
print "Best accuracy:", best_acc
print "Best values: NTreeBounds = ", bestValues.t, ", NFeaturesBounds = ", bestValues.f
print "-----------------------------------------"
return bestrandomforest, bestValues.t, bestValues.f, best_acc
示例6: use_classifier
def use_classifier(data_filename, cli):
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(data_filename)
data.class_is_last()
cls = from_commandline(cli, classname="weka.classifiers.Classifier")
cls.build_classifier(data)
evaluation = Evaluation(data)
evaluation.crossvalidate_model(cls, data, 10, Random(1))
return cls, evaluation
示例7: RandomForest_ParamFinder
def RandomForest_ParamFinder(data):
# possible set for Number of trees
NTreeBounds = [1,20,1]
# possible set for number of features
NFeaturesBounds = [0,20,1]
if (data.numInstances()>10): # grid search does 10-fold cross validation; hence number of samples must be more than 10
gridsearch = GridSearch()
acctag = gridsearch.getEvaluation()
acctag = SelectedTag('ACC',acctag.getTags())
gridsearch.setEvaluation(acctag)
allfilters = AllFilters()
gridsearch.setFilter(allfilters)
gridsearch.setGridIsExtendable(Boolean(True))
randomforest = RandomForest()
gridsearch.setClassifier(randomforest)
gridsearch.setXProperty(String('classifier.numTrees'))
gridsearch.setYProperty(String('classifier.numFeatures'))
gridsearch.setXExpression(String('I'))
gridsearch.setYExpression(String('I'))
gridsearch.setXMin(NTreeBounds[0])
gridsearch.setXMax(NTreeBounds[1])
gridsearch.setXStep(NTreeBounds[2])
gridsearch.setYMin(NFeaturesBounds[0])
gridsearch.setYMax(NFeaturesBounds[1])
gridsearch.setYStep(NFeaturesBounds[2])
gridsearch.setYBase(10)
print "searching for random-forest NumTrees = [", NTreeBounds[0], ",", NTreeBounds[1], "], NumFeatures = [ ", NFeaturesBounds[0], ",", NFeaturesBounds[1], "] ...."
gridsearch.buildClassifier(data)
bestValues = gridsearch.getValues()
# ----------------------- Evaluation
bestrandomforest = RandomForest()
bestrandomforest.setNumTrees(int(bestValues.x))
bestrandomforest.setNumFeatures(int(bestValues.y))
evaluation = Evaluation(data)
output = output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(bestrandomforest,data,numFolds,random,[output, attRange, outputDistribution])
acc = evaluation.pctCorrect()
print "best accuracy: ", acc
print "best random-forest classifier with NumTrees=",bestValues.x , ", NumFeatures = ", bestValues.y
OptRndFrst = bestrandomforest
OptRndFrstp1 = bestValues.x
OptRndFrstp2 = bestValues.y
OptRndFrstAcc = acc
else:
OptRndFrst, OptRndFrstp1, OptRndFrstp2, OptRndFrstAcc = myGridSearch(data,NTreeBounds,NFeaturesBounds)
Description = 'Random-Forest classifier: OptNumTrees = ' + str(OptRndFrstp1) + \
', OptNumFeatures = ' + str(OptRndFrstp2) + ', OptAcc = ' + str(OptRndFrstAcc)
print "-----------------------------------------"
return OptRndFrst, OptRndFrstp1, OptRndFrstp2, OptRndFrstAcc, Description
示例8: Logistic_ParamFinder
def Logistic_ParamFinder(data):
# Possible set for Ridge-value
RBounds = [-10,2,1]
# possible set for maximum Iteration
MBounds = [-1,10,1]
if (data.numInstances()>10): # grid search does 10-fold cross validation; hence number of samples must be more than 10
gridsearch = GridSearch()
acctag = gridsearch.getEvaluation()
acctag = SelectedTag('ACC',acctag.getTags())
gridsearch.setEvaluation(acctag)
allfilters = AllFilters()
gridsearch.setFilter(allfilters)
gridsearch.setGridIsExtendable(Boolean(True))
logistic = Logistic()
gridsearch.setClassifier(logistic)
gridsearch.setXProperty(String('classifier.maxIts'))
gridsearch.setYProperty(String('classifier.ridge'))
gridsearch.setXExpression(String('I'))
gridsearch.setYExpression(String('pow(BASE,I)'))
gridsearch.setXMin(MBounds[0])
gridsearch.setXMax(MBounds[1])
gridsearch.setXStep(MBounds[2])
gridsearch.setYMin(RBounds[0])
gridsearch.setYMax(RBounds[1])
gridsearch.setYStep(RBounds[2])
gridsearch.setYBase(10)
print "searching for logistic lcassifier Max Iteration = [", MBounds[0], ",", MBounds[1], "], Ridge = [ 10E", RBounds[0], ",10E", RBounds[1], "] ...."
gridsearch.buildClassifier(data)
bestValues = gridsearch.getValues()
# ----------------------- Evaluation
bestlogistic = Logistic()
bestlogistic.setMaxIts(int(bestValues.x))
bestlogistic.setRidge(pow(10,bestValues.y))
evaluation = Evaluation(data)
output = util.get_buffer_for_predictions()[0]
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
random = Random(1)
numFolds = min(10,data.numInstances())
evaluation.crossValidateModel(bestlogistic,data,numFolds,random,[output, attRange, outputDistribution])
acc = evaluation.pctCorrect()
print "best accuracy: ", acc
print "best logistic classifier with Ridge = ", bestlogistic.getRidge(), " Max Iteration = ", bestlogistic.getMaxIts()
OptLog = bestlogistic
OptLogp1 = bestlogistic.getRidge()
OptLogp2 = bestlogistic.getMaxIts()
OptLogAcc = acc
else:
OptLog, OptLogp1, OptLogp2, OptLogAcc = myGridSearch(data,RBounds,MBounds)
Description = 'Logistic classifier OptRidge = ' + str(OptLogp1) + \
', OptMaxIts = ' + str(OptLogp2) + ', OptAcc = ' + str(OptLogAcc)
print "-----------------------------------------"
return OptLog, OptLogp1, OptLogp2, OptLogAcc, Description
示例9: test_model
def test_model(self, test_data, empty_solution, evaluate = False):
model_weka = None
if os.path.isfile(self.prediction_file):
print 'Model ' + self.name + ' already tested.'
elif not os.path.isfile(self.model_file):
print 'Impossible testing this model. It should be trained first.'
return
else:
print 'Starting to test_model model ' + self.name + '.'
model_weka = Classifier(jobject = serialization.read(self.model_file))
evaluation = Evaluation(data = test_data)
evaluation.test_model(classifier = model_weka, data = test_data)
predictions = evaluation.predictions()
rows = read_sheet(file_name = empty_solution)
solutions = []
for row in rows:
solution = [row['userid'], row['tweetid'], predictions.pop(0).predicted()]
solutions.append(solution)
write_the_solution_file(solutions, self.prediction_file)
print 'Model ' + self.name + ' tested.'
if evaluate == True:
if os.path.isfile(self.evaluation_file):
print 'Model ' + self.name + ' already evaluated.'
return
elif model_weka == None:
model_weka = Classifier(jobject = serialization.read(self.model_file))
evaluation = Evaluation(data = test_data)
evaluation.test_model(classifier = model_weka, data = test_data)
save_file(file_name = self.evaluation_file, content = evaluation.to_summary())
print 'Model ' + self.name + ' evaluated.'
示例10: smo
def smo(trainData,testData,params,exparams):
kerType = str2bool(params[0])
cValue = float(params[1])
kerParam = float(params[2])
if kerType: # RBF kernel
kernel = RBFKernel()
kernel.setGamma(kerParam)
else: # Polynomial kernel
kernel = PolyKernel()
kernel.setExponent(kerParam)
smo = SMO()
smo.setKernel(kernel)
smo.setC(cValue)
smo.buildClassifier(trainData) # only a trained classifier can be evaluated
# evaluate it on the training
evaluation = Evaluation(trainData)
(trainOutput, trainBuffer) = util.get_buffer_for_predictions(trainData)
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
evaluation.evaluateModel(smo, trainData, [trainOutput, attRange, outputDistribution])
print "--> Evaluation:\n"
print evaluation.toSummaryString()
trainSummary = makeTrainEvalSummary(evaluation)
# evaluate it on testing
evaluation = Evaluation(testData)
(testOutput, testBuffer) = util.get_buffer_for_predictions(testData)
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
evaluation.evaluateModel(smo, testData, [testOutput, attRange, outputDistribution])
return trainBuffer, testBuffer, trainSummary
示例11: simple_logistic
def simple_logistic(trainData,testData,params,exparams):
heuristicStop = int(float(params[0]))
numBoostingIterations = int(float(params[1]))
simplelogistic = SimpleLogistic()
simplelogistic.setHeuristicStop(heuristicStop)
simplelogistic.setNumBoostingIterations(numBoostingIterations)
if (trainData.numInstances()<5): # special case for small sample size
simplelogistic.setUseCrossValidation(False)
simplelogistic.buildClassifier(trainData) # only a trained classifier can be evaluated
# evaluate it on the training
evaluation = Evaluation(trainData)
(trainOutput, trainBuffer) = util.get_buffer_for_predictions(trainData)
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
evaluation.evaluateModel(simplelogistic, trainData, [trainOutput, attRange, outputDistribution])
print "--> Evaluation:\n"
print evaluation.toSummaryString()
trainSummary = makeTrainEvalSummary(evaluation)
# evaluate it on testing
evaluation = Evaluation(testData)
(testOutput, testBuffer) = util.get_buffer_for_predictions(testData)
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
evaluation.evaluateModel(simplelogistic, testData, [testOutput, attRange, outputDistribution])
return trainBuffer, testBuffer, trainSummary
示例12: bayesian
def bayesian(trainData,testData,params,exparams):
IsOptMultinomialBayes = str2bool(params[0])
IsOptNaiveKernelDensity = str2bool(params[1])
if IsOptMultinomialBayes: # optimal bayesian classifier is multinomial
bayes = NaiveBayesMultinomial()
else:
bayes = NaiveBayes()
if IsOptNaiveKernelDensity: # use kernel density estimation
bayes.setUseKernelEstimator(Boolean(True))
bayes.buildClassifier(trainData) # only a trained classifier can be evaluated
# evaluate it on the training
evaluation = Evaluation(trainData)
(trainOutput, trainBuffer) = util.get_buffer_for_predictions(trainData)
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
evaluation.evaluateModel(bayes, trainData, [trainOutput, attRange, outputDistribution])
print "--> Evaluation:\n"
print evaluation.toSummaryString()
trainSummary = makeTrainEvalSummary(evaluation)
# evaluate it on testing
evaluation = Evaluation(testData)
(testOutput, testBuffer) = util.get_buffer_for_predictions(testData)
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
evaluation.evaluateModel(bayes, testData, [testOutput, attRange, outputDistribution])
return trainBuffer, testBuffer, trainSummary
示例13: bagging_logistic
def bagging_logistic(trainData,testData,params,exparams):
IsOptBagOnOptLog = str2bool(params[0])
logistic = Logistic()
bagging = Bagging()
if IsOptBagOnOptLog: # optimal bagging is based on optimal logistic
ridge = float(exparams[0])
maxIt = int(float(exparams[1]))
logistic.setMaxIts(maxIt)
bagSizePercent = int(float(params[1]))
bagging.setBagSizePercent(bagSizePercent)
else: # ridge parameter is also optimized in the process
ridge = float(params[1])
numIterations = int(float(params[2]))
bagging.setNumIterations(numIterations)
logistic.setRidge(ridge)
bagging.setClassifier(logistic)
bagging.buildClassifier(trainData) # only a trained classifier can be evaluated
# evaluate it on the training
evaluation = Evaluation(trainData)
(trainOutput, trainBuffer) = util.get_buffer_for_predictions(trainData)
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
evaluation.evaluateModel(bagging, trainData, [trainOutput, attRange, outputDistribution])
print "--> Evaluation:\n"
print evaluation.toSummaryString()
trainSummary = makeTrainEvalSummary(evaluation)
# evaluate it on testing
evaluation = Evaluation(testData)
(testOutput, testBuffer) = util.get_buffer_for_predictions(testData)
attRange = Range() # no additional attributes output
outputDistribution = Boolean(False) # we don't want distribution
evaluation.evaluateModel(bagging, testData, [testOutput, attRange, outputDistribution])
return trainBuffer, testBuffer, trainSummary
示例14: build_and_classify
def build_and_classify(classifier, classifier_name, approach_name, infile, percentage='10'):
"""
Creates model and classifies against input data. Returns accuracy statistics
"""
# set seed so results are consistent
random.seed('iot')
# load data
loader = Loader(classname='weka.core.converters.CSVLoader')
data = loader.load_file(infile)
data.class_is_last()
# convert all numeric attributes to nominal
to_nominal = Filter(classname='weka.filters.unsupervised.attribute.NumericToNominal',
options=['-R', 'first-last'])
to_nominal.inputformat(data)
data = to_nominal.filter(data)
# randomize data with constant seed
randomize = Filter(classname='weka.filters.unsupervised.instance.Randomize',
options=['-S', '42'])
randomize.inputformat(data)
data = randomize.filter(data)
# create training set and testing set
train_percent_filter = Filter(classname='weka.filters.unsupervised.instance.RemovePercentage',
options=['-P', percentage, '-V'])
train_percent_filter.inputformat(data)
train = train_percent_filter.filter(data)
test = data
# build and test classifier
classifier.build_classifier(train)
evaluation = Evaluation(train)
evaluation.test_model(classifier, test)
# return results as array
results = [
approach_name,
classifier_name,
percentage,
evaluation.percent_correct,
evaluation.weighted_f_measure
]
return results
示例15: crossValidate
def crossValidate(self, arrfFile = None, classname="weka.classifiers.trees.J48", options=["-C", "0.3"]):
if arrfFile is not None:
self.initData( arrfFile )
if self.data is None:
return
print 'Classificador ' + str(classname) + ' ' + ' '.join(options)
cls = Classifier(classname=classname, options=options)
evl = Evaluation(self.data)
evl.crossvalidate_model(cls, self.data, 10, Random(1))
print(evl.percent_correct)
print(evl.summary())
print(evl.class_details())