本文整理汇总了Python中weka.classifiers.Evaluation.crossvalidate_model方法的典型用法代码示例。如果您正苦于以下问题:Python Evaluation.crossvalidate_model方法的具体用法?Python Evaluation.crossvalidate_model怎么用?Python Evaluation.crossvalidate_model使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类weka.classifiers.Evaluation
的用法示例。
在下文中一共展示了Evaluation.crossvalidate_model方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import crossvalidate_model [as 别名]
def main():
"""
Shows how to use the CostSensitiveClassifier.
"""
# load a dataset
data_file = helper.get_data_dir() + os.sep + "diabetes.arff"
helper.print_info("Loading dataset: " + data_file)
loader = Loader("weka.core.converters.ArffLoader")
data = loader.load_file(data_file)
data.class_is_last()
# classifier
classifier = SingleClassifierEnhancer(
classname="weka.classifiers.meta.CostSensitiveClassifier",
options=["-cost-matrix", "[0 1; 2 0]", "-S", "2"])
base = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"])
classifier.classifier = base
folds = 10
evaluation = Evaluation(data)
evaluation.crossvalidate_model(classifier, data, folds, Random(1))
print("")
print("=== Setup ===")
print("Classifier: " + classifier.to_commandline())
print("Dataset: " + data.relationname)
print("")
print(evaluation.summary("=== " + str(folds) + " -fold Cross-Validation ==="))
示例2: use_classifier
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import crossvalidate_model [as 别名]
def use_classifier(data, cli, args):
cli = cli.format(cli, **args)
cls = from_commandline(cli, classname="weka.classifiers.Classifier")
cls.build_classifier(data)
evaluation = Evaluation(data)
evaluation.crossvalidate_model(cls, data, 10, Random(1))
return cls, evaluation
示例3: evaluation
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import crossvalidate_model [as 别名]
def evaluation(self, classifier, trainingData, testingData = None):
trainingData.set_class_index(trainingData.num_attributes() - 1)
if testingData == None:
evaluation = Evaluation(trainingData)
# initialize with priors
evaluation.crossvalidate_model(classifier, trainingData, 10, Random(42)) # 10-fold CV
return evaluation
else:
print "testing data exists"
if testingData.num_attributes() == trainingData.num_attributes():
testingData.set_class_index(testingData.num_attributes() - 1)
evaluation = Evaluation(trainingData)
classifier.build_classifier(trainingData)
evaluation.test_model(classifier, testingData)
#for attribute in trainingData.attributes():
# print "train:" + str(attribute)
#for attribute in testingData.attributes():
# print "test:" + str(attribute)
return evaluation
else:
print "testing Data doesn't have same attribute with training data"
for attribute in trainingData.attributes():
print "train:" + str(attribute)
for attribute in testingData.attributes():
print "test:" + str(attribute)
示例4: use_classifier
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import crossvalidate_model [as 别名]
def use_classifier(data_filename, cli):
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(data_filename)
data.class_is_last()
cls = from_commandline(cli, classname="weka.classifiers.Classifier")
cls.build_classifier(data)
evaluation = Evaluation(data)
evaluation.crossvalidate_model(cls, data, 10, Random(1))
return cls, evaluation
示例5: crossValidate
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import crossvalidate_model [as 别名]
def crossValidate(self, arrfFile = None, classname="weka.classifiers.trees.J48", options=["-C", "0.3"]):
if arrfFile is not None:
self.initData( arrfFile )
if self.data is None:
return
print 'Classificador ' + str(classname) + ' ' + ' '.join(options)
cls = Classifier(classname=classname, options=options)
evl = Evaluation(self.data)
evl.crossvalidate_model(cls, self.data, 10, Random(1))
print(evl.percent_correct)
print(evl.summary())
print(evl.class_details())
示例6: use_classifier
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import crossvalidate_model [as 别名]
def use_classifier(data):
"""
Uses the meta-classifier AttributeSelectedClassifier for attribute selection.
:param data: the dataset to use
:type data: Instances
"""
print("\n1. Meta-classifier")
classifier = Classifier(classname="weka.classifiers.meta.AttributeSelectedClassifier")
aseval = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval")
assearch = ASSearch(classname="weka.attributeSelection.GreedyStepwise", options=["-B"])
base = Classifier(classname="weka.classifiers.trees.J48")
# setting nested options is always a bit tricky, getting all the escaped double quotes right
# simply using the bean property for setting Java objects is often easier and less error prone
classifier.set_property("classifier", base.jobject)
classifier.set_property("evaluator", aseval.jobject)
classifier.set_property("search", assearch.jobject)
evaluation = Evaluation(data)
evaluation.crossvalidate_model(classifier, data, 10, Random(1))
print(evaluation.summary())
示例7: print
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import crossvalidate_model [as 别名]
import weka.plot as plot
if plot.matplotlib_available:
import matplotlib.pyplot as plt
jvm.start()
# load glass
fname = data_dir + os.sep + "glass.arff"
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)
data.set_class_index(data.num_attributes() - 1)
# compute baseline
evl = Evaluation(data)
evl.crossvalidate_model(Classifier("weka.classifiers.rules.ZeroR"), data, 10, Random(1))
baseline = evl.percent_correct()
# generate learning curves
percentages = [1, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
repetitions = [1, 10, 100]
curves = {}
for repetition in repetitions:
# progress info
sys.stdout.write("Repetitions=" + str(repetition))
# initialize curve
curve = {}
for percentage in percentages:
curve[percentage] = 0
curves[repetition] = curve
# run and add up percentage correct from repetition
示例8: Loader
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import crossvalidate_model [as 别名]
# load diabetes
loader = Loader(classname="weka.core.converters.ArffLoader")
fname = data_dir + os.sep + "diabetes.arff"
print("\nLoading dataset: " + fname + "\n")
data = loader.load_file(fname)
data.set_class_index(data.num_attributes() - 1)
for classifier in ["weka.classifiers.bayes.NaiveBayes", "weka.classifiers.rules.ZeroR", "weka.classifiers.trees.J48"]:
# train/test split 90% using classifier
cls = Classifier(classname=classifier)
evl = Evaluation(data)
evl.evaluate_train_test_split(cls, data, 90.0, Random(1))
print("\n" + classifier + " train/test split (90%):\n" + evl.to_summary())
cls.build_classifier(data)
print(classifier + " model:\n\n" + str(cls))
# calculate mean/stdev over 10 cross-validations
for classifier in [
"weka.classifiers.meta.ClassificationViaRegression", "weka.classifiers.bayes.NaiveBayes",
"weka.classifiers.rules.ZeroR", "weka.classifiers.trees.J48", "weka.classifiers.functions.Logistic"]:
accuracy = []
for i in xrange(1,11):
cls = Classifier(classname=classifier)
evl = Evaluation(data)
evl.crossvalidate_model(cls, data, 10, Random(i))
accuracy.append(evl.percent_correct())
nacc = numpy.array(accuracy)
print("%s: %0.2f +/-%0.2f" % (classifier, numpy.mean(nacc), numpy.std(nacc)))
jvm.stop()
示例9: main
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import crossvalidate_model [as 别名]
#.........这里部分代码省略.........
flter = Filter("weka.filters.unsupervised.attribute.Remove")
flter.options = ["-R", "first"]
meta.set_property("filter", flter.jobject)
print(meta.to_commandline())
# direct FilteredClassifier instantiation
print("direct FilteredClassifier instantiation")
meta = FilteredClassifier()
meta.classifier = Classifier(classname="weka.classifiers.functions.LinearRegression")
flter = Filter("weka.filters.unsupervised.attribute.Remove")
flter.options = ["-R", "first"]
meta.filter = flter
print(meta.to_commandline())
# generic Vote
print("generic Vote instantiation")
meta = MultipleClassifiersCombiner(classname="weka.classifiers.meta.Vote")
classifiers = [
Classifier(classname="weka.classifiers.functions.SMO"),
Classifier(classname="weka.classifiers.trees.J48")
]
meta.classifiers = classifiers
print(meta.to_commandline())
# cross-validate nominal classifier
helper.print_title("Cross-validating NaiveBayes on diabetes")
diabetes_file = helper.get_data_dir() + os.sep + "diabetes.arff"
helper.print_info("Loading dataset: " + diabetes_file)
loader = Loader("weka.core.converters.ArffLoader")
diabetes_data = loader.load_file(diabetes_file)
diabetes_data.class_is_last()
classifier = Classifier(classname="weka.classifiers.bayes.NaiveBayes")
pred_output = PredictionOutput(
classname="weka.classifiers.evaluation.output.prediction.PlainText", options=["-distribution"])
evaluation = Evaluation(diabetes_data)
evaluation.crossvalidate_model(classifier, diabetes_data, 10, Random(42), output=pred_output)
print(evaluation.summary())
print(evaluation.class_details())
print(evaluation.matrix())
print("areaUnderPRC/0: " + str(evaluation.area_under_prc(0)))
print("weightedAreaUnderPRC: " + str(evaluation.weighted_area_under_prc))
print("areaUnderROC/1: " + str(evaluation.area_under_roc(1)))
print("weightedAreaUnderROC: " + str(evaluation.weighted_area_under_roc))
print("avgCost: " + str(evaluation.avg_cost))
print("totalCost: " + str(evaluation.total_cost))
print("confusionMatrix: " + str(evaluation.confusion_matrix))
print("correct: " + str(evaluation.correct))
print("pctCorrect: " + str(evaluation.percent_correct))
print("incorrect: " + str(evaluation.incorrect))
print("pctIncorrect: " + str(evaluation.percent_incorrect))
print("unclassified: " + str(evaluation.unclassified))
print("pctUnclassified: " + str(evaluation.percent_unclassified))
print("coverageOfTestCasesByPredictedRegions: " + str(evaluation.coverage_of_test_cases_by_predicted_regions))
print("sizeOfPredictedRegions: " + str(evaluation.size_of_predicted_regions))
print("falseNegativeRate: " + str(evaluation.false_negative_rate(1)))
print("weightedFalseNegativeRate: " + str(evaluation.weighted_false_negative_rate))
print("numFalseNegatives: " + str(evaluation.num_false_negatives(1)))
print("trueNegativeRate: " + str(evaluation.true_negative_rate(1)))
print("weightedTrueNegativeRate: " + str(evaluation.weighted_true_negative_rate))
print("numTrueNegatives: " + str(evaluation.num_true_negatives(1)))
print("falsePositiveRate: " + str(evaluation.false_positive_rate(1)))
print("weightedFalsePositiveRate: " + str(evaluation.weighted_false_positive_rate))
print("numFalsePositives: " + str(evaluation.num_false_positives(1)))
print("truePositiveRate: " + str(evaluation.true_positive_rate(1)))
print("weightedTruePositiveRate: " + str(evaluation.weighted_true_positive_rate))
print("numTruePositives: " + str(evaluation.num_true_positives(1)))
print("fMeasure: " + str(evaluation.f_measure(1)))
print("weightedFMeasure: " + str(evaluation.weighted_f_measure))
示例10: print
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import crossvalidate_model [as 别名]
fname = data_dir + os.sep + "diabetes.arff"
print("\nLoading dataset: " + fname + "\n")
data = loader.load_file(fname)
# we'll set the class attribute after filtering
# apply NominalToBinary filter and set class attribute
fltr = Filter("weka.filters.unsupervised.attribute.NominalToBinary")
fltr.inputformat(data)
filtered = fltr.filter(data)
filtered.class_is_last()
# cross-validate LinearRegression on filtered data, display model
cls = Classifier(classname="weka.classifiers.functions.LinearRegression")
pout = PredictionOutput(classname="weka.classifiers.evaluation.output.prediction.PlainText")
evl = Evaluation(filtered)
evl.crossvalidate_model(cls, filtered, 10, Random(1), pout)
print("10-fold cross-validation:\n" + evl.summary())
print("Predictions:\n\n" + str(pout))
cls.build_classifier(filtered)
print("Model:\n\n" + str(cls))
# use AddClassification filter with LinearRegression on filtered data
print("Applying AddClassification to filtered data:\n")
fltr = Filter(
classname="weka.filters.supervised.attribute.AddClassification",
options=["-W", "weka.classifiers.functions.LinearRegression", "-classification"])
fltr.inputformat(filtered)
classified = fltr.filter(filtered)
print(classified)
# convert class back to nominal
示例11: print
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import crossvalidate_model [as 别名]
# load a dataset
iris_file = "HairEyeColor.csv"
print("Loading dataset: " + iris_file)
loader = Loader(classname="weka.core.converters.CSVLoader")
iris_data = loader.load_file(iris_file)
print (iris_data.num_attributes)
iris_data.set_class_index(iris_data.num_attributes() - 1)
# build a classifier and output model
print ("Training J48 classifier on iris")
classifier = Classifier(classname="weka.test.Regression")
#classifier = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.5"])
# Instead of using 'options=["-C", "0.3"]' in the constructor, we can also set the "confidenceFactor"
# property of the J48 classifier itself. However, being of type float rather than double, we need
# to convert it to the correct type first using the double_to_float function:
#classifier.set_property("confidenceFactor", types.double_to_float(0.3))
classifier.build_classifier(iris_data)
print(classifier)
print(classifier.graph())
#plot_graph.plot_dot_graph(classifier.graph())
evaluation = Evaluation(iris_data) # initialize with priors
evaluation.crossvalidate_model(classifier, iris_data, 10, Random(42)) # 10-fold CV
print(evaluation.to_summary())
print("pctCorrect: " + str(evaluation.percent_correct()))
print("incorrect: " + str(evaluation.incorrect()))
jvm.stop()
示例12: Classifier
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import crossvalidate_model [as 别名]
from weka.classifiers import Classifier
cls = Classifier(classname= "weka.classifiers.bayes.NaiveBayes" )
# No options of interest to adjust
# Build classifier on training data
cls.build_classifier(train)
# print(cls)
#import weka.plot.graph as graph
#graph.plot_dot_graph(cls.graph)
from weka.classifiers import Evaluation
from weka.core.classes import Random
evl = Evaluation(train)
evl.crossvalidate_model(cls, train, 10, Random(1))
print ("Kappa Score")
print (evl.kappa) # 0.50 - Not bad
print ("Evaluation Summary")
print (evl.summary()) # Accuracy: 83%
## Test model on new data ##
evl = Evaluation(test)
from weka.classifiers import PredictionOutput
pred_output = PredictionOutput(
classname="weka.classifiers.evaluation.output.prediction.PlainText", options=["-distribution"])
evl.crossvalidate_model(cls, test, 10, Random(1), pred_output)
示例13: Loader
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import crossvalidate_model [as 别名]
# load weather.nominal
loader = Loader(classname="weka.core.converters.ArffLoader")
fname = data_dir + os.sep + "weather.nominal.arff"
print("\nLoading dataset: " + fname + "\n")
data = loader.load_file(fname)
data.class_is_last()
# define classifiers
classifiers = ["weka.classifiers.rules.OneR", "weka.classifiers.trees.J48"]
# cross-validate original dataset
for classifier in classifiers:
cls = Classifier(classname=classifier)
evl = Evaluation(data)
evl.crossvalidate_model(cls, data, 10, Random(1))
print("%s (original): %0.0f%%" % (classifier, evl.percent_correct))
# replace 'outlook' in first 4 'no' instances with 'missing'
modified = Instances.copy_instances(data)
count = 0
for i in xrange(modified.num_instances):
if modified.get_instance(i).get_string_value(modified.class_index) == "no":
count += 1
modified.get_instance(i).set_missing(0)
if count == 4:
break
# cross-validate modified dataset
for classifier in classifiers:
cls = Classifier(classname=classifier)
示例14: process_classifier
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import crossvalidate_model [as 别名]
#.........这里部分代码省略.........
aws_c.execute('select * from ' + table + ' ' \
'where duty!=0 and deviceMAC not in (select * from vector_reject) ' \
'and deviceMAC in (select * from id_fewcats_mac);')
else:
aws_c.execute('select * from ' + table + ' ' \
'where duty!=0 and deviceMAC not in (select * from vector_reject);')
results = aws_c.fetchall()
devCount += 1
remaining = chop_microseconds(((datetime.utcnow() - item_start)*totalDevs/devCount)-(datetime.utcnow() - item_start))
sys.stdout.write('Running ' + runType + ' classifier for \'' + label + '\' - ' + \
str(round(100*float(devCount)/totalDevs,2)) + ' pct complete (' + str(remaining) + ' remaining) \r')
sys.stdout.flush()
# Generate type list
total_types = ['{']
for data in results:
if(data[-1] not in total_types):
total_types.append('\"')
total_types.append(data[-1])
total_types.append('\"')
total_types.append(',')
total_types[-1] = '}'
typeStr = ''.join(total_types)
arff_file = label + '_train'
gen_arff(arff_file, typeStr, results, occ, arff_idcol)
train = loader.load_file(arff_file + '.arff')
train.class_is_last()
mv(arff_file + '.arff', master_saveDir)
cls.build_classifier(train)
evl = Evaluation(train)
evl.crossvalidate_model(cls, train, 10, Random(1))
print('\n')
#print(evl.percent_correct)
#print(evl.class_details())
print(evl.matrix())
total_conf.write('\n' + evl.matrix())
print(evl.summary())
total_conf.write(evl.summary() + '\n')
final_result = round(evl.percent_correct, 2)
else:
success = []
for startDev in devList:
for changeToDev in devList:
if startDev != changeToDev:
devCount += 1
remaining = chop_microseconds(((datetime.utcnow() - item_start)*totalDevs/devCount)-(datetime.utcnow() - item_start))
sys.stdout.write('Running ' + runType + ' classifier for \'' + label + '\' - ' + \
str(round(100*float(devCount)/totalDevs,2)) + ' pct complete (' + str(remaining) + ' remaining) \r')
sys.stdout.flush()
aws_c.execute('select * from temp_dat_occ_vector_2 ' \
'where duty!=0 and deviceMAC in (\'' + startDev + '\',\'' + changeToDev + '\');')
results = [x[:-1] + (x[1],) for x in aws_c.fetchall()] # Class label is just the deviceMAC
if len(results) > 10:
# Generate type list
typeStr = '{' + startDev + ',' + changeToDev + '}'
arff_file = label + '_' + startDev + '_' + changeToDev + '_train'
gen_arff(arff_file, typeStr, results, occ, arff_idcol)
train = loader.load_file(arff_file + '.arff')
train.class_is_last()
mv(arff_file + '.arff', master_saveDir)
cls.build_classifier(train)
evl = Evaluation(train)
evl.crossvalidate_model(cls, train, 10, Random(1))
print('\n')
#print(evl.percent_correct)
#print(evl.class_details())
print(evl.matrix())
total_conf.write('\n' + evl.matrix())
print(evl.summary())
total_conf.write(evl.summary() + '\n')
success.append(evl.percent_correct)
if len(success) > 0:
final_result = [sum(success)/len(success), percentile(success, 5), percentile(success, 10), percentile(success, 95)]
else:
final_result = False
if label in total_results:
print('Warning label ' + label + ' exists twice, overwriting...')
if final_result != False:
total_results[label] = final_result
示例15: print
# 需要导入模块: from weka.classifiers import Evaluation [as 别名]
# 或者: from weka.classifiers.Evaluation import crossvalidate_model [as 别名]
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)
data.set_class_index(data.num_attributes() - 1)
for equal in ["", "-F"]:
print("\nEqual frequency binning? " + str(equal == "-F") + "\n")
for bins in [0, 40, 10, 5, 2]:
if bins > 0:
fltr = Filter(classname="weka.filters.unsupervised.attribute.Discretize", options=["-B", str(bins), equal])
fltr.set_inputformat(data)
filtered = fltr.filter(data)
else:
filtered = data
cls = Classifier(classname="weka.classifiers.trees.J48")
# cross-validate
evl = Evaluation(filtered)
evl.crossvalidate_model(cls, filtered, 10, Random(1))
# build classifier on full dataset
cls.build_classifier(filtered)
# get size of tree from model strings
lines = str(cls).split("\n")
nodes = "N/A"
for line in lines:
if line.find("Size of the tree :") > -1:
nodes = line.replace("Size of the tree :", "").strip()
# output stats
print("bins=%i accuracy=%0.1f nodes=%s" % (bins, evl.percent_correct(), nodes))
jvm.stop()