本文整理汇总了Python中weka.classifiers.Classifier.build_classifier方法的典型用法代码示例。如果您正苦于以下问题:Python Classifier.build_classifier方法的具体用法?Python Classifier.build_classifier怎么用?Python Classifier.build_classifier使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类weka.classifiers.Classifier
的用法示例。
在下文中一共展示了Classifier.build_classifier方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from weka.classifiers import Classifier [as 别名]
# 或者: from weka.classifiers.Classifier import build_classifier [as 别名]
def main(args):
"""
Trains a NaiveBayesUpdateable classifier incrementally on a dataset. The dataset can be supplied as parameter.
:param args: the commandline arguments
:type args: list
"""
# load a dataset
if len(args) <= 1:
data_file = helper.get_data_dir() + os.sep + "vote.arff"
else:
data_file = args[1]
helper.print_info("Loading dataset: " + data_file)
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(data_file, incremental=True)
data.class_is_last()
# classifier
nb = Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable")
nb.build_classifier(data)
# train incrementally
for inst in loader:
nb.update_classifier(inst)
print(nb)
示例2: main
# 需要导入模块: from weka.classifiers import Classifier [as 别名]
# 或者: from weka.classifiers.Classifier import build_classifier [as 别名]
def main():
"""
Just runs some example code.
"""
# load a dataset
iris_file = helper.get_data_dir() + os.sep + "iris.arff"
helper.print_info("Loading dataset: " + iris_file)
loader = Loader("weka.core.converters.ArffLoader")
iris_data = loader.load_file(iris_file)
iris_data.class_is_last()
# train classifier
classifier = Classifier("weka.classifiers.trees.J48")
classifier.build_classifier(iris_data)
# save and read object
helper.print_title("I/O: single object")
outfile = tempfile.gettempdir() + os.sep + "j48.model"
serialization.write(outfile, classifier)
model = Classifier(jobject=serialization.read(outfile))
print(model)
# save classifier and dataset header (multiple objects)
helper.print_title("I/O: single object")
serialization.write_all(outfile, [classifier, Instances.template_instances(iris_data)])
objects = serialization.read_all(outfile)
for i, obj in enumerate(objects):
helper.print_info("Object #" + str(i+1) + ":")
if javabridge.get_env().is_instance_of(obj, javabridge.get_env().find_class("weka/core/Instances")):
obj = Instances(jobject=obj)
elif javabridge.get_env().is_instance_of(obj, javabridge.get_env().find_class("weka/classifiers/Classifier")):
obj = Classifier(jobject=obj)
print(obj)
示例3: main
# 需要导入模块: from weka.classifiers import Classifier [as 别名]
# 或者: from weka.classifiers.Classifier import build_classifier [as 别名]
def main(args):
"""
Loads a dataset, shuffles it, splits it into train/test set. Trains J48 with training set and
evaluates the built model on the test set.
:param args: the commandline arguments (optional, can be dataset filename)
:type args: list
"""
# load a dataset
if len(args) <= 1:
data_file = helper.get_data_dir() + os.sep + "vote.arff"
else:
data_file = args[1]
helper.print_info("Loading dataset: " + data_file)
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(data_file)
data.class_is_last()
# generate train/test split of randomized data
train, test = data.train_test_split(66.0, Random(1))
# build classifier
cls = Classifier(classname="weka.classifiers.trees.J48")
cls.build_classifier(train)
print(cls)
# evaluate
evl = Evaluation(train)
evl.test_model(cls, test)
print(evl.summary())
示例4: main
# 需要导入模块: from weka.classifiers import Classifier [as 别名]
# 或者: from weka.classifiers.Classifier import build_classifier [as 别名]
def main(args):
"""
Trains a J48 classifier on a training set and outputs the predicted class and class distribution alongside the
actual class from a test set. Class attribute is assumed to be the last attribute.
:param args: the commandline arguments (train and test datasets)
:type args: list
"""
# load a dataset
helper.print_info("Loading train: " + args[1])
loader = Loader(classname="weka.core.converters.ArffLoader")
train = loader.load_file(args[1])
train.class_index = train.num_attributes - 1
helper.print_info("Loading test: " + args[2])
test = loader.load_file(args[2])
test.class_is_last()
# classifier
cls = Classifier(classname="weka.classifiers.trees.J48")
cls.build_classifier(train)
# output predictions
print("# - actual - predicted - error - distribution")
for index, inst in enumerate(test):
pred = cls.classify_instance(inst)
dist = cls.distribution_for_instance(inst)
print(
"%d - %s - %s - %s - %s" %
(index+1,
inst.get_string_value(inst.class_index),
inst.class_attribute.value(int(pred)),
"yes" if pred != inst.get_value(inst.class_index) else "no",
str(dist.tolist())))
示例5: python_weka
# 需要导入模块: from weka.classifiers import Classifier [as 别名]
# 或者: from weka.classifiers.Classifier import build_classifier [as 别名]
class python_weka(object):
def __init__(self, input_x, input_y, labels):
self.input_x = input_x
self.input_y = input_y
self.labels = labels
def write_arff(self, filename, relation, train_or_predict, input_x, input_y=None):
f = open(filename, "w")
f.write("@relation " + relation + "\n")
for i in self.labels:
train_or_predict += 1
if train_or_predict == len(self.labels):
break
f.write("@attribute " + i + " " + self.labels[i] + "\n")
f.write("\n")
f.write("@data" + "\n")
for i in range(len(input_x)):
for j in input_x[i]:
f.write(str(j) + " ")
if train_or_predict == 0:
f.write(str(input_y[i]))
else:
f.write(str(0))
f.write("\n")
f.close()
def train(self):
filename = "train.arff"
self.write_arff(filename, "train", 0, self.input_x, self.input_y)
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(filename)
data.class_is_last()
self.cls = Classifier(classname="weka.classifiers.meta.Bagging", options=["-S", "5"])
self.cls.build_classifier(data)
os.remove(filename)
def predict(self, test_data):
filename = "test.arff"
self.write_arff(filename, "test", 0, test_data)
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(filename)
data.class_is_last()
# evl = Evaluation(data)
# evl.evaluate_model(self.cls,data)
# data.set_class_label(data.numAttributes() - 1)
# data.setClassIndex(data.numAttributes() - 1)
result = []
for index, inst in enumerate(data):
pred = self.cls.classify_instance(inst)
dist = self.cls.distribution_for_instance(inst)
result.append(dist[0])
# print(str(index+1) + ": label index=" + str(pred) + ", class distribution=" + str(dist))
# print str(index+1) + 'dist:'+ str(dist)
os.remove(filename)
return result
示例6: playback_speed_checker
# 需要导入模块: from weka.classifiers import Classifier [as 别名]
# 或者: from weka.classifiers.Classifier import build_classifier [as 别名]
def playback_speed_checker(inputFile, dirRef):
TRAINING_ARFF = 'dataset_playback.arff'
inputRef = ""
# Start JVM
jvm.start()
jvm.start(system_cp=True, packages=True)
jvm.start(max_heap_size="512m")
# Find reference file
for file in os.listdir(dirRef):
if str(file).find(str(os.path.basename(inputFile))) != -1:
inputRef = os.path.join(dirRef, file)
break
# Calculation distance
(result, distance) = dtw_checker(inputFile, inputRef)
# Loading data
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(TRAINING_ARFF)
data.class_is_last() # set class attribute
# Train the classifier
#cls = Classifier(classname="weka.classifiers.functions.SMO")
cls = Classifier(classname="weka.classifiers.trees.J48", options = ["-C", "0.3", "-M", "10"])
cls.build_classifier(data)
# Classify instance
speed_instance = Instance.create_instance(numpy.ndarray(distance), classname='weka.core.DenseInstance', weight=1.0)
speed_instance.dataset = data
# Classify instance
speed_flag = cls.classify_instance(speed_instance)
if (distance == 0):
speed_class = 'nominal'
else:
if speed_flag == 0: speed_class = 'down_speed'
if speed_flag == 0: speed_class = 'up_speed'
# print os.path.basename(inputFile) + ' --- ' + speed_class
# Stop JVM
jvm.stop()
print "SPEED IS: " + speed_class
return speed_class
示例7: main
# 需要导入模块: from weka.classifiers import Classifier [as 别名]
# 或者: from weka.classifiers.Classifier import build_classifier [as 别名]
def main():
"""
Just runs some example code.
"""
# load a dataset
bodyfat_file = helper.get_data_dir() + os.sep + "bodyfat.arff"
helper.print_info("Loading dataset: " + bodyfat_file)
loader = Loader("weka.core.converters.ArffLoader")
bodyfat_data = loader.load_file(bodyfat_file)
bodyfat_data.class_is_last()
# classifier help
helper.print_title("Creating help string")
classifier = Classifier(classname="weka.classifiers.trees.M5P")
classifier.build_classifier(bodyfat_data)
print(classifier)
示例8: train_model
# 需要导入模块: from weka.classifiers import Classifier [as 别名]
# 或者: from weka.classifiers.Classifier import build_classifier [as 别名]
def train_model(self, training_data):
model_weka = None
if os.path.isfile(self.model_file):
print 'Model ' + self.name + ' already trained.'
else:
print 'Starting to train_model model ' + self.name + '.'
model_weka = Classifier(classname = self.classname, options = self.options)
model_weka.build_classifier(data = training_data)
serialization.write(filename = self.model_file, jobject = model_weka)
print 'Model ' + self.name + ' trained and saved.'
if os.path.isfile(self.parameter_file):
print 'Parameters of the model ' + self.name + ' already saved.'
else:
if model_weka == None:
model_weka = Classifier(jobject = serialization.read(self.model_file))
save_file(file_name = self.parameter_file, content = str(model_weka))
print 'Parameters of the model ' + self.name + ' saved.'
示例9: riaa_checker
# 需要导入模块: from weka.classifiers import Classifier [as 别名]
# 或者: from weka.classifiers.Classifier import build_classifier [as 别名]
def riaa_checker(inputFile):
TRAINING_ARFF = 'C:\Users\ASUS\Desktop\IGNASI\SMC\Workspace\dataset_riaa.arff'
# Start JVM
jvm.start()
jvm.start(system_cp=True, packages=True)
jvm.start(max_heap_size="512m")
# Calculation of bark bands information
(absolute_bark, relative_bark, bark_ratios) = compute_bark_spectrum(inputFile)
# Loading data
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(TRAINING_ARFF)
data.class_is_last() # set class attribute
# Train the classifier
cls = Classifier(classname="weka.classifiers.functions.SMO")
#cls = Classifier(classname="weka.classifiers.trees.J48", options = ["-C", "0.3", "-M", "10"])
cls.build_classifier(data)
# Classify instance
bark_instance = Instance.create_instance(bark_ratios, classname='weka.core.DenseInstance', weight=1.0)
bark_instance.dataset = data
# Classify instance
riaa_flag = cls.classify_instance(bark_instance)
if riaa_flag == 0:
riaa_class = 'riaa_ok'
else:
riaa_class = 'riaa_ko'
# print os.path.basename(inputFile) + ' --- ' + riaa_class
# Stop JVM
jvm.stop()
print "RIAA FILTERING?: " + riaa_class
return riaa_class
示例10: getDecisionTree
# 需要导入模块: from weka.classifiers import Classifier [as 别名]
# 或者: from weka.classifiers.Classifier import build_classifier [as 别名]
def getDecisionTree(self, inputPath):
#load arff
data = self.load_Arff(inputPath)
#classifier
data.set_class_index(data.num_attributes() - 1) # set class attribute
classifier = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"])
data.set_class_index(data.num_attributes() - 1)
classifier.build_classifier(data)
classifierStr = str(classifier)
for index in range(0,data.num_instances()):
instance = data.get_instance(index)
#print instance
result = classifier.distribution_for_instance(instance)
#print result
graph = classifier.graph()
return graph
示例11: classify
# 需要导入模块: from weka.classifiers import Classifier [as 别名]
# 或者: from weka.classifiers.Classifier import build_classifier [as 别名]
def classify(train, test, name="RF", tuning=False):
jvm.start()
if isinstance(train, list) and isinstance(test, list):
train = weka_instance(train)
trn_data = converters.load_any_file(train)
test = weka_instance(test)
tst_data = converters.load_any_file(test)
elif os.path.isfile(train) and os.path.isfile(test):
trn_data = converters.load_any_file(train)
tst_data = converters.load_any_file(test)
else:
trn = csv_as_ndarray(train)
tst = csv_as_ndarray(test)
trn_data = converters.ndarray_to_instances(trn, relation="Train")
tst_data = converters.ndarray_to_instances(tst, relation="Test")
trn_data.class_is_last()
tst_data.class_is_last()
# t = time()
if tuning:
opt = tune(train)
else:
opt = default_opt
# print("Time to tune: {} seconds".format(time() - t))
cls = Classifier(classname=classifiers[name.lower()], options=opt)
cls.build_classifier(trn_data)
distr = [cls.distribution_for_instance(inst)[1] for inst in tst_data]
preds = [cls.classify_instance(inst) for inst in tst_data]
jvm.stop()
return preds, distr
示例12: Loader
# 需要导入模块: from weka.classifiers import Classifier [as 别名]
# 或者: from weka.classifiers.Classifier import build_classifier [as 别名]
if data_dir is None:
data_dir = "." + os.sep + "data"
import os
import weka.core.jvm as jvm
from weka.core.converters import Loader
from weka.core.classes import Random
from weka.classifiers import Classifier, Evaluation
from weka.filters import Filter
jvm.start()
# load weather.nominal
loader = Loader(classname="weka.core.converters.ArffLoader")
fname = data_dir + os.sep + "weather.nominal.arff"
print("\nLoading dataset: " + fname + "\n")
data = loader.load_file(fname)
data.set_class_index(data.num_attributes() - 1)
# perform 10-fold cross-validation
cls = Classifier(classname="weka.classifiers.rules.OneR")
evl = Evaluation(data)
evl.crossvalidate_model(cls, data, 10, Random(1))
print("10-fold cross-validation:\n" + evl.to_summary())
# build model on full dataset and output it
cls.build_classifier(data)
print("Model:\n\n" + str(cls))
jvm.stop()
示例13: __init__
# 需要导入模块: from weka.classifiers import Classifier [as 别名]
# 或者: from weka.classifiers.Classifier import build_classifier [as 别名]
#.........这里部分代码省略.........
print "Model Params"
print self.modelParams
# Get data for testing and learning
learnerData = self.retrieveData(self.questionID, "learner")
testData = self.retrieveData(self.questionID, 'test')
masterData = self.retrieveData(self.questionID, 'all')
masterData = self.addNominals(masterData)
# Check if there is enough correct data to run
if (learnerData.num_instances < 1 or testData.num_instances < 1):
self.status = self.config.NOT_ENOUGH_DATA
return False
# If this is a prediction and there is a valid patient, change masterData header
patientObj = self.buildPatientObject()
patientInstance = None
if ((patientObj is not None) and (self.predict == 1)):
masterData = self.addPatientNominals(patientObj, masterData)
patientInstance = self.createPatientInstance(patientObj, masterData)
masterData.add_instance(patientInstance)
elif (patientObj is None) and (self.predict == 1):
print 'No patient defined for prediction. Exiting'
return True
# Fix dataset headers up to match and fix instances to match headers
masterData.delete()
learner = masterData.copy_instances(masterData, 0, 0)
test = masterData.copy_instances(masterData, 0, 0)
self.addInstancesToDataset(learnerData, learner)
self.addInstancesToDataset(testData, test)
# Comparison of data for testing purposes
# print 'learnerData'
# print learnerData
# print 'learner'
# print learner
# print 'testData'
# print testData
# print 'test'
# print test
# pdb.set_trace()
# Instantiate classifier
self.cls = Classifier(classname=self.classifier, options=self.parameters)
# Run classifier
self.cls.build_classifier(learner)
# for index, inst in enumerate(learnerData):
# prediction = self.cls.classify_instance(inst)
# distribution = self.cls.distribution_for_instance(inst)
# Test classifier
evl = Evaluation(learner)
evl.test_model(self.cls, test)
# Store information about matrix
self.acc = evl.percent_correct
self.val = None
# Convert numpy array into simple array
confusionMatrix = []
confusionMatrix.append([evl.confusion_matrix[0][0], evl.confusion_matrix[0][1]])
confusionMatrix.append([evl.confusion_matrix[1][0], evl.confusion_matrix[1][1]])
# Convert matrix into json format
self.matrix = json.dumps(confusionMatrix)
# print 'Classifier: ', self.classifier
# print 'ID: ', self.questionID
# print 'ACC: ', self.acc
# print(evl.summary())
# If this is a prediction... make the prediction
if ((patientObj is not None) and (self.predict == 1)):
masterData.add_instance(patientInstance)
print "Running prediction on patient: "
print masterData.get_instance(0)
self.prediction = self.cls.classify_instance(masterData.get_instance(0))
#self.uploadPrediction()
# Temporarily store file to serialize to
fileName = str(self.questionID) + self.algorithm + ".model"
serialization.write(fileName, self.cls)
# Open that file and store it
self.model = None
with open(fileName, 'rb') as f:
self.model = f.read()
# Remove temporary file
os.remove(fileName)
# Set status to awaiting feedback
self.status = self.config.AWAITING_FEEDBACK_STATUS
return True
示例14: Filter
# 需要导入模块: from weka.classifiers import Classifier [as 别名]
# 或者: from weka.classifiers.Classifier import build_classifier [as 别名]
# we'll set the class attribute after filtering
# apply NominalToBinary filter and set class attribute
fltr = Filter("weka.filters.unsupervised.attribute.NominalToBinary")
fltr.inputformat(data)
filtered = fltr.filter(data)
filtered.class_is_last()
# cross-validate LinearRegression on filtered data, display model
cls = Classifier(classname="weka.classifiers.functions.LinearRegression")
pout = PredictionOutput(classname="weka.classifiers.evaluation.output.prediction.PlainText")
evl = Evaluation(filtered)
evl.crossvalidate_model(cls, filtered, 10, Random(1), pout)
print("10-fold cross-validation:\n" + evl.summary())
print("Predictions:\n\n" + str(pout))
cls.build_classifier(filtered)
print("Model:\n\n" + str(cls))
# use AddClassification filter with LinearRegression on filtered data
print("Applying AddClassification to filtered data:\n")
fltr = Filter(
classname="weka.filters.supervised.attribute.AddClassification",
options=["-W", "weka.classifiers.functions.LinearRegression", "-classification"])
fltr.inputformat(filtered)
classified = fltr.filter(filtered)
print(classified)
# convert class back to nominal
fltr = Filter(classname="weka.filters.unsupervised.attribute.NumericToNominal", options=["-R", "9"])
fltr.inputformat(classified)
nominal = fltr.filter(classified)
示例15: Loader
# 需要导入模块: from weka.classifiers import Classifier [as 别名]
# 或者: from weka.classifiers.Classifier import build_classifier [as 别名]
jvm.logger.setLevel(jvm.logging.WARNING)
jvm.start(packages=True, max_heap_size="512m")
# Each instance has nominal class and numeric attributes
loader = Loader(classname="weka.core.converters.ArffLoader")
trainData = loader.load_file('segment-challenge.arff')
trainData.class_is_last()
testData = loader.load_file('segment-test.arff')
testData.class_is_last()
# Default C4.5 tree
classifier = Classifier(classname="weka.classifiers.trees.J48")
# Search for the best parameters and build a classifier with them
classifier.build_classifier(trainData)
print("\n\n=========== Classifier information ================\n\n")
print(classifier.options)
print(classifier)
print("\n\n=========== Train results ================\n\n")
evaluation = Evaluation(trainData)
evaluation.test_model(classifier, trainData)
print(classifier.to_commandline())
print(evaluation.matrix())
print("Train recognition: %0.2f%%" % evaluation.percent_correct)
print("\n\n=========== Test results ================\n\n")
evaluation = Evaluation(testData)
evaluation.test_model(classifier, testData)