本文整理汇总了Python中weka.filters.Filter.inputformat方法的典型用法代码示例。如果您正苦于以下问题:Python Filter.inputformat方法的具体用法?Python Filter.inputformat怎么用?Python Filter.inputformat使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类weka.filters.Filter
的用法示例。
在下文中一共展示了Filter.inputformat方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import inputformat [as 别名]
def main():
"""
Just runs some example code.
"""
# load a dataset
iris_file = helper.get_data_dir() + os.sep + "iris.arff"
helper.print_info("Loading dataset: " + iris_file)
loader = Loader("weka.core.converters.ArffLoader")
data = loader.load_file(iris_file)
# remove class attribute
data.delete_last_attribute()
# build a clusterer and output model
helper.print_title("Training SimpleKMeans clusterer")
clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
clusterer.build_clusterer(data)
print(clusterer)
helper.print_info("Evaluating on data")
evaluation = ClusterEvaluation()
evaluation.set_model(clusterer)
evaluation.test_model(data)
print("# clusters: " + str(evaluation.num_clusters))
print("log likelihood: " + str(evaluation.log_likelihood))
print("cluster assignments:\n" + str(evaluation.cluster_assignments))
plc.plot_cluster_assignments(evaluation, data, inst_no=True)
# using a filtered clusterer
helper.print_title("Filtered clusterer")
loader = Loader("weka.core.converters.ArffLoader")
data = loader.load_file(iris_file)
clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
fclusterer = FilteredClusterer()
fclusterer.clusterer = clusterer
fclusterer.filter = remove
fclusterer.build_clusterer(data)
print(fclusterer)
# load a dataset incrementally and build clusterer incrementally
helper.print_title("Incremental clusterer")
loader = Loader("weka.core.converters.ArffLoader")
iris_inc = loader.load_file(iris_file, incremental=True)
clusterer = Clusterer("weka.clusterers.Cobweb")
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
remove.inputformat(iris_inc)
iris_filtered = remove.outputformat()
clusterer.build_clusterer(iris_filtered)
for inst in loader:
remove.input(inst)
inst_filtered = remove.output()
clusterer.update_clusterer(inst_filtered)
clusterer.update_finished()
print(clusterer.to_commandline())
print(clusterer)
print(clusterer.graph)
plg.plot_dot_graph(clusterer.graph)
示例2: remove_correct_classified
# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import inputformat [as 别名]
def remove_correct_classified(self, invert = False):
options=[
'-W', self.classifier.to_commandline(),
'-C', str(self.class_index), #classindex
# '-F','0', # folds
# '-T','0.1', #threshold by numeric classes
'-I','0', # max iterations
'-V' if not invert else ''
] # invert
classname = "weka.filters.unsupervised.instance.RemoveMisclassified"
remove = Filter(classname=classname, options=options)
remove.inputformat(self.data)
self.data = remove.filter(self.data)
示例3: use_filter
# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import inputformat [as 别名]
def use_filter(data):
"""
Uses the AttributeSelection filter for attribute selection.
:param data: the dataset to use
:type data: Instances
"""
print("\n2. Filter")
flter = Filter(classname="weka.filters.supervised.attribute.AttributeSelection")
aseval = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval")
assearch = ASSearch(classname="weka.attributeSelection.GreedyStepwise", options=["-B"])
flter.set_property("evaluator", aseval.jobject)
flter.set_property("search", assearch.jobject)
flter.inputformat(data)
filtered = flter.filter(data)
print(str(filtered))
示例4: load
# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import inputformat [as 别名]
def load(path, db):
nominals = [
49, # dev_double_fp_config
50, # dev_endian_little
51, # dev_execution_capabilities
52, # dev_extensions
54, # dev_global_mem_cache_type
57, # dev_host_unified_memory
63, # dev_image_support
65, # dev_local_mem_type
96, # dev_queue_properties
97, # dev_single_fp_config
98, # dev_type
100, # dev_vendor_id
]
nominal_indices = ",".join([str(index) for index in nominals])
force_nominal = ["-N", nominal_indices]
# Load data from CSV.
dataset = Dataset.load_csv(path, options=force_nominal)
dataset.__class__ = Dataset
# Set class index and database connection.
dataset.class_index = -1
dataset.db = db
# Create string->nominal type attribute filter, ignoring the first
# attribute (scenario ID), since we're not classifying with it.
string_to_nominal = WekaFilter(classname=("weka.filters.unsupervised."
"attribute.StringToNominal"),
options=["-R", "2-last"])
string_to_nominal.inputformat(dataset.instances)
# Create filtered dataset, and swap data around.
filtered = string_to_nominal.filter(dataset.instances)
dataset.instances = filtered
return dataset
示例5: select_missclassified
# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import inputformat [as 别名]
def select_missclassified(self):
remove = Filter(classname="weka.filters.supervised.attribute.AddClassification", options=['-classification' ,'-error' ,'-W' ,self.base_classifier.to_commandline()])
remove.inputformat(self.data)
self.data = remove.filter(self.data)
remove = Filter(classname="weka.filters.unsupervised.instance.RemoveWithValues", options=['-S','0.0','-C','last','-L','last','-V'])
remove.inputformat(self.data)
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=['-R',str(self.data.num_attributes-2)+',last'])
remove.inputformat(self.data)
self.data = remove.filter(self.data)
示例6: build_and_classify
# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import inputformat [as 别名]
def build_and_classify(classifier, classifier_name, approach_name, infile, percentage='10'):
"""
Creates model and classifies against input data. Returns accuracy statistics
"""
# set seed so results are consistent
random.seed('iot')
# load data
loader = Loader(classname='weka.core.converters.CSVLoader')
data = loader.load_file(infile)
data.class_is_last()
# convert all numeric attributes to nominal
to_nominal = Filter(classname='weka.filters.unsupervised.attribute.NumericToNominal',
options=['-R', 'first-last'])
to_nominal.inputformat(data)
data = to_nominal.filter(data)
# randomize data with constant seed
randomize = Filter(classname='weka.filters.unsupervised.instance.Randomize',
options=['-S', '42'])
randomize.inputformat(data)
data = randomize.filter(data)
# create training set and testing set
train_percent_filter = Filter(classname='weka.filters.unsupervised.instance.RemovePercentage',
options=['-P', percentage, '-V'])
train_percent_filter.inputformat(data)
train = train_percent_filter.filter(data)
test = data
# build and test classifier
classifier.build_classifier(train)
evaluation = Evaluation(train)
evaluation.test_model(classifier, test)
# return results as array
results = [
approach_name,
classifier_name,
percentage,
evaluation.percent_correct,
evaluation.weighted_f_measure
]
return results
示例7: runner
# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import inputformat [as 别名]
def runner(self, cdat, heap_size = 16384, seed = None, verbose = True):
self.set_status(Pipeline.RUNNING)
self.logs.append('Initializing Pipeline')
para = self.config
self.logs.append('Reading Pipeline Configuration')
head = ''
name = get_rand_uuid_str()
self.logs.append('Reading Input File')
for i, stage in enumerate(self.stages):
if stage.code in ('dat.fle', 'prp.bgc', 'prp.nrm', 'prp.pmc', 'prp.sum'):
self.stages[i].status = Pipeline.RUNNING
if stage.code == 'dat.fle':
head = os.path.abspath(stage.value.path)
name, _ = os.path.splitext(stage.value.name)
self.logs.append('Parsing to ARFF')
path = os.path.join(head, '{name}.arff'.format(name = name))
# This bug, I don't know why, using Config.schema instead.
# cdat.toARFF(path, express_config = para.Preprocess.schema, verbose = verbose)
for i, stage in enumerate(self.stages):
if stage.code in ('dat.fle', 'prp.bgc', 'prp.nrm', 'prp.pmc', 'prp.sum'):
self.stages[i].status = Pipeline.COMPLETE
self.logs.append('Saved ARFF at {path}'.format(path = path))
self.logs.append('Splitting to Training and Testing Sets')
JVM.start(max_heap_size = '{size}m'.format(size = heap_size))
load = Loader(classname = 'weka.core.converters.ArffLoader')
# data = load.load_file(path)
# save = Saver(classname = 'weka.core.converters.ArffSaver')
data = load.load_file(os.path.join(head, 'iris.arff')) # For Debugging Purposes Only
data.class_is_last() # For Debugging Purposes Only
# data.class_index = cdat.iclss
for i, stage in enumerate(self.stages):
if stage.code == 'prp.kcv':
self.stages[i].status = Pipeline.RUNNING
self.logs.append('Splitting Training Set')
# TODO - Check if this seed is worth it.
seed = assign_if_none(seed, random.randint(0, 1000))
opts = ['-S', str(seed), '-N', str(para.Preprocess.FOLDS)]
wobj = Filter(classname = 'weka.filters.supervised.instance.StratifiedRemoveFolds', options = opts + ['-V'])
wobj.inputformat(data)
tran = wobj.filter(data)
self.logs.append('Splitting Testing Set')
wobj.options = opts
test = wobj.filter(data)
for i, stage in enumerate(self.stages):
if stage.code == 'prp.kcv':
self.stages[i].status = Pipeline.COMPLETE
self.logs.append('Performing Feature Selection')
feat = [ ]
for comb in para.FEATURE_SELECTION:
if comb.USE:
for i, stage in enumerate(self.stages):
if stage.code == 'ats':
search = stage.value.search.name
evaluator = stage.value.evaluator.name
if search == comb.Search.NAME and evaluator == comb.Evaluator.NAME:
self.stages[i].status = Pipeline.RUNNING
srch = ASSearch(classname = 'weka.attributeSelection.{classname}'.format(
classname = comb.Search.NAME,
options = assign_if_none(comb.Search.OPTIONS, [ ])
))
ewal = ASEvaluation(classname = 'weka.attributeSelection.{classname}'.format(
classname = comb.Evaluator.NAME,
options = assign_if_none(comb.Evaluator.OPTIONS, [ ])
))
attr = AttributeSelection()
attr.search(srch)
attr.evaluator(ewal)
attr.select_attributes(tran)
meta = addict.Dict()
meta.search = comb.Search.NAME
meta.evaluator = comb.Evaluator.NAME
meta.features = [tran.attribute(index).name for index in attr.selected_attributes]
feat.append(meta)
#.........这里部分代码省略.........
示例8: print
# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import inputformat [as 别名]
from weka.core.converters import Loader
from weka.clusterers import Clusterer, ClusterEvaluation
from weka.filters import Filter
import weka.plot.clusterers as plc
jvm.start()
# load iris
fname = data_dir + os.sep + "iris.arff"
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)
# remove class attribute
flt = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
flt.inputformat(data)
filtered = flt.filter(data)
# build KMeans
print("\n--> SimpleKMeans\n")
cl = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
cl.build_clusterer(filtered)
evl = ClusterEvaluation()
evl.set_model(cl)
evl.test_model(filtered)
print(evl.cluster_results)
plc.plot_cluster_assignments(evl, data, atts=[], inst_no=True, wait=True)
# use AddCluster filter
print("\n--> AddCluster filter\n")
flt = Filter(classname="weka.filters.unsupervised.attribute.AddCluster",
示例9: merge_nominal_attributes
# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import inputformat [as 别名]
def merge_nominal_attributes(self, significance=0.01):
remove = Filter(classname="weka.filters.supervised.attribute.MergeNominalValues", options=['-L',str(significance),'-R','first-last'])
remove.inputformat(self.data)
self.data = remove.filter(self.data)
示例10: run_classifier
# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import inputformat [as 别名]
def run_classifier(path, prot, sel, cols, prot_vals, beta):
DIs = dict()
jvm.start()
for i in range(len(cols)-1):
loader = Loader(classname="weka.core.converters.CSVLoader")
data = loader.load_file(path)
# remove selected attribute from the data
# NOTE: options are ONE indexed, not ZERO indexed
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", \
options=["-R", str(sel[2]+1)])
remove.inputformat(data)
data = remove.filter(data)
# if running for only one attribue, remove all others (except protected)
if i > 0:
for j in range(1, prot[2]+1):
if i != j:
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", \
options=["-R", ("1" if i>j else "2")])
remove.inputformat(data)
data = remove.filter(data)
# set prot attribute as Class attribute
data.class_is_last()
# run classifier
cls = Classifier(classname="weka.classifiers.bayes.NaiveBayes")
cls.build_classifier(data)
# count the number of each combination
pos_and_pred = float(0.0)
pos_and_not_pred = float(0.0)
neg_and_pred = float(0.0)
neg_and_not_pred = float(0.0)
for ind, inst in enumerate(data):
if cls.classify_instance(inst):
if prot_vals[ind] == prot[1]:
pos_and_pred += 1
else:
neg_and_pred += 1
else:
if prot_vals[ind] == prot[1]:
pos_and_not_pred += 1
else:
neg_and_not_pred += 1
# calculate DI
BER = ((pos_and_not_pred / (pos_and_pred + pos_and_not_pred)) + \
(neg_and_pred / (neg_and_pred + neg_and_not_pred))) * 0.5
if BER > 0.5:
BER = 1 - BER
DI = 1 - ((1 - 2 * BER) / (beta + 1 - 2 * BER))
if i == 0: # consider changing this to a 'code word' instead of 'all'
DIs["all"] = DI
else:
DIs[cols[i-1]] = DI
jvm.stop()
return DIs
示例11: print
# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import inputformat [as 别名]
from weka.core.converters import Loader, Saver
from weka.core.dataset import Instances
from weka.filters import Filter
jvm.start()
# load weather.nominal
fname = data_dir + os.sep + "weather.nominal.arff"
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)
# output header
print(Instances.template_instances(data))
# remove attribute no 3
print("\nRemove attribute no 3")
fltr = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "3"])
fltr.inputformat(data)
filtered = fltr.filter(data)
# output header
print(Instances.template_instances(filtered))
# save modified dataset
saver = Saver(classname="weka.core.converters.ArffSaver")
saver.save_file(filtered, data_dir + os.sep + "weather.nominal-filtered.arff")
jvm.stop()
示例12: Loader
# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import inputformat [as 别名]
from weka.core.classes import Random
from weka.classifiers import Classifier, Evaluation, PredictionOutput
from weka.filters import Filter
jvm.start()
# load diabetes
loader = Loader(classname="weka.core.converters.ArffLoader")
fname = data_dir + os.sep + "diabetes.arff"
print("\nLoading dataset: " + fname + "\n")
data = loader.load_file(fname)
# we'll set the class attribute after filtering
# apply NominalToBinary filter and set class attribute
fltr = Filter("weka.filters.unsupervised.attribute.NominalToBinary")
fltr.inputformat(data)
filtered = fltr.filter(data)
filtered.class_is_last()
# cross-validate LinearRegression on filtered data, display model
cls = Classifier(classname="weka.classifiers.functions.LinearRegression")
pout = PredictionOutput(classname="weka.classifiers.evaluation.output.prediction.PlainText")
evl = Evaluation(filtered)
evl.crossvalidate_model(cls, filtered, 10, Random(1), pout)
print("10-fold cross-validation:\n" + evl.summary())
print("Predictions:\n\n" + str(pout))
cls.build_classifier(filtered)
print("Model:\n\n" + str(cls))
# use AddClassification filter with LinearRegression on filtered data
print("Applying AddClassification to filtered data:\n")
示例13: main
# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import inputformat [as 别名]
def main():
"""
Just runs some example code.
"""
# load a dataset
data_file = helper.get_data_dir() + os.sep + "vote.arff"
helper.print_info("Loading dataset: " + data_file)
loader = Loader("weka.core.converters.ArffLoader")
data = loader.load_file(data_file)
data.class_is_last()
# classifier
classifier = Classifier(classname="weka.classifiers.trees.J48")
# randomize data
folds = 10
seed = 1
rnd = Random(seed)
rand_data = Instances.copy_instances(data)
rand_data.randomize(rnd)
if rand_data.class_attribute.is_nominal:
rand_data.stratify(folds)
# perform cross-validation and add predictions
predicted_data = None
evaluation = Evaluation(rand_data)
for i in xrange(folds):
train = rand_data.train_cv(folds, i)
# the above code is used by the StratifiedRemoveFolds filter,
# the following code is used by the Explorer/Experimenter
# train = rand_data.train_cv(folds, i, rnd)
test = rand_data.test_cv(folds, i)
# build and evaluate classifier
cls = Classifier.make_copy(classifier)
cls.build_classifier(train)
evaluation.test_model(cls, test)
# add predictions
addcls = Filter(
classname="weka.filters.supervised.attribute.AddClassification",
options=["-classification", "-distribution", "-error"])
# setting the java object directory avoids issues with correct quoting in option array
addcls.set_property("classifier", Classifier.make_copy(classifier))
addcls.inputformat(train)
addcls.filter(train) # trains the classifier
pred = addcls.filter(test)
if predicted_data is None:
predicted_data = Instances.template_instances(pred, 0)
for n in xrange(pred.num_instances):
predicted_data.add_instance(pred.get_instance(n))
print("")
print("=== Setup ===")
print("Classifier: " + classifier.to_commandline())
print("Dataset: " + data.relationname)
print("Folds: " + str(folds))
print("Seed: " + str(seed))
print("")
print(evaluation.summary("=== " + str(folds) + " -fold Cross-Validation ==="))
print("")
print(predicted_data)
示例14: Loader
# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import inputformat [as 别名]
# load diabetes
loader = Loader(classname="weka.core.converters.ArffLoader")
fname = data_dir + os.sep + "diabetes.arff"
print("\nLoading dataset: " + fname + "\n")
data = loader.load_file(fname)
data.class_is_last()
# simulate the 10 train/test pairs of cross-validation
evl = Evaluation(data)
for i in xrange(1, 11):
# create train set
remove = Filter(
classname="weka.filters.supervised.instance.StratifiedRemoveFolds",
options=["-N", "10", "-F", str(i), "-S", "1", "-V"])
remove.inputformat(data)
train = remove.filter(data)
# create test set
remove = Filter(
classname="weka.filters.supervised.instance.StratifiedRemoveFolds",
options=["-N", "10", "-F", str(i), "-S", "1"])
remove.inputformat(data)
test = remove.filter(data)
cls = Classifier(classname="weka.classifiers.trees.J48")
cls.build_classifier(train)
evl.test_model(cls, test)
print("Simulated CV accuracy: %0.1f%%" % evl.percent_correct)
示例15: remove_attributes
# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import inputformat [as 别名]
def remove_attributes(self, *attributes):
indices = [self.attribute_index(x) for x in attributes]
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove",
options=["-R", ','.join(str(x + 1) for x in indices)])
remove.inputformat(self.instances)
self.instances = remove.filter(self.instances)