本文整理匯總了Python中weka.filters.Filter類的典型用法代碼示例。如果您正苦於以下問題:Python Filter類的具體用法?Python Filter怎麽用?Python Filter使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了Filter類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: filterUnusedFeatureFromList
def filterUnusedFeatureFromList(self, data, unusedFuncitonList):
filteredData = data
for attribute in unusedFuncitonList:
remove = Filter(classname="weka.filters.unsupervised.attribute.RemoveByName", options=["-E", "^" + attribute + ".*$"])
remove.set_inputformat(filteredData)
filteredData = remove.filter(filteredData)
return filteredData
示例2: attributeSelector
def attributeSelector(self, data, selectNum):
attributeSelector = Filter(classname="weka.filters.supervised.attribute.AttributeSelection",\
options=["-S", "weka.attributeSelection.Ranker -T -1.7976931348623157E308 -N " + str(selectNum),\
"-E", "weka.attributeSelection.InfoGainAttributeEval"])
attributeSelector.set_inputformat(data)
data = attributeSelector.filter(data)
return data
示例3: getSetDataBySetIndex
def getSetDataBySetIndex(self, data, index):
# cut feature set out
featureTable = FeatureTable()
startIndexList = featureTable.getEachSetStartIndex()
start = startIndexList[index]
end = startIndexList[index+1] - 1
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-V", "-R", str(start) + "-" + str(end) + ",last"])
remove.set_inputformat(data)
filteredData = remove.filter(data)
return filteredData
示例4: remove_correct_classified
def remove_correct_classified(self, invert = False):
options=[
'-W', self.classifier.to_commandline(),
'-C', str(self.class_index), #classindex
# '-F','0', # folds
# '-T','0.1', #threshold by numeric classes
'-I','0', # max iterations
'-V' if not invert else ''
] # invert
classname = "weka.filters.unsupervised.instance.RemoveMisclassified"
remove = Filter(classname=classname, options=options)
remove.inputformat(self.data)
self.data = remove.filter(self.data)
示例5: emlimitateUnusedFeature
def emlimitateUnusedFeature(self, trainData, testData = None):
trainData.set_class_index(trainData.num_attributes() - 1) # set class attribute
featureIndex = -1
filteredTrainData = trainData
filteredTestData = testData
attribute_index = 0
while attribute_index < filteredTrainData.num_attributes() - 1:
sampleCoverage = 0
#print attribute_index
# check value for current feature in each instance
for instance_index in range(0, filteredTrainData.num_instances()):
instance = filteredTrainData.get_instance(instance_index)
value = instance.get_value(attribute_index)
if value > 0:
sampleCoverage += 1
if sampleCoverage == 0:
#print "found"
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", str(attribute_index+1)]) #The index in this function start from 1
remove.set_inputformat(filteredTrainData)
filteredTrainData = remove.filter(filteredTrainData)
if filteredTestData:
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", str(attribute_index+1)]) #The index in this function start from 1
remove.set_inputformat(filteredTestData)
filteredTestData = remove.filter(filteredTestData)
else:
attribute_index += 1
return [filteredTrainData, filteredTestData]
示例6: main
def main():
"""
Just runs some example code.
"""
# load a dataset
iris_file = helper.get_data_dir() + os.sep + "iris.arff"
helper.print_info("Loading dataset: " + iris_file)
loader = Loader("weka.core.converters.ArffLoader")
data = loader.load_file(iris_file)
# remove class attribute
data.delete_last_attribute()
# build a clusterer and output model
helper.print_title("Training SimpleKMeans clusterer")
clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
clusterer.build_clusterer(data)
print(clusterer)
helper.print_info("Evaluating on data")
evaluation = ClusterEvaluation()
evaluation.set_model(clusterer)
evaluation.test_model(data)
print("# clusters: " + str(evaluation.num_clusters))
print("log likelihood: " + str(evaluation.log_likelihood))
print("cluster assignments:\n" + str(evaluation.cluster_assignments))
plc.plot_cluster_assignments(evaluation, data, inst_no=True)
# using a filtered clusterer
helper.print_title("Filtered clusterer")
loader = Loader("weka.core.converters.ArffLoader")
data = loader.load_file(iris_file)
clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
fclusterer = FilteredClusterer()
fclusterer.clusterer = clusterer
fclusterer.filter = remove
fclusterer.build_clusterer(data)
print(fclusterer)
# load a dataset incrementally and build clusterer incrementally
helper.print_title("Incremental clusterer")
loader = Loader("weka.core.converters.ArffLoader")
iris_inc = loader.load_file(iris_file, incremental=True)
clusterer = Clusterer("weka.clusterers.Cobweb")
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
remove.inputformat(iris_inc)
iris_filtered = remove.outputformat()
clusterer.build_clusterer(iris_filtered)
for inst in loader:
remove.input(inst)
inst_filtered = remove.output()
clusterer.update_clusterer(inst_filtered)
clusterer.update_finished()
print(clusterer.to_commandline())
print(clusterer)
print(clusterer.graph)
plg.plot_dot_graph(clusterer.graph)
示例7: filterOutUnnecessaryAPIAndEvaluateOurApproach
def filterOutUnnecessaryAPIAndEvaluateOurApproach(self, ourApproahFile, apiFile, indexInTable, methodName, databaseTable, csvFilePath):
outputStr = methodName+","
resultList = []
# Get whole feature set of our approach
filteredData = self.load_Arff(ourApproahFile)
# Use this function to get selected API feature and save the unselected api in a list
filterOutList = self.attribueSelectionBasedOnRankingInDatabase(apiFile, indexInTable, databaseTable, "")[1]
# Remove unselected API
for functionName in filterOutList:
functionName = functionName.split("(")[0] + "\(\)"
functionName = functionName.replace('$','\$')
remove = Filter(classname="weka.filters.unsupervised.attribute.RemoveByName", options=["-E", "^" + functionName + ".*$"])
remove.set_inputformat(filteredData)
filteredData = remove.filter(filteredData)
featureNum = filteredData.num_attributes() - 1
print "featureNum: " + str(featureNum)
if csvFilePath != "":
self.writeTenScaledTitleManual(featureNum, csvFilePath)
#print "i:" + str(i)
#print "functionName:" + functionName
#print "featureNum: " + str(filteredData.num_attributes() - 1)
for attributeStr in filteredData.attributes():
print(attributeStr)
# Run ten scaled generation and evaluation
step = 10
while step < featureNum:
roundData = self.attributeSelector(filteredData, step)
classifier = self.algorithmPicker(roundData, indexInTable)
evaluation = self.evaluation(classifier, roundData)
#print(self.algorithmTable[indexInTable] + ": " + "{:.2f}".format(evaluation.percent_correct()) + ", Feature select number:" + str(roundData.num_attributes() - 1) + "/" + str(featureNum))
resultList.append("{:.2f}".format(evaluation.percent_correct()))
#csvFile.write("{:.2f}".format(evaluation.percent_correct()) +",")
step += 10
classifier = self.algorithmPicker(filteredData, indexInTable)
evaluation = self.evaluation(classifier, filteredData)
#print(self.algorithmTable[indexInTable] + ": " + "{:.2f}".format(evaluation.percent_correct()) + ", Feature select number:" + str(filteredData.num_attributes() - 1) + "/" + str(featureNum))
resultList.append("{:.2f}".format(evaluation.percent_correct()))
# Write out to CSV file
for item in resultList:
outputStr += item +","
outputStr = outputStr[0:-1] + "\n"
self.writeToPath(csvFilePath, outputStr)
示例8: use_filter
def use_filter(data):
"""
Uses the AttributeSelection filter for attribute selection.
:param data: the dataset to use
:type data: Instances
"""
print("\n2. Filter")
flter = Filter(classname="weka.filters.supervised.attribute.AttributeSelection")
aseval = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval")
assearch = ASSearch(classname="weka.attributeSelection.GreedyStepwise", options=["-B"])
flter.set_property("evaluator", aseval.jobject)
flter.set_property("search", assearch.jobject)
flter.inputformat(data)
filtered = flter.filter(data)
print(str(filtered))
示例9: _pre_process_to_classification
def _pre_process_to_classification(self, dataset):
filter_data = Filter(classname = 'weka.filters.unsupervised.attribute.MathExpression',
options = ['-unset-class-temporarily', '-E', "ifelse ( A>0, 1, 0 )",
'-V', '-R', 'last'])
filter_data.set_inputformat(dataset)
filtered = filter_data.filter(dataset)
discretize_data = Filter(classname = 'weka.filters.unsupervised.attribute.NumericToNominal',
options = ['-R', 'last'])
discretize_data.set_inputformat(filtered)
discretized = discretize_data.filter(filtered)
return discretized
示例10: load
def load(path, db):
nominals = [
49, # dev_double_fp_config
50, # dev_endian_little
51, # dev_execution_capabilities
52, # dev_extensions
54, # dev_global_mem_cache_type
57, # dev_host_unified_memory
63, # dev_image_support
65, # dev_local_mem_type
96, # dev_queue_properties
97, # dev_single_fp_config
98, # dev_type
100, # dev_vendor_id
]
nominal_indices = ",".join([str(index) for index in nominals])
force_nominal = ["-N", nominal_indices]
# Load data from CSV.
dataset = Dataset.load_csv(path, options=force_nominal)
dataset.__class__ = Dataset
# Set class index and database connection.
dataset.class_index = -1
dataset.db = db
# Create string->nominal type attribute filter, ignoring the first
# attribute (scenario ID), since we're not classifying with it.
string_to_nominal = WekaFilter(classname=("weka.filters.unsupervised."
"attribute.StringToNominal"),
options=["-R", "2-last"])
string_to_nominal.inputformat(dataset.instances)
# Create filtered dataset, and swap data around.
filtered = string_to_nominal.filter(dataset.instances)
dataset.instances = filtered
return dataset
示例11: Loader
from weka.core.converters import Loader
from weka.core.classes import Random
from weka.classifiers import Classifier, Evaluation, PredictionOutput
from weka.filters import Filter
jvm.start()
# load diabetes
loader = Loader(classname="weka.core.converters.ArffLoader")
fname = data_dir + os.sep + "diabetes.arff"
print("\nLoading dataset: " + fname + "\n")
data = loader.load_file(fname)
# we'll set the class attribute after filtering
# apply NominalToBinary filter and set class attribute
fltr = Filter("weka.filters.unsupervised.attribute.NominalToBinary")
fltr.inputformat(data)
filtered = fltr.filter(data)
filtered.class_is_last()
# cross-validate LinearRegression on filtered data, display model
cls = Classifier(classname="weka.classifiers.functions.LinearRegression")
pout = PredictionOutput(classname="weka.classifiers.evaluation.output.prediction.PlainText")
evl = Evaluation(filtered)
evl.crossvalidate_model(cls, filtered, 10, Random(1), pout)
print("10-fold cross-validation:\n" + evl.summary())
print("Predictions:\n\n" + str(pout))
cls.build_classifier(filtered)
print("Model:\n\n" + str(cls))
# use AddClassification filter with LinearRegression on filtered data
示例12: remove_attributes
def remove_attributes(self, *attributes):
indices = [self.attribute_index(x) for x in attributes]
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove",
options=["-R", ','.join(str(x + 1) for x in indices)])
remove.inputformat(self.instances)
self.instances = remove.filter(self.instances)
示例13: Loader
if (len_email > 0) and (len_content > 0):
writer.writerow(row)
# close csvfile
csvfile.close()
# start JVM
jvm.start()
# load CSV file
loader = Loader(classname="weka.core.converters.CSVLoader", options=["-E", '"', "-F", ","])
data = loader.load_file(csvfilename)
#print(data)
# convert class to nominal
wfilter = Filter(classname="weka.filters.unsupervised.attribute.StringToNominal", options=["-R", "last"])
wfilter.set_inputformat(data)
data = wfilter.filter(data)
# convert content to string
wfilter = Filter(classname="weka.filters.unsupervised.attribute.NominalToString", options=["-C", "first"])
wfilter.set_inputformat(data)
data = wfilter.filter(data)
# set class attribute
data.set_class_index(data.num_attributes() - 1)
# generate baseline
zeror = Classifier(classname="weka.classifiers.rules.ZeroR")
evaluation = Evaluation(data)
evaluation.crossvalidate_model(zeror, data, 10, Random(1))
示例14: main
def main():
"""
Just runs some example code.
"""
# load a dataset
iris_file = helper.get_data_dir() + os.sep + "iris.arff"
helper.print_info("Loading dataset: " + iris_file)
loader = Loader("weka.core.converters.ArffLoader")
iris_data = loader.load_file(iris_file)
iris_data.class_is_last()
# classifier help
helper.print_title("Creating help string")
classifier = Classifier(classname="weka.classifiers.trees.J48")
print(classifier.to_help())
# partial classname
helper.print_title("Creating classifier from partial classname")
clsname = ".J48"
classifier = Classifier(classname=clsname)
print(clsname + " --> " + classifier.classname)
# classifier from commandline
helper.print_title("Creating SMO from command-line string")
cmdline = 'weka.classifiers.functions.SMO -K "weka.classifiers.functions.supportVector.NormalizedPolyKernel -E 3.0"'
classifier = from_commandline(cmdline, classname="weka.classifiers.Classifier")
classifier.build_classifier(iris_data)
print("input: " + cmdline)
print("output: " + classifier.to_commandline())
print("model:\n" + str(classifier))
# kernel classifier
helper.print_title("Creating SMO as KernelClassifier")
kernel = Kernel(classname="weka.classifiers.functions.supportVector.RBFKernel", options=["-G", "0.001"])
classifier = KernelClassifier(classname="weka.classifiers.functions.SMO", options=["-M"])
classifier.kernel = kernel
classifier.build_classifier(iris_data)
print("classifier: " + classifier.to_commandline())
print("model:\n" + str(classifier))
# build a classifier and output model
helper.print_title("Training J48 classifier on iris")
classifier = Classifier(classname="weka.classifiers.trees.J48")
# Instead of using 'options=["-C", "0.3"]' in the constructor, we can also set the "confidenceFactor"
# property of the J48 classifier itself. However, being of type float rather than double, we need
# to convert it to the correct type first using the double_to_float function:
classifier.set_property("confidenceFactor", typeconv.double_to_float(0.3))
classifier.build_classifier(iris_data)
print(classifier)
print(classifier.graph)
print(classifier.to_source("MyJ48"))
plot_graph.plot_dot_graph(classifier.graph)
# evaluate model on test set
helper.print_title("Evaluating J48 classifier on iris")
evaluation = Evaluation(iris_data)
evl = evaluation.test_model(classifier, iris_data)
print(evl)
print(evaluation.summary())
# evaluate model on train/test split
helper.print_title("Evaluating J48 classifier on iris (random split 66%)")
classifier = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"])
evaluation = Evaluation(iris_data)
evaluation.evaluate_train_test_split(classifier, iris_data, 66.0, Random(1))
print(evaluation.summary())
# load a dataset incrementally and build classifier incrementally
helper.print_title("Build classifier incrementally on iris")
helper.print_info("Loading dataset: " + iris_file)
loader = Loader("weka.core.converters.ArffLoader")
iris_inc = loader.load_file(iris_file, incremental=True)
iris_inc.class_is_last()
classifier = Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable")
classifier.build_classifier(iris_inc)
for inst in loader:
classifier.update_classifier(inst)
print(classifier)
# construct meta-classifiers
helper.print_title("Meta classifiers")
# generic FilteredClassifier instantiation
print("generic FilteredClassifier instantiation")
meta = SingleClassifierEnhancer(classname="weka.classifiers.meta.FilteredClassifier")
meta.classifier = Classifier(classname="weka.classifiers.functions.LinearRegression")
flter = Filter("weka.filters.unsupervised.attribute.Remove")
flter.options = ["-R", "first"]
meta.set_property("filter", flter.jobject)
print(meta.to_commandline())
# direct FilteredClassifier instantiation
print("direct FilteredClassifier instantiation")
meta = FilteredClassifier()
meta.classifier = Classifier(classname="weka.classifiers.functions.LinearRegression")
flter = Filter("weka.filters.unsupervised.attribute.Remove")
flter.options = ["-R", "first"]
meta.filter = flter
print(meta.to_commandline())
# generic Vote
print("generic Vote instantiation")
#.........這裏部分代碼省略.........
示例15: run_classifier
def run_classifier(path, prot, sel, cols, prot_vals, beta):
DIs = dict()
jvm.start()
for i in range(len(cols)-1):
loader = Loader(classname="weka.core.converters.CSVLoader")
data = loader.load_file(path)
# remove selected attribute from the data
# NOTE: options are ONE indexed, not ZERO indexed
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", \
options=["-R", str(sel[2]+1)])
remove.inputformat(data)
data = remove.filter(data)
# if running for only one attribue, remove all others (except protected)
if i > 0:
for j in range(1, prot[2]+1):
if i != j:
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", \
options=["-R", ("1" if i>j else "2")])
remove.inputformat(data)
data = remove.filter(data)
# set prot attribute as Class attribute
data.class_is_last()
# run classifier
cls = Classifier(classname="weka.classifiers.bayes.NaiveBayes")
cls.build_classifier(data)
# count the number of each combination
pos_and_pred = float(0.0)
pos_and_not_pred = float(0.0)
neg_and_pred = float(0.0)
neg_and_not_pred = float(0.0)
for ind, inst in enumerate(data):
if cls.classify_instance(inst):
if prot_vals[ind] == prot[1]:
pos_and_pred += 1
else:
neg_and_pred += 1
else:
if prot_vals[ind] == prot[1]:
pos_and_not_pred += 1
else:
neg_and_not_pred += 1
# calculate DI
BER = ((pos_and_not_pred / (pos_and_pred + pos_and_not_pred)) + \
(neg_and_pred / (neg_and_pred + neg_and_not_pred))) * 0.5
if BER > 0.5:
BER = 1 - BER
DI = 1 - ((1 - 2 * BER) / (beta + 1 - 2 * BER))
if i == 0: # consider changing this to a 'code word' instead of 'all'
DIs["all"] = DI
else:
DIs[cols[i-1]] = DI
jvm.stop()
return DIs