本文整理匯總了Python中weka.filters.Filter.filter方法的典型用法代碼示例。如果您正苦於以下問題:Python Filter.filter方法的具體用法?Python Filter.filter怎麽用?Python Filter.filter使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類weka.filters.Filter
的用法示例。
在下文中一共展示了Filter.filter方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: emlimitateUnusedFeature
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import filter [as 別名]
def emlimitateUnusedFeature(self, trainData, testData = None):
trainData.set_class_index(trainData.num_attributes() - 1) # set class attribute
featureIndex = -1
filteredTrainData = trainData
filteredTestData = testData
attribute_index = 0
while attribute_index < filteredTrainData.num_attributes() - 1:
sampleCoverage = 0
#print attribute_index
# check value for current feature in each instance
for instance_index in range(0, filteredTrainData.num_instances()):
instance = filteredTrainData.get_instance(instance_index)
value = instance.get_value(attribute_index)
if value > 0:
sampleCoverage += 1
if sampleCoverage == 0:
#print "found"
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", str(attribute_index+1)]) #The index in this function start from 1
remove.set_inputformat(filteredTrainData)
filteredTrainData = remove.filter(filteredTrainData)
if filteredTestData:
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", str(attribute_index+1)]) #The index in this function start from 1
remove.set_inputformat(filteredTestData)
filteredTestData = remove.filter(filteredTestData)
else:
attribute_index += 1
return [filteredTrainData, filteredTestData]
示例2: select_missclassified
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import filter [as 別名]
def select_missclassified(self):
remove = Filter(classname="weka.filters.supervised.attribute.AddClassification", options=['-classification' ,'-error' ,'-W' ,self.base_classifier.to_commandline()])
remove.inputformat(self.data)
self.data = remove.filter(self.data)
remove = Filter(classname="weka.filters.unsupervised.instance.RemoveWithValues", options=['-S','0.0','-C','last','-L','last','-V'])
remove.inputformat(self.data)
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=['-R',str(self.data.num_attributes-2)+',last'])
remove.inputformat(self.data)
self.data = remove.filter(self.data)
示例3: _pre_process_to_classification
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import filter [as 別名]
def _pre_process_to_classification(self, dataset):
filter_data = Filter(classname = 'weka.filters.unsupervised.attribute.MathExpression',
options = ['-unset-class-temporarily', '-E', "ifelse ( A>0, 1, 0 )",
'-V', '-R', 'last'])
filter_data.set_inputformat(dataset)
filtered = filter_data.filter(dataset)
discretize_data = Filter(classname = 'weka.filters.unsupervised.attribute.NumericToNominal',
options = ['-R', 'last'])
discretize_data.set_inputformat(filtered)
discretized = discretize_data.filter(filtered)
return discretized
示例4: build_and_classify
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import filter [as 別名]
def build_and_classify(classifier, classifier_name, approach_name, infile, percentage='10'):
"""
Creates model and classifies against input data. Returns accuracy statistics
"""
# set seed so results are consistent
random.seed('iot')
# load data
loader = Loader(classname='weka.core.converters.CSVLoader')
data = loader.load_file(infile)
data.class_is_last()
# convert all numeric attributes to nominal
to_nominal = Filter(classname='weka.filters.unsupervised.attribute.NumericToNominal',
options=['-R', 'first-last'])
to_nominal.inputformat(data)
data = to_nominal.filter(data)
# randomize data with constant seed
randomize = Filter(classname='weka.filters.unsupervised.instance.Randomize',
options=['-S', '42'])
randomize.inputformat(data)
data = randomize.filter(data)
# create training set and testing set
train_percent_filter = Filter(classname='weka.filters.unsupervised.instance.RemovePercentage',
options=['-P', percentage, '-V'])
train_percent_filter.inputformat(data)
train = train_percent_filter.filter(data)
test = data
# build and test classifier
classifier.build_classifier(train)
evaluation = Evaluation(train)
evaluation.test_model(classifier, test)
# return results as array
results = [
approach_name,
classifier_name,
percentage,
evaluation.percent_correct,
evaluation.weighted_f_measure
]
return results
示例5: filterUnusedFeatureFromList
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import filter [as 別名]
def filterUnusedFeatureFromList(self, data, unusedFuncitonList):
filteredData = data
for attribute in unusedFuncitonList:
remove = Filter(classname="weka.filters.unsupervised.attribute.RemoveByName", options=["-E", "^" + attribute + ".*$"])
remove.set_inputformat(filteredData)
filteredData = remove.filter(filteredData)
return filteredData
示例6: attributeSelector
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import filter [as 別名]
def attributeSelector(self, data, selectNum):
attributeSelector = Filter(classname="weka.filters.supervised.attribute.AttributeSelection",\
options=["-S", "weka.attributeSelection.Ranker -T -1.7976931348623157E308 -N " + str(selectNum),\
"-E", "weka.attributeSelection.InfoGainAttributeEval"])
attributeSelector.set_inputformat(data)
data = attributeSelector.filter(data)
return data
示例7: createTwoDatasets
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import filter [as 別名]
def createTwoDatasets(self, wholeDataPath, trainingDataPercentage, trainingPath, testingPath, shuffleSeed = 43):
wholeData = self.load_Arff(wholeDataPath)
randomize = Filter(classname="weka.filters.unsupervised.instance.Randomize", options=["-S", str(shuffleSeed)])
randomize.set_inputformat(wholeData)
wholeData = randomize.filter(wholeData)
removePercentage = Filter(classname="weka.filters.unsupervised.instance.RemovePercentage", options=["-P", str(trainingDataPercentage), "-V"])
removePercentage.set_inputformat(wholeData)
trainingData = removePercentage.filter(wholeData)
print "instances:" + str(trainingData.num_instances())
removePercentage = Filter(classname="weka.filters.unsupervised.instance.RemovePercentage", options=["-P", str(trainingDataPercentage)])
removePercentage.set_inputformat(wholeData)
testingData = removePercentage.filter(wholeData)
print "instances:" + str(testingData.num_instances())
self.save_Arff(trainingData, trainingPath)
self.save_Arff(testingData, testingPath)
示例8: getSetDataBySetIndex
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import filter [as 別名]
def getSetDataBySetIndex(self, data, index):
# cut feature set out
featureTable = FeatureTable()
startIndexList = featureTable.getEachSetStartIndex()
start = startIndexList[index]
end = startIndexList[index+1] - 1
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-V", "-R", str(start) + "-" + str(end) + ",last"])
remove.set_inputformat(data)
filteredData = remove.filter(data)
return filteredData
示例9: remove_correct_classified
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import filter [as 別名]
def remove_correct_classified(self, invert = False):
options=[
'-W', self.classifier.to_commandline(),
'-C', str(self.class_index), #classindex
# '-F','0', # folds
# '-T','0.1', #threshold by numeric classes
'-I','0', # max iterations
'-V' if not invert else ''
] # invert
classname = "weka.filters.unsupervised.instance.RemoveMisclassified"
remove = Filter(classname=classname, options=options)
remove.inputformat(self.data)
self.data = remove.filter(self.data)
示例10: load
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import filter [as 別名]
def load(path, db):
nominals = [
49, # dev_global_mem_cache_type
52, # dev_host_unified_memory
54, # dev_local_mem_type
56, # dev_type
57, # dev_vendor
]
nominal_indices = ",".join([str(index) for index in nominals])
force_nominal = ["-N", nominal_indices]
# Load data from CSV.
dataset = Dataset.load_csv(path, options=force_nominal)
dataset.__class__ = Dataset
# Set class index and database connection.
dataset.class_index = -1
dataset.db = db
# Create string->nominal type attribute filter, ignoring the first
# attribute (scenario ID), since we're not classifying with it.
string_to_nominal = WekaFilter(classname=("weka.filters.unsupervised."
"attribute.StringToNominal"),
options=["-R", "2-last"])
string_to_nominal.inputformat(dataset.instances)
# Create filtered dataset, and swap data around.
filtered = string_to_nominal.filter(dataset.instances)
# Create nominal->binary type attribute filter, ignoring the
# first attribute (scenario ID), since we're not classifying with it.
n2b = WekaFilter(classname="weka.filters.unsupervised.attribute.NominalToBinary",
options=["-R", "2-last"])
n2b.inputformat(filtered)
dataset.instances = n2b.filter(filtered)
return dataset
示例11: use_filter
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import filter [as 別名]
def use_filter(data):
"""
Uses the AttributeSelection filter for attribute selection.
:param data: the dataset to use
:type data: Instances
"""
print("\n2. Filter")
flter = Filter(classname="weka.filters.supervised.attribute.AttributeSelection")
aseval = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval")
assearch = ASSearch(classname="weka.attributeSelection.GreedyStepwise", options=["-B"])
flter.set_property("evaluator", aseval.jobject)
flter.set_property("search", assearch.jobject)
flter.inputformat(data)
filtered = flter.filter(data)
print(str(filtered))
示例12: filterOutUnnecessaryAPIAndEvaluateOurApproach
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import filter [as 別名]
def filterOutUnnecessaryAPIAndEvaluateOurApproach(self, ourApproahFile, apiFile, indexInTable, methodName, databaseTable, csvFilePath):
outputStr = methodName+","
resultList = []
# Get whole feature set of our approach
filteredData = self.load_Arff(ourApproahFile)
# Use this function to get selected API feature and save the unselected api in a list
filterOutList = self.attribueSelectionBasedOnRankingInDatabase(apiFile, indexInTable, databaseTable, "")[1]
# Remove unselected API
for functionName in filterOutList:
functionName = functionName.split("(")[0] + "\(\)"
functionName = functionName.replace('$','\$')
remove = Filter(classname="weka.filters.unsupervised.attribute.RemoveByName", options=["-E", "^" + functionName + ".*$"])
remove.set_inputformat(filteredData)
filteredData = remove.filter(filteredData)
featureNum = filteredData.num_attributes() - 1
print "featureNum: " + str(featureNum)
if csvFilePath != "":
self.writeTenScaledTitleManual(featureNum, csvFilePath)
#print "i:" + str(i)
#print "functionName:" + functionName
#print "featureNum: " + str(filteredData.num_attributes() - 1)
for attributeStr in filteredData.attributes():
print(attributeStr)
# Run ten scaled generation and evaluation
step = 10
while step < featureNum:
roundData = self.attributeSelector(filteredData, step)
classifier = self.algorithmPicker(roundData, indexInTable)
evaluation = self.evaluation(classifier, roundData)
#print(self.algorithmTable[indexInTable] + ": " + "{:.2f}".format(evaluation.percent_correct()) + ", Feature select number:" + str(roundData.num_attributes() - 1) + "/" + str(featureNum))
resultList.append("{:.2f}".format(evaluation.percent_correct()))
#csvFile.write("{:.2f}".format(evaluation.percent_correct()) +",")
step += 10
classifier = self.algorithmPicker(filteredData, indexInTable)
evaluation = self.evaluation(classifier, filteredData)
#print(self.algorithmTable[indexInTable] + ": " + "{:.2f}".format(evaluation.percent_correct()) + ", Feature select number:" + str(filteredData.num_attributes() - 1) + "/" + str(featureNum))
resultList.append("{:.2f}".format(evaluation.percent_correct()))
# Write out to CSV file
for item in resultList:
outputStr += item +","
outputStr = outputStr[0:-1] + "\n"
self.writeToPath(csvFilePath, outputStr)
示例13: main
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import filter [as 別名]
def main():
"""
Just runs some example code.
"""
# load a dataset
iris = helper.get_data_dir() + os.sep + "iris.arff"
helper.print_info("Loading dataset: " + iris)
loader = Loader("weka.core.converters.ArffLoader")
data = loader.load_file(iris)
# remove class attribute
helper.print_info("Removing class attribute")
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
remove.inputformat(data)
filtered = remove.filter(data)
# use MultiFilter
helper.print_info("Use MultiFilter")
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "first"])
std = Filter(classname="weka.filters.unsupervised.attribute.Standardize")
multi = MultiFilter()
multi.filters = [remove, std]
multi.inputformat(data)
filtered_multi = multi.filter(data)
# output datasets
helper.print_title("Input")
print(data)
helper.print_title("Output")
print(filtered)
helper.print_title("Output (MultiFilter)")
print(filtered_multi)
# load text dataset
text = helper.get_data_dir() + os.sep + "reutersTop10Randomized_1perc_shortened.arff"
helper.print_info("Loading dataset: " + text)
loader = Loader("weka.core.converters.ArffLoader")
data = loader.load_file(text)
data.class_is_last()
# apply StringToWordVector
stemmer = Stemmer(classname="weka.core.stemmers.IteratedLovinsStemmer")
stopwords = Stopwords(classname="weka.core.stopwords.Rainbow")
tokenizer = Tokenizer(classname="weka.core.tokenizers.WordTokenizer")
s2wv = StringToWordVector(options=["-W", "10", "-L", "-C"])
s2wv.stemmer = stemmer
s2wv.stopwords = stopwords
s2wv.tokenizer = tokenizer
s2wv.inputformat(data)
filtered = s2wv.filter(data)
helper.print_title("Input (StringToWordVector)")
print(data)
helper.print_title("Output (StringToWordVector)")
print(filtered)
# partial classname
helper.print_title("Creating filter from partial classname")
clsname = ".Standardize"
f = Filter(classname=clsname)
print(clsname + " --> " + f.classname)
# source code
helper.print_info("Generate source code")
bolts = helper.get_data_dir() + os.sep + "labor.arff"
helper.print_info("Loading dataset: " + bolts)
loader = Loader("weka.core.converters.ArffLoader")
data = loader.load_file(bolts)
replace = Filter(classname="weka.filters.unsupervised.attribute.ReplaceMissingValues")
replace.inputformat(data)
replace.filter(data)
print(replace.to_source("MyReplaceMissingValues", data))
示例14: merge_nominal_attributes
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import filter [as 別名]
def merge_nominal_attributes(self, significance=0.01):
remove = Filter(classname="weka.filters.supervised.attribute.MergeNominalValues", options=['-L',str(significance),'-R','first-last'])
remove.inputformat(self.data)
self.data = remove.filter(self.data)
示例15: print
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import filter [as 別名]
from weka.clusterers import Clusterer, ClusterEvaluation
from weka.filters import Filter
import weka.plot.clusterers as plc
jvm.start()
# load iris
fname = data_dir + os.sep + "iris.arff"
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)
# remove class attribute
flt = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
flt.set_inputformat(data)
filtered = flt.filter(data)
# build KMeans
print("\n--> SimpleKMeans\n")
cl = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
cl.build_clusterer(filtered)
evl = ClusterEvaluation()
evl.set_model(cl)
evl.test_model(filtered)
print(evl.get_cluster_results())
plc.plot_cluster_assignments(evl, data, atts=[], inst_no=True, wait=True)
# use AddCluster filter
print("\n--> AddCluster filter\n")
flt = Filter(classname="weka.filters.unsupervised.attribute.AddCluster",
options=["-W", "weka.clusterers.SimpleKMeans -N 3"])