本文整理匯總了Python中weka.filters.Filter.set_inputformat方法的典型用法代碼示例。如果您正苦於以下問題:Python Filter.set_inputformat方法的具體用法?Python Filter.set_inputformat怎麽用?Python Filter.set_inputformat使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類weka.filters.Filter
的用法示例。
在下文中一共展示了Filter.set_inputformat方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: emlimitateUnusedFeature
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import set_inputformat [as 別名]
def emlimitateUnusedFeature(self, trainData, testData = None):
trainData.set_class_index(trainData.num_attributes() - 1) # set class attribute
featureIndex = -1
filteredTrainData = trainData
filteredTestData = testData
attribute_index = 0
while attribute_index < filteredTrainData.num_attributes() - 1:
sampleCoverage = 0
#print attribute_index
# check value for current feature in each instance
for instance_index in range(0, filteredTrainData.num_instances()):
instance = filteredTrainData.get_instance(instance_index)
value = instance.get_value(attribute_index)
if value > 0:
sampleCoverage += 1
if sampleCoverage == 0:
#print "found"
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", str(attribute_index+1)]) #The index in this function start from 1
remove.set_inputformat(filteredTrainData)
filteredTrainData = remove.filter(filteredTrainData)
if filteredTestData:
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", str(attribute_index+1)]) #The index in this function start from 1
remove.set_inputformat(filteredTestData)
filteredTestData = remove.filter(filteredTestData)
else:
attribute_index += 1
return [filteredTrainData, filteredTestData]
示例2: filterUnusedFeatureFromList
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import set_inputformat [as 別名]
def filterUnusedFeatureFromList(self, data, unusedFuncitonList):
filteredData = data
for attribute in unusedFuncitonList:
remove = Filter(classname="weka.filters.unsupervised.attribute.RemoveByName", options=["-E", "^" + attribute + ".*$"])
remove.set_inputformat(filteredData)
filteredData = remove.filter(filteredData)
return filteredData
示例3: attributeSelector
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import set_inputformat [as 別名]
def attributeSelector(self, data, selectNum):
attributeSelector = Filter(classname="weka.filters.supervised.attribute.AttributeSelection",\
options=["-S", "weka.attributeSelection.Ranker -T -1.7976931348623157E308 -N " + str(selectNum),\
"-E", "weka.attributeSelection.InfoGainAttributeEval"])
attributeSelector.set_inputformat(data)
data = attributeSelector.filter(data)
return data
示例4: getSetDataBySetIndex
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import set_inputformat [as 別名]
def getSetDataBySetIndex(self, data, index):
# cut feature set out
featureTable = FeatureTable()
startIndexList = featureTable.getEachSetStartIndex()
start = startIndexList[index]
end = startIndexList[index+1] - 1
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-V", "-R", str(start) + "-" + str(end) + ",last"])
remove.set_inputformat(data)
filteredData = remove.filter(data)
return filteredData
示例5: _pre_process_to_classification
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import set_inputformat [as 別名]
def _pre_process_to_classification(self, dataset):
filter_data = Filter(classname = 'weka.filters.unsupervised.attribute.MathExpression',
options = ['-unset-class-temporarily', '-E', "ifelse ( A>0, 1, 0 )",
'-V', '-R', 'last'])
filter_data.set_inputformat(dataset)
filtered = filter_data.filter(dataset)
discretize_data = Filter(classname = 'weka.filters.unsupervised.attribute.NumericToNominal',
options = ['-R', 'last'])
discretize_data.set_inputformat(filtered)
discretized = discretize_data.filter(filtered)
return discretized
示例6: filterOutUnnecessaryAPIAndEvaluateOurApproach
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import set_inputformat [as 別名]
def filterOutUnnecessaryAPIAndEvaluateOurApproach(self, ourApproahFile, apiFile, indexInTable, methodName, databaseTable, csvFilePath):
outputStr = methodName+","
resultList = []
# Get whole feature set of our approach
filteredData = self.load_Arff(ourApproahFile)
# Use this function to get selected API feature and save the unselected api in a list
filterOutList = self.attribueSelectionBasedOnRankingInDatabase(apiFile, indexInTable, databaseTable, "")[1]
# Remove unselected API
for functionName in filterOutList:
functionName = functionName.split("(")[0] + "\(\)"
functionName = functionName.replace('$','\$')
remove = Filter(classname="weka.filters.unsupervised.attribute.RemoveByName", options=["-E", "^" + functionName + ".*$"])
remove.set_inputformat(filteredData)
filteredData = remove.filter(filteredData)
featureNum = filteredData.num_attributes() - 1
print "featureNum: " + str(featureNum)
if csvFilePath != "":
self.writeTenScaledTitleManual(featureNum, csvFilePath)
#print "i:" + str(i)
#print "functionName:" + functionName
#print "featureNum: " + str(filteredData.num_attributes() - 1)
for attributeStr in filteredData.attributes():
print(attributeStr)
# Run ten scaled generation and evaluation
step = 10
while step < featureNum:
roundData = self.attributeSelector(filteredData, step)
classifier = self.algorithmPicker(roundData, indexInTable)
evaluation = self.evaluation(classifier, roundData)
#print(self.algorithmTable[indexInTable] + ": " + "{:.2f}".format(evaluation.percent_correct()) + ", Feature select number:" + str(roundData.num_attributes() - 1) + "/" + str(featureNum))
resultList.append("{:.2f}".format(evaluation.percent_correct()))
#csvFile.write("{:.2f}".format(evaluation.percent_correct()) +",")
step += 10
classifier = self.algorithmPicker(filteredData, indexInTable)
evaluation = self.evaluation(classifier, filteredData)
#print(self.algorithmTable[indexInTable] + ": " + "{:.2f}".format(evaluation.percent_correct()) + ", Feature select number:" + str(filteredData.num_attributes() - 1) + "/" + str(featureNum))
resultList.append("{:.2f}".format(evaluation.percent_correct()))
# Write out to CSV file
for item in resultList:
outputStr += item +","
outputStr = outputStr[0:-1] + "\n"
self.writeToPath(csvFilePath, outputStr)
示例7: createTwoDatasets
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import set_inputformat [as 別名]
def createTwoDatasets(self, wholeDataPath, trainingDataPercentage, trainingPath, testingPath, shuffleSeed = 43):
wholeData = self.load_Arff(wholeDataPath)
randomize = Filter(classname="weka.filters.unsupervised.instance.Randomize", options=["-S", str(shuffleSeed)])
randomize.set_inputformat(wholeData)
wholeData = randomize.filter(wholeData)
removePercentage = Filter(classname="weka.filters.unsupervised.instance.RemovePercentage", options=["-P", str(trainingDataPercentage), "-V"])
removePercentage.set_inputformat(wholeData)
trainingData = removePercentage.filter(wholeData)
print "instances:" + str(trainingData.num_instances())
removePercentage = Filter(classname="weka.filters.unsupervised.instance.RemovePercentage", options=["-P", str(trainingDataPercentage)])
removePercentage.set_inputformat(wholeData)
testingData = removePercentage.filter(wholeData)
print "instances:" + str(testingData.num_instances())
self.save_Arff(trainingData, trainingPath)
self.save_Arff(testingData, testingPath)
示例8: print
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import set_inputformat [as 別名]
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)
data.set_class_index(data.num_attributes() - 1)
# plot
pld.scatter_plot(
data, data.get_attribute_by_name("petalwidth").get_index(),
data.get_attribute_by_name("petallength").get_index(),
wait=False)
# add classifier errors to dataset
addcls = Filter(
classname="weka.filters.supervised.attribute.AddClassification",
options=["-W", "weka.classifiers.trees.J48", "-classification", "-error"])
addcls.set_inputformat(data)
filtered = addcls.filter(data)
print(filtered)
# build J48
cls = Classifier(classname="weka.classifiers.trees.J48")
cls.build_classifier(data)
evl = Evaluation(data)
evl.test_model(cls, data)
# plot classifier errors
plc.plot_classifier_errors(evl.predictions(), wait=True)
jvm.stop()
示例9: print
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import set_inputformat [as 別名]
from weka.core.converters import Loader, Saver
from weka.core.dataset import Instances
from weka.filters import Filter
jvm.start()
# load weather.nominal
fname = data_dir + os.sep + "weather.nominal.arff"
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)
# output header
print(Instances.template_instances(data))
# remove attribute no 3
print("\nRemove attribute no 3")
fltr = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "3"])
fltr.set_inputformat(data)
filtered = fltr.filter(data)
# output header
print(Instances.template_instances(filtered))
# save modified dataset
saver = Saver(classname="weka.core.converters.ArffSaver")
saver.save_file(filtered, data_dir + os.sep + "weather.nominal-filtered.arff")
jvm.stop()
示例10: print
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import set_inputformat [as 別名]
from weka.core.converters import Loader
from weka.clusterers import Clusterer, ClusterEvaluation
from weka.filters import Filter
import weka.plot.clusterers as plc
jvm.start()
# load iris
fname = data_dir + os.sep + "iris.arff"
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)
# remove class attribute
flt = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
flt.set_inputformat(data)
filtered = flt.filter(data)
# build KMeans
print("\n--> SimpleKMeans\n")
cl = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
cl.build_clusterer(filtered)
evl = ClusterEvaluation()
evl.set_model(cl)
evl.test_model(filtered)
print(evl.get_cluster_results())
plc.plot_cluster_assignments(evl, data, atts=[], inst_no=True, wait=True)
# use AddCluster filter
print("\n--> AddCluster filter\n")
flt = Filter(classname="weka.filters.unsupervised.attribute.AddCluster",
示例11: Loader
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import set_inputformat [as 別名]
writer.writerow(row)
# close csvfile
csvfile.close()
# start JVM
jvm.start()
# load CSV file
loader = Loader(classname="weka.core.converters.CSVLoader", options=["-E", '"', "-F", ","])
data = loader.load_file(csvfilename)
#print(data)
# convert class to nominal
wfilter = Filter(classname="weka.filters.unsupervised.attribute.StringToNominal", options=["-R", "last"])
wfilter.set_inputformat(data)
data = wfilter.filter(data)
# convert content to string
wfilter = Filter(classname="weka.filters.unsupervised.attribute.NominalToString", options=["-C", "first"])
wfilter.set_inputformat(data)
data = wfilter.filter(data)
# set class attribute
data.set_class_index(data.num_attributes() - 1)
# generate baseline
zeror = Classifier(classname="weka.classifiers.rules.ZeroR")
evaluation = Evaluation(data)
evaluation.crossvalidate_model(zeror, data, 10, Random(1))
print("\nBaseline:\n" + evaluation.to_summary())
示例12: getTenScaledResultsRankedByInfo
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import set_inputformat [as 別名]
def getTenScaledResultsRankedByInfo(self, trainingData, indexInTable, csvFilePath, testingData = None):
dbmgr = permissionMappingManager(databasePath)
featureNum = trainingData.num_attributes() - 1
attributeIn = trainingData.attributes()
attributeList = []
for item in attributeIn:
functionName = str(item).split(" ")[1]
functionName = functionName.split("(")[0] + "\(\)"
functionName = functionName.replace('$','\$')
#print functionName
attributeList.append(functionName)
outputStr = ""
outputStr += "InfomationGain" + ","
resultList = []
bestAccuracy = 0
bestTrainData = 0
bestTestData = 0
#for index in range(0, len(attributeList)-1):
# attributeList[index] = attributeList[index].split(" ")[1]
# print attributeList[index]
csvFile = open(csvFilePath, "a")
csvFile.write(self.algorithmTable[indexInTable]+",")
step = 10
while step < featureNum:
# pick top features
filteredTrainData = self.attributeSelector(trainingData, step)
# check top feature informations
APIList = []
for item in filteredTrainData.attributes():
#print str(item)
functionName = str(item).split(" ")[1]
#functionName = functionName.split("_")[0][1:]
APIList.append(functionName)
numberOfInstance = self.getNumOfInstance(trainingData)
# Get those features that it doesn't pick
filteredList = []
attributeIn = filteredTrainData.attributes()
for item in attributeIn:
functionName = str(item).split(" ")[1]
functionName = functionName.split("(")[0] + "\(\)"
functionName = functionName.replace('$','\$')
filteredList.append(functionName)
items = self.getItemsNotInTheList(attributeList, filteredList)
#print len(items)
#for item in items:
# print item
# Re-process training data and make testing Data synchronized
filteredTrainData = trainingData
filterTestingData = testingData
for attribute in items:
remove = Filter(classname="weka.filters.unsupervised.attribute.RemoveByName", options=["-E", "^" + attribute + ".*$"])
remove.set_inputformat(filteredTrainData)
filteredTrainData = remove.filter(filteredTrainData)
if filterTestingData:
remove.set_inputformat(filterTestingData)
filterTestingData = remove.filter(filterTestingData)
#print attribute
#print str(filteredTrainData.num_attributes() - 1)
# Build classifier and evaluate it
classifier = self.algorithmPicker(filteredTrainData, indexInTable)
evaluation = self.evaluation(classifier, filteredTrainData, filterTestingData)
#print(self.algorithmTable[indexInTable] + ": " + "{:.2f}".format(evaluation.percent_correct()) + ", Feature select number:" + str(filteredTrainData.num_attributes() - 1) + "/" + str(featureNum))
resultList.append("{:.2f}".format(evaluation.percent_correct()))
#Save best data and accuracy
if evaluation.percent_correct() > bestAccuracy:
bestAccuracy = evaluation.percent_correct()
bestTrainData = filteredTrainData
if testingData:
bestTestData = filterTestingData
#bestEvaluation = evaluation
step += 10
classifier = self.algorithmPicker(trainingData, indexInTable)
evaluation = self.evaluation(classifier, trainingData, testingData)
#print(self.algorithmTable[indexInTable] + ": " + "{:.2f}".format(evaluation.percent_correct()) + ", Feature select number:" + str(trainingData.num_attributes() - 1) + "/" + str(featureNum))
resultList.append("{:.2f}".format(evaluation.percent_correct()))
#Save best data and accuracy
if evaluation.percent_correct() > bestAccuracy:
bestAccuracy = evaluation.percent_correct()
#.........這裏部分代碼省略.........
示例13: attribueSelectionBasedOnRankingInDatabase
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import set_inputformat [as 別名]
def attribueSelectionBasedOnRankingInDatabase(self, trainingData, indexInTable, databaseTable, csvFilePath, testingData = None):
featureNum = trainingData.num_attributes() - 1
outputStr = ""
outputStr += databaseTable+","
# select from database vector difference
featureList3 = []
wholefeatureList = []
dbmgr = permissionMappingManager(databasePath)
for row in dbmgr.query("select * from " + databaseTable):
featureList3.append(row[0])
wholefeatureList.append(row[0])
#featureList3.reverse()
bestRemainFilterList = []
resultList = []
digit = len(featureList3) % 10
bestAccuracy = 0
bestTrainingData = None
bestTestingData = None
bestEvaluation = None
classifier = self.algorithmPicker(trainingData, indexInTable)
evaluation = self.evaluation(classifier, trainingData, testingData)
if evaluation.percent_correct() >= bestAccuracy:
bestAccuracy = evaluation.percent_correct()
bestTrainingData = trainingData
bestTestingData = testingData
bestRemainFilterList = list(featureList3)
bestEvaluation = evaluation
print(self.algorithmTable[indexInTable] + ": " + "{:.2f}".format(evaluation.percent_correct()) + ", Feature select number:" + str(trainingData.num_attributes() - 1) + "/" + str(featureNum))
resultList.append("{:.2f}".format(evaluation.percent_correct()))
if digit > 0:
for i in range(0, digit):
functionName = featureList3.pop().split("(")[0] + "\(\)"
functionName = functionName.replace('$','\$')
#print "functionName:" + functionName
remove = Filter(classname="weka.filters.unsupervised.attribute.RemoveByName", options=["-E", "^" + functionName + ".*$"])
remove.set_inputformat(trainingData)
trainingData = remove.filter(trainingData)
if testingData:
remove.set_inputformat(testingData)
testingData = remove.filter(testingData)
#print "i:" + str(i)
#print "functionName:" + functionName
#print "featureNum: " + str(filteredData.num_attributes() - 1)
#for attributeStr in trainingData.attributes():
# print(attributeStr)
#self.printFunctionInfo(trainingData, trainingData.num_instances())
classifier = self.algorithmPicker(trainingData, indexInTable)
evaluation = self.evaluation(classifier, trainingData, testingData)
if evaluation.percent_correct() >= bestAccuracy:
bestAccuracy = evaluation.percent_correct()
bestTrainingData = trainingData
bestTestingData = testingData
bestRemainFilterList = list(featureList3)
bestEvaluation = evaluation
print(self.algorithmTable[indexInTable] + ": " + "{:.2f}".format(evaluation.percent_correct()) + ", Feature select number:" + str(trainingData.num_attributes() - 1) + "/" + str(featureNum))
resultList.append("{:.2f}".format(evaluation.percent_correct()))
while trainingData.num_attributes() - 1 > 10:
for i in range(0,10):
functionName = featureList3.pop().split("(")[0] + "\(\)"
functionName = functionName.replace('$','\$')
#print "functionName:" + functionName
remove = Filter(classname="weka.filters.unsupervised.attribute.RemoveByName", options=["-E", "^" + functionName + ".*$"])
remove.set_inputformat(trainingData)
trainingData = remove.filter(trainingData)
if testingData:
remove.set_inputformat(testingData)
testingData = remove.filter(testingData)
#print functionName
#print "featureNum: " + str(filteredData.num_attributes() - 1)
#for attributeStr in trainingData.attributes():
# print(attributeStr)
classifier = self.algorithmPicker(trainingData, indexInTable)
evaluation = self.evaluation(classifier, trainingData, testingData)
if evaluation.percent_correct() >= bestAccuracy:
bestAccuracy = evaluation.percent_correct()
bestTrainingData = trainingData
bestTestingData = testingData
bestRemainFilterList = list(featureList3)
bestEvaluation = evaluation
#print "update feature number:" + str(len(bestRemainFilterList))
print(self.algorithmTable[indexInTable] + ": " + "{:.2f}".format(evaluation.percent_correct()) + ", Feature select number:" + str(trainingData.num_attributes() - 1) + "/" + str(featureNum))
resultList.append("{:.2f}".format(evaluation.percent_correct()))
resultList.reverse()
#.........這裏部分代碼省略.........
示例14: Loader
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import set_inputformat [as 別名]
# load diabetes
loader = Loader(classname="weka.core.converters.ArffLoader")
fname = data_dir + os.sep + "diabetes.arff"
print("\nLoading dataset: " + fname + "\n")
data = loader.load_file(fname)
data.set_class_index(data.num_attributes() - 1)
# simulate the 10 train/test pairs of cross-validation
evl = Evaluation(data)
for i in xrange(1, 11):
# create train set
remove = Filter(
classname="weka.filters.supervised.instance.StratifiedRemoveFolds",
options=["-N", "10", "-F", str(i), "-S", "1", "-V"])
remove.set_inputformat(data)
train = remove.filter(data)
# create test set
remove = Filter(
classname="weka.filters.supervised.instance.StratifiedRemoveFolds",
options=["-N", "10", "-F", str(i), "-S", "1"])
remove.set_inputformat(data)
test = remove.filter(data)
cls = Classifier(classname="weka.classifiers.trees.J48")
cls.build_classifier(train)
evl.test_model(cls, test)
print("Simulated CV accuracy: %0.1f%%" % (evl.percent_correct()))
示例15: _normalize_dataset
# 需要導入模塊: from weka.filters import Filter [as 別名]
# 或者: from weka.filters.Filter import set_inputformat [as 別名]
def _normalize_dataset(self, dataset):
normalize_data = Filter(classname = 'weka.filters.unsupervised.attribute.Normalize',
options = [])
normalize_data.set_inputformat(dataset)
normalized = normalize_data.filter(dataset)
return normalized