当前位置: 首页>>代码示例>>Python>>正文


Python Filter.filter方法代码示例

本文整理汇总了Python中weka.filters.Filter.filter方法的典型用法代码示例。如果您正苦于以下问题:Python Filter.filter方法的具体用法?Python Filter.filter怎么用?Python Filter.filter使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在weka.filters.Filter的用法示例。


在下文中一共展示了Filter.filter方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: emlimitateUnusedFeature

# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import filter [as 别名]
    def emlimitateUnusedFeature(self, trainData, testData = None):
        trainData.set_class_index(trainData.num_attributes() - 1)   # set class attribute
        featureIndex = -1       
        filteredTrainData = trainData
        filteredTestData = testData
        

        attribute_index = 0

        while attribute_index < filteredTrainData.num_attributes() - 1:
            sampleCoverage = 0
            #print attribute_index
            # check value for current feature in each instance
            for instance_index in range(0, filteredTrainData.num_instances()):
                instance = filteredTrainData.get_instance(instance_index)
                value = instance.get_value(attribute_index)
                
                if value > 0:
                    sampleCoverage += 1
            if sampleCoverage == 0:
                #print "found"
                remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", str(attribute_index+1)]) #The index in this function start from 1
                remove.set_inputformat(filteredTrainData)
                filteredTrainData = remove.filter(filteredTrainData)  
                if filteredTestData:
                    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", str(attribute_index+1)]) #The index in this function start from 1
                    remove.set_inputformat(filteredTestData)
                    filteredTestData = remove.filter(filteredTestData)  
            else:
                attribute_index += 1

        return [filteredTrainData, filteredTestData]
开发者ID:zhaohengyang,项目名称:Android-malware-detection,代码行数:34,代码来源:weka_interface.py

示例2: select_missclassified

# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import filter [as 别名]
	def select_missclassified(self):
		remove = Filter(classname="weka.filters.supervised.attribute.AddClassification", options=['-classification' ,'-error' ,'-W' ,self.base_classifier.to_commandline()])
		remove.inputformat(self.data)
		self.data = remove.filter(self.data)

		remove = Filter(classname="weka.filters.unsupervised.instance.RemoveWithValues", options=['-S','0.0','-C','last','-L','last','-V'])
		remove.inputformat(self.data)

		remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=['-R',str(self.data.num_attributes-2)+',last'])
		remove.inputformat(self.data)
		self.data = remove.filter(self.data)
开发者ID:sbiastoch,项目名称:thesis,代码行数:13,代码来源:evaluate.py

示例3: _pre_process_to_classification

# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import filter [as 别名]
 def _pre_process_to_classification(self, dataset):   
     filter_data = Filter(classname = 'weka.filters.unsupervised.attribute.MathExpression', 
                          options = ['-unset-class-temporarily', '-E', "ifelse ( A>0, 1, 0 )", 
                                     '-V', '-R', 'last'])
     
     filter_data.set_inputformat(dataset)
     filtered = filter_data.filter(dataset)
     
     discretize_data = Filter(classname = 'weka.filters.unsupervised.attribute.NumericToNominal', 
                          options = ['-R', 'last'])
     
     discretize_data.set_inputformat(filtered)
     discretized = discretize_data.filter(filtered)
     
     return discretized
开发者ID:jonmagal,项目名称:recsys_challenge,代码行数:17,代码来源:dataset.py

示例4: build_and_classify

# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import filter [as 别名]
def build_and_classify(classifier, classifier_name, approach_name, infile, percentage='10'):
    """
    Creates model and classifies against input data. Returns accuracy statistics
    """
    # set seed so results are consistent
    random.seed('iot')

    # load data
    loader = Loader(classname='weka.core.converters.CSVLoader')
    data = loader.load_file(infile)
    data.class_is_last()

    # convert all numeric attributes to nominal
    to_nominal = Filter(classname='weka.filters.unsupervised.attribute.NumericToNominal',
                        options=['-R', 'first-last'])
    to_nominal.inputformat(data)
    data = to_nominal.filter(data)

    # randomize data with constant seed
    randomize = Filter(classname='weka.filters.unsupervised.instance.Randomize',
                       options=['-S', '42'])
    randomize.inputformat(data)

    data = randomize.filter(data)

    # create training set and testing set
    train_percent_filter = Filter(classname='weka.filters.unsupervised.instance.RemovePercentage',
                                  options=['-P', percentage, '-V'])
    train_percent_filter.inputformat(data)

    train = train_percent_filter.filter(data)
    test = data

    # build and test classifier
    classifier.build_classifier(train)
    evaluation = Evaluation(train)
    evaluation.test_model(classifier, test)

    # return results as array
    results = [
        approach_name,
        classifier_name,
        percentage,
        evaluation.percent_correct,
        evaluation.weighted_f_measure
    ]
    return results
开发者ID:kapil1garg,项目名称:nursing-home-analytics,代码行数:49,代码来源:weka_learning-curve_generator.py

示例5: filterUnusedFeatureFromList

# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import filter [as 别名]
    def filterUnusedFeatureFromList(self, data, unusedFuncitonList):
        filteredData = data

        for attribute in unusedFuncitonList:                
            remove = Filter(classname="weka.filters.unsupervised.attribute.RemoveByName", options=["-E", "^" + attribute + ".*$"])
            remove.set_inputformat(filteredData)
            filteredData = remove.filter(filteredData)

        return filteredData      
开发者ID:zhaohengyang,项目名称:Android-malware-detection,代码行数:11,代码来源:weka_interface.py

示例6: attributeSelector

# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import filter [as 别名]
    def attributeSelector(self, data, selectNum):
        attributeSelector = Filter(classname="weka.filters.supervised.attribute.AttributeSelection",\
                         options=["-S", "weka.attributeSelection.Ranker -T -1.7976931348623157E308 -N " + str(selectNum),\
                                   "-E", "weka.attributeSelection.InfoGainAttributeEval"])

        attributeSelector.set_inputformat(data)
        data = attributeSelector.filter(data)

            
        return data
开发者ID:zhaohengyang,项目名称:Android-malware-detection,代码行数:12,代码来源:weka_interface.py

示例7: createTwoDatasets

# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import filter [as 别名]
 def createTwoDatasets(self, wholeDataPath, trainingDataPercentage, trainingPath, testingPath, shuffleSeed = 43):
     wholeData = self.load_Arff(wholeDataPath)
     randomize = Filter(classname="weka.filters.unsupervised.instance.Randomize", options=["-S", str(shuffleSeed)])
     randomize.set_inputformat(wholeData)
     wholeData = randomize.filter(wholeData)
     
     removePercentage = Filter(classname="weka.filters.unsupervised.instance.RemovePercentage", options=["-P", str(trainingDataPercentage), "-V"])
     removePercentage.set_inputformat(wholeData)
     trainingData = removePercentage.filter(wholeData)
     print "instances:" + str(trainingData.num_instances())
     
     removePercentage = Filter(classname="weka.filters.unsupervised.instance.RemovePercentage", options=["-P", str(trainingDataPercentage)])
     removePercentage.set_inputformat(wholeData)
     testingData = removePercentage.filter(wholeData)
     
     print "instances:" + str(testingData.num_instances())
     
     self.save_Arff(trainingData, trainingPath)
     self.save_Arff(testingData, testingPath)
开发者ID:zhaohengyang,项目名称:Android-malware-detection,代码行数:21,代码来源:weka_interface.py

示例8: getSetDataBySetIndex

# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import filter [as 别名]
 def getSetDataBySetIndex(self, data, index):
     # cut feature set out
     featureTable = FeatureTable()
     startIndexList = featureTable.getEachSetStartIndex()
     
     start = startIndexList[index]
     end = startIndexList[index+1] - 1
     remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-V", "-R", str(start) + "-" + str(end) + ",last"])
     remove.set_inputformat(data)
     filteredData = remove.filter(data)
     return filteredData
开发者ID:zhaohengyang,项目名称:Android-malware-detection,代码行数:13,代码来源:weka_interface.py

示例9: remove_correct_classified

# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import filter [as 别名]
	def remove_correct_classified(self, invert = False):
		options=[
			'-W', self.classifier.to_commandline(), 
			'-C', str(self.class_index), #classindex
	#		'-F','0', # folds
	#		'-T','0.1', #threshold by numeric classes
			'-I','0', # max iterations
			'-V' if not invert else '' 
		] # invert
		classname = "weka.filters.unsupervised.instance.RemoveMisclassified"
		remove = Filter(classname=classname, options=options)
		remove.inputformat(self.data)
		self.data = remove.filter(self.data)
开发者ID:sbiastoch,项目名称:thesis,代码行数:15,代码来源:evaluate.py

示例10: load

# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import filter [as 别名]
    def load(path, db):
        nominals = [
            49,  # dev_global_mem_cache_type
            52,  # dev_host_unified_memory
            54,  # dev_local_mem_type
            56,  # dev_type
            57,  # dev_vendor
        ]
        nominal_indices = ",".join([str(index) for index in nominals])
        force_nominal = ["-N", nominal_indices]

        # Load data from CSV.
        dataset = Dataset.load_csv(path, options=force_nominal)
        dataset.__class__ = Dataset

        # Set class index and database connection.
        dataset.class_index = -1
        dataset.db = db

        # Create string->nominal type attribute filter, ignoring the first
        # attribute (scenario ID), since we're not classifying with it.
        string_to_nominal = WekaFilter(classname=("weka.filters.unsupervised."
                                                  "attribute.StringToNominal"),
                                       options=["-R", "2-last"])
        string_to_nominal.inputformat(dataset.instances)

        # Create filtered dataset, and swap data around.
        filtered = string_to_nominal.filter(dataset.instances)

        # Create nominal->binary type attribute filter, ignoring the
        # first attribute (scenario ID), since we're not classifying with it.
        n2b = WekaFilter(classname="weka.filters.unsupervised.attribute.NominalToBinary",
                         options=["-R", "2-last"])
        n2b.inputformat(filtered)

        dataset.instances = n2b.filter(filtered)

        return dataset
开发者ID:vianziro,项目名称:msc-thesis,代码行数:40,代码来源:dataset.py

示例11: use_filter

# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import filter [as 别名]
def use_filter(data):
    """
    Uses the AttributeSelection filter for attribute selection.
    :param data: the dataset to use
    :type data: Instances
    """
    print("\n2. Filter")
    flter = Filter(classname="weka.filters.supervised.attribute.AttributeSelection")
    aseval = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval")
    assearch = ASSearch(classname="weka.attributeSelection.GreedyStepwise", options=["-B"])
    flter.set_property("evaluator", aseval.jobject)
    flter.set_property("search", assearch.jobject)
    flter.inputformat(data)
    filtered = flter.filter(data)
    print(str(filtered))
开发者ID:keypointt,项目名称:python-weka-wrapper-examples,代码行数:17,代码来源:attribute_selection_test.py

示例12: filterOutUnnecessaryAPIAndEvaluateOurApproach

# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import filter [as 别名]
 def filterOutUnnecessaryAPIAndEvaluateOurApproach(self, ourApproahFile, apiFile, indexInTable, methodName, databaseTable, csvFilePath):
     outputStr = methodName+","
     resultList = []
     # Get whole feature set of our approach
     filteredData = self.load_Arff(ourApproahFile)
     # Use this function to get selected API feature and save the unselected api in a list
     filterOutList = self.attribueSelectionBasedOnRankingInDatabase(apiFile, indexInTable, databaseTable, "")[1]
     
     # Remove unselected API
     for functionName in filterOutList:
         functionName = functionName.split("(")[0] + "\(\)"
         functionName = functionName.replace('$','\$')
         remove = Filter(classname="weka.filters.unsupervised.attribute.RemoveByName", options=["-E", "^" + functionName + ".*$"])
         remove.set_inputformat(filteredData)
         filteredData = remove.filter(filteredData)
     featureNum = filteredData.num_attributes() - 1
     print "featureNum: " + str(featureNum)
     if csvFilePath != "":
         self.writeTenScaledTitleManual(featureNum, csvFilePath)
         #print "i:" + str(i)
         #print "functionName:" + functionName
         #print "featureNum: " + str(filteredData.num_attributes() - 1)
     for attributeStr in filteredData.attributes():
         print(attributeStr)
     # Run ten scaled generation and evaluation 
     step = 10 
     while step < featureNum:
         roundData = self.attributeSelector(filteredData, step)
         classifier = self.algorithmPicker(roundData, indexInTable)
         evaluation = self.evaluation(classifier, roundData)
         #print(self.algorithmTable[indexInTable] + ": " + "{:.2f}".format(evaluation.percent_correct()) + ", Feature select number:" + str(roundData.num_attributes() - 1) + "/" + str(featureNum))
         resultList.append("{:.2f}".format(evaluation.percent_correct()))
         #csvFile.write("{:.2f}".format(evaluation.percent_correct()) +",")
         step += 10
     
     classifier = self.algorithmPicker(filteredData, indexInTable)
     evaluation = self.evaluation(classifier, filteredData)
     #print(self.algorithmTable[indexInTable] + ": " + "{:.2f}".format(evaluation.percent_correct()) + ", Feature select number:" + str(filteredData.num_attributes() - 1) + "/" + str(featureNum))
     resultList.append("{:.2f}".format(evaluation.percent_correct()))
     
     # Write out to CSV file
     for item in resultList:
         outputStr += item +","
     outputStr = outputStr[0:-1] + "\n"
     self.writeToPath(csvFilePath, outputStr)
开发者ID:zhaohengyang,项目名称:Android-malware-detection,代码行数:47,代码来源:weka_interface.py

示例13: main

# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import filter [as 别名]
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(iris)

    # remove class attribute
    helper.print_info("Removing class attribute")
    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
    remove.inputformat(data)
    filtered = remove.filter(data)

    # use MultiFilter
    helper.print_info("Use MultiFilter")
    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "first"])
    std = Filter(classname="weka.filters.unsupervised.attribute.Standardize")
    multi = MultiFilter()
    multi.filters = [remove, std]
    multi.inputformat(data)
    filtered_multi = multi.filter(data)

    # output datasets
    helper.print_title("Input")
    print(data)
    helper.print_title("Output")
    print(filtered)
    helper.print_title("Output (MultiFilter)")
    print(filtered_multi)

    # load text dataset
    text = helper.get_data_dir() + os.sep + "reutersTop10Randomized_1perc_shortened.arff"
    helper.print_info("Loading dataset: " + text)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(text)
    data.class_is_last()

    # apply StringToWordVector
    stemmer = Stemmer(classname="weka.core.stemmers.IteratedLovinsStemmer")
    stopwords = Stopwords(classname="weka.core.stopwords.Rainbow")
    tokenizer = Tokenizer(classname="weka.core.tokenizers.WordTokenizer")
    s2wv = StringToWordVector(options=["-W", "10", "-L", "-C"])
    s2wv.stemmer = stemmer
    s2wv.stopwords = stopwords
    s2wv.tokenizer = tokenizer
    s2wv.inputformat(data)
    filtered = s2wv.filter(data)

    helper.print_title("Input (StringToWordVector)")
    print(data)
    helper.print_title("Output (StringToWordVector)")
    print(filtered)

    # partial classname
    helper.print_title("Creating filter from partial classname")
    clsname = ".Standardize"
    f = Filter(classname=clsname)
    print(clsname + " --> " + f.classname)

    # source code
    helper.print_info("Generate source code")
    bolts = helper.get_data_dir() + os.sep + "labor.arff"
    helper.print_info("Loading dataset: " + bolts)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(bolts)
    replace = Filter(classname="weka.filters.unsupervised.attribute.ReplaceMissingValues")
    replace.inputformat(data)
    replace.filter(data)
    print(replace.to_source("MyReplaceMissingValues", data))
开发者ID:fracpete,项目名称:python-weka-wrapper-examples,代码行数:75,代码来源:filters.py

示例14: merge_nominal_attributes

# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import filter [as 别名]
	def merge_nominal_attributes(self, significance=0.01):
		remove = Filter(classname="weka.filters.supervised.attribute.MergeNominalValues", options=['-L',str(significance),'-R','first-last'])
		remove.inputformat(self.data)
		self.data = remove.filter(self.data)
开发者ID:sbiastoch,项目名称:thesis,代码行数:6,代码来源:evaluate.py

示例15: print

# 需要导入模块: from weka.filters import Filter [as 别名]
# 或者: from weka.filters.Filter import filter [as 别名]
from weka.clusterers import Clusterer, ClusterEvaluation
from weka.filters import Filter
import weka.plot.clusterers as plc

jvm.start()

# load iris
fname = data_dir + os.sep + "iris.arff"
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)

# remove class attribute
flt = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
flt.set_inputformat(data)
filtered = flt.filter(data)

# build KMeans
print("\n--> SimpleKMeans\n")
cl = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
cl.build_clusterer(filtered)
evl = ClusterEvaluation()
evl.set_model(cl)
evl.test_model(filtered)
print(evl.get_cluster_results())
plc.plot_cluster_assignments(evl, data, atts=[], inst_no=True, wait=True)

# use AddCluster filter
print("\n--> AddCluster filter\n")
flt = Filter(classname="weka.filters.unsupervised.attribute.AddCluster",
             options=["-W", "weka.clusterers.SimpleKMeans -N 3"])
开发者ID:echavarria,项目名称:wekamooc,代码行数:33,代码来源:class-3.6.py


注:本文中的weka.filters.Filter.filter方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。