当前位置: 首页>>代码示例>>Python>>正文


Python filters.Filter类代码示例

本文整理汇总了Python中weka.filters.Filter的典型用法代码示例。如果您正苦于以下问题:Python Filter类的具体用法?Python Filter怎么用?Python Filter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Filter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: filterUnusedFeatureFromList

    def filterUnusedFeatureFromList(self, data, unusedFuncitonList):
        filteredData = data

        for attribute in unusedFuncitonList:                
            remove = Filter(classname="weka.filters.unsupervised.attribute.RemoveByName", options=["-E", "^" + attribute + ".*$"])
            remove.set_inputformat(filteredData)
            filteredData = remove.filter(filteredData)

        return filteredData      
开发者ID:zhaohengyang,项目名称:Android-malware-detection,代码行数:9,代码来源:weka_interface.py

示例2: attributeSelector

    def attributeSelector(self, data, selectNum):
        attributeSelector = Filter(classname="weka.filters.supervised.attribute.AttributeSelection",\
                         options=["-S", "weka.attributeSelection.Ranker -T -1.7976931348623157E308 -N " + str(selectNum),\
                                   "-E", "weka.attributeSelection.InfoGainAttributeEval"])

        attributeSelector.set_inputformat(data)
        data = attributeSelector.filter(data)

            
        return data
开发者ID:zhaohengyang,项目名称:Android-malware-detection,代码行数:10,代码来源:weka_interface.py

示例3: getSetDataBySetIndex

 def getSetDataBySetIndex(self, data, index):
     # cut feature set out
     featureTable = FeatureTable()
     startIndexList = featureTable.getEachSetStartIndex()
     
     start = startIndexList[index]
     end = startIndexList[index+1] - 1
     remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-V", "-R", str(start) + "-" + str(end) + ",last"])
     remove.set_inputformat(data)
     filteredData = remove.filter(data)
     return filteredData
开发者ID:zhaohengyang,项目名称:Android-malware-detection,代码行数:11,代码来源:weka_interface.py

示例4: remove_correct_classified

	def remove_correct_classified(self, invert = False):
		options=[
			'-W', self.classifier.to_commandline(), 
			'-C', str(self.class_index), #classindex
	#		'-F','0', # folds
	#		'-T','0.1', #threshold by numeric classes
			'-I','0', # max iterations
			'-V' if not invert else '' 
		] # invert
		classname = "weka.filters.unsupervised.instance.RemoveMisclassified"
		remove = Filter(classname=classname, options=options)
		remove.inputformat(self.data)
		self.data = remove.filter(self.data)
开发者ID:sbiastoch,项目名称:thesis,代码行数:13,代码来源:evaluate.py

示例5: emlimitateUnusedFeature

    def emlimitateUnusedFeature(self, trainData, testData = None):
        trainData.set_class_index(trainData.num_attributes() - 1)   # set class attribute
        featureIndex = -1       
        filteredTrainData = trainData
        filteredTestData = testData
        

        attribute_index = 0

        while attribute_index < filteredTrainData.num_attributes() - 1:
            sampleCoverage = 0
            #print attribute_index
            # check value for current feature in each instance
            for instance_index in range(0, filteredTrainData.num_instances()):
                instance = filteredTrainData.get_instance(instance_index)
                value = instance.get_value(attribute_index)
                
                if value > 0:
                    sampleCoverage += 1
            if sampleCoverage == 0:
                #print "found"
                remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", str(attribute_index+1)]) #The index in this function start from 1
                remove.set_inputformat(filteredTrainData)
                filteredTrainData = remove.filter(filteredTrainData)  
                if filteredTestData:
                    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", str(attribute_index+1)]) #The index in this function start from 1
                    remove.set_inputformat(filteredTestData)
                    filteredTestData = remove.filter(filteredTestData)  
            else:
                attribute_index += 1

        return [filteredTrainData, filteredTestData]
开发者ID:zhaohengyang,项目名称:Android-malware-detection,代码行数:32,代码来源:weka_interface.py

示例6: main

def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(iris_file)

    # remove class attribute
    data.delete_last_attribute()

    # build a clusterer and output model
    helper.print_title("Training SimpleKMeans clusterer")
    clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
    clusterer.build_clusterer(data)
    print(clusterer)
    helper.print_info("Evaluating on data")
    evaluation = ClusterEvaluation()
    evaluation.set_model(clusterer)
    evaluation.test_model(data)
    print("# clusters: " + str(evaluation.num_clusters))
    print("log likelihood: " + str(evaluation.log_likelihood))
    print("cluster assignments:\n" + str(evaluation.cluster_assignments))
    plc.plot_cluster_assignments(evaluation, data, inst_no=True)

    # using a filtered clusterer
    helper.print_title("Filtered clusterer")
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(iris_file)
    clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
    fclusterer = FilteredClusterer()
    fclusterer.clusterer = clusterer
    fclusterer.filter = remove
    fclusterer.build_clusterer(data)
    print(fclusterer)

    # load a dataset incrementally and build clusterer incrementally
    helper.print_title("Incremental clusterer")
    loader = Loader("weka.core.converters.ArffLoader")
    iris_inc = loader.load_file(iris_file, incremental=True)
    clusterer = Clusterer("weka.clusterers.Cobweb")
    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
    remove.inputformat(iris_inc)
    iris_filtered = remove.outputformat()
    clusterer.build_clusterer(iris_filtered)
    for inst in loader:
        remove.input(inst)
        inst_filtered = remove.output()
        clusterer.update_clusterer(inst_filtered)
    clusterer.update_finished()
    print(clusterer.to_commandline())
    print(clusterer)
    print(clusterer.graph)
    plg.plot_dot_graph(clusterer.graph)
开发者ID:keypointt,项目名称:python-weka-wrapper-examples,代码行数:58,代码来源:clusterers.py

示例7: filterOutUnnecessaryAPIAndEvaluateOurApproach

 def filterOutUnnecessaryAPIAndEvaluateOurApproach(self, ourApproahFile, apiFile, indexInTable, methodName, databaseTable, csvFilePath):
     outputStr = methodName+","
     resultList = []
     # Get whole feature set of our approach
     filteredData = self.load_Arff(ourApproahFile)
     # Use this function to get selected API feature and save the unselected api in a list
     filterOutList = self.attribueSelectionBasedOnRankingInDatabase(apiFile, indexInTable, databaseTable, "")[1]
     
     # Remove unselected API
     for functionName in filterOutList:
         functionName = functionName.split("(")[0] + "\(\)"
         functionName = functionName.replace('$','\$')
         remove = Filter(classname="weka.filters.unsupervised.attribute.RemoveByName", options=["-E", "^" + functionName + ".*$"])
         remove.set_inputformat(filteredData)
         filteredData = remove.filter(filteredData)
     featureNum = filteredData.num_attributes() - 1
     print "featureNum: " + str(featureNum)
     if csvFilePath != "":
         self.writeTenScaledTitleManual(featureNum, csvFilePath)
         #print "i:" + str(i)
         #print "functionName:" + functionName
         #print "featureNum: " + str(filteredData.num_attributes() - 1)
     for attributeStr in filteredData.attributes():
         print(attributeStr)
     # Run ten scaled generation and evaluation 
     step = 10 
     while step < featureNum:
         roundData = self.attributeSelector(filteredData, step)
         classifier = self.algorithmPicker(roundData, indexInTable)
         evaluation = self.evaluation(classifier, roundData)
         #print(self.algorithmTable[indexInTable] + ": " + "{:.2f}".format(evaluation.percent_correct()) + ", Feature select number:" + str(roundData.num_attributes() - 1) + "/" + str(featureNum))
         resultList.append("{:.2f}".format(evaluation.percent_correct()))
         #csvFile.write("{:.2f}".format(evaluation.percent_correct()) +",")
         step += 10
     
     classifier = self.algorithmPicker(filteredData, indexInTable)
     evaluation = self.evaluation(classifier, filteredData)
     #print(self.algorithmTable[indexInTable] + ": " + "{:.2f}".format(evaluation.percent_correct()) + ", Feature select number:" + str(filteredData.num_attributes() - 1) + "/" + str(featureNum))
     resultList.append("{:.2f}".format(evaluation.percent_correct()))
     
     # Write out to CSV file
     for item in resultList:
         outputStr += item +","
     outputStr = outputStr[0:-1] + "\n"
     self.writeToPath(csvFilePath, outputStr)
开发者ID:zhaohengyang,项目名称:Android-malware-detection,代码行数:45,代码来源:weka_interface.py

示例8: use_filter

def use_filter(data):
    """
    Uses the AttributeSelection filter for attribute selection.
    :param data: the dataset to use
    :type data: Instances
    """
    print("\n2. Filter")
    flter = Filter(classname="weka.filters.supervised.attribute.AttributeSelection")
    aseval = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval")
    assearch = ASSearch(classname="weka.attributeSelection.GreedyStepwise", options=["-B"])
    flter.set_property("evaluator", aseval.jobject)
    flter.set_property("search", assearch.jobject)
    flter.inputformat(data)
    filtered = flter.filter(data)
    print(str(filtered))
开发者ID:keypointt,项目名称:python-weka-wrapper-examples,代码行数:15,代码来源:attribute_selection_test.py

示例9: _pre_process_to_classification

 def _pre_process_to_classification(self, dataset):   
     filter_data = Filter(classname = 'weka.filters.unsupervised.attribute.MathExpression', 
                          options = ['-unset-class-temporarily', '-E', "ifelse ( A>0, 1, 0 )", 
                                     '-V', '-R', 'last'])
     
     filter_data.set_inputformat(dataset)
     filtered = filter_data.filter(dataset)
     
     discretize_data = Filter(classname = 'weka.filters.unsupervised.attribute.NumericToNominal', 
                          options = ['-R', 'last'])
     
     discretize_data.set_inputformat(filtered)
     discretized = discretize_data.filter(filtered)
     
     return discretized
开发者ID:jonmagal,项目名称:recsys_challenge,代码行数:15,代码来源:dataset.py

示例10: load

    def load(path, db):
        nominals = [
            49,  # dev_double_fp_config
            50,  # dev_endian_little
            51,  # dev_execution_capabilities
            52,  # dev_extensions
            54,  # dev_global_mem_cache_type
            57,  # dev_host_unified_memory
            63,  # dev_image_support
            65,  # dev_local_mem_type
            96,  # dev_queue_properties
            97,  # dev_single_fp_config
            98,  # dev_type
            100, # dev_vendor_id
        ]
        nominal_indices = ",".join([str(index) for index in nominals])
        force_nominal = ["-N", nominal_indices]

        # Load data from CSV.
        dataset = Dataset.load_csv(path, options=force_nominal)
        dataset.__class__ = Dataset

        # Set class index and database connection.
        dataset.class_index = -1
        dataset.db = db

        # Create string->nominal type attribute filter, ignoring the first
        # attribute (scenario ID), since we're not classifying with it.
        string_to_nominal = WekaFilter(classname=("weka.filters.unsupervised."
                                                  "attribute.StringToNominal"),
                                       options=["-R", "2-last"])
        string_to_nominal.inputformat(dataset.instances)

        # Create filtered dataset, and swap data around.
        filtered = string_to_nominal.filter(dataset.instances)
        dataset.instances = filtered

        return dataset
开发者ID:vianziro,项目名称:msc-thesis,代码行数:38,代码来源:dataset.py

示例11: Loader

from weka.core.converters import Loader
from weka.core.classes import Random
from weka.classifiers import Classifier, Evaluation, PredictionOutput
from weka.filters import Filter

jvm.start()

# load diabetes
loader = Loader(classname="weka.core.converters.ArffLoader")
fname = data_dir + os.sep + "diabetes.arff"
print("\nLoading dataset: " + fname + "\n")
data = loader.load_file(fname)
# we'll set the class attribute after filtering

# apply NominalToBinary filter and set class attribute
fltr = Filter("weka.filters.unsupervised.attribute.NominalToBinary")
fltr.inputformat(data)
filtered = fltr.filter(data)
filtered.class_is_last()

# cross-validate LinearRegression on filtered data, display model
cls = Classifier(classname="weka.classifiers.functions.LinearRegression")
pout = PredictionOutput(classname="weka.classifiers.evaluation.output.prediction.PlainText")
evl = Evaluation(filtered)
evl.crossvalidate_model(cls, filtered, 10, Random(1), pout)
print("10-fold cross-validation:\n" + evl.summary())
print("Predictions:\n\n" + str(pout))
cls.build_classifier(filtered)
print("Model:\n\n" + str(cls))

# use AddClassification filter with LinearRegression on filtered data
开发者ID:fracpete,项目名称:wekamooc,代码行数:31,代码来源:class-4.3.py

示例12: remove_attributes

 def remove_attributes(self, *attributes):
     indices = [self.attribute_index(x) for x in attributes]
     remove = Filter(classname="weka.filters.unsupervised.attribute.Remove",
                     options=["-R", ','.join(str(x + 1) for x in indices)])
     remove.inputformat(self.instances)
     self.instances = remove.filter(self.instances)
开发者ID:ChrisCummins,项目名称:phd,代码行数:6,代码来源:ml.py

示例13: Loader

        if (len_email > 0) and (len_content > 0):
            writer.writerow(row)

# close csvfile
csvfile.close()

# start JVM
jvm.start()

# load CSV file
loader = Loader(classname="weka.core.converters.CSVLoader", options=["-E", '"', "-F", ","])
data = loader.load_file(csvfilename)
#print(data)

# convert class to nominal
wfilter = Filter(classname="weka.filters.unsupervised.attribute.StringToNominal", options=["-R", "last"])
wfilter.set_inputformat(data)
data = wfilter.filter(data)

# convert content to string
wfilter = Filter(classname="weka.filters.unsupervised.attribute.NominalToString", options=["-C", "first"])
wfilter.set_inputformat(data)
data = wfilter.filter(data)

# set class attribute
data.set_class_index(data.num_attributes() - 1)

# generate baseline
zeror = Classifier(classname="weka.classifiers.rules.ZeroR")
evaluation = Evaluation(data)
evaluation.crossvalidate_model(zeror, data, 10, Random(1))
开发者ID:Br3nda,项目名称:meetings,代码行数:31,代码来源:list.py

示例14: main

def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    iris_data = loader.load_file(iris_file)
    iris_data.class_is_last()

    # classifier help
    helper.print_title("Creating help string")
    classifier = Classifier(classname="weka.classifiers.trees.J48")
    print(classifier.to_help())

    # partial classname
    helper.print_title("Creating classifier from partial classname")
    clsname = ".J48"
    classifier = Classifier(classname=clsname)
    print(clsname + " --> " + classifier.classname)

    # classifier from commandline
    helper.print_title("Creating SMO from command-line string")
    cmdline = 'weka.classifiers.functions.SMO -K "weka.classifiers.functions.supportVector.NormalizedPolyKernel -E 3.0"'
    classifier = from_commandline(cmdline, classname="weka.classifiers.Classifier")
    classifier.build_classifier(iris_data)
    print("input: " + cmdline)
    print("output: " + classifier.to_commandline())
    print("model:\n" + str(classifier))

    # kernel classifier
    helper.print_title("Creating SMO as KernelClassifier")
    kernel = Kernel(classname="weka.classifiers.functions.supportVector.RBFKernel", options=["-G", "0.001"])
    classifier = KernelClassifier(classname="weka.classifiers.functions.SMO", options=["-M"])
    classifier.kernel = kernel
    classifier.build_classifier(iris_data)
    print("classifier: " + classifier.to_commandline())
    print("model:\n" + str(classifier))

    # build a classifier and output model
    helper.print_title("Training J48 classifier on iris")
    classifier = Classifier(classname="weka.classifiers.trees.J48")
    # Instead of using 'options=["-C", "0.3"]' in the constructor, we can also set the "confidenceFactor"
    # property of the J48 classifier itself. However, being of type float rather than double, we need
    # to convert it to the correct type first using the double_to_float function:
    classifier.set_property("confidenceFactor", typeconv.double_to_float(0.3))
    classifier.build_classifier(iris_data)
    print(classifier)
    print(classifier.graph)
    print(classifier.to_source("MyJ48"))
    plot_graph.plot_dot_graph(classifier.graph)

    # evaluate model on test set
    helper.print_title("Evaluating J48 classifier on iris")
    evaluation = Evaluation(iris_data)
    evl = evaluation.test_model(classifier, iris_data)
    print(evl)
    print(evaluation.summary())

    # evaluate model on train/test split
    helper.print_title("Evaluating J48 classifier on iris (random split 66%)")
    classifier = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"])
    evaluation = Evaluation(iris_data)
    evaluation.evaluate_train_test_split(classifier, iris_data, 66.0, Random(1))
    print(evaluation.summary())

    # load a dataset incrementally and build classifier incrementally
    helper.print_title("Build classifier incrementally on iris")
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    iris_inc = loader.load_file(iris_file, incremental=True)
    iris_inc.class_is_last()
    classifier = Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable")
    classifier.build_classifier(iris_inc)
    for inst in loader:
        classifier.update_classifier(inst)
    print(classifier)

    # construct meta-classifiers
    helper.print_title("Meta classifiers")
    # generic FilteredClassifier instantiation
    print("generic FilteredClassifier instantiation")
    meta = SingleClassifierEnhancer(classname="weka.classifiers.meta.FilteredClassifier")
    meta.classifier = Classifier(classname="weka.classifiers.functions.LinearRegression")
    flter = Filter("weka.filters.unsupervised.attribute.Remove")
    flter.options = ["-R", "first"]
    meta.set_property("filter", flter.jobject)
    print(meta.to_commandline())
    # direct FilteredClassifier instantiation
    print("direct FilteredClassifier instantiation")
    meta = FilteredClassifier()
    meta.classifier = Classifier(classname="weka.classifiers.functions.LinearRegression")
    flter = Filter("weka.filters.unsupervised.attribute.Remove")
    flter.options = ["-R", "first"]
    meta.filter = flter
    print(meta.to_commandline())
    # generic Vote
    print("generic Vote instantiation")
#.........这里部分代码省略.........
开发者ID:fracpete,项目名称:python-weka-wrapper3-examples,代码行数:101,代码来源:classifiers.py

示例15: run_classifier

def run_classifier(path, prot, sel, cols, prot_vals, beta):
        
    DIs = dict()
    jvm.start()

    for i in range(len(cols)-1):
        loader = Loader(classname="weka.core.converters.CSVLoader")
        data = loader.load_file(path)
    
        # remove selected attribute from the data
        # NOTE: options are ONE indexed, not ZERO indexed
        remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", \
                        options=["-R", str(sel[2]+1)])
        remove.inputformat(data)
        data = remove.filter(data)

        # if running for only one attribue, remove all others (except protected)
        if i > 0:
            for j in range(1, prot[2]+1):
                if i != j:
                    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", \
                                    options=["-R", ("1" if i>j else "2")])
                    remove.inputformat(data)
                    data = remove.filter(data)

        # set prot attribute as Class attribute
        data.class_is_last()
        
        # run classifier
        cls = Classifier(classname="weka.classifiers.bayes.NaiveBayes")
        cls.build_classifier(data)
    
        # count the number of each combination
        pos_and_pred = float(0.0)
        pos_and_not_pred = float(0.0)
        neg_and_pred = float(0.0)
        neg_and_not_pred = float(0.0)
        for ind, inst in enumerate(data):
            if cls.classify_instance(inst):
                if prot_vals[ind] == prot[1]:
                    pos_and_pred += 1
                else:
                    neg_and_pred += 1
            else:
                if prot_vals[ind] == prot[1]:
                    pos_and_not_pred += 1
                else:
                    neg_and_not_pred += 1

        # calculate DI
        BER = ((pos_and_not_pred / (pos_and_pred + pos_and_not_pred)) + \
               (neg_and_pred / (neg_and_pred + neg_and_not_pred))) * 0.5
        if BER > 0.5:
            BER = 1 - BER
        DI = 1 - ((1 - 2 * BER) / (beta + 1 - 2 * BER))

        if i == 0: # consider changing this to a 'code word' instead of 'all'
            DIs["all"] = DI
        else:
            DIs[cols[i-1]] = DI

    jvm.stop()

    return DIs
开发者ID:sorelle,项目名称:fairdata,代码行数:64,代码来源:main.py


注:本文中的weka.filters.Filter类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。