Python feature_selection.SelectPercentile方法代码示例

本文整理汇总了Python中sklearn.feature_selection.SelectPercentile方法的典型用法代码示例。如果您正苦于以下问题：Python feature_selection.SelectPercentile方法的具体用法？Python feature_selection.SelectPercentile怎么用？Python feature_selection.SelectPercentile使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.feature_selection的用法示例。

在下文中一共展示了feature_selection.SelectPercentile方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: find_best_feature_selections

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectPercentile [as 别名]
def find_best_feature_selections(X,y):

    #select the best features usin different technique
    X_new = SelectKBest(chi2, k=80).fit_transform(X,y)
    X_new1 = SelectPercentile(chi2, percentile=20).fit_transform(X,y)

    X_new2 = SelectKBest(f_classif, k=80).fit_transform(X,y) #this one has the best performance
    X_new22 = SelectPercentile(f_classif, percentile=20).fit_transform(X,y)

    X_new3 = SelectKBest(f_classif, k=70).fit_transform(X,y)
    X_new4 = SelectKBest(f_classif, k=60).fit_transform(X,y)

    print (X_new.shape)
    #selection_parameters_for_classfier(X_new,y)
    #print (y.shape)
    train_and_test(X_new,y)
    train_and_test(X_new1,y)
    train_and_test(X_new2,y)
    train_and_test(X_new22,y)
    train_and_test(X_new3,y)
    train_and_test(X_new4,y)
    #X,y = _dataset_sample()

################################PARAMETER  Selected################################
#TODO some problem happens when using the parameter max_leaf_nodes in Dtree and RandomForest

开发者ID:ririhedou，项目名称:dr_droid，代码行数:27，代码来源:GetMLPara.py

示例2: test_objectmapper

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectPercentile [as 别名]
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.feature_selection.GenericUnivariateSelect,
                      fs.GenericUnivariateSelect)
        self.assertIs(df.feature_selection.SelectPercentile,
                      fs.SelectPercentile)
        self.assertIs(df.feature_selection.SelectKBest, fs.SelectKBest)
        self.assertIs(df.feature_selection.SelectFpr, fs.SelectFpr)
        self.assertIs(df.feature_selection.SelectFromModel,
                      fs.SelectFromModel)
        self.assertIs(df.feature_selection.SelectFdr, fs.SelectFdr)
        self.assertIs(df.feature_selection.SelectFwe, fs.SelectFwe)
        self.assertIs(df.feature_selection.RFE, fs.RFE)
        self.assertIs(df.feature_selection.RFECV, fs.RFECV)
        self.assertIs(df.feature_selection.VarianceThreshold,
                      fs.VarianceThreshold)

开发者ID:pandas-ml，项目名称:pandas-ml，代码行数:18，代码来源:test_feature_selection.py

示例3: _get_feature_selector

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectPercentile [as 别名]
def _get_feature_selector(self):
        """Get a feature selector instance based on the feature_selector model
        parameter

        Returns:
            (Object): a feature selector which returns a reduced feature matrix, \
                given the full feature matrix, X and the class labels, y
        """
        if self.config.model_settings is None:
            selector_type = None
        else:
            selector_type = self.config.model_settings.get("feature_selector")
        selector = {
            "l1": SelectFromModel(LogisticRegression(penalty="l1", C=1)),
            "f": SelectPercentile(),
        }.get(selector_type)
        return selector

开发者ID:cisco，项目名称:mindmeld，代码行数:19，代码来源:text_models.py

示例4: univariate_feature_selection

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectPercentile [as 别名]
def univariate_feature_selection(mode,predictors,target):
    
    if mode == 'f_regression':
        fselect = SelectPercentile(f_regression, 100)
        
    if mode == 'f_classif':
        fselect = SelectPercentile(f_classif, 100)
        
    if mode == 'chi2':
        fselect = SelectPercentile(chi2, 100)
        
    fselect.fit_transform(predictors, target)
    
    return fselect.pvalues_

开发者ID:dataiku，项目名称:dataiku-contrib，代码行数:16，代码来源:feature_selection.py

示例5: test_export_pipeline

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectPercentile [as 别名]
def test_export_pipeline():
    """Assert that exported_pipeline() generated a compile source file as expected given a fixed pipeline."""

    pipeline_string = (
        'KNeighborsClassifier(CombineDFs('
        'DecisionTreeClassifier(input_matrix, DecisionTreeClassifier__criterion=gini, '
        'DecisionTreeClassifier__max_depth=8,DecisionTreeClassifier__min_samples_leaf=5,'
        'DecisionTreeClassifier__min_samples_split=5),SelectPercentile(input_matrix, SelectPercentile__percentile=20))'
        'KNeighborsClassifier__n_neighbors=10, '
        'KNeighborsClassifier__p=1,KNeighborsClassifier__weights=uniform'
    )

    pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
    expected_code = """import numpy as np
import pandas as pd
from sklearn.feature_selection import SelectPercentile, f_classif
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline, make_union
from sklearn.tree import DecisionTreeClassifier
from tpot.builtins import StackingEstimator

# NOTE: Make sure that the outcome column is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1)
training_features, testing_features, training_target, testing_target = \\
            train_test_split(features, tpot_data['target'], random_state=None)

exported_pipeline = make_pipeline(
    make_union(
        StackingEstimator(estimator=DecisionTreeClassifier(criterion="gini", max_depth=8, min_samples_leaf=5, min_samples_split=5)),
        SelectPercentile(score_func=f_classif, percentile=20)
    ),
    KNeighborsClassifier(n_neighbors=10, p=1, weights="uniform")
)

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
"""
    assert expected_code == export_pipeline(pipeline, tpot_obj.operators, tpot_obj._pset)

开发者ID:EpistasisLab，项目名称:tpot，代码行数:42，代码来源:export_tests.py

示例6: test_export_pipeline_3

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectPercentile [as 别名]
def test_export_pipeline_3():
    """Assert that exported_pipeline() generated a compile source file as expected given a fixed simple pipeline with a preprocessor."""

    pipeline_string = (
        'DecisionTreeClassifier(SelectPercentile(input_matrix, SelectPercentile__percentile=20),'
        'DecisionTreeClassifier__criterion=gini, DecisionTreeClassifier__max_depth=8,'
        'DecisionTreeClassifier__min_samples_leaf=5, DecisionTreeClassifier__min_samples_split=5)'
    )
    pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)

    expected_code = """import numpy as np
import pandas as pd
from sklearn.feature_selection import SelectPercentile, f_classif
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.tree import DecisionTreeClassifier

# NOTE: Make sure that the outcome column is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1)
training_features, testing_features, training_target, testing_target = \\
            train_test_split(features, tpot_data['target'], random_state=None)

exported_pipeline = make_pipeline(
    SelectPercentile(score_func=f_classif, percentile=20),
    DecisionTreeClassifier(criterion="gini", max_depth=8, min_samples_leaf=5, min_samples_split=5)
)

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
"""
    assert expected_code == export_pipeline(pipeline, tpot_obj.operators, tpot_obj._pset)

开发者ID:EpistasisLab，项目名称:tpot，代码行数:34，代码来源:export_tests.py

示例7: test_export_pipeline_6

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectPercentile [as 别名]
def test_export_pipeline_6():
    """Assert that exported_pipeline() generated a compile source file with random_state and data_file_path."""

    pipeline_string = (
        'DecisionTreeClassifier(SelectPercentile(input_matrix, SelectPercentile__percentile=20),'
        'DecisionTreeClassifier__criterion=gini, DecisionTreeClassifier__max_depth=8,'
        'DecisionTreeClassifier__min_samples_leaf=5, DecisionTreeClassifier__min_samples_split=5)'
    )
    pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
    expected_code = """import numpy as np
import pandas as pd
from sklearn.feature_selection import SelectPercentile, f_classif
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.tree import DecisionTreeClassifier
from tpot.export_utils import set_param_recursive

# NOTE: Make sure that the outcome column is labeled 'target' in the data file
tpot_data = pd.read_csv('test_path', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1)
training_features, testing_features, training_target, testing_target = \\
            train_test_split(features, tpot_data['target'], random_state=42)

exported_pipeline = make_pipeline(
    SelectPercentile(score_func=f_classif, percentile=20),
    DecisionTreeClassifier(criterion="gini", max_depth=8, min_samples_leaf=5, min_samples_split=5)
)
# Fix random state for all the steps in exported pipeline
set_param_recursive(exported_pipeline.steps, 'random_state', 42)

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
"""
    exported_code = export_pipeline(pipeline, tpot_obj.operators,
                                    tpot_obj._pset, random_state=42,
                                    data_file_path='test_path')

    assert expected_code == exported_code

开发者ID:EpistasisLab，项目名称:tpot，代码行数:40，代码来源:export_tests.py

示例8: test_operator_export

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectPercentile [as 别名]
def test_operator_export():
    """Assert that a TPOT operator can export properly with a callable function as a parameter."""
    assert list(TPOTSelectPercentile.arg_types) == TPOTSelectPercentile_args
    export_string = TPOTSelectPercentile.export(5)
    assert export_string == "SelectPercentile(score_func=f_classif, percentile=5)"

开发者ID:EpistasisLab，项目名称:tpot，代码行数:7，代码来源:export_tests.py

示例9: test_get_by_name

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectPercentile [as 别名]
def test_get_by_name():
    """Assert that the Operator class returns operators by name appropriately."""

    assert get_by_name("SelectPercentile", tpot_obj.operators).__class__ == TPOTSelectPercentile.__class__
    assert get_by_name("SelectFromModel", tpot_obj.operators).__class__ == TPOTSelectFromModel.__class__

开发者ID:EpistasisLab，项目名称:tpot，代码行数:7，代码来源:export_tests.py

示例10: test_pipeline_score_save

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectPercentile [as 别名]
def test_pipeline_score_save():
    """Assert that the TPOTClassifier can generate a scored pipeline export correctly."""
    tpot_obj = TPOTClassifier()
    tpot_obj._fit_init()
    tpot_obj._pbar = tqdm(total=1, disable=True)
    pipeline_string = (
        'DecisionTreeClassifier(SelectPercentile(input_matrix, SelectPercentile__percentile=20),'
        'DecisionTreeClassifier__criterion=gini, DecisionTreeClassifier__max_depth=8,'
        'DecisionTreeClassifier__min_samples_leaf=5, DecisionTreeClassifier__min_samples_split=5)'
    )
    pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
    expected_code = """import numpy as np
import pandas as pd
from sklearn.feature_selection import SelectPercentile, f_classif
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.tree import DecisionTreeClassifier

# NOTE: Make sure that the outcome column is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1)
training_features, testing_features, training_target, testing_target = \\
            train_test_split(features, tpot_data['target'], random_state=None)

# Average CV score on the training set was: 0.929813743
exported_pipeline = make_pipeline(
    SelectPercentile(score_func=f_classif, percentile=20),
    DecisionTreeClassifier(criterion="gini", max_depth=8, min_samples_leaf=5, min_samples_split=5)
)

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
"""
    assert_equal(expected_code, export_pipeline(pipeline, tpot_obj.operators, tpot_obj._pset, pipeline_score=0.929813743))

开发者ID:EpistasisLab，项目名称:tpot，代码行数:36，代码来源:export_tests.py

示例11: _select_percentile

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectPercentile [as 别名]
def _select_percentile(self, percentile):
        # Algorithm is conservative. Defaults to keeping features if
        # percentile specifies a value that corresponds to a floating number
        # of features. For example, if percentile=18 on a 20-feature matrix
        # implies keeping 3.6 features. In that case, keeps 4 features.
        if self._problem == FeatureSelector.CLASSIFICATION:
            score = f_classif
        else:
            score = f_regression

        self._selector = SelectPercentile(score, percentile)

开发者ID:HealthRex，项目名称:CDSS，代码行数:13，代码来源:FeatureSelector.py

示例12: PlotPerfPercentFeatures

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectPercentile [as 别名]
def PlotPerfPercentFeatures(X,y,est=LinearSVC()):
    '''
    Performance of a classifier (default: SVM-Anova)
    varying the percentile of features selected (F-test) .

    http://scikit-learn.org/stable/auto_examples/svm/plot_svm_anova.html#example-svm-plot-svm-anova-py

    See Also: (Similar but with model seelction from among classifiers):
    http://nbviewer.ipython.org/github/bugra/pydata-nyc-2014/blob/master/6.%20Scikit%20Learn%20-%20Model%20Selection.ipynb

    '''
    transform = SelectPercentile(f_classif)

    clf = Pipeline([('anova', transform), ('est', est)])
    ###############################################################################
    # Plot the cross-validation score as a function of percentile of features
    score_means = list()
    score_stds = list()
    percentiles = (1,2,3,5,7,10,13,15,20,25,33,50,65,75,90, 99)
    # percentiles = (1,5,10,25,50,75,90)

    for percentile in percentiles:
        # print(percentile)
        clf.set_params(anova__percentile=percentile)
        this_scores = cross_val_score(clf, X, y,cv=StratifiedShuffleSplit(y, n_iter=7, test_size=0.3), n_jobs=-1)
        score_means.append(this_scores.mean())
        score_stds.append(this_scores.std())
    print("Outputting Graph:")

    plt.errorbar(percentiles, score_means, np.array(score_stds))

    plt.title(
        'Predictor Performance, varying percent of features used')
    plt.xlabel('Percentile')
    plt.ylabel('Prediction Performance')
    plt.axis('tight')
    plt.show()

开发者ID:ddofer，项目名称:ProFET，代码行数:39，代码来源:PipeTasks.py

示例13: plot_BestKFeatures

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectPercentile [as 别名]
def plot_BestKFeatures (X_train, y_train):
    '''
    http://nbviewer.ipython.org/github/gmonce/scikit-learn-book/blob/master/Chapter%204%20-%20Advanced%20Features%20-%20Feature%20Engineering%20and%20Selection.ipynb
    Find the best percentile of features to use,
    using cross-validation on the training set and get K best feats
    '''
    from sklearn import cross_validation
    from sklearn import feature_selection
    from sklearn import tree
    dt = tree.DecisionTreeClassifier(criterion='entropy')
    dt = RandomForestClassifier(n_jobs=2, bootstrap=True, n_estimators=250, criterion='gini')
    dt = dt.fit(X_train, y_train)

    percentiles = range(1, 95, 5)
    results = []
    for i in range(1, 95, 5):
        fs = feature_selection.SelectPercentile(feature_selection.chi2, percentile=i) #Original
        fs = feature_selection.SelectPercentile(feature_selection.f_classif, percentile=i) # alt
        X_train_fs = fs.fit_transform(X_train, y_train)
        scores = cross_validation.cross_val_score(dt, X_train_fs, y_train, cv=4)
        #print i,scores.mean()
        results = np.append(results, scores.mean())

    optimal_percentil = np.where(results == results.max())[0]
    print (("Optimal number of features:{0}".format(percentiles[optimal_percentil])), "\n")

    # Plot number of features VS. cross-validation scores
    import pylab as pl
    import matplotlib.pylab as pl
    pl.figure()
    pl.xlabel("Number of features selected")
    pl.ylabel("Cross validation accuracy)")
    pl.plot(percentiles,results)
    print ("Mean scores:",results)
    return

开发者ID:ddofer，项目名称:ProFET，代码行数:37，代码来源:Model_Parameters_CV.py

示例14: _get_feature_selector

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectPercentile [as 别名]
def _get_feature_selector(selector_type):
        """Get a feature selector instance based on the feature_selector model
        parameter.

        Returns:
            (Object): A feature selector which returns a reduced feature matrix, \
                given the full feature matrix, X and the class labels, y.
        """
        selector = {
            "l1": SelectFromModel(LogisticRegression(penalty="l1", C=1)),
            "f": SelectPercentile(),
        }.get(selector_type)
        return selector

开发者ID:cisco，项目名称:mindmeld，代码行数:15，代码来源:memm.py

示例15: run_feature_selection

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectPercentile [as 别名]
def run_feature_selection(features, labels, feature_selection, best_features):
    
    if feature_selection == 'select_K_Best':
        # feature extraction
        selector = SelectKBest(score_func=f_classif, k=4) # score_func=chi2 : only for non-negative features
        selector.fit(features, labels)
        # summarize scores
        scores = selector.scores_
        features_index_sorted = np.argsort(-scores)
        features_selected = features[:, features_index_sorted[0:best_features]]

    # SelectFromModel and LassoCV
    
    # We use the base estimator LassoCV since the L1 norm promotes sparsity of features.
    if feature_selection == 'LassoCV':
        clf = LassoCV()

        # Set a minimum threshold of 0.25
        sfm = SelectFromModel(clf, threshold=0.95)
        sfm.fit(features, labels)
        features_selected = sfm.transform(features).shape[1]

        """
        # Reset the threshold till the number of features equals two.
        # Note that the attribute can be set directly instead of repeatedly
        # fitting the metatransformer.
        while n_features > 2:
            sfm.threshold += 0.1
            X_transform = sfm.transform(X)
            n_features = X_transform.shape[1]
        """

    # Univariate feature selection
    # Univariate feature selection works by selecting the best features based on univariate statistical tests. 
    # It can be seen as a preprocessing step to an estimator. 
    # Scikit-learn exposes feature selection routines as objects that implement the transform method:
    #   - SelectKBest removes all but the k highest scoring features
    #   - SelectPercentile removes all but a user-specified highest scoring percentage of features
    #       common univariate statistical tests for each feature: false positive rate SelectFpr, false discovery rate SelectFdr, or family wise error SelectFwe.
    #   - GenericUnivariateSelect allows to perform univariate feature selection with a configurable strategy. This allows to select the best univariate selection strategy with hyper-parameter search estimator.

    if feature_selection == 'slct_percentile':
        selector = SelectPercentile(f_classif, percentile=10)
        selector.fit(features, labels)
        # The percentile not affect. 
        # Just select in order the top features by number or threshold

        # Keep best 8 values?
        scores = selector.scores_
        features_index_sorted = np.argsort(-scores)
        # scores = selector.scores_

        # scores = -np.log10(selector.pvalues_)
        # scores /= scores.max()

        features_selected = features[:, features_index_sorted[0:best_features]]

    print("Selected only " + str(features_selected.shape) + " features ")

    return features_selected, features_index_sorted

开发者ID:mondejar，项目名称:ecg-classification，代码行数:62，代码来源:feature_selection.py

注：本文中的sklearn.feature_selection.SelectPercentile方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。