Python feature_selection.f_classif方法代码示例

本文整理汇总了Python中sklearn.feature_selection.f_classif方法的典型用法代码示例。如果您正苦于以下问题：Python feature_selection.f_classif方法的具体用法？Python feature_selection.f_classif怎么用？Python feature_selection.f_classif使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.feature_selection的用法示例。

在下文中一共展示了feature_selection.f_classif方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: GetSelectedFeatureIndex

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def GetSelectedFeatureIndex(self, data_container):
        data = data_container.GetArray()
        data /= np.linalg.norm(data, ord=2, axis=0)
        label = data_container.GetLabel()

        if data.shape[1] < self.GetSelectedFeatureNumber():
            print(
                'ANOVA: The number of features {:d} in data container is smaller than the required number {:d}'.format(
                    data.shape[1], self.GetSelectedFeatureNumber()))
            self.SetSelectedFeatureNumber(data.shape[1])

        fs = SelectKBest(f_classif, k=self.GetSelectedFeatureNumber())
        fs.fit(data, label)
        feature_index = fs.get_support(True)
        f_value, p_value = f_classif(data, label)
        return feature_index.tolist(), f_value, p_value

开发者ID:salan668，项目名称:FAE，代码行数:18，代码来源:FeatureSelector.py

示例2: test_clone

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def test_clone():
    # Tests that clone creates a correct deep copy.
    # We create an estimator, make a copy of its original state
    # (which, in this case, is the current state of the estimator),
    # and check that the obtained copy is a correct deep copy.

    from sklearn.feature_selection import SelectFpr, f_classif

    selector = SelectFpr(f_classif, alpha=0.1)
    new_selector = clone(selector)
    assert selector is not new_selector
    assert_equal(selector.get_params(), new_selector.get_params())

    selector = SelectFpr(f_classif, alpha=np.zeros((10, 2)))
    new_selector = clone(selector)
    assert selector is not new_selector

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:18，代码来源:test_base.py

示例3: f_score

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def f_score(X, y):
    """
    This function implements the anova f_value feature selection (existing method for classification in scikit-learn),
    where f_score = sum((ni/(c-1))*(mean_i - mean)^2)/((1/(n - c))*sum((ni-1)*std_i^2))

    Input
    -----
    X: {numpy array}, shape (n_samples, n_features)
        input data
    y : {numpy array},shape (n_samples,)
        input class labels

    Output
    ------
    F: {numpy array}, shape (n_features,)
        f-score for each feature
    """

    F, pval = f_classif(X, y)
    return F

开发者ID:jundongl，项目名称:scikit-feature，代码行数:22，代码来源:f_score.py

示例4: get_top_k

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def get_top_k(self):
		columns=list(self.data.columns.values)
		columns.remove(self.target)
		# remove intercept from top_k
		if(self.objective):
			top_k_vars=SelectKBest(f_regression, k=self.top_k)
			top_k_vars.fit_transform(self.data[columns], self.data[self.target])
		else:
			columns.remove('intercept')
			try:
				top_k_vars=SelectKBest(chi2, k=self.top_k)
				top_k_vars.fit_transform(self.data[columns], self.data[self.target])
			except:
				top_k_vars=SelectKBest(f_classif, k=self.top_k)
				top_k_vars.fit_transform(self.data[columns], self.data[self.target])
		return [columns[i] for i in top_k_vars.get_support(indices=True)]

开发者ID:dominance-analysis，项目名称:dominance-analysis，代码行数:18，代码来源:dominance.py

示例5: _anova

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def _anova(self, X_fft, y, n_coefs, n_timestamps):
        if n_coefs < X_fft.shape[1]:
            non_constant = np.where(
                ~np.isclose(X_fft.var(axis=0), np.zeros_like(X_fft.shape[1]))
            )[0]
            if non_constant.size == 0:
                raise ValueError("All the Fourier coefficients are constant. "
                                 "Your input data is weirdly homogeneous.")
            elif non_constant.size < n_coefs:
                warn("The number of non constant Fourier coefficients ({0}) "
                     "is lower than the number of coefficients to keep ({1}). "
                     "The number of coefficients to keep is truncated to {2}"
                     ".".format(non_constant.size, n_coefs, non_constant.size))
                support = non_constant
            else:
                _, p = f_classif(X_fft[:, non_constant], y)
                support = non_constant[np.argsort(p)[:n_coefs]]
        else:
            support = np.arange(n_coefs)
        return support

开发者ID:johannfaouzi，项目名称:pyts，代码行数:22，代码来源:dft.py

示例6: _compute_expected_results

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def _compute_expected_results(X, y=None, n_coefs=None, drop_sum=False,
                              anova=False, norm_mean=False, norm_std=False):
    """Compute the expected results."""
    X = np.asarray(X)
    if norm_mean:
        X -= X.mean(axis=1)[:, None]
    if norm_std:
        X /= X.std(axis=1)[:, None]
    X_fft = np.fft.rfft(X)
    X_fft = np.vstack([np.real(X_fft), np.imag(X_fft)])
    X_fft = X_fft.reshape(n_samples, -1, order='F')
    if drop_sum:
        X_fft = X_fft[:, 2:-1]
    else:
        X_fft = np.hstack([X_fft[:, :1], X_fft[:, 2:-1]])
    if n_coefs is None:
        return X_fft
    else:
        if anova:
            _, p = f_classif(X_fft, y)
            support = np.argsort(p)[:n_coefs]
            return X_fft[:, support]
        else:
            return X_fft[:, :n_coefs]

开发者ID:johannfaouzi，项目名称:pyts，代码行数:26，代码来源:test_dft.py

示例7: find_best_feature_selections

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def find_best_feature_selections(X,y):

    #select the best features usin different technique
    X_new = SelectKBest(chi2, k=80).fit_transform(X,y)
    X_new1 = SelectPercentile(chi2, percentile=20).fit_transform(X,y)

    X_new2 = SelectKBest(f_classif, k=80).fit_transform(X,y) #this one has the best performance
    X_new22 = SelectPercentile(f_classif, percentile=20).fit_transform(X,y)

    X_new3 = SelectKBest(f_classif, k=70).fit_transform(X,y)
    X_new4 = SelectKBest(f_classif, k=60).fit_transform(X,y)

    print (X_new.shape)
    #selection_parameters_for_classfier(X_new,y)
    #print (y.shape)
    train_and_test(X_new,y)
    train_and_test(X_new1,y)
    train_and_test(X_new2,y)
    train_and_test(X_new22,y)
    train_and_test(X_new3,y)
    train_and_test(X_new4,y)
    #X,y = _dataset_sample()

################################PARAMETER  Selected################################
#TODO some problem happens when using the parameter max_leaf_nodes in Dtree and RandomForest

开发者ID:ririhedou，项目名称:dr_droid，代码行数:27，代码来源:GetMLPara.py

示例8: test_clone

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def test_clone():
    # Tests that clone creates a correct deep copy.
    # We create an estimator, make a copy of its original state
    # (which, in this case, is the current state of the estimator),
    # and check that the obtained copy is a correct deep copy.

    from sklearn.feature_selection import SelectFpr, f_classif

    selector = SelectFpr(f_classif, alpha=0.1)
    new_selector = clone(selector)
    assert_true(selector is not new_selector)
    assert_equal(selector.get_params(), new_selector.get_params())

    selector = SelectFpr(f_classif, alpha=np.zeros((10, 2)))
    new_selector = clone(selector)
    assert_true(selector is not new_selector)

开发者ID:alvarobartt，项目名称:twitter-stock-recommendation，代码行数:18，代码来源:test_base.py

示例9: univariate_feature_selection

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def univariate_feature_selection(mode,predictors,target):
    
    if mode == 'f_regression':
        fselect = SelectPercentile(f_regression, 100)
        
    if mode == 'f_classif':
        fselect = SelectPercentile(f_classif, 100)
        
    if mode == 'chi2':
        fselect = SelectPercentile(chi2, 100)
        
    fselect.fit_transform(predictors, target)
    
    return fselect.pvalues_

开发者ID:dataiku，项目名称:dataiku-contrib，代码行数:16，代码来源:feature_selection.py

示例10: fit

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def fit(self, X, y):
        
        self.selector = SelectKBest(f_classif, k=self.max_features)
        self.selector.fit(X, y)

        X_train=self.selector.transform(X)
        y_train=y

        param_list=[]
        idx = range(len(y_train))
        for i in range(self.n_estimators):
            random.shuffle(idx)
            param_list.append((X_train[idx[:self.max_samples]], 
                               y_train[idx[:self.max_samples]]))

        pool = ThreadPool(cpu_count())
        self.clf_list = pool.map(self._prepare_classifier, param_list)
        pool.close()
        pool.join()

        """
        X2=[]
        for clf in self.clf_list:
            P=clf.predict_proba(X_train)
            if len(X2)==0:
                X2=P[:, 0]
            else:
                X2=numpy.vstack((X2, P[:, 0]))
        X2=numpy.swapaxes(X2, 0, 1)
        print "X2:", X2.shape

        from sklearn.ensemble import RandomForestClassifier
        self.clf2=RandomForestClassifier(n_estimators=100)
        self.clf2.fit(X2, y_train)
        """

开发者ID:daniellerch，项目名称:aletheia，代码行数:37，代码来源:models.py

示例11: test_pipeline_methods_anova

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def test_pipeline_methods_anova():
    # Test the various methods of the pipeline (anova).
    iris = load_iris()
    X = iris.data
    y = iris.target
    # Test with Anova + LogisticRegression
    clf = LogisticRegression()
    filter1 = SelectKBest(f_classif, k=2)
    pipe = Pipeline([('anova', filter1), ('logistic', clf)])
    pipe.fit(X, y)
    pipe.predict(X)
    pipe.predict_proba(X)
    pipe.predict_log_proba(X)
    pipe.score(X, y)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:16，代码来源:test_pipeline.py

示例12: test_clone_2

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def test_clone_2():
    # Tests that clone doesn't copy everything.
    # We first create an estimator, give it an own attribute, and
    # make a copy of its original state. Then we check that the copy doesn't
    # have the specific attribute we manually added to the initial estimator.

    from sklearn.feature_selection import SelectFpr, f_classif

    selector = SelectFpr(f_classif, alpha=0.1)
    selector.own_attribute = "test"
    new_selector = clone(selector)
    assert not hasattr(new_selector, "own_attribute")

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:14，代码来源:test_base.py

示例13: test_export_pipeline

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def test_export_pipeline():
    """Assert that exported_pipeline() generated a compile source file as expected given a fixed pipeline."""

    pipeline_string = (
        'KNeighborsClassifier(CombineDFs('
        'DecisionTreeClassifier(input_matrix, DecisionTreeClassifier__criterion=gini, '
        'DecisionTreeClassifier__max_depth=8,DecisionTreeClassifier__min_samples_leaf=5,'
        'DecisionTreeClassifier__min_samples_split=5),SelectPercentile(input_matrix, SelectPercentile__percentile=20))'
        'KNeighborsClassifier__n_neighbors=10, '
        'KNeighborsClassifier__p=1,KNeighborsClassifier__weights=uniform'
    )

    pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
    expected_code = """import numpy as np
import pandas as pd
from sklearn.feature_selection import SelectPercentile, f_classif
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline, make_union
from sklearn.tree import DecisionTreeClassifier
from tpot.builtins import StackingEstimator

# NOTE: Make sure that the outcome column is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1)
training_features, testing_features, training_target, testing_target = \\
            train_test_split(features, tpot_data['target'], random_state=None)

exported_pipeline = make_pipeline(
    make_union(
        StackingEstimator(estimator=DecisionTreeClassifier(criterion="gini", max_depth=8, min_samples_leaf=5, min_samples_split=5)),
        SelectPercentile(score_func=f_classif, percentile=20)
    ),
    KNeighborsClassifier(n_neighbors=10, p=1, weights="uniform")
)

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
"""
    assert expected_code == export_pipeline(pipeline, tpot_obj.operators, tpot_obj._pset)

开发者ID:EpistasisLab，项目名称:tpot，代码行数:42，代码来源:export_tests.py

示例14: test_export_pipeline_3

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def test_export_pipeline_3():
    """Assert that exported_pipeline() generated a compile source file as expected given a fixed simple pipeline with a preprocessor."""

    pipeline_string = (
        'DecisionTreeClassifier(SelectPercentile(input_matrix, SelectPercentile__percentile=20),'
        'DecisionTreeClassifier__criterion=gini, DecisionTreeClassifier__max_depth=8,'
        'DecisionTreeClassifier__min_samples_leaf=5, DecisionTreeClassifier__min_samples_split=5)'
    )
    pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)

    expected_code = """import numpy as np
import pandas as pd
from sklearn.feature_selection import SelectPercentile, f_classif
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.tree import DecisionTreeClassifier

# NOTE: Make sure that the outcome column is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1)
training_features, testing_features, training_target, testing_target = \\
            train_test_split(features, tpot_data['target'], random_state=None)

exported_pipeline = make_pipeline(
    SelectPercentile(score_func=f_classif, percentile=20),
    DecisionTreeClassifier(criterion="gini", max_depth=8, min_samples_leaf=5, min_samples_split=5)
)

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
"""
    assert expected_code == export_pipeline(pipeline, tpot_obj.operators, tpot_obj._pset)

开发者ID:EpistasisLab，项目名称:tpot，代码行数:34，代码来源:export_tests.py

示例15: test_export_pipeline_6

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def test_export_pipeline_6():
    """Assert that exported_pipeline() generated a compile source file with random_state and data_file_path."""

    pipeline_string = (
        'DecisionTreeClassifier(SelectPercentile(input_matrix, SelectPercentile__percentile=20),'
        'DecisionTreeClassifier__criterion=gini, DecisionTreeClassifier__max_depth=8,'
        'DecisionTreeClassifier__min_samples_leaf=5, DecisionTreeClassifier__min_samples_split=5)'
    )
    pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
    expected_code = """import numpy as np
import pandas as pd
from sklearn.feature_selection import SelectPercentile, f_classif
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.tree import DecisionTreeClassifier
from tpot.export_utils import set_param_recursive

# NOTE: Make sure that the outcome column is labeled 'target' in the data file
tpot_data = pd.read_csv('test_path', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1)
training_features, testing_features, training_target, testing_target = \\
            train_test_split(features, tpot_data['target'], random_state=42)

exported_pipeline = make_pipeline(
    SelectPercentile(score_func=f_classif, percentile=20),
    DecisionTreeClassifier(criterion="gini", max_depth=8, min_samples_leaf=5, min_samples_split=5)
)
# Fix random state for all the steps in exported pipeline
set_param_recursive(exported_pipeline.steps, 'random_state', 42)

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
"""
    exported_code = export_pipeline(pipeline, tpot_obj.operators,
                                    tpot_obj._pset, random_state=42,
                                    data_file_path='test_path')

    assert expected_code == exported_code

开发者ID:EpistasisLab，项目名称:tpot，代码行数:40，代码来源:export_tests.py

注：本文中的sklearn.feature_selection.f_classif方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。