Python SelectFwe.fit方法代码示例

本文整理汇总了Python中sklearn.feature_selection.SelectFwe.fit方法的典型用法代码示例。如果您正苦于以下问题：Python SelectFwe.fit方法的具体用法？Python SelectFwe.fit怎么用？Python SelectFwe.fit使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.feature_selection.SelectFwe的用法示例。

在下文中一共展示了SelectFwe.fit方法的8个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_boundary_case_ch2

# 需要导入模块: from sklearn.feature_selection import SelectFwe [as 别名]
# 或者: from sklearn.feature_selection.SelectFwe import fit [as 别名]
def test_boundary_case_ch2():
    # Test boundary case, and always aim to select 1 feature.
    X = np.array([[10, 20], [20, 20], [20, 30]])
    y = np.array([[1], [0], [0]])
    scores, pvalues = chi2(X, y)
    assert_array_almost_equal(scores, np.array([4.0, 0.71428571]))
    assert_array_almost_equal(pvalues, np.array([0.04550026, 0.39802472]))

    filter_fdr = SelectFdr(chi2, alpha=0.1)
    filter_fdr.fit(X, y)
    support_fdr = filter_fdr.get_support()
    assert_array_equal(support_fdr, np.array([True, False]))

    filter_kbest = SelectKBest(chi2, k=1)
    filter_kbest.fit(X, y)
    support_kbest = filter_kbest.get_support()
    assert_array_equal(support_kbest, np.array([True, False]))

    filter_percentile = SelectPercentile(chi2, percentile=50)
    filter_percentile.fit(X, y)
    support_percentile = filter_percentile.get_support()
    assert_array_equal(support_percentile, np.array([True, False]))

    filter_fpr = SelectFpr(chi2, alpha=0.1)
    filter_fpr.fit(X, y)
    support_fpr = filter_fpr.get_support()
    assert_array_equal(support_fpr, np.array([True, False]))

    filter_fwe = SelectFwe(chi2, alpha=0.1)
    filter_fwe.fit(X, y)
    support_fwe = filter_fwe.get_support()
    assert_array_equal(support_fwe, np.array([True, False]))

开发者ID:nelson-liu，项目名称:scikit-learn，代码行数:34，代码来源:test_feature_select.py

示例2: test_select_fwe_4

# 需要导入模块: from sklearn.feature_selection import SelectFwe [as 别名]
# 或者: from sklearn.feature_selection.SelectFwe import fit [as 别名]
def test_select_fwe_4():
    """Ensure that the TPOT select fwe outputs the same result as sklearn fwe when 0.001 < alpha < 0.05"""
    tpot_obj = TPOT()
    non_feature_columns = ['class', 'group', 'guess']
    training_features = training_testing_data.loc[training_testing_data['group'] == 'training'].drop(non_feature_columns, axis=1)
    training_class_vals = training_testing_data.loc[training_testing_data['group'] == 'training', 'class'].values

    with warnings.catch_warnings():
        warnings.simplefilter('ignore', category=UserWarning)
        selector = SelectFwe(f_classif, alpha=0.042)
        selector.fit(training_features, training_class_vals)
        mask = selector.get_support(True)
    mask_cols = list(training_features.iloc[:, mask].columns) + non_feature_columns

    assert np.array_equal(tpot_obj._select_fwe(training_testing_data, 0.042), training_testing_data[mask_cols])

开发者ID:ANSWER1992，项目名称:tpot，代码行数:17，代码来源:tests.py

示例3: test_select_heuristics_classif

# 需要导入模块: from sklearn.feature_selection import SelectFwe [as 别名]
# 或者: from sklearn.feature_selection.SelectFwe import fit [as 别名]
def test_select_heuristics_classif():
    # Test whether the relative univariate feature selection
    # gets the correct items in a simple classification problem
    # with the fdr, fwe and fpr heuristics
    X, y = make_classification(
        n_samples=200,
        n_features=20,
        n_informative=3,
        n_redundant=2,
        n_repeated=0,
        n_classes=8,
        n_clusters_per_class=1,
        flip_y=0.0,
        class_sep=10,
        shuffle=False,
        random_state=0,
    )

    univariate_filter = SelectFwe(f_classif, alpha=0.01)
    X_r = univariate_filter.fit(X, y).transform(X)
    gtruth = np.zeros(20)
    gtruth[:5] = 1
    for mode in ["fdr", "fpr", "fwe"]:
        X_r2 = GenericUnivariateSelect(f_classif, mode=mode, param=0.01).fit(X, y).transform(X)
        assert_array_equal(X_r, X_r2)
        support = univariate_filter.get_support()
        assert_array_almost_equal(support, gtruth)

开发者ID:nelson-liu，项目名称:scikit-learn，代码行数:29，代码来源:test_feature_select.py

示例4: test_select_fwe_classif

# 需要导入模块: from sklearn.feature_selection import SelectFwe [as 别名]
# 或者: from sklearn.feature_selection.SelectFwe import fit [as 别名]
def test_select_fwe_classif():
    """
    Test whether the relative univariate feature selection
    gets the correct items in a simple classification problem
    with the fpr heuristic
    """
    X, Y = make_classification(
        n_samples=200,
        n_features=20,
        n_informative=3,
        n_redundant=2,
        n_repeated=0,
        n_classes=8,
        n_clusters_per_class=1,
        flip_y=0.0,
        class_sep=10,
        shuffle=False,
        random_state=0,
    )

    univariate_filter = SelectFwe(f_classif, alpha=0.01)
    X_r = univariate_filter.fit(X, Y).transform(X)
    X_r2 = GenericUnivariateSelect(f_classif, mode="fwe", param=0.01).fit(X, Y).transform(X)
    assert_array_equal(X_r, X_r2)
    support = univariate_filter.get_support()
    gtruth = np.zeros(20)
    gtruth[:5] = 1
    assert np.sum(np.abs(support - gtruth)) < 2

开发者ID:nellaivijay，项目名称:scikit-learn，代码行数:30，代码来源:test_feature_select.py

示例5: _select_fwe

# 需要导入模块: from sklearn.feature_selection import SelectFwe [as 别名]
# 或者: from sklearn.feature_selection.SelectFwe import fit [as 别名]
    def _select_fwe(self, input_df, alpha):
        """ Uses Scikit-learn's SelectFwe feature selection to filter the subset of features
           according to p-values corresponding to Family-wise error rate
        Parameters
        ----------
        input_df: pandas.DataFrame {n_samples, n_features+['class', 'group', 'guess']}
            Input DataFrame to perform feature selection on
        alpha: float in the range [0.001, 0.05]
            The highest uncorrected p-value for features to keep

        Returns
        -------
        subsetted_df: pandas.DataFrame {n_samples, n_filtered_features + ['guess', 'group', 'class']}
            Returns a DataFrame containing the 'best' features

        """
        training_features = input_df.loc[input_df['group'] == 'training'].drop(['class', 'group', 'guess'], axis=1)
        training_class_vals = input_df.loc[input_df['group'] == 'training', 'class'].values

        # forcing  0.001 <= alpha <= 0.05
        if alpha > 0.05:
            alpha = 0.05
        elif alpha <= 0.001:
            alpha = 0.001


        if len(training_features.columns.values) == 0:
            return input_df.copy()

        with warnings.catch_warnings():
            # Ignore warnings about constant features
            warnings.simplefilter('ignore', category=UserWarning)

            selector = SelectFwe(f_classif, alpha=alpha)
            selector.fit(training_features, training_class_vals)
            mask = selector.get_support(True)

        mask_cols = list(training_features.iloc[:, mask].columns) + ['guess', 'class', 'group']
        return input_df[mask_cols].copy()

开发者ID:vsolano，项目名称:tpot，代码行数:41，代码来源:tpot.py

示例6: test_select_fwe_regression

# 需要导入模块: from sklearn.feature_selection import SelectFwe [as 别名]
# 或者: from sklearn.feature_selection.SelectFwe import fit [as 别名]
def test_select_fwe_regression():
    # Test whether the relative univariate feature selection
    # gets the correct items in a simple regression problem
    # with the fwe heuristic
    X, y = make_regression(n_samples=200, n_features=20, n_informative=5, shuffle=False, random_state=0)

    univariate_filter = SelectFwe(f_regression, alpha=0.01)
    X_r = univariate_filter.fit(X, y).transform(X)
    X_r2 = GenericUnivariateSelect(f_regression, mode="fwe", param=0.01).fit(X, y).transform(X)
    assert_array_equal(X_r, X_r2)
    support = univariate_filter.get_support()
    gtruth = np.zeros(20)
    gtruth[:5] = 1
    assert_array_equal(support[:5], np.ones((5,), dtype=np.bool))
    assert_less(np.sum(support[5:] == 1), 2)

开发者ID:nelson-liu，项目名称:scikit-learn，代码行数:17，代码来源:test_feature_select.py

示例7: test_select_fwe_regression

# 需要导入模块: from sklearn.feature_selection import SelectFwe [as 别名]
# 或者: from sklearn.feature_selection.SelectFwe import fit [as 别名]
def test_select_fwe_regression():
    """
    Test whether the relative univariate feature selection
    gets the correct items in a simple regression problem
    with the fwe heuristic
    """
    X, Y = make_regression(n_samples=200, n_features=20,
                           n_informative=5, shuffle=False, random_state=0)

    univariate_filter = SelectFwe(f_regression, alpha=0.01)
    X_r = univariate_filter.fit(X, Y).transform(X)
    X_r2 = GenericUnivariateSelect(f_regression, mode='fwe',
                    param=0.01).fit(X, Y).transform(X)
    assert_array_equal(X_r, X_r2)
    support = univariate_filter.get_support()
    gtruth = np.zeros(20)
    gtruth[:5] = 1
    assert(support[:5] == 1).all()
    assert(np.sum(support[5:] == 1) < 2)

开发者ID:dasmith，项目名称:scikit-learn，代码行数:21，代码来源:test_feature_select.py

示例8: train_test_split

# 需要导入模块: from sklearn.feature_selection import SelectFwe [as 别名]
# 或者: from sklearn.feature_selection.SelectFwe import fit [as 别名]
import pandas as pd

from sklearn.cross_validation import train_test_split
from sklearn.feature_selection import SelectFwe
from sklearn.feature_selection import f_classif
from sklearn.neighbors import KNeighborsClassifier

# NOTE: Make sure that the class is labeled 'class' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR')
training_indices, testing_indices = train_test_split(tpot_data.index, stratify = tpot_data['class'].values, train_size=0.75, test_size=0.25)


result1 = tpot_data.copy()

training_features = result1.loc[training_indices].drop(['class', 'group', 'guess'], axis=1)
training_class_vals = result1.loc[training_indices, 'class'].values
if len(training_features.columns.values) == 0:
    result1 = result1.copy()
else:
    selector = SelectFwe(f_classif, alpha=0.05)
    selector.fit(training_features.values, training_class_vals)
    mask = selector.get_support(True)
    mask_cols = list(training_features.iloc[:, mask].columns) + ['class']
    result1 = result1[mask_cols]

# Perform classification with a k-nearest neighbor classifier
knnc2 = KNeighborsClassifier(n_neighbors=min(8, len(training_indices)))
knnc2.fit(result1.loc[training_indices].drop('class', axis=1).values, result1.loc[training_indices, 'class'].values)
result2 = result1.copy()
result2['knnc2-classification'] = knnc2.predict(result2.drop('class', axis=1).values)

开发者ID:MariosRichards，项目名称:expedia_code，代码行数:32，代码来源:tpot_expedia_pipeline.py

注：本文中的sklearn.feature_selection.SelectFwe.fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。