Python feature_selection.SelectFdr类代码示例

本文整理汇总了Python中sklearn.feature_selection.SelectFdr类的典型用法代码示例。如果您正苦于以下问题：Python SelectFdr类的具体用法？Python SelectFdr怎么用？Python SelectFdr使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了SelectFdr类的7个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_boundary_case_ch2

def test_boundary_case_ch2():
    # Test boundary case, and always aim to select 1 feature.
    X = np.array([[10, 20], [20, 20], [20, 30]])
    y = np.array([[1], [0], [0]])
    scores, pvalues = chi2(X, y)
    assert_array_almost_equal(scores, np.array([4.0, 0.71428571]))
    assert_array_almost_equal(pvalues, np.array([0.04550026, 0.39802472]))

    filter_fdr = SelectFdr(chi2, alpha=0.1)
    filter_fdr.fit(X, y)
    support_fdr = filter_fdr.get_support()
    assert_array_equal(support_fdr, np.array([True, False]))

    filter_kbest = SelectKBest(chi2, k=1)
    filter_kbest.fit(X, y)
    support_kbest = filter_kbest.get_support()
    assert_array_equal(support_kbest, np.array([True, False]))

    filter_percentile = SelectPercentile(chi2, percentile=50)
    filter_percentile.fit(X, y)
    support_percentile = filter_percentile.get_support()
    assert_array_equal(support_percentile, np.array([True, False]))

    filter_fpr = SelectFpr(chi2, alpha=0.1)
    filter_fpr.fit(X, y)
    support_fpr = filter_fpr.get_support()
    assert_array_equal(support_fpr, np.array([True, False]))

    filter_fwe = SelectFwe(chi2, alpha=0.1)
    filter_fwe.fit(X, y)
    support_fwe = filter_fwe.get_support()
    assert_array_equal(support_fwe, np.array([True, False]))

开发者ID:nelson-liu，项目名称:scikit-learn，代码行数:32，代码来源:test_feature_select.py

示例2: single_fdr

    def single_fdr(alpha, n_informative, random_state):
        X, y = make_regression(
            n_samples=150,
            n_features=20,
            n_informative=n_informative,
            shuffle=False,
            random_state=random_state,
            noise=10,
        )

        with warnings.catch_warnings(record=True):
            # Warnings can be raised when no features are selected
            # (low alpha or very noisy data)
            univariate_filter = SelectFdr(f_regression, alpha=alpha)
            X_r = univariate_filter.fit(X, y).transform(X)
            X_r2 = GenericUnivariateSelect(f_regression, mode="fdr", param=alpha).fit(X, y).transform(X)

        assert_array_equal(X_r, X_r2)
        support = univariate_filter.get_support()
        num_false_positives = np.sum(support[n_informative:] == 1)
        num_true_positives = np.sum(support[:n_informative] == 1)

        if num_false_positives == 0:
            return 0.0
        false_discovery_rate = num_false_positives / (num_true_positives + num_false_positives)
        return false_discovery_rate

开发者ID:nelson-liu，项目名称:scikit-learn，代码行数:26，代码来源:test_feature_select.py

示例3: test_select_fdr_classif

def test_select_fdr_classif():
    """
    Test whether the relative univariate feature selection
    gets the correct items in a simple classification problem
    with the fpr heuristic
    """
    X, Y = make_classification(
        n_samples=200,
        n_features=20,
        n_informative=3,
        n_redundant=2,
        n_repeated=0,
        n_classes=8,
        n_clusters_per_class=1,
        flip_y=0.0,
        class_sep=10,
        shuffle=False,
        random_state=0,
    )

    univariate_filter = SelectFdr(f_classif, alpha=0.0001)
    X_r = univariate_filter.fit(X, Y).transform(X)
    X_r2 = GenericUnivariateSelect(f_classif, mode="fdr", param=0.0001).fit(X, Y).transform(X)
    assert_array_equal(X_r, X_r2)
    support = univariate_filter.get_support()
    gtruth = np.zeros(20)
    gtruth[:5] = 1
    assert_array_equal(support, gtruth)

开发者ID:nellaivijay，项目名称:scikit-learn，代码行数:28，代码来源:test_feature_select.py

示例4: test_select_fdr_regression

def test_select_fdr_regression():
    """
    Test whether the relative univariate feature selection
    gets the correct items in a simple regression problem
    with the fdr heuristic
    """
    X, Y = make_regression(n_samples=200, n_features=20, n_informative=5, shuffle=False, random_state=0)

    univariate_filter = SelectFdr(f_regression, alpha=0.01)
    X_r = univariate_filter.fit(X, Y).transform(X)
    X_r2 = GenericUnivariateSelect(f_regression, mode="fdr", param=0.01).fit(X, Y).transform(X)
    assert_array_equal(X_r, X_r2)
    support = univariate_filter.get_support()
    gtruth = np.zeros(20)
    gtruth[:5] = 1
    assert_array_equal(support, gtruth)

开发者ID:nellaivijay，项目名称:scikit-learn，代码行数:16，代码来源:test_feature_select.py

示例5: featureFitting

def featureFitting( filename, X, y, featureNames,optimalFlag, kbest=20, alpha=0.05,model=None):
    '''
    Gets the K-best features (filtered by FDR, then select best ranked by t-test , more advanced options can be implemented).
    Save the data/matrix with the resulting/kept features to a new output file, "REDUCED_Feat.csv"
    '''
    a=alpha
    FD = SelectFdr(alpha=a)
    X = FD.fit_transform(X,y)

    selectK = SelectKBest(k=kbest)
    selectK.fit(X,y)
    selectK_mask=selectK.get_support()
    K_featnames = featureNames[selectK_mask]
    print("K_featnames: %s" %(K_featnames))
    Reduced_df = pd.read_csv(filename, index_col=0)
    Reduced_df = Reduced_df[Reduced_df.columns[selectK_mask]]
    Reduced_df.to_csv('REDUCED_Feat.csv')
    return Reduced_df

开发者ID:nadavrap，项目名称:ProFET，代码行数:18，代码来源:Model_trainer.py

示例6: svm_cv

def svm_cv(data, data_target):
    X_train, X_test, y_train, y_test = cross_validation.train_test_split(data, data_target)
    print "*" * 79
    print "Training..."
    # selector = SelectFdr(chi2)
    selector = SelectFdr(f_classif)
    selector.fit(X_train, y_train)
    clf = svm.SVC(kernel='linear', probability=True)
    clf.fit(selector.transform(X_train), y_train)
    print "Testing..."
    pred = clf.predict(selector.transform(X_test))
    probs = pred.predict_proba(selector.transfrom(X_test))
    accuracy_score = metrics.accuracy_score(y_test, pred)
    classification_report = metrics.classification_report(y_test, pred)
    support = selector.get_support()
    print support
    print accuracy_score
    print classification_report
    precision, recall, thresholds = precision_recall_curve(y_test, probs[:, 1])

开发者ID:jfortuna，项目名称:cs224u-project，代码行数:19，代码来源:househearing.py

示例7: str

    Kcv=4 #Number of stratified folds for cross validation. More = slower, more accurate.
    fileName = r'\trainingSetFeatures.csv'

    # filePath = r'E:\Dropbox\Dropbox\BioInformatics Lab\AA_Information\CODE\Feature_Extract\test_seq\Chap'
    filePath = str(input('Input DIRRectory containing TrainingData csv '))

    ## features, labels, lb_encoder,featureNames = load_data(filename, 'file')
    features, labels, lb_encoder,featureNames = load_data(filePath+fileName, 'file')

    X, y = features, labels
    print('len(set(y)',len(set(y)))
    print(X.shape,"X = samples, features")
    scale = StandardScaler(copy=False)
    X = scale.fit_transform(X)

    FD = SelectFdr(alpha=0.0005)
    FD_K = SelectPercentile(percentile=70)
    X = FD.fit_transform(X,y)
    print(X.shape,"X post FDR alpha filter")
    X_FD = FD_K.fit_transform(X,y)
    print(X_FD.shape,"X post FDR+K-best alpha filter")

    print("\n BASE X models: \n")
    ModelParam_GridSearch(X,y,cv=Kcv)
    '''
    pca = PCA(n_components='mle')
    X_PCA = pca.fit_transform(X)
    print(X_PCA.shape,"X - PCA,mle")
    ModelParam_GridSearch(X_PCA,y,cv=Kcv)
    '''

开发者ID:MichaelDoron，项目名称:ProFET，代码行数:30，代码来源:PipeTasks.py

注：本文中的sklearn.feature_selection.SelectFdr类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。