Python feature_selection.SelectFpr类代码示例

本文整理汇总了Python中sklearn.feature_selection.SelectFpr类的典型用法代码示例。如果您正苦于以下问题：Python SelectFpr类的具体用法？Python SelectFpr怎么用？Python SelectFpr使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了SelectFpr类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_boundary_case_ch2

def test_boundary_case_ch2():
    # Test boundary case, and always aim to select 1 feature.
    X = np.array([[10, 20], [20, 20], [20, 30]])
    y = np.array([[1], [0], [0]])
    scores, pvalues = chi2(X, y)
    assert_array_almost_equal(scores, np.array([4.0, 0.71428571]))
    assert_array_almost_equal(pvalues, np.array([0.04550026, 0.39802472]))

    filter_fdr = SelectFdr(chi2, alpha=0.1)
    filter_fdr.fit(X, y)
    support_fdr = filter_fdr.get_support()
    assert_array_equal(support_fdr, np.array([True, False]))

    filter_kbest = SelectKBest(chi2, k=1)
    filter_kbest.fit(X, y)
    support_kbest = filter_kbest.get_support()
    assert_array_equal(support_kbest, np.array([True, False]))

    filter_percentile = SelectPercentile(chi2, percentile=50)
    filter_percentile.fit(X, y)
    support_percentile = filter_percentile.get_support()
    assert_array_equal(support_percentile, np.array([True, False]))

    filter_fpr = SelectFpr(chi2, alpha=0.1)
    filter_fpr.fit(X, y)
    support_fpr = filter_fpr.get_support()
    assert_array_equal(support_fpr, np.array([True, False]))

    filter_fwe = SelectFwe(chi2, alpha=0.1)
    filter_fwe.fit(X, y)
    support_fwe = filter_fwe.get_support()
    assert_array_equal(support_fwe, np.array([True, False]))

开发者ID:nelson-liu，项目名称:scikit-learn，代码行数:32，代码来源:test_feature_select.py

示例2: select_with_fpr

def select_with_fpr(train, test):
  train_data = train.drop('ID', axis=1)
  test_data = test.drop('ID', axis=1)

  train_y = train_data['TARGET']
  train_X = train_data.drop('TARGET', 1)

  fpr = SelectFpr(alpha = 0.001)

  features = fpr.fit_transform(train_X, train_y)

  print('Fpr выбрал {} признаков.'.format(features.shape[1]))

  col_numbers = fpr.get_support()
  columns = np.delete(train_data.columns.values, train_data.shape[1] - 1, axis=0)
  features = []
  i = 0
  for i in range(len(columns)):
    if col_numbers[i] == True:
      features.append(columns[i])

  new_train = train[['ID'] + features + ['TARGET']]
  new_train.to_csv('train_after_fpr.csv')

  new_test = test[['ID'] + features]
  new_test.to_csv('test_after_fpr.csv')

开发者ID:poketulhu，项目名称:happy_customers，代码行数:26，代码来源:features_selection.py

示例3: test_select_fpr_classif

def test_select_fpr_classif():
    """
    Test whether the relative univariate feature selection
    gets the correct items in a simple classification problem
    with the fpr heuristic
    """
    X, Y = make_classification(
        n_samples=200,
        n_features=20,
        n_informative=3,
        n_redundant=2,
        n_repeated=0,
        n_classes=8,
        n_clusters_per_class=1,
        flip_y=0.0,
        class_sep=10,
        shuffle=False,
        random_state=0,
    )

    univariate_filter = SelectFpr(f_classif, alpha=0.0001)
    X_r = univariate_filter.fit(X, Y).transform(X)
    X_r2 = GenericUnivariateSelect(f_classif, mode="fpr", param=0.0001).fit(X, Y).transform(X)
    assert_array_equal(X_r, X_r2)
    support = univariate_filter.get_support()
    gtruth = np.zeros(20)
    gtruth[:5] = 1
    assert_array_equal(support, gtruth)

开发者ID:nellaivijay，项目名称:scikit-learn，代码行数:28，代码来源:test_feature_select.py

示例4: evaluate_model

def evaluate_model(classifier, data_records, class_labels, labels):

    attribute_values = []
    accuracy_values = []

    # Scoring the attributes using F_test and false positive rate
    clf = SelectFpr(f_classif, alpha=0.9)
    clf.fit(data_records, class_labels)
    print(clf.scores_)
    print('\n')

    ranked_attr_indices = [0] * len(clf.scores_)
    for i, x in enumerate(sorted(range(len(clf.scores_)), key=lambda y: clf.scores_[y])):
        ranked_attr_indices[x] = i

    # Performing a 4-fold cross validation against varying number of attributes. The attributes are chosen
    # on the basis of their scores
    for idx in range(2, len(ranked_attr_indices)):
        filtered_records = data_records[:, ranked_attr_indices[:idx]]
        for idx2 in ranked_attr_indices[:idx]:
            print(labels[idx2])
        validation_score = cross_validation.cross_val_score(classifier, filtered_records, class_labels, cv=5)
        accuracy = max(validation_score) * 100
        attribute_values.append(idx)
        accuracy_values.append(accuracy)
        print('Cross validation score - ' + str(idx) + ' attributes :' + str(validation_score) + '\n')

    return (attribute_values, accuracy_values)

开发者ID:jetsasank，项目名称:MachineLearning，代码行数:28，代码来源:EvaluateModel.py

示例5: test_clone_2

def test_clone_2():
    # Tests that clone doesn't copy everything.
    # We first create an estimator, give it an own attribute, and
    # make a copy of its original state. Then we check that the copy doesn't
    # have the specific attribute we manually added to the initial estimator.

    from sklearn.feature_selection import SelectFpr, f_classif

    selector = SelectFpr(f_classif, alpha=0.1)
    selector.own_attribute = "test"
    new_selector = clone(selector)
    assert_false(hasattr(new_selector, "own_attribute"))

开发者ID:AlexisMignon，项目名称:scikit-learn，代码行数:12，代码来源:test_base.py

示例6: test_clone

def test_clone():
    """Tests that clone creates a correct deep copy.

    We create an estimator, make a copy of its original state
    (which, in this case, is the current state of the setimator),
    and check that the obtained copy is a correct deep copy.

    """
    from sklearn.feature_selection import SelectFpr, f_classif

    selector = SelectFpr(f_classif, alpha=0.1)
    new_selector = clone(selector)
    assert_true(selector is not new_selector)
    assert_equal(selector.get_params(), new_selector.get_params())

开发者ID:c0ldlimit，项目名称:scikit-learn，代码行数:14，代码来源:test_base.py

示例7: test_clone

def test_clone():
    # Tests that clone creates a correct deep copy.
    # We create an estimator, make a copy of its original state
    # (which, in this case, is the current state of the estimator),
    # and check that the obtained copy is a correct deep copy.

    from sklearn.feature_selection import SelectFpr, f_classif

    selector = SelectFpr(f_classif, alpha=0.1)
    new_selector = clone(selector)
    assert selector is not new_selector
    assert_equal(selector.get_params(), new_selector.get_params())

    selector = SelectFpr(f_classif, alpha=np.zeros((10, 2)))
    new_selector = clone(selector)
    assert selector is not new_selector

开发者ID:chrisfilo，项目名称:scikit-learn，代码行数:16，代码来源:test_base.py

示例8: test_select_heuristics_regression

def test_select_heuristics_regression():
    # Test whether the relative univariate feature selection
    # gets the correct items in a simple regression problem
    # with the fpr, fdr or fwe heuristics
    X, y = make_regression(n_samples=200, n_features=20, n_informative=5, shuffle=False, random_state=0, noise=10)

    univariate_filter = SelectFpr(f_regression, alpha=0.01)
    X_r = univariate_filter.fit(X, y).transform(X)
    gtruth = np.zeros(20)
    gtruth[:5] = 1
    for mode in ["fdr", "fpr", "fwe"]:
        X_r2 = GenericUnivariateSelect(f_regression, mode=mode, param=0.01).fit(X, y).transform(X)
        assert_array_equal(X_r, X_r2)
        support = univariate_filter.get_support()
        assert_array_equal(support[:5], np.ones((5,), dtype=np.bool))
        assert_less(np.sum(support[5:] == 1), 3)

开发者ID:nelson-liu，项目名称:scikit-learn，代码行数:16，代码来源:test_feature_select.py

示例9: test_select_fpr_regression

def test_select_fpr_regression():
    """
    Test whether the relative univariate feature selection
    gets the correct items in a simple regression problem
    with the fpr heuristic
    """
    X, Y = make_regression(n_samples=200, n_features=20, n_informative=5, shuffle=False, random_state=0)

    univariate_filter = SelectFpr(f_regression, alpha=0.01)
    X_r = univariate_filter.fit(X, Y).transform(X)
    X_r2 = GenericUnivariateSelect(f_regression, mode="fpr", param=0.01).fit(X, Y).transform(X)
    assert_array_equal(X_r, X_r2)
    support = univariate_filter.get_support()
    gtruth = np.zeros(20)
    gtruth[:5] = 1
    assert (support[:5] == 1).all()
    assert np.sum(support[5:] == 1) < 3

开发者ID:nellaivijay，项目名称:scikit-learn，代码行数:17，代码来源:test_feature_select.py

示例10: train_decisiontree_FPR

def train_decisiontree_FPR(configurationname, train_data, score_function, undersam=False, oversam=False, export=False):
    print("Training with configuration " + configurationname)
    X_train, y_train, id_to_a_train = train_data
    dtc = DecisionTreeClassifier(random_state=0)

    print("Feature Selection")
    # selector = SelectFpr(score_function)
    selector = SelectFpr(score_function)
    result = selector.fit(X_train, y_train)
    X_train = selector.transform(X_train)

    fitted_ids = [i for i in result.get_support(indices=True)]

    print("Apply Resampling")
    print(Counter(y_train))
    if undersam and not oversam:
        renn = RepeatedEditedNearestNeighbours()
        X_train, y_train = renn.fit_resample(X_train, y_train)
    if oversam and not undersam:
        # feature_indices_array = list(range(len(f_to_id)))
        # smote_nc = SMOTENC(categorical_features=feature_indices_array, random_state=0)
        # X_train, y_train = smote_nc.fit_resample(X_train, y_train)
        sm = SMOTE(random_state=42)
        X_train, y_train = sm.fit_resample(X_train, y_train)
    if oversam and undersam:
        smote_enn = SMOTEENN(random_state=0)
        X_train, y_train = smote_enn.fit_resample(X_train, y_train)
    print(Counter(y_train))

    print("Train Classifier")
    dtc = dtc.fit(X_train, y_train, check_input=True)

    if export:
        export_graphviz(dtc, out_file=DATAP + "/temp/trees/sltree_" + configurationname + ".dot", filled=True)
        transform(fitted_ids)

    print("Self Accuracy: " + str(dtc.score(X_train, y_train)))

    return selector, dtc

开发者ID:softlang，项目名称:wikionto，代码行数:39，代码来源:decision_tree.py

示例11: multisplit

def multisplit(skf,X,y,stepsize=1000):
    total_score = 0
    for train_index, test_index in skf:
        wl = []
        pred1 = np.matrix([])
        # Training
        for x in range(0, len(X[0]), stepsize):
            clf1 = plib.classif(X[train_index, x:x + stepsize], y[train_index])
            tmp_p = np.matrix(clf1.decision_function(X[train_index, x:x + stepsize]))
            if pred1.size == 0:
                pred1 = tmp_p
            else:
                pred1 = np.concatenate((pred1, tmp_p), axis=1)
            wl.append(clf1)
        #selectf = SelectKBest(f_classif, k=5).fit(pred1, y[train_index])
        selectf = SelectFpr().fit(pred1, y[train_index])
        clf3 = AdaBoostClassifier(n_estimators=100)
        #clf3 = svm.SVC(class_weight='auto')
        #clf3 = RandomForestClassifier(n_estimators=20)
        clf3.fit(selectf.transform(pred1), y[train_index])
        # Testing
        predtest = np.matrix([])
        k = 0
        for x in range(0, len(X[0]), stepsize):
            tmp_p = np.matrix(wl[k].decision_function(X[test_index, x:x + stepsize]))
            if predtest.size == 0:
                predtest = tmp_p
            else:
                predtest = np.concatenate((predtest, tmp_p), axis=1)
            k += 1
        # Final prediction
        predfinal = clf3.predict(selectf.transform(predtest))
        print "Target     : ", y[test_index]
        print "Prediction : ", predfinal
        matchs = np.equal(predfinal, y[test_index])
        score = np.divide(np.sum(matchs), np.float64(matchs.size))
        total_score = score + total_score
    return np.divide(total_score, skf.n_folds)

开发者ID:cdansereau，项目名称:Proteus，代码行数:38，代码来源:prediction.py

示例12: SelectFpr

y = iris.target

################################################################################
pl.figure(1)
pl.clf()

x_indices = np.arange(x.shape[-1])

################################################################################
# Univariate feature selection
from sklearn.feature_selection import SelectFpr, f_classif
# As a scoring function, we use a F test for classification
# We use the default selection function: the 10% most significant
# features

selector = SelectFpr(f_classif, alpha=0.1)
selector.fit(x, y)
scores = -np.log10(selector._pvalues)
scores /= scores.max()
pl.bar(x_indices-.45, scores, width=.3,
        label=r'Univariate score ($-Log(p_{value})$)',
        color='g')

################################################################################
# Compare to the weights of an SVM
clf = svm.SVC(kernel='linear')
clf.fit(x, y)

svm_weights = (clf.coef_**2).sum(axis=0)
svm_weights /= svm_weights.max()
pl.bar(x_indices-.15, svm_weights, width=.3, label='SVM weight',

开发者ID:cocoaaa，项目名称:ml_gesture，代码行数:31，代码来源:fs_test.py

示例13: SelectFpr

data1 = pdc.objFeatures[tr1_mask][:, featureIds]
data2 = pdc.objFeatures[tr2_mask][:, featureIds]
data = np.vstack([data1, data2])
labels1 = np.zeros((data1.shape[0],))
labels2 = np.ones((data2.shape[0],))
labels = np.hstack([labels1, labels2])
X1 = data1[:1000]
X2 = data2[-1000:]
X = np.vstack([X1, X2])
Y1 = labels1[:X1.shape[0]]
Y2 = labels2[:X2.shape[0]]
Y = np.hstack([Y1, Y2])

from sklearn.feature_selection import SelectFpr, f_classif

selector = SelectFpr(f_classif, alpha=0.1)
selector.fit(X, Y)
scores = -np.log10(selector._pvalues)
scores /= scores.max()

from sklearn import svm
# Compare to the weights of an SVM
clf = svm.SVC(kernel='linear')
clf.fit(X, Y)
print 'SVM error:', clf.score(data, labels)
pred = clf.predict(data)
match = numpy.sum(pred == labels)
print match, labels.shape[0]
print match / float(labels.shape[0])

svm_weights = (clf.coef_**2).sum(axis=0)

开发者ID:bennihepp，项目名称:yaca，代码行数:31，代码来源:treatmentComparison.py

示例14: VarianceThreshold

from sklearn.feature_selection import VarianceThreshold, SelectFpr, f_regression

# import data of all Count and Position features. Training and test sets altogether
dfCountfeatures = pd.read_csv('data/CountingAndPositionFeatures_TrainAndTestData.csv')
dfTrainRaw = pd.read_csv('data/train.csv')

# get only training data  
TrainQueryIDs = dfTrainRaw["id"]
relevance = dfTrainRaw["relevance"]
dfCountfeatures_TrainSet = dfCountfeatures[dfCountfeatures["id"].isin(TrainQueryIDs)]
#select these features which have non-zero variance
selector = VarianceThreshold()
selector.fit_transform(dfCountfeatures_TrainSet).shape # only one feature with zero variance - shape (74067L, 262L)

# select feature based on p-values from univariate regression with target feature (relevance)
selector2= SelectFpr(f_regression, alpha = 0.01)
selector2.fit(dfCountfeatures_TrainSet.drop("id", axis = 1), relevance)
selector2.get_support(indices=True).size # left 226 features out of 262 with p-value <=1%
# get titles of features which were selected
selectedCountfeatures = dfCountfeatures.columns[selector2.get_support(indices=True)]

# check correlation amongst features
corrReduced = dfCountfeatures_TrainSet[selectedCountfeatures].corr()
corrReduced.iloc[:,:] = np.tril(corrReduced.values, k=-1)
corrReduced =corrReduced.stack()
# get pairs of features which are highly correlated
corrReduced[corrReduced.abs()>0.8].size # 578 pairs correlated more than 80% out of 25.425
len(set(corrReduced[corrReduced.abs()>0.8].index.labels[0])) # 172 features to be removed due to high correlation with other features
# get feature titles which will be used in training the model after removing highly correlated features
indices = set(corrReduced[corrReduced.abs()>0.8].index.labels[0])
selectedCountfeatures2 = [i for j, i in enumerate(selectedCountfeatures.tolist()) if j not in indices]

开发者ID:chyu1988，项目名称:kaggles，代码行数:31，代码来源:Count&PositionFeature_selection.py

示例15: SelectPercentile

#SelectPercentile -- chi2
from sklearn.feature_selection import SelectPercentile
from sklearn.feature_selection import chi2
X_fitted_4 = SelectPercentile(chi2, percentile=50).fit(X,y)
print "SelectPercentile -- chi2"
print X_fitted_4.scores_
print X_fitted_4.pvalues_
print X_fitted_4.get_support()
X_transformed_4 = X_fitted_4.transform(X)
print X_transformed_4.shape

#SelectFpr --- chi2
from sklearn.feature_selection import SelectFpr
from sklearn.feature_selection import chi2
X_fitted_5 = SelectFpr(chi2, alpha=2.50017968e-15).fit(X,y)
print "SelectFpr --- chi2"
print X_fitted_5.scores_
print X_fitted_5.pvalues_
print X_fitted_5.get_support()
X_transformed_5 = X_fitted_5.transform(X)
print X_transformed_5.shape

#SelectFpr --- f_classif
from sklearn.feature_selection import SelectFpr
from sklearn.feature_selection import f_classif
X_fitted_6 = SelectFpr(f_classif, alpha=1.66966919e-31 ).fit(X,y)
print "SelectFpr --- f_classif"
print X_fitted_6.scores_
print X_fitted_6.pvalues_
print X_fitted_6.get_support()

开发者ID:FF120，项目名称:python，代码行数:30，代码来源:Univariate+feature+selection.py

注：本文中的sklearn.feature_selection.SelectFpr类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。