本文整理汇总了Python中sklearn.feature_selection.f_classif方法的典型用法代码示例。如果您正苦于以下问题:Python feature_selection.f_classif方法的具体用法?Python feature_selection.f_classif怎么用?Python feature_selection.f_classif使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.feature_selection
的用法示例。
在下文中一共展示了feature_selection.f_classif方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: GetSelectedFeatureIndex
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def GetSelectedFeatureIndex(self, data_container):
data = data_container.GetArray()
data /= np.linalg.norm(data, ord=2, axis=0)
label = data_container.GetLabel()
if data.shape[1] < self.GetSelectedFeatureNumber():
print(
'ANOVA: The number of features {:d} in data container is smaller than the required number {:d}'.format(
data.shape[1], self.GetSelectedFeatureNumber()))
self.SetSelectedFeatureNumber(data.shape[1])
fs = SelectKBest(f_classif, k=self.GetSelectedFeatureNumber())
fs.fit(data, label)
feature_index = fs.get_support(True)
f_value, p_value = f_classif(data, label)
return feature_index.tolist(), f_value, p_value
示例2: test_clone
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def test_clone():
# Tests that clone creates a correct deep copy.
# We create an estimator, make a copy of its original state
# (which, in this case, is the current state of the estimator),
# and check that the obtained copy is a correct deep copy.
from sklearn.feature_selection import SelectFpr, f_classif
selector = SelectFpr(f_classif, alpha=0.1)
new_selector = clone(selector)
assert selector is not new_selector
assert_equal(selector.get_params(), new_selector.get_params())
selector = SelectFpr(f_classif, alpha=np.zeros((10, 2)))
new_selector = clone(selector)
assert selector is not new_selector
示例3: f_score
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def f_score(X, y):
"""
This function implements the anova f_value feature selection (existing method for classification in scikit-learn),
where f_score = sum((ni/(c-1))*(mean_i - mean)^2)/((1/(n - c))*sum((ni-1)*std_i^2))
Input
-----
X: {numpy array}, shape (n_samples, n_features)
input data
y : {numpy array},shape (n_samples,)
input class labels
Output
------
F: {numpy array}, shape (n_features,)
f-score for each feature
"""
F, pval = f_classif(X, y)
return F
示例4: get_top_k
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def get_top_k(self):
columns=list(self.data.columns.values)
columns.remove(self.target)
# remove intercept from top_k
if(self.objective):
top_k_vars=SelectKBest(f_regression, k=self.top_k)
top_k_vars.fit_transform(self.data[columns], self.data[self.target])
else:
columns.remove('intercept')
try:
top_k_vars=SelectKBest(chi2, k=self.top_k)
top_k_vars.fit_transform(self.data[columns], self.data[self.target])
except:
top_k_vars=SelectKBest(f_classif, k=self.top_k)
top_k_vars.fit_transform(self.data[columns], self.data[self.target])
return [columns[i] for i in top_k_vars.get_support(indices=True)]
示例5: _anova
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def _anova(self, X_fft, y, n_coefs, n_timestamps):
if n_coefs < X_fft.shape[1]:
non_constant = np.where(
~np.isclose(X_fft.var(axis=0), np.zeros_like(X_fft.shape[1]))
)[0]
if non_constant.size == 0:
raise ValueError("All the Fourier coefficients are constant. "
"Your input data is weirdly homogeneous.")
elif non_constant.size < n_coefs:
warn("The number of non constant Fourier coefficients ({0}) "
"is lower than the number of coefficients to keep ({1}). "
"The number of coefficients to keep is truncated to {2}"
".".format(non_constant.size, n_coefs, non_constant.size))
support = non_constant
else:
_, p = f_classif(X_fft[:, non_constant], y)
support = non_constant[np.argsort(p)[:n_coefs]]
else:
support = np.arange(n_coefs)
return support
示例6: _compute_expected_results
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def _compute_expected_results(X, y=None, n_coefs=None, drop_sum=False,
anova=False, norm_mean=False, norm_std=False):
"""Compute the expected results."""
X = np.asarray(X)
if norm_mean:
X -= X.mean(axis=1)[:, None]
if norm_std:
X /= X.std(axis=1)[:, None]
X_fft = np.fft.rfft(X)
X_fft = np.vstack([np.real(X_fft), np.imag(X_fft)])
X_fft = X_fft.reshape(n_samples, -1, order='F')
if drop_sum:
X_fft = X_fft[:, 2:-1]
else:
X_fft = np.hstack([X_fft[:, :1], X_fft[:, 2:-1]])
if n_coefs is None:
return X_fft
else:
if anova:
_, p = f_classif(X_fft, y)
support = np.argsort(p)[:n_coefs]
return X_fft[:, support]
else:
return X_fft[:, :n_coefs]
示例7: find_best_feature_selections
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def find_best_feature_selections(X,y):
#select the best features usin different technique
X_new = SelectKBest(chi2, k=80).fit_transform(X,y)
X_new1 = SelectPercentile(chi2, percentile=20).fit_transform(X,y)
X_new2 = SelectKBest(f_classif, k=80).fit_transform(X,y) #this one has the best performance
X_new22 = SelectPercentile(f_classif, percentile=20).fit_transform(X,y)
X_new3 = SelectKBest(f_classif, k=70).fit_transform(X,y)
X_new4 = SelectKBest(f_classif, k=60).fit_transform(X,y)
print (X_new.shape)
#selection_parameters_for_classfier(X_new,y)
#print (y.shape)
train_and_test(X_new,y)
train_and_test(X_new1,y)
train_and_test(X_new2,y)
train_and_test(X_new22,y)
train_and_test(X_new3,y)
train_and_test(X_new4,y)
#X,y = _dataset_sample()
################################PARAMETER Selected################################
#TODO some problem happens when using the parameter max_leaf_nodes in Dtree and RandomForest
示例8: test_clone
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def test_clone():
# Tests that clone creates a correct deep copy.
# We create an estimator, make a copy of its original state
# (which, in this case, is the current state of the estimator),
# and check that the obtained copy is a correct deep copy.
from sklearn.feature_selection import SelectFpr, f_classif
selector = SelectFpr(f_classif, alpha=0.1)
new_selector = clone(selector)
assert_true(selector is not new_selector)
assert_equal(selector.get_params(), new_selector.get_params())
selector = SelectFpr(f_classif, alpha=np.zeros((10, 2)))
new_selector = clone(selector)
assert_true(selector is not new_selector)
示例9: univariate_feature_selection
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def univariate_feature_selection(mode,predictors,target):
if mode == 'f_regression':
fselect = SelectPercentile(f_regression, 100)
if mode == 'f_classif':
fselect = SelectPercentile(f_classif, 100)
if mode == 'chi2':
fselect = SelectPercentile(chi2, 100)
fselect.fit_transform(predictors, target)
return fselect.pvalues_
示例10: fit
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def fit(self, X, y):
self.selector = SelectKBest(f_classif, k=self.max_features)
self.selector.fit(X, y)
X_train=self.selector.transform(X)
y_train=y
param_list=[]
idx = range(len(y_train))
for i in range(self.n_estimators):
random.shuffle(idx)
param_list.append((X_train[idx[:self.max_samples]],
y_train[idx[:self.max_samples]]))
pool = ThreadPool(cpu_count())
self.clf_list = pool.map(self._prepare_classifier, param_list)
pool.close()
pool.join()
"""
X2=[]
for clf in self.clf_list:
P=clf.predict_proba(X_train)
if len(X2)==0:
X2=P[:, 0]
else:
X2=numpy.vstack((X2, P[:, 0]))
X2=numpy.swapaxes(X2, 0, 1)
print "X2:", X2.shape
from sklearn.ensemble import RandomForestClassifier
self.clf2=RandomForestClassifier(n_estimators=100)
self.clf2.fit(X2, y_train)
"""
示例11: test_pipeline_methods_anova
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def test_pipeline_methods_anova():
# Test the various methods of the pipeline (anova).
iris = load_iris()
X = iris.data
y = iris.target
# Test with Anova + LogisticRegression
clf = LogisticRegression()
filter1 = SelectKBest(f_classif, k=2)
pipe = Pipeline([('anova', filter1), ('logistic', clf)])
pipe.fit(X, y)
pipe.predict(X)
pipe.predict_proba(X)
pipe.predict_log_proba(X)
pipe.score(X, y)
示例12: test_clone_2
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def test_clone_2():
# Tests that clone doesn't copy everything.
# We first create an estimator, give it an own attribute, and
# make a copy of its original state. Then we check that the copy doesn't
# have the specific attribute we manually added to the initial estimator.
from sklearn.feature_selection import SelectFpr, f_classif
selector = SelectFpr(f_classif, alpha=0.1)
selector.own_attribute = "test"
new_selector = clone(selector)
assert not hasattr(new_selector, "own_attribute")
示例13: test_export_pipeline
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def test_export_pipeline():
"""Assert that exported_pipeline() generated a compile source file as expected given a fixed pipeline."""
pipeline_string = (
'KNeighborsClassifier(CombineDFs('
'DecisionTreeClassifier(input_matrix, DecisionTreeClassifier__criterion=gini, '
'DecisionTreeClassifier__max_depth=8,DecisionTreeClassifier__min_samples_leaf=5,'
'DecisionTreeClassifier__min_samples_split=5),SelectPercentile(input_matrix, SelectPercentile__percentile=20))'
'KNeighborsClassifier__n_neighbors=10, '
'KNeighborsClassifier__p=1,KNeighborsClassifier__weights=uniform'
)
pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
expected_code = """import numpy as np
import pandas as pd
from sklearn.feature_selection import SelectPercentile, f_classif
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline, make_union
from sklearn.tree import DecisionTreeClassifier
from tpot.builtins import StackingEstimator
# NOTE: Make sure that the outcome column is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1)
training_features, testing_features, training_target, testing_target = \\
train_test_split(features, tpot_data['target'], random_state=None)
exported_pipeline = make_pipeline(
make_union(
StackingEstimator(estimator=DecisionTreeClassifier(criterion="gini", max_depth=8, min_samples_leaf=5, min_samples_split=5)),
SelectPercentile(score_func=f_classif, percentile=20)
),
KNeighborsClassifier(n_neighbors=10, p=1, weights="uniform")
)
exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
"""
assert expected_code == export_pipeline(pipeline, tpot_obj.operators, tpot_obj._pset)
示例14: test_export_pipeline_3
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def test_export_pipeline_3():
"""Assert that exported_pipeline() generated a compile source file as expected given a fixed simple pipeline with a preprocessor."""
pipeline_string = (
'DecisionTreeClassifier(SelectPercentile(input_matrix, SelectPercentile__percentile=20),'
'DecisionTreeClassifier__criterion=gini, DecisionTreeClassifier__max_depth=8,'
'DecisionTreeClassifier__min_samples_leaf=5, DecisionTreeClassifier__min_samples_split=5)'
)
pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
expected_code = """import numpy as np
import pandas as pd
from sklearn.feature_selection import SelectPercentile, f_classif
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.tree import DecisionTreeClassifier
# NOTE: Make sure that the outcome column is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1)
training_features, testing_features, training_target, testing_target = \\
train_test_split(features, tpot_data['target'], random_state=None)
exported_pipeline = make_pipeline(
SelectPercentile(score_func=f_classif, percentile=20),
DecisionTreeClassifier(criterion="gini", max_depth=8, min_samples_leaf=5, min_samples_split=5)
)
exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
"""
assert expected_code == export_pipeline(pipeline, tpot_obj.operators, tpot_obj._pset)
示例15: test_export_pipeline_6
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import f_classif [as 别名]
def test_export_pipeline_6():
"""Assert that exported_pipeline() generated a compile source file with random_state and data_file_path."""
pipeline_string = (
'DecisionTreeClassifier(SelectPercentile(input_matrix, SelectPercentile__percentile=20),'
'DecisionTreeClassifier__criterion=gini, DecisionTreeClassifier__max_depth=8,'
'DecisionTreeClassifier__min_samples_leaf=5, DecisionTreeClassifier__min_samples_split=5)'
)
pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
expected_code = """import numpy as np
import pandas as pd
from sklearn.feature_selection import SelectPercentile, f_classif
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.tree import DecisionTreeClassifier
from tpot.export_utils import set_param_recursive
# NOTE: Make sure that the outcome column is labeled 'target' in the data file
tpot_data = pd.read_csv('test_path', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1)
training_features, testing_features, training_target, testing_target = \\
train_test_split(features, tpot_data['target'], random_state=42)
exported_pipeline = make_pipeline(
SelectPercentile(score_func=f_classif, percentile=20),
DecisionTreeClassifier(criterion="gini", max_depth=8, min_samples_leaf=5, min_samples_split=5)
)
# Fix random state for all the steps in exported pipeline
set_param_recursive(exported_pipeline.steps, 'random_state', 42)
exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
"""
exported_code = export_pipeline(pipeline, tpot_obj.operators,
tpot_obj._pset, random_state=42,
data_file_path='test_path')
assert expected_code == exported_code