本文整理匯總了Python中sklearn.feature_selection.f_regression方法的典型用法代碼示例。如果您正苦於以下問題:Python feature_selection.f_regression方法的具體用法?Python feature_selection.f_regression怎麽用?Python feature_selection.f_regression使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類sklearn.feature_selection
的用法示例。
在下文中一共展示了feature_selection.f_regression方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: getTopFeatures
# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import f_regression [as 別名]
def getTopFeatures(train_x, train_y, n_features=100):
f_val, p_val = f_regression(train_x,train_y)
f_val_dict = {}
p_val_dict = {}
for i in range(len(f_val)):
if math.isnan(f_val[i]):
f_val[i] = 0.0
f_val_dict[i] = f_val[i]
if math.isnan(p_val[i]):
p_val[i] = 0.0
p_val_dict[i] = p_val[i]
sorted_f = sorted(f_val_dict.iteritems(), key=operator.itemgetter(1),reverse=True)
sorted_p = sorted(p_val_dict.iteritems(), key=operator.itemgetter(1),reverse=True)
feature_indexs = []
for i in range(0,n_features):
feature_indexs.append(sorted_f[i][0])
return feature_indexs
# generate the new data, based on which features are generated, and used
示例2: get_top_k
# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import f_regression [as 別名]
def get_top_k(self):
columns=list(self.data.columns.values)
columns.remove(self.target)
# remove intercept from top_k
if(self.objective):
top_k_vars=SelectKBest(f_regression, k=self.top_k)
top_k_vars.fit_transform(self.data[columns], self.data[self.target])
else:
columns.remove('intercept')
try:
top_k_vars=SelectKBest(chi2, k=self.top_k)
top_k_vars.fit_transform(self.data[columns], self.data[self.target])
except:
top_k_vars=SelectKBest(f_classif, k=self.top_k)
top_k_vars.fit_transform(self.data[columns], self.data[self.target])
return [columns[i] for i in top_k_vars.get_support(indices=True)]
示例3: test_pipeline
# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import f_regression [as 別名]
def test_pipeline(self):
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression
from sklearn.pipeline import Pipeline
diabetes = datasets.load_diabetes()
models = ['OLS', 'GLS', 'WLS', 'GLSAR', 'QuantReg', 'GLM', 'RLM']
for model in models:
klass = getattr(sm, model)
selector = SelectKBest(f_regression, k=5)
estimator = Pipeline([('selector', selector),
('reg', base.StatsModelsRegressor(klass))])
estimator.fit(diabetes.data, diabetes.target)
result = estimator.predict(diabetes.data)
data = SelectKBest(f_regression, k=5).fit_transform(diabetes.data, diabetes.target)
expected = klass(diabetes.target, data).fit().predict(data)
self.assert_numpy_array_almost_equal(result, expected)
示例4: univariate_feature_selection
# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import f_regression [as 別名]
def univariate_feature_selection(mode,predictors,target):
if mode == 'f_regression':
fselect = SelectPercentile(f_regression, 100)
if mode == 'f_classif':
fselect = SelectPercentile(f_classif, 100)
if mode == 'chi2':
fselect = SelectPercentile(chi2, 100)
fselect.fit_transform(predictors, target)
return fselect.pvalues_
示例5: get_initial_regression_model_recommendation
# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import f_regression [as 別名]
def get_initial_regression_model_recommendation(project_id, dataset_id, dependent_variable_id=None, recommendation_type=MRT.LASSO.value, table_layout=MCT.LEAVE_ONE_OUT.value, data_size_cutoff=current_app.config['ANALYSIS_DATA_SIZE_CUTOFF'], categorical_value_limit=current_app.config['ANALYSIS_CATEGORICAL_VALUE_LIMIT']):
df = get_data(project_id=project_id, dataset_id=dataset_id)
if len(df) > data_size_cutoff:
df = df.sample(data_size_cutoff)
field_properties = db_access.get_field_properties(project_id, dataset_id)
quantitative_field_properties = [ fp for fp in field_properties if fp['general_type'] == 'q']
dependent_variable = next((f for f in field_properties if f['id'] == dependent_variable_id), None) \
if dependent_variable_id \
else np.random.choice(quantitative_field_properties, size=1)[0]
independent_variables = []
for fp in field_properties:
if (fp['name'] != dependent_variable['name']):
if (fp['general_type'] == 'c' and (fp['is_unique'] or len(fp['unique_values']) > categorical_value_limit)):
continue
independent_variables.append(fp)
recommendationTypeToFunction = {
MRT.FORWARD_R2.value: forward_r2,
MRT.LASSO.value: lasso,
MRT.RFE.value: recursive_feature_elimination,
MRT.FORWARD_F.value: f_regression
}
result = recommendationTypeToFunction[recommendation_type](df, dependent_variable, independent_variables)
return {
'recommended': True,
'table_layout': table_layout,
'recommendation_type': recommendation_type,
'dependent_variable_id': dependent_variable['id'],
'independent_variables_ids': [ x['id'] for x in result ],
}
示例6: f_regression
# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import f_regression [as 別名]
def f_regression(df, dependent_variable, independent_variables, interaction_terms=[], model_limit=5):
considered_independent_variables_per_model, patsy_models = \
construct_models(df, dependent_variable, independent_variables, interaction_terms, table_layout=MCT.ALL_VARIABLES.value)
y, X = dmatrices(patsy_models[0], df, return_type='dataframe')
f_test, r = f_regression(X, y, center=True)
logger.info(f_test)
logger.info(r)
return
示例7: test_import_from_sklearn_pipeline
# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import f_regression [as 別名]
def test_import_from_sklearn_pipeline(self):
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression
from sklearn import svm
from sklearn.pipeline import Pipeline
anova_filter = SelectKBest(f_regression, k=3)
clf = svm.SVC(kernel='linear')
sklearn_pipeline = Pipeline([('anova', anova_filter), ('svc', clf)])
lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline)
for i, pipeline_step in enumerate(sklearn_pipeline.named_steps):
sklearn_step_params = sklearn_pipeline.named_steps[pipeline_step].get_params()
lale_sklearn_params = lale_pipeline.steps()[i]._impl._wrapped_model.get_params()
self.assertEqual(sklearn_step_params, lale_sklearn_params)
self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
示例8: test_import_from_sklearn_pipeline2
# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import f_regression [as 別名]
def test_import_from_sklearn_pipeline2(self):
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression
from sklearn import svm
from sklearn.pipeline import Pipeline
anova_filter = SelectKBest(f_regression, k=3)
clf = svm.SVC(kernel='linear')
sklearn_pipeline = Pipeline([('anova', anova_filter), ('svc', clf)])
sklearn_pipeline.fit(self.X_train, self.y_train)
lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline)
lale_pipeline.predict(self.X_test)
示例9: test_import_from_sklearn_pipeline3
# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import f_regression [as 別名]
def test_import_from_sklearn_pipeline3(self):
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression
from sklearn import svm
from sklearn.pipeline import Pipeline
anova_filter = SelectKBest(f_regression, k=3)
clf = svm.SVC(kernel='linear')
sklearn_pipeline = Pipeline([('anova', anova_filter), ('svc', clf)])
lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline, fitted=False)
with self.assertRaises(ValueError):#fitted=False returns a Trainable, so calling predict is invalid.
lale_pipeline.predict(self.X_test)
示例10: decode
# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import f_regression [as 別名]
def decode(cls, obj):
from sklearn.feature_selection import f_classif, f_regression, GenericUnivariateSelect
new_obj = GenericUnivariateSelect.__new__(GenericUnivariateSelect)
new_obj.__dict__ = obj['dict']
if new_obj.score_func == 'f_classif':
new_obj.score_func = f_classif
elif new_obj.score_func == 'f_regression':
new_obj.score_func = f_regression
else:
raise ValueError('Unsupported GenericUnivariateSelect.score_func "%s"' % new_obj.score_func)
return new_obj
示例11: __init__
# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import f_regression [as 別名]
def __init__(self, options):
self.handle_options(options)
out_params = convert_params(
options.get('params', {}),
floats=['param'],
strs=['type', 'mode'],
aliases={'type': 'score_func'},
)
if 'score_func' not in out_params:
out_params['score_func'] = f_classif
else:
if out_params['score_func'].lower() == 'categorical':
out_params['score_func'] = f_classif
elif out_params['score_func'].lower() in ['numerical', 'numeric']:
out_params['score_func'] = f_regression
else:
raise RuntimeError('type can either be categorical or numeric.')
if 'mode' in out_params:
if out_params['mode'] not in ('k_best', 'fpr', 'fdr', 'fwe', 'percentile'):
raise RuntimeError('mode can only be one of the following: fdr, fpr, fwe, k_best, and percentile')
if out_params['mode'] in ['fpr', 'fdr', 'fwe']:
if 'param' in out_params:
if not 0 < out_params['param'] < 1:
msg = 'Invalid param value for mode {}: param must be between 0 and 1.'.format(out_params['mode'])
raise ValueError(msg)
# k_best and percentile require integer param
if 'param' in out_params and out_params.get('mode') not in ['fdr', 'fpr', 'fwe']:
original_value = out_params['param']
out_params['param'] = int(out_params['param'])
if out_params['param'] != original_value:
msg = 'param value {} is not an integer; mode={} requires an integer.'
msg = msg.format(original_value, out_params.get('mode', 'percentile'))
raise ValueError(msg)
self.estimator = GenericUnivariateSelect(**out_params)
示例12: compute_pvals
# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import f_regression [as 別名]
def compute_pvals(self, X, y):
# TODO: export to stats_utils?
is_y_binary = (len(np.unique(y)) == 2)
# is_binary_feature = np.sum(((X != np.nanmin(X, axis=0)[np.newaxis, :]) &
# (X != np.nanmax(X, axis=0)[np.newaxis, :])), axis=0) == 0
is_binary_feature = areColumnsBinary(X)
p_vals = np.zeros(X.shape[1])
if is_y_binary:
# Process non-binary columns:
for i in np.where(~is_binary_feature)[0]:
x0 = X.loc[y == 0, i]
x1 = X.loc[y == 1, i]
if self.is_linear:
_, p_vals[i] = stats.ttest_ind(x0, x1)
else:
_, p_vals[i] = stats.ks_2samp(x0, x1)
# Process binary features:
_, p_vals[is_binary_feature] = feature_selection.chi2(X.loc[:, is_binary_feature], y)
else:
# Process non-binary features:
_, p_vals[~is_binary_feature] = feature_selection.f_regression(X.loc[:, ~is_binary_feature], y)
# Process binary features:
y_mat = np.row_stack(y)
for i in np.where(is_binary_feature)[0]:
_, p_vals[i] = feature_selection.f_regression(y_mat, X.loc[:, i])
return p_vals
示例13: test_init
# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import f_regression [as 別名]
def test_init(self):
selector = SelectKBest(score_func = f_regression, k = 1)
selector.fit(numpy.array([[0, 0], [1.0, 2.0]]), numpy.array([0.5, 1.0]))
self.assertEqual([0, 1], selector._get_support_mask().tolist())
selector_proxy = SelectorProxy(selector)
self.assertEqual([0, 1], selector_proxy.support_mask_.tolist())
示例14: test_fit
# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import f_regression [as 別名]
def test_fit(self):
selector = SelectKBest(score_func = f_regression, k = 1)
selector_proxy = SelectorProxy(selector)
self.assertFalse(hasattr(selector_proxy, "support_mask_"))
selector_proxy.fit(numpy.array([[0, 0], [1.0, 2.0]]), numpy.array([0.5, 1.0]))
self.assertEqual([0, 1], selector._get_support_mask().tolist())
self.assertEqual([0, 1], selector_proxy.support_mask_.tolist())
示例15: test_f_regression
# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import f_regression [as 別名]
def test_f_regression(self):
diabetes = datasets.load_diabetes()
df = pdml.ModelFrame(diabetes)
result = df.feature_selection.f_regression()
expected = fs.f_regression(diabetes.data, diabetes.target)
self.assertEqual(len(result), 2)
self.assert_numpy_array_almost_equal(result[0], expected[0])
self.assert_numpy_array_almost_equal(result[1], expected[1])