本文整理汇总了Python中sklearn.feature_selection.SelectFromModel方法的典型用法代码示例。如果您正苦于以下问题:Python feature_selection.SelectFromModel方法的具体用法?Python feature_selection.SelectFromModel怎么用?Python feature_selection.SelectFromModel使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.feature_selection
的用法示例。
在下文中一共展示了feature_selection.SelectFromModel方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_max_features_tiebreak
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectFromModel [as 别名]
def test_max_features_tiebreak():
# Test if max_features can break tie among feature importance
X, y = datasets.make_classification(
n_samples=1000, n_features=10, n_informative=3, n_redundant=0,
n_repeated=0, shuffle=False, random_state=0)
max_features = X.shape[1]
feature_importances = np.array([4, 4, 4, 4, 3, 3, 3, 2, 2, 1])
for n_features in range(1, max_features + 1):
transformer = SelectFromModel(
FixedImportanceEstimator(feature_importances),
max_features=n_features,
threshold=-np.inf)
X_new = transformer.fit_transform(X, y)
selected_feature_indices = np.where(transformer._get_support_mask())[0]
assert_array_equal(selected_feature_indices, np.arange(n_features))
assert X_new.shape[1] == n_features
示例2: test_threshold_and_max_features
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectFromModel [as 别名]
def test_threshold_and_max_features():
X, y = datasets.make_classification(
n_samples=1000, n_features=10, n_informative=3, n_redundant=0,
n_repeated=0, shuffle=False, random_state=0)
est = RandomForestClassifier(n_estimators=50, random_state=0)
transformer1 = SelectFromModel(estimator=est, max_features=3,
threshold=-np.inf)
X_new1 = transformer1.fit_transform(X, y)
transformer2 = SelectFromModel(estimator=est, threshold=0.04)
X_new2 = transformer2.fit_transform(X, y)
transformer3 = SelectFromModel(estimator=est, max_features=3,
threshold=0.04)
X_new3 = transformer3.fit_transform(X, y)
assert X_new3.shape[1] == min(X_new1.shape[1], X_new2.shape[1])
selected_indices = transformer3.transform(
np.arange(X.shape[1])[np.newaxis, :])
assert_allclose(X_new3, X[:, selected_indices[0]])
示例3: test_feature_importances
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectFromModel [as 别名]
def test_feature_importances():
X, y = datasets.make_classification(
n_samples=1000, n_features=10, n_informative=3, n_redundant=0,
n_repeated=0, shuffle=False, random_state=0)
est = RandomForestClassifier(n_estimators=50, random_state=0)
for threshold, func in zip(["mean", "median"], [np.mean, np.median]):
transformer = SelectFromModel(estimator=est, threshold=threshold)
transformer.fit(X, y)
assert hasattr(transformer.estimator_, 'feature_importances_')
X_new = transformer.transform(X)
assert_less(X_new.shape[1], X.shape[1])
importances = transformer.estimator_.feature_importances_
feature_mask = np.abs(importances) > func(importances)
assert_array_almost_equal(X_new, X[:, feature_mask])
示例4: test_sample_weight
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectFromModel [as 别名]
def test_sample_weight():
# Ensure sample weights are passed to underlying estimator
X, y = datasets.make_classification(
n_samples=100, n_features=10, n_informative=3, n_redundant=0,
n_repeated=0, shuffle=False, random_state=0)
# Check with sample weights
sample_weight = np.ones(y.shape)
sample_weight[y == 1] *= 100
est = LogisticRegression(random_state=0, fit_intercept=False)
transformer = SelectFromModel(estimator=est)
transformer.fit(X, y, sample_weight=None)
mask = transformer._get_support_mask()
transformer.fit(X, y, sample_weight=sample_weight)
weighted_mask = transformer._get_support_mask()
assert not np.all(weighted_mask == mask)
transformer.fit(X, y, sample_weight=3 * sample_weight)
reweighted_mask = transformer._get_support_mask()
assert np.all(weighted_mask == reweighted_mask)
示例5: test_partial_fit
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectFromModel [as 别名]
def test_partial_fit():
est = PassiveAggressiveClassifier(random_state=0, shuffle=False,
max_iter=5, tol=None)
transformer = SelectFromModel(estimator=est)
transformer.partial_fit(data, y,
classes=np.unique(y))
old_model = transformer.estimator_
transformer.partial_fit(data, y,
classes=np.unique(y))
new_model = transformer.estimator_
assert old_model is new_model
X_transform = transformer.transform(data)
transformer.fit(np.vstack((data, data)), np.concatenate((y, y)))
assert_array_almost_equal(X_transform, transformer.transform(data))
# check that if est doesn't have partial_fit, neither does SelectFromModel
transformer = SelectFromModel(estimator=RandomForestClassifier())
assert not hasattr(transformer, "partial_fit")
示例6: test_prefit
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectFromModel [as 别名]
def test_prefit():
# Test all possible combinations of the prefit parameter.
# Passing a prefit parameter with the selected model
# and fitting a unfit model with prefit=False should give same results.
clf = SGDClassifier(alpha=0.1, max_iter=10, shuffle=True,
random_state=0, tol=None)
model = SelectFromModel(clf)
model.fit(data, y)
X_transform = model.transform(data)
clf.fit(data, y)
model = SelectFromModel(clf, prefit=True)
assert_array_almost_equal(model.transform(data), X_transform)
# Check that the model is rewritten if prefit=False and a fitted model is
# passed
model = SelectFromModel(clf, prefit=False)
model.fit(data, y)
assert_array_almost_equal(model.transform(data), X_transform)
# Check that prefit=True and calling fit raises a ValueError
model = SelectFromModel(clf, prefit=True)
assert_raises(ValueError, model.fit, data, y)
示例7: test_time
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectFromModel [as 别名]
def test_time(pipeline_name, name, path):
if pipeline_name == "LR":
pipeline = make_pipeline(LogisticRegression())
if pipeline_name == "FGS":
pipeline = make_pipeline(FeatureGradientSelector(), LogisticRegression())
if pipeline_name == "Tree":
pipeline = make_pipeline(SelectFromModel(ExtraTreesClassifier(n_estimators=50)), LogisticRegression())
test_benchmark = Benchmark()
print("Dataset:\t", name)
print("Pipeline:\t", pipeline_name)
starttime = datetime.datetime.now()
test_benchmark.run_test(pipeline, name, path)
endtime = datetime.datetime.now()
print("Used time: ", (endtime - starttime).microseconds/1000)
print("")
示例8: test
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectFromModel [as 别名]
def test():
url_zip_train = 'https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/rcv1_train.binary.bz2'
urllib.request.urlretrieve(url_zip_train, filename='train.bz2')
f_svm = open('train.svm', 'wt')
with bz2.open('train.bz2', 'rb') as f_zip:
data = f_zip.read()
f_svm.write(data.decode('utf-8'))
f_svm.close()
X, y = load_svmlight_file('train.svm')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
pipeline = make_pipeline(FeatureGradientSelector(n_epochs=1, n_features=10), LogisticRegression())
# pipeline = make_pipeline(SelectFromModel(ExtraTreesClassifier(n_estimators=50)), LogisticRegression())
pipeline.fit(X_train, y_train)
print("Pipeline Score: ", pipeline.score(X_train, y_train))
示例9: get_feature_selection_model_from_name
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectFromModel [as 别名]
def get_feature_selection_model_from_name(type_of_estimator, model_name):
model_map = {
'classifier': {
'SelectFromModel': SelectFromModel(RandomForestClassifier(n_jobs=-1, max_depth=10, n_estimators=15), threshold='20*mean'),
'RFECV': RFECV(estimator=RandomForestClassifier(n_jobs=-1), step=0.1),
'GenericUnivariateSelect': GenericUnivariateSelect(),
'KeepAll': 'KeepAll'
},
'regressor': {
'SelectFromModel': SelectFromModel(RandomForestRegressor(n_jobs=-1, max_depth=10, n_estimators=15), threshold='0.7*mean'),
'RFECV': RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=0.1),
'GenericUnivariateSelect': GenericUnivariateSelect(),
'KeepAll': 'KeepAll'
}
}
return model_map[type_of_estimator][model_name]
示例10: test_set_param_recursive_2
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectFromModel [as 别名]
def test_set_param_recursive_2():
"""Assert that set_param_recursive sets \"random_state\" to 42 in nested estimator in SelectFromModel."""
pipeline_string = (
'DecisionTreeRegressor(SelectFromModel(input_matrix, '
'SelectFromModel__ExtraTreesRegressor__max_features=0.05, SelectFromModel__ExtraTreesRegressor__n_estimators=100, '
'SelectFromModel__threshold=0.05), DecisionTreeRegressor__max_depth=8,'
'DecisionTreeRegressor__min_samples_leaf=5, DecisionTreeRegressor__min_samples_split=5)'
)
tpot_obj = TPOTRegressor()
tpot_obj._fit_init()
deap_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
set_param_recursive(sklearn_pipeline.steps, 'random_state', 42)
assert getattr(getattr(sklearn_pipeline.steps[0][1], 'estimator'), 'random_state') == 42
assert getattr(sklearn_pipeline.steps[1][1], 'random_state') == 42
示例11: test_objectmapper
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectFromModel [as 别名]
def test_objectmapper(self):
df = pdml.ModelFrame([])
self.assertIs(df.feature_selection.GenericUnivariateSelect,
fs.GenericUnivariateSelect)
self.assertIs(df.feature_selection.SelectPercentile,
fs.SelectPercentile)
self.assertIs(df.feature_selection.SelectKBest, fs.SelectKBest)
self.assertIs(df.feature_selection.SelectFpr, fs.SelectFpr)
self.assertIs(df.feature_selection.SelectFromModel,
fs.SelectFromModel)
self.assertIs(df.feature_selection.SelectFdr, fs.SelectFdr)
self.assertIs(df.feature_selection.SelectFwe, fs.SelectFwe)
self.assertIs(df.feature_selection.RFE, fs.RFE)
self.assertIs(df.feature_selection.RFECV, fs.RFECV)
self.assertIs(df.feature_selection.VarianceThreshold,
fs.VarianceThreshold)
示例12: _get_feature_selector
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectFromModel [as 别名]
def _get_feature_selector(self):
"""Get a feature selector instance based on the feature_selector model
parameter
Returns:
(Object): a feature selector which returns a reduced feature matrix, \
given the full feature matrix, X and the class labels, y
"""
if self.config.model_settings is None:
selector_type = None
else:
selector_type = self.config.model_settings.get("feature_selector")
selector = {
"l1": SelectFromModel(LogisticRegression(penalty="l1", C=1)),
"f": SelectPercentile(),
}.get(selector_type)
return selector
示例13: test_feature_importances
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectFromModel [as 别名]
def test_feature_importances():
X, y = datasets.make_classification(
n_samples=1000, n_features=10, n_informative=3, n_redundant=0,
n_repeated=0, shuffle=False, random_state=0)
est = RandomForestClassifier(n_estimators=50, random_state=0)
for threshold, func in zip(["mean", "median"], [np.mean, np.median]):
transformer = SelectFromModel(estimator=est, threshold=threshold)
transformer.fit(X, y)
assert_true(hasattr(transformer.estimator_, 'feature_importances_'))
X_new = transformer.transform(X)
assert_less(X_new.shape[1], X.shape[1])
importances = transformer.estimator_.feature_importances_
feature_mask = np.abs(importances) > func(importances)
assert_array_almost_equal(X_new, X[:, feature_mask])
示例14: test_2d_coef
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectFromModel [as 别名]
def test_2d_coef():
X, y = datasets.make_classification(
n_samples=1000, n_features=10, n_informative=3, n_redundant=0,
n_repeated=0, shuffle=False, random_state=0, n_classes=4)
est = LogisticRegression()
for threshold, func in zip(["mean", "median"], [np.mean, np.median]):
for order in [1, 2, np.inf]:
# Fit SelectFromModel a multi-class problem
transformer = SelectFromModel(estimator=LogisticRegression(),
threshold=threshold,
norm_order=order)
transformer.fit(X, y)
assert_true(hasattr(transformer.estimator_, 'coef_'))
X_new = transformer.transform(X)
assert_less(X_new.shape[1], X.shape[1])
# Manually check that the norm is correctly performed
est.fit(X, y)
importances = np.linalg.norm(est.coef_, axis=0, ord=order)
feature_mask = importances > func(importances)
assert_array_equal(X_new, X[:, feature_mask])
示例15: test_partial_fit
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectFromModel [as 别名]
def test_partial_fit():
est = PassiveAggressiveClassifier(random_state=0, shuffle=False,
max_iter=5, tol=None)
transformer = SelectFromModel(estimator=est)
transformer.partial_fit(data, y,
classes=np.unique(y))
old_model = transformer.estimator_
transformer.partial_fit(data, y,
classes=np.unique(y))
new_model = transformer.estimator_
assert_true(old_model is new_model)
X_transform = transformer.transform(data)
transformer.fit(np.vstack((data, data)), np.concatenate((y, y)))
assert_array_equal(X_transform, transformer.transform(data))
# check that if est doesn't have partial_fit, neither does SelectFromModel
transformer = SelectFromModel(estimator=RandomForestClassifier())
assert_false(hasattr(transformer, "partial_fit"))