本文整理汇总了Python中sklearn.feature_selection.VarianceThreshold方法的典型用法代码示例。如果您正苦于以下问题:Python feature_selection.VarianceThreshold方法的具体用法?Python feature_selection.VarianceThreshold怎么用?Python feature_selection.VarianceThreshold使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.feature_selection
的用法示例。
在下文中一共展示了feature_selection.VarianceThreshold方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: low_variance_feature_selection
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import VarianceThreshold [as 别名]
def low_variance_feature_selection(X, threshold):
"""
This function implements the low_variance feature selection (existing method in scikit-learn)
Input
-----
X: {numpy array}, shape (n_samples, n_features)
input data
p:{float}
parameter used to calculate the threshold(threshold = p*(1-p))
Output
------
X_new: {numpy array}, shape (n_samples, n_selected_features)
data with selected features
"""
sel = VarianceThreshold(threshold)
return sel.fit_transform(X)
示例2: test_objectmapper
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import VarianceThreshold [as 别名]
def test_objectmapper(self):
df = pdml.ModelFrame([])
self.assertIs(df.feature_selection.GenericUnivariateSelect,
fs.GenericUnivariateSelect)
self.assertIs(df.feature_selection.SelectPercentile,
fs.SelectPercentile)
self.assertIs(df.feature_selection.SelectKBest, fs.SelectKBest)
self.assertIs(df.feature_selection.SelectFpr, fs.SelectFpr)
self.assertIs(df.feature_selection.SelectFromModel,
fs.SelectFromModel)
self.assertIs(df.feature_selection.SelectFdr, fs.SelectFdr)
self.assertIs(df.feature_selection.SelectFwe, fs.SelectFwe)
self.assertIs(df.feature_selection.RFE, fs.RFE)
self.assertIs(df.feature_selection.RFECV, fs.RFECV)
self.assertIs(df.feature_selection.VarianceThreshold,
fs.VarianceThreshold)
示例3: test_same_variances
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import VarianceThreshold [as 别名]
def test_same_variances(self):
local = VarianceThreshold()
dist = SparkVarianceThreshold()
shapes = [((10, 5), None),
((1e3, 20), None),
((1e3, 20), 100),
((1e4, 100), None),
((1e4, 100), 600)]
for shape, block_size in shapes:
X_dense, X_dense_rdd = self.make_dense_rdd()
X_sparse, X_sparse_rdd = self.make_sparse_rdd()
Z = DictRDD([X_sparse_rdd, X_dense_rdd], columns=('X', 'Y'))
local.fit(X_dense)
dist.fit(X_dense_rdd)
assert_array_almost_equal(local.variances_, dist.variances_)
local.fit(X_sparse)
dist.fit(X_sparse_rdd)
assert_array_almost_equal(local.variances_, dist.variances_)
dist.fit(Z)
assert_array_almost_equal(local.variances_, dist.variances_)
示例4: test_same_transform_result
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import VarianceThreshold [as 别名]
def test_same_transform_result(self):
local = VarianceThreshold()
dist = SparkVarianceThreshold()
X_dense, X_dense_rdd = self.make_dense_rdd()
X_sparse, X_sparse_rdd = self.make_sparse_rdd()
Z_rdd = DictRDD([X_sparse_rdd, X_dense_rdd], columns=('X', 'Y'))
result_local = local.fit_transform(X_dense)
result_dist = dist.fit_transform(X_dense_rdd)
assert_true(check_rdd_dtype(result_dist, (np.ndarray,)))
assert_array_almost_equal(result_local, result_dist.toarray())
result_local = local.fit_transform(X_sparse)
result_dist = dist.fit_transform(X_sparse_rdd)
assert_true(check_rdd_dtype(result_dist, (sp.spmatrix,)))
assert_array_almost_equal(result_local.toarray(),
result_dist.toarray())
result_dist = dist.fit_transform(Z_rdd)[:, 'X']
assert_true(check_rdd_dtype(result_dist, (sp.spmatrix,)))
assert_array_almost_equal(result_local.toarray(),
result_dist.toarray())
示例5: test_same_transform_with_treshold
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import VarianceThreshold [as 别名]
def test_same_transform_with_treshold(self):
local = VarianceThreshold(.03)
dist = SparkVarianceThreshold(.03)
X_dense, X_dense_rdd = self.make_dense_rdd()
X_sparse, X_sparse_rdd = self.make_sparse_rdd()
Z_rdd = DictRDD([X_sparse_rdd, X_dense_rdd], columns=('X', 'Y'))
result_local = local.fit_transform(X_dense)
result_dist = dist.fit_transform(X_dense_rdd)
assert_true(check_rdd_dtype(result_dist, (np.ndarray,)))
assert_array_almost_equal(result_local, result_dist.toarray())
result_local = local.fit_transform(X_sparse)
result_dist = dist.fit_transform(X_sparse_rdd)
assert_true(check_rdd_dtype(result_dist, (sp.spmatrix,)))
assert_array_almost_equal(result_local.toarray(),
result_dist.toarray())
result_dist = dist.fit_transform(Z_rdd)[:, 'X']
assert_true(check_rdd_dtype(result_dist, (sp.spmatrix,)))
assert_array_almost_equal(result_local.toarray(),
result_dist.toarray())
示例6: test_pipeline_same_results
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import VarianceThreshold [as 别名]
def test_pipeline_same_results(self):
X, y, Z = self.make_classification(2, 10000, 2000)
loc_clf = LogisticRegression()
loc_filter = VarianceThreshold()
loc_pipe = Pipeline([
('threshold', loc_filter),
('logistic', loc_clf)
])
dist_clf = SparkLogisticRegression()
dist_filter = SparkVarianceThreshold()
dist_pipe = SparkPipeline([
('threshold', dist_filter),
('logistic', dist_clf)
])
dist_filter.fit(Z)
loc_pipe.fit(X, y)
dist_pipe.fit(Z, logistic__classes=np.unique(y))
assert_true(np.mean(np.abs(
loc_pipe.predict(X) -
np.concatenate(dist_pipe.predict(Z[:, 'X']).collect())
)) < 0.1)
示例7: test_zero_variance
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import VarianceThreshold [as 别名]
def test_zero_variance():
# Test VarianceThreshold with default setting, zero variance.
for X in [data, csr_matrix(data), csc_matrix(data), bsr_matrix(data)]:
sel = VarianceThreshold().fit(X)
assert_array_equal([0, 1, 3, 4], sel.get_support(indices=True))
assert_raises(ValueError, VarianceThreshold().fit, [[0, 1, 2, 3]])
assert_raises(ValueError, VarianceThreshold().fit, [[0, 1], [0, 1]])
示例8: test_variance_threshold
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import VarianceThreshold [as 别名]
def test_variance_threshold():
# Test VarianceThreshold with custom variance.
for X in [data, csr_matrix(data)]:
X = VarianceThreshold(threshold=.4).fit_transform(X)
assert_equal((len(data), 1), X.shape)
示例9: __init__
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import VarianceThreshold [as 别名]
def __init__(self, threshold):
self.selector = fs.VarianceThreshold(threshold=threshold)
示例10: __init__
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import VarianceThreshold [as 别名]
def __init__(self, *args, **kwargs):
""" Assemble Neural network or SVM using sklearn pipeline """
# Cherrypick arguments for model. Exclude 'steps', which is pipeline argument
local_kwargs = {key: kwargs.pop(key) for key in list(kwargs.keys())
if key != 'steps' and '__' not in key}
if self._model is None:
raise ValueError('Model not specified!')
model = self._model(*args, **local_kwargs)
self.pipeline = Pipeline([('empty_dims_remover', VarianceThreshold()),
('scaler', StandardScaler()),
('model', model)]).set_params(**kwargs)
示例11: __init__
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import VarianceThreshold [as 别名]
def __init__(self, *args, **kwargs):
""" Assemble Neural network or SVM using sklearn pipeline """
# Cherrypick arguments for model. Exclude 'steps', which is pipeline argument
local_kwargs = {key: kwargs.pop(key) for key in list(kwargs.keys())
if key != 'steps' and '__' not in key}
if self._model is None:
raise ValueError('Model not specified!')
model = self._model(*args, **local_kwargs)
self.pipeline = Pipeline([('empty_dims_remover', VarianceThreshold()),
('scaler', StandardScaler()),
('model', model)]).set_params(**kwargs)
示例12: remove_lv_features
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import VarianceThreshold [as 别名]
def remove_lv_features(model, X):
r"""Remove low-variance features.
Parameters
----------
model : alphapy.Model
Model specifications for removing features.
X : numpy array
The feature matrix.
Returns
-------
X_reduced : numpy array
The reduced feature matrix.
References
----------
You can find more information on low-variance feature selection here [LV]_.
.. [LV] http://scikit-learn.org/stable/modules/feature_selection.html#variance-threshold
"""
logger.info("Removing Low-Variance Features")
# Extract model parameters
lv_remove = model.specs['lv_remove']
lv_threshold = model.specs['lv_threshold']
predict_mode = model.specs['predict_mode']
# Remove low-variance features
if lv_remove:
logger.info("Low-Variance Threshold : %.2f", lv_threshold)
logger.info("Original Feature Count : %d", X.shape[1])
if not predict_mode:
selector = VarianceThreshold(threshold=lv_threshold)
selector.fit(X)
support = selector.get_support()
model.feature_map['lv_support'] = support
else:
support = model.feature_map['lv_support']
X_reduced = X[:, support]
model.feature_names = list(itertools.compress(model.feature_names, support))
logger.info("Reduced Feature Count : %d", X_reduced.shape[1])
else:
X_reduced = X
logger.info("Skipping Low-Variance Features")
assert X_reduced.shape[1] == len(model.feature_names), "Mismatched Features and Names"
return X_reduced