本文整理匯總了Python中sklearn.feature_selection.RFECV屬性的典型用法代碼示例。如果您正苦於以下問題:Python feature_selection.RFECV屬性的具體用法?Python feature_selection.RFECV怎麽用?Python feature_selection.RFECV使用的例子?那麽, 這裏精選的屬性代碼示例或許可以為您提供幫助。您也可以進一步了解該屬性所在類sklearn.feature_selection
的用法示例。
在下文中一共展示了feature_selection.RFECV屬性的14個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: get_feature_selection_model_from_name
# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import RFECV [as 別名]
def get_feature_selection_model_from_name(type_of_estimator, model_name):
model_map = {
'classifier': {
'SelectFromModel': SelectFromModel(RandomForestClassifier(n_jobs=-1, max_depth=10, n_estimators=15), threshold='20*mean'),
'RFECV': RFECV(estimator=RandomForestClassifier(n_jobs=-1), step=0.1),
'GenericUnivariateSelect': GenericUnivariateSelect(),
'KeepAll': 'KeepAll'
},
'regressor': {
'SelectFromModel': SelectFromModel(RandomForestRegressor(n_jobs=-1, max_depth=10, n_estimators=15), threshold='0.7*mean'),
'RFECV': RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=0.1),
'GenericUnivariateSelect': GenericUnivariateSelect(),
'KeepAll': 'KeepAll'
}
}
return model_map[type_of_estimator][model_name]
示例2: predict_features
# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import RFECV [as 別名]
def predict_features(self, df_features, df_target, idx=0, **kwargs):
"""For one variable, predict its neighbouring nodes.
Args:
df_features (pandas.DataFrame):
df_target (pandas.Series):
idx (int): (optional) for printing purposes
kwargs (dict): additional options for algorithms
Returns:
list: scores of each feature relatively to the target
"""
estimator = SVR(kernel='linear')
selector = RFECV(estimator, step=1)
selector = selector.fit(df_features.values, np.ravel(df_target.values))
return selector.grid_scores_
示例3: test_objectmapper
# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import RFECV [as 別名]
def test_objectmapper(self):
df = pdml.ModelFrame([])
self.assertIs(df.feature_selection.GenericUnivariateSelect,
fs.GenericUnivariateSelect)
self.assertIs(df.feature_selection.SelectPercentile,
fs.SelectPercentile)
self.assertIs(df.feature_selection.SelectKBest, fs.SelectKBest)
self.assertIs(df.feature_selection.SelectFpr, fs.SelectFpr)
self.assertIs(df.feature_selection.SelectFromModel,
fs.SelectFromModel)
self.assertIs(df.feature_selection.SelectFdr, fs.SelectFdr)
self.assertIs(df.feature_selection.SelectFwe, fs.SelectFwe)
self.assertIs(df.feature_selection.RFE, fs.RFE)
self.assertIs(df.feature_selection.RFECV, fs.RFECV)
self.assertIs(df.feature_selection.VarianceThreshold,
fs.VarianceThreshold)
示例4: perf_RFCVE
# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import RFECV [as 別名]
def perf_RFCVE(projectPath, RFCVE_CVs, RFCVE_step_size, clf, data_train, target_train):
selector = RFECV(estimator=clf, step=RFCVE_step_size, cv=RFCVE_CVs, scoring='f1', verbose=1)
selector = selector.fit(data_train, target_train)
print(selector.support_)
示例5: _fit_recursive_feature_elimination
# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import RFECV [as 別名]
def _fit_recursive_feature_elimination(self, X_train_outer, y_train_outer, X_test_outer):
rfe = RFECV(estimator=self.model,
min_features_to_select=self.rfe_n_features, cv=self.inner_cv, n_jobs = self.n_jobs)
rfe.fit(X_train_outer, y_train_outer)
log.info('Best number of features was: {0}'.format(rfe.n_features_))
# Assign selected features to data
return rfe.transform(X_train_outer), rfe.transform(X_test_outer)
示例6: compute_ranks
# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import RFECV [as 別名]
def compute_ranks(self):
if self._algorithm == FeatureSelector.SELECT_K_BEST:
scores = self._selector.scores_
sorted_scores = sorted(scores, reverse=True)
ranks = [sorted_scores.index(i) + 1 for i in scores]
elif self._algorithm == FeatureSelector.SELECT_PERCENTILE:
scores = self._selector.scores_
sorted_scores = sorted(scores, reverse=True)
ranks = [sorted_scores.index(i) + 1 for i in scores]
elif self._algorithm == FeatureSelector.RECURSIVE_ELIMINATION:
n_selected = self._selector.n_features_
support = self._selector.support_
ranking = self._selector.ranking_
# RFE and RFECV do not provide feature scores. Instead, they
# provide a list of features which have been selected (support)
# and an ascending list indicating when each other feature was
# eliminated. Use these two to construct feature ranks, though
# acknowledge that RFE and RFECV do not actually distinguish between
# the weights of selected features.
ranks = [0]*len(support)
selected_count = 0
for i in range(len(ranking)):
if support[i]:
# All selected features in ranking receive rank 1, so need
# to iterate through list and add incrementing values so
# that features ranked 1, 1, 1, become 1, 2, 3.
ranks[i] = ranking[i] + selected_count
selected_count += 1
else:
# Even if there are 5 selected features, the 6th feature
# in ranking is given rank 2, so add (n_selected - 1).
ranks[i] = ranking[i] + (n_selected - 1)
return ranks
示例7: _eliminate_recursively
# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import RFECV [as 別名]
def _eliminate_recursively(self, k=None):
if self._problem == FeatureSelector.CLASSIFICATION:
estimator = RandomForestClassifier(random_state=self._random_state)
else:
estimator = LassoCV(random_state=self._random_state)
# If k is not specified, then use RFECV to automatically decide on
# optimal number of features. If specified, then use RFE.
if k is None:
self._selector = RFECV(estimator)
else:
self._selector = RFE(estimator, n_features_to_select=k, step=0.05)
示例8: recursiveFeatureSelectorCV
# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import RFECV [as 別名]
def recursiveFeatureSelectorCV(classifier_model,train_data,train_labels,test_data,number_of_features):
rfe = RFECV(classifier_model,number_of_features)
transformed_train_data = rfe.fit_transform(train_data,train_labels)
transformed_test_data = rfe.transform(test_data)
return transformed_train_data,transformed_test_data
#Iterating over all feature preprocessors and classifiers in turn
示例9: GetKFeatures
# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import RFECV [as 別名]
def GetKFeatures(filename, method='RFE',kbest=30,alpha=0.01, reduceMatrix = True):
'''
Gets best features using chosen method
(K-best, RFE, RFECV,'L1' (RandomizedLogisticRegression),'Tree' (ExtraTreesClassifier), mrmr),
then prints top K features' names (from featNames).
If reduceMatrix = True, then also returns X reduced to the K best features.
Available methods' names are: 'RFE','RFECV','RandomizedLogisticRegression','K-best','ExtraTreesClassifier'..
Note, that effectiveyl, Any scikit learn method could be used, if correctly imported..
'''
#est = method()
'''
Gets the K-best features (filtered by FDR, then select best ranked by t-test , more advanced options can be implemented).
Save the data/matrix with the resulting/kept features to a new output file, "REDUCED_Feat.csv"
'''
features, labels, lb_encoder,featureNames = load_data(filename)
X, y = features, labels
# change the names as ints back to strings
class_names=lb_encoder.inverse_transform(y)
print("Data and labels imported. PreFilter Feature matrix shape:")
print(X.shape)
selectK = SelectKBest(k=kbest)
selectK.fit(X,y)
selectK_mask=selectK.get_support()
K_featnames = featureNames[selectK_mask]
print('X After K filter:',X.shape)
print("K_featnames: %s" %(K_featnames))
if reduceMatrix ==True :
Reduced_df = pd.read_csv(filename, index_col=0)
Reduced_df = Reduced_df[Reduced_df.columns[selectK_mask]]
Reduced_df.to_csv('REDUCED_Feat.csv')
print('Saved to REDUCED_Feat.csv')
return Reduced_df
#WORKS! But unreadable with too many features!
示例10: plotRFECV
# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import RFECV [as 別名]
def plotRFECV (X,y,stepSize=0.05,scoring='f1'):
'''
Plot recursive feature elimination example with automatic tuning of the number of features selected with cross-validation.
http://scikit-learn.org/stable/auto_examples/plot_rfe_with_cross_validation.html#example-plot-rfe-with-cross-validation-py
'''
from sklearn.svm import SVC
from sklearn.cross_validation import StratifiedKFold
from sklearn.feature_selection import RFECV
# Create the RFE object and compute a cross-validated score.
# svc = SVC(kernel="linear")
svc = SVC(kernel="linear",class_weight='auto', cache_size=1400)
# The "accuracy" scoring is proportional to the number of correct
# classifications
rfecv = RFECV(estimator=svc, step=stepSize, cv=StratifiedKFold(y, 2),
scoring=scoring)
rfecv.fit(X, y)
print("Optimal number of features : %d" % rfecv.n_features_)
# Plot number of features VS. cross-validation scores
import matplotlib.pyplot as plt
plt.figure()
plt.xlabel("Number of features selected")
plt.ylabel("Cross validation score (nb of correct classifications)")
plt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_)
plt.show()
return rfecv
示例11: plot_RFE
# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import RFECV [as 別名]
def plot_RFE(X,y):
from sklearn.svm import SVC
from sklearn.cross_validation import StratifiedKFold
from sklearn.feature_selection import RFECV
from sklearn.datasets import make_classification
from sklearn.metrics import zero_one_loss
import pylab as pl
import matplotlib.pylab as pl
# Create the RFE object and compute a cross-validated score.
# svc= SVC(kernel="linear", class_weight="auto", cache_size=1200, shrinking=True)
svc=LinearSVC(penalty='l1', loss='l2', dual=False, class_weight='auto',multi_class='ovr')
# SGD = SGDClassifier(penalty='elasticnet',class_weight='auto',n_jobs=-1,n_iter=10,l1_ratio =0.15)
## rfecv = RFECV(estimator=svc, step=0.1, cv=StratifiedKFold(y, 5), scoring='roc_auc')
rfecv = RFECV(estimator=svc, step=0.2,cv=StratifiedKFold(y, 2), scoring='f1')
X_RFE = rfecv.fit_transform(X, y)
print("Optimal number of features in X_RFE : %d" % rfecv.n_features_)
# Plot number of features VS. cross-validation scores
pl.figure()
pl.xlabel("Number of features selected")
pl.ylabel("Cross validation score (nb of misclassifications)")
pl.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_)
pl.show()
print ('RFE Opt.shapes features CV score:')
CV_multi_stats(X_RFE,y,svc)
return (X_RFE,rfecv)
示例12: __select_features
# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import RFECV [as 別名]
def __select_features(self, X, y, feature_names):
logging.info("Automagically extracting features with recursive feature eliminiation based on RandomForest")
model = RandomForestClassifier(n_jobs=-1)
rfe = RFECV(model, cv=QuincyConfig.CV, scoring=QuincyConfig.METRIC)
fit = rfe.fit(X, y)
logging.info("Number of selected features: %d" % fit.n_features_)
discarded, selected = self.__get_discarded_and_selected_features(feature_names, fit)
X = self.__drop_discarded_features(X, discarded)
feature_selection_results = self.__get_feature_selection_results(X, discarded, feature_names, fit, model, selected, y)
self._featureSelectionResults = feature_selection_results
return X
示例13: adopt
# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import RFECV [as 別名]
def adopt(self, dfe, interpreted=None):
models = []
# about scoring, please see following document
# http://scikit-learn.org/stable/modules/model_evaluation.html#common-cases-predefined-values
scoring = "accuracy"
# todo: now, text and datetime colum is ignored
for t in (FType.text, FType.datetime):
columns = dfe.get_columns(t, include_target=False)
dfe.df.drop(columns, inplace=True, axis=1)
dfe.sync()
if dfe.get_target_ftype() == FType.categorical:
#models = [RandomForestClassifier(), SVC(kernel="linear")]
models = [RandomForestClassifier()]
if self.is_binary_classification(dfe):
scoring = "f1"
else:
# see reference about f1 score
# http://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html#sklearn.metrics.f1_score
scoring = "f1_micro" # if prediction does not occur to some label, macro is too worse to evaluate
elif dfe.get_target_ftype() == FType.numerical:
# About the model to select the feature, please refer
# http://scikit-learn.org/stable/modules/feature_selection.html
models = [Lasso(alpha=.1), RandomForestRegressor()]
scoring = "r2"
else:
raise Exception("Target type is None or un-predictable type.")
features = dfe.get_features()
target = dfe.get_target()
best_rfecv = None
feature_masks = []
for m in models:
rfecv = RFECV(estimator=m, step=1, cv=self.cv_count, scoring=scoring, n_jobs=self.n_jobs)
rfecv.fit(features, target)
feature_masks.append(rfecv.support_)
selected_mask = []
if len(feature_masks) < 2:
selected_mask = feature_masks[0]
else:
selected_mask = np.logical_and(*feature_masks) # take the feature that some models take
eliminates = features.columns[np.logical_not(selected_mask)]
dfe.df.drop(eliminates, inplace=True, axis=1)
dfe.sync()
selected = features.columns[selected_mask].tolist()
ss = self.a2t(selected)
self.description = {
"ja": "項目{}は予測に有効な項目です。これらを利用し、モデルを構築します。".format(ss),
"en": "Columns {} are useful to predict. I'll use these to make model.".format(ss)
}
return True
示例14: recursive_feature_elimination_cv
# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import RFECV [as 別名]
def recursive_feature_elimination_cv(self, step=1, inplace=False):
"""A method to implement recursive feature elimination on the model
with cross-validation(CV). At each step, features are ranked as per
the algorithm used and lowest ranked features are removed,
as specified by the step argument. At each step, the CV score is
determined using the scoring metric specified in the model. The set
of features with highest cross validation scores is then chosen.
Parameters
__________
step : int or float, default=1
If int, then step corresponds to the number of features to remove
at each iteration.
If float and within (0.0, 1.0), then step corresponds to the
percentage (rounded down) of features to remove at each
iteration.
If float and greater than one, then integral part will be
considered as an integer input
inplace : bool, default=False
If True, the predictors of the class are modified to those
selected by the RFECV procedure.
Returns
_______
selected : pandas series
A series object containing the selected features as
index and their rank in selection as values
"""
rfecv = RFECV(
self.alg, step=step,cv=self.cv_folds,
scoring=self.scoring_metric,n_jobs=-1
)
rfecv.fit(
self.datablock.train[self.predictors],
self.datablock.train[self.datablock.target]
)
if step>1:
min_nfeat = (len(self.predictors)
- step*(len(rfecv.grid_scores_)-1))
plt.xlabel("Number of features selected")
plt.ylabel("Cross validation score")
plt.plot(
range(min_nfeat, len(self.predictors)+1, step),
rfecv.grid_scores_
)
plt.show(block=False)
ranks = pd.Series(rfecv.ranking_, index=self.predictors)
selected = ranks.loc[rfecv.support_]
if inplace:
self.set_predictors(selected.index.tolist())
return ranks