本文整理汇总了Python中sklearn.feature_selection.RFE属性的典型用法代码示例。如果您正苦于以下问题:Python feature_selection.RFE属性的具体用法?Python feature_selection.RFE怎么用?Python feature_selection.RFE使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类sklearn.feature_selection
的用法示例。
在下文中一共展示了feature_selection.RFE属性的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: GetSelectedFeatureIndex
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import RFE [as 别名]
def GetSelectedFeatureIndex(self, data_container):
data = data_container.GetArray()
data /= np.linalg.norm(data, ord=2, axis=0)
label = data_container.GetLabel()
if data.shape[1] < self.GetSelectedFeatureNumber():
print('RFE: The number of features {:d} in data container is smaller than the required number {:d}'.format(
data.shape[1], self.GetSelectedFeatureNumber()))
self.SetSelectedFeatureNumber(data.shape[1])
fs = RFE(self.__classifier, self.GetSelectedFeatureNumber(), step=0.05)
fs.fit(data, label)
feature_index = fs.get_support(True)
self._rank = fs.ranking_
return feature_index.tolist()
示例2: rfe_selection
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import RFE [as 别名]
def rfe_selection(X,y,n_features):
"""
Performs the Recursive Feature Elimination method and selects the top ranking features
Keyword arguments:
X -- The feature vectors
y -- The target vector
n_features -- n best ranked features
"""
if verbose:
print '\nPerforming Feature Selection based on the Recursive Feature Elimination method ...'
clf=RandomForestClassifierWithCoef(n_estimators=10,n_jobs=-1)
fs= RFE(clf, n_features, step=1)
fs= fs.fit(X,y)
ranks=fs.ranking_
feature_indexes=[]
for i in xrange(len(ranks)):
if ranks[i]==1:
feature_indexes+=[i]
return X[:,feature_indexes[0:n_features]],feature_indexes[0:n_features] #return selected features and original index features
示例3: test_objectmapper
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import RFE [as 别名]
def test_objectmapper(self):
df = pdml.ModelFrame([])
self.assertIs(df.feature_selection.GenericUnivariateSelect,
fs.GenericUnivariateSelect)
self.assertIs(df.feature_selection.SelectPercentile,
fs.SelectPercentile)
self.assertIs(df.feature_selection.SelectKBest, fs.SelectKBest)
self.assertIs(df.feature_selection.SelectFpr, fs.SelectFpr)
self.assertIs(df.feature_selection.SelectFromModel,
fs.SelectFromModel)
self.assertIs(df.feature_selection.SelectFdr, fs.SelectFdr)
self.assertIs(df.feature_selection.SelectFwe, fs.SelectFwe)
self.assertIs(df.feature_selection.RFE, fs.RFE)
self.assertIs(df.feature_selection.RFECV, fs.RFECV)
self.assertIs(df.feature_selection.VarianceThreshold,
fs.VarianceThreshold)
示例4: ReducedFeaturesDF
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import RFE [as 别名]
def ReducedFeaturesDF(X,y):
'''
Returns a dataframe with only a subset of features/columns retained
'''
from sklearn.feature_selection import RFE
est = LinearSVC( penalty='l1', loss='l2', dual=False, class_weight='auto')
# selectK = SelectKBest(score_func = f_classif, k=45)
selectRFE = RFE(estimator=est, n_features_to_select=22, step=0.15)
selectK=selectRFE
selectK.fit(X,y)
selectK_mask=selectK.get_support()
K_featnames = feature_names[selectK_mask]
print ("reduced RFE features:")
print(K_featnames)
Reduced_df = pd.read_csv(filename, index_col=0)
Reduced_df = Reduced_df[Reduced_df.columns[selectK_mask]]
# Reduced_df.to_csv('REDUCED_Feat.csv')
return Reduced_df
# ReducedFeaturesDF(X,y)
# z=pd.DataFrame(data=X_SGD,index=y)
# z.to_csv('REDUCED_Feat.csv')
示例5: rfe_multiprocess
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import RFE [as 别名]
def rfe_multiprocess(i, dets, deform, body_num, x, measure, k_features):
sys.stdout.write('>> calc rfe map NO.%d\n'%(i))
y = np.array(dets).reshape(body_num, 1)
model = LinearRegression()
# recurcive feature elimination
rfe = RFE(model, k_features)
rfe.fit(x, y.ravel())
# mask.append(rfe.support_)
flag = np.array(rfe.support_).reshape(utils.M_NUM, 1)
flag = flag.repeat(body_num, axis=1)
# calculte linear mapping mat
S = np.array(deform)
S.shape = (S.size, 1)
m = np.array(measure[flag])
m.shape = (k_features, body_num)
M = build_equation(m, 9)
MtM = M.transpose().dot(M)
MtS = M.transpose().dot(S)
ans = np.array(scipy.sparse.linalg.spsolve(MtM, MtS))
ans.shape = (9, k_features)
return [ans, rfe.support_]
示例6: __init__
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import RFE [as 别名]
def __init__(self, selected_feature_number=1, classifier=SVC(kernel='linear')):
super(FeatureSelectByRFE, self).__init__(name='RFE', selected_feature_number=selected_feature_number)
self.__classifier = classifier
self._rank = None
self._selected_features = []
示例7: GetDescription
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import RFE [as 别名]
def GetDescription(self):
text = "Before build the model, we used recursive feature elimination (RFE) to select features. The goal of RFE " \
"is to select features based on a classifier by recursively considering smaller set of the features. "
return text
示例8: get_initial_regression_model_recommendation
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import RFE [as 别名]
def get_initial_regression_model_recommendation(project_id, dataset_id, dependent_variable_id=None, recommendation_type=MRT.LASSO.value, table_layout=MCT.LEAVE_ONE_OUT.value, data_size_cutoff=current_app.config['ANALYSIS_DATA_SIZE_CUTOFF'], categorical_value_limit=current_app.config['ANALYSIS_CATEGORICAL_VALUE_LIMIT']):
df = get_data(project_id=project_id, dataset_id=dataset_id)
if len(df) > data_size_cutoff:
df = df.sample(data_size_cutoff)
field_properties = db_access.get_field_properties(project_id, dataset_id)
quantitative_field_properties = [ fp for fp in field_properties if fp['general_type'] == 'q']
dependent_variable = next((f for f in field_properties if f['id'] == dependent_variable_id), None) \
if dependent_variable_id \
else np.random.choice(quantitative_field_properties, size=1)[0]
independent_variables = []
for fp in field_properties:
if (fp['name'] != dependent_variable['name']):
if (fp['general_type'] == 'c' and (fp['is_unique'] or len(fp['unique_values']) > categorical_value_limit)):
continue
independent_variables.append(fp)
recommendationTypeToFunction = {
MRT.FORWARD_R2.value: forward_r2,
MRT.LASSO.value: lasso,
MRT.RFE.value: recursive_feature_elimination,
MRT.FORWARD_F.value: f_regression
}
result = recommendationTypeToFunction[recommendation_type](df, dependent_variable, independent_variables)
return {
'recommended': True,
'table_layout': table_layout,
'recommendation_type': recommendation_type,
'dependent_variable_id': dependent_variable['id'],
'independent_variables_ids': [ x['id'] for x in result ],
}
示例9: recursive_feature_elimination
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import RFE [as 别名]
def recursive_feature_elimination(df, dependent_variable, independent_variables, interaction_terms=[], model_limit=5):
considered_independent_variables_per_model, patsy_models = \
construct_models(df, dependent_variable, independent_variables, interaction_terms, table_layout=MCT.ALL_VARIABLES.value)
y, X = dmatrices(patsy_models[0], df, return_type='dataframe')
estimator = SVR(kernel='linear')
selector = RFE(estimator, 5, step=1)
selector = selector.fit(X, y)
logger.info(selector.support_)
logger.info(selector.ranking_)
return
示例10: lr_with_fs
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import RFE [as 别名]
def lr_with_fs():
"""
Submission: lr_with_fs_0620_02.csv
E_val: <missing>
E_in: 0.856252488379
E_out: 0.8552577388980213
"""
from sklearn.linear_model import LogisticRegressionCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
X = util.fetch(util.cache_path('train_X_before_2014-08-01_22-00-47'))
y = util.fetch(util.cache_path('train_y_before_2014-08-01_22-00-47'))
raw_scaler = StandardScaler()
raw_scaler.fit(X)
X_scaled = raw_scaler.transform(X)
rfe = util.fetch(util.cache_path('feature_selection.RFE.21'))
X_pruned = rfe.transform(X_scaled)
new_scaler = StandardScaler()
new_scaler.fit(X_pruned)
X_new = new_scaler.transform(X_pruned)
clf = LogisticRegressionCV(cv=10, scoring='roc_auc', n_jobs=-1)
clf.fit(X_new, y)
print(auc_score(clf, X_new, y))
to_submission(Pipeline([('scale_raw', raw_scaler),
('rfe', rfe),
('scale_new', new_scaler),
('lr', clf)]), 'lr_with_fs_0620_02')
示例11: feature_selection
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import RFE [as 别名]
def feature_selection(matrix, labels, train_ind, fnum):
"""
matrix : feature matrix (num_subjects x num_features)
labels : ground truth labels (num_subjects x 1)
train_ind : indices of the training samples
fnum : size of the feature vector after feature selection
return:
x_data : feature matrix of lower dimension (num_subjects x fnum)
"""
estimator = RidgeClassifier()
selector = RFE(estimator, fnum, step=100, verbose=1)
featureX = matrix[train_ind, :]
featureY = labels[train_ind]
selector = selector.fit(featureX, featureY.ravel())
x_data = selector.transform(matrix)
print("Number of labeled samples %d" % len(train_ind))
print("Number of features selected %d" % x_data.shape[1])
return x_data
# Make sure each site is represented in the training set when selecting a subset of the training set
示例12: __init__
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import RFE [as 别名]
def __init__(self, estimator, n_features_to_select=None, step=1, verbose=0):
self._hyperparams = {
'estimator': estimator,
'n_features_to_select': n_features_to_select,
'step': step,
'verbose': verbose}
self._wrapped_model = SKLModel(**self._hyperparams)
示例13: feature_selection
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import RFE [as 别名]
def feature_selection(x_train, y_train, x_test, y_test):
print("Feature selection with LinearSVC")
model = LinearSVC(C=0.1, penalty='l2')
rfe = RFE(model, 5)
best_features_model = rfe.fit(x_train, y_train)
y_hat = best_features_model.predict(x_test)
utils.print_statistics(y_test, y_hat)
示例14: compute_ranks
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import RFE [as 别名]
def compute_ranks(self):
if self._algorithm == FeatureSelector.SELECT_K_BEST:
scores = self._selector.scores_
sorted_scores = sorted(scores, reverse=True)
ranks = [sorted_scores.index(i) + 1 for i in scores]
elif self._algorithm == FeatureSelector.SELECT_PERCENTILE:
scores = self._selector.scores_
sorted_scores = sorted(scores, reverse=True)
ranks = [sorted_scores.index(i) + 1 for i in scores]
elif self._algorithm == FeatureSelector.RECURSIVE_ELIMINATION:
n_selected = self._selector.n_features_
support = self._selector.support_
ranking = self._selector.ranking_
# RFE and RFECV do not provide feature scores. Instead, they
# provide a list of features which have been selected (support)
# and an ascending list indicating when each other feature was
# eliminated. Use these two to construct feature ranks, though
# acknowledge that RFE and RFECV do not actually distinguish between
# the weights of selected features.
ranks = [0]*len(support)
selected_count = 0
for i in range(len(ranking)):
if support[i]:
# All selected features in ranking receive rank 1, so need
# to iterate through list and add incrementing values so
# that features ranked 1, 1, 1, become 1, 2, 3.
ranks[i] = ranking[i] + selected_count
selected_count += 1
else:
# Even if there are 5 selected features, the 6th feature
# in ranking is given rank 2, so add (n_selected - 1).
ranks[i] = ranking[i] + (n_selected - 1)
return ranks
示例15: _eliminate_recursively
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import RFE [as 别名]
def _eliminate_recursively(self, k=None):
if self._problem == FeatureSelector.CLASSIFICATION:
estimator = RandomForestClassifier(random_state=self._random_state)
else:
estimator = LassoCV(random_state=self._random_state)
# If k is not specified, then use RFECV to automatically decide on
# optimal number of features. If specified, then use RFE.
if k is None:
self._selector = RFECV(estimator)
else:
self._selector = RFE(estimator, n_features_to_select=k, step=0.05)