当前位置: 首页>>代码示例>>Python>>正文


Python feature_selection.SelectKBest方法代码示例

本文整理汇总了Python中sklearn.feature_selection.SelectKBest方法的典型用法代码示例。如果您正苦于以下问题:Python feature_selection.SelectKBest方法的具体用法?Python feature_selection.SelectKBest怎么用?Python feature_selection.SelectKBest使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.feature_selection的用法示例。


在下文中一共展示了feature_selection.SelectKBest方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: GetSelectedFeatureIndex

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectKBest [as 别名]
def GetSelectedFeatureIndex(self, data_container):
        data = data_container.GetArray()
        data /= np.linalg.norm(data, ord=2, axis=0)
        label = data_container.GetLabel()

        if data.shape[1] < self.GetSelectedFeatureNumber():
            print(
                'ANOVA: The number of features {:d} in data container is smaller than the required number {:d}'.format(
                    data.shape[1], self.GetSelectedFeatureNumber()))
            self.SetSelectedFeatureNumber(data.shape[1])

        fs = SelectKBest(f_classif, k=self.GetSelectedFeatureNumber())
        fs.fit(data, label)
        feature_index = fs.get_support(True)
        f_value, p_value = f_classif(data, label)
        return feature_index.tolist(), f_value, p_value 
开发者ID:salan668,项目名称:FAE,代码行数:18,代码来源:FeatureSelector.py

示例2: get_model

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectKBest [as 别名]
def get_model(with_pipeline=False):
    """Get a multi-layer perceptron model.

    Optionally, put it in a pipeline that scales the data.

    """
    model = NeuralNetClassifier(MLPClassifier)
    if with_pipeline:
        model = Pipeline([
            ('scale', FeatureUnion([
                ('minmax', MinMaxScaler()),
                ('normalize', Normalizer()),
            ])),
            ('select', SelectKBest(k=N_FEATURES)),  # keep input size constant
            ('net', model),
        ])
    return model 
开发者ID:skorch-dev,项目名称:skorch,代码行数:19,代码来源:train.py

示例3: featuresFromFeatureSelection

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectKBest [as 别名]
def featuresFromFeatureSelection(X,Y,columnNames):
    
    for f in columnNames:
        print(f)
    X_new_withfitTransform = SelectKBest(chi2, k=34).fit(X, Y)
    colors = getColorNames()
    counter  = 0
    
    scores = X_new_withfitTransform.scores_
    scores_scaled = np.divide(scores, 1000) 
        
    for score in scores_scaled:
        #if(score > 10):
        #print('Feature {:>34}'.format(columnNames[counter]))
        print('{:>34}  '.format( score))
        '''Plot a graph'''    
        plt.bar(counter, score,color=colors[counter])
        counter +=1 

    plt.ylabel('Scores(1k)')
    plt.title('Scores calculated by Chi-Square Test')
    plt.legend(columnNames, bbox_to_anchor=(0., 0.8, 1., .102), loc=3,ncol=5, mode="expand", borderaxespad=0.)
    plt.show()
    
    #print(feature_selection.chi2(X,Y)) 
开发者ID:md-k-sarker,项目名称:Predicting-Health-Insurance-Cost,代码行数:27,代码来源:DataAnalysis.py

示例4: test_export_to_sklearn_pipeline3

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectKBest [as 别名]
def test_export_to_sklearn_pipeline3(self):
        from lale.lib.lale import ConcatFeatures
        from lale.lib.sklearn import PCA
        from lale.lib.sklearn import KNeighborsClassifier, LogisticRegression, SVC 
        from sklearn.feature_selection import SelectKBest
        from lale.lib.sklearn import Nystroem
        from sklearn.pipeline import FeatureUnion

        lale_pipeline = ((PCA() >> SelectKBest(k=2)) & (Nystroem(random_state = 42) >> SelectKBest(k=3))
         & (SelectKBest(k=3))) >> ConcatFeatures() >> SelectKBest(k=2) >> LogisticRegression()
        trained_lale_pipeline = lale_pipeline.fit(self.X_train, self.y_train)
        sklearn_pipeline = trained_lale_pipeline.export_to_sklearn_pipeline()
        self.assertIsInstance(sklearn_pipeline.named_steps['featureunion'], FeatureUnion)
        self.assertIsInstance(sklearn_pipeline.named_steps['selectkbest'], SelectKBest)
        from sklearn.linear_model import LogisticRegression
        self.assertIsInstance(sklearn_pipeline.named_steps['logisticregression'], LogisticRegression)
        self.assert_equal_predictions(sklearn_pipeline, trained_lale_pipeline) 
开发者ID:IBM,项目名称:lale,代码行数:19,代码来源:test_core_pipeline.py

示例5: get_top_k

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectKBest [as 别名]
def get_top_k(self):
		columns=list(self.data.columns.values)
		columns.remove(self.target)
		# remove intercept from top_k
		if(self.objective):
			top_k_vars=SelectKBest(f_regression, k=self.top_k)
			top_k_vars.fit_transform(self.data[columns], self.data[self.target])
		else:
			columns.remove('intercept')
			try:
				top_k_vars=SelectKBest(chi2, k=self.top_k)
				top_k_vars.fit_transform(self.data[columns], self.data[self.target])
			except:
				top_k_vars=SelectKBest(f_classif, k=self.top_k)
				top_k_vars.fit_transform(self.data[columns], self.data[self.target])
		return [columns[i] for i in top_k_vars.get_support(indices=True)] 
开发者ID:dominance-analysis,项目名称:dominance-analysis,代码行数:18,代码来源:dominance.py

示例6: find_best_feature_selections

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectKBest [as 别名]
def find_best_feature_selections(X,y):

    #select the best features usin different technique
    X_new = SelectKBest(chi2, k=80).fit_transform(X,y)
    X_new1 = SelectPercentile(chi2, percentile=20).fit_transform(X,y)

    X_new2 = SelectKBest(f_classif, k=80).fit_transform(X,y) #this one has the best performance
    X_new22 = SelectPercentile(f_classif, percentile=20).fit_transform(X,y)

    X_new3 = SelectKBest(f_classif, k=70).fit_transform(X,y)
    X_new4 = SelectKBest(f_classif, k=60).fit_transform(X,y)

    print (X_new.shape)
    #selection_parameters_for_classfier(X_new,y)
    #print (y.shape)
    train_and_test(X_new,y)
    train_and_test(X_new1,y)
    train_and_test(X_new2,y)
    train_and_test(X_new22,y)
    train_and_test(X_new3,y)
    train_and_test(X_new4,y)
    #X,y = _dataset_sample()

################################PARAMETER  Selected################################
#TODO some problem happens when using the parameter max_leaf_nodes in Dtree and RandomForest 
开发者ID:ririhedou,项目名称:dr_droid,代码行数:27,代码来源:GetMLPara.py

示例7: feature_select

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectKBest [as 别名]
def feature_select(corpus, labels, k=1000):
    """
    select top k features through chi-square test
    """
    bin_cv = CountVectorizer(binary=True)
    le = LabelEncoder()
    X = bin_cv.fit_transform(corpus)
    y = le.fit_transform(labels).reshape(-1, 1)

    k = min(X.shape[1], k)
    skb = SelectKBest(chi2, k=k)
    skb.fit(X, y)

    feature_ids = skb.get_support(indices=True)
    feature_names = bin_cv.get_feature_names()
    vocab = {}

    for new_fid, old_fid in enumerate(feature_ids):
        feature_name = feature_names[old_fid]
        vocab[feature_name] = new_fid

    # we only care about the final extracted feature vocabulary
    return vocab 
开发者ID:FelixHo,项目名称:Text-Classification-Benchmark,代码行数:25,代码来源:tester.py

示例8: test_objectmapper

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectKBest [as 别名]
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.feature_selection.GenericUnivariateSelect,
                      fs.GenericUnivariateSelect)
        self.assertIs(df.feature_selection.SelectPercentile,
                      fs.SelectPercentile)
        self.assertIs(df.feature_selection.SelectKBest, fs.SelectKBest)
        self.assertIs(df.feature_selection.SelectFpr, fs.SelectFpr)
        self.assertIs(df.feature_selection.SelectFromModel,
                      fs.SelectFromModel)
        self.assertIs(df.feature_selection.SelectFdr, fs.SelectFdr)
        self.assertIs(df.feature_selection.SelectFwe, fs.SelectFwe)
        self.assertIs(df.feature_selection.RFE, fs.RFE)
        self.assertIs(df.feature_selection.RFECV, fs.RFECV)
        self.assertIs(df.feature_selection.VarianceThreshold,
                      fs.VarianceThreshold) 
开发者ID:pandas-ml,项目名称:pandas-ml,代码行数:18,代码来源:test_feature_selection.py

示例9: test_pipeline

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectKBest [as 别名]
def test_pipeline(self):
        from sklearn.feature_selection import SelectKBest
        from sklearn.feature_selection import f_regression
        from sklearn.pipeline import Pipeline

        diabetes = datasets.load_diabetes()
        models = ['OLS', 'GLS', 'WLS', 'GLSAR', 'QuantReg', 'GLM', 'RLM']

        for model in models:
            klass = getattr(sm, model)

            selector = SelectKBest(f_regression, k=5)
            estimator = Pipeline([('selector', selector),
                                  ('reg', base.StatsModelsRegressor(klass))])

            estimator.fit(diabetes.data, diabetes.target)
            result = estimator.predict(diabetes.data)

            data = SelectKBest(f_regression, k=5).fit_transform(diabetes.data, diabetes.target)
            expected = klass(diabetes.target, data).fit().predict(data)
            self.assert_numpy_array_almost_equal(result, expected) 
开发者ID:pandas-ml,项目名称:pandas-ml,代码行数:23,代码来源:test_base.py

示例10: featureFitting

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectKBest [as 别名]
def featureFitting(filename, X, y, featureNames,optimalFlag, kbest=20, alpha=0.05, model=None):
    '''
    Gets the K-best features (filtered by FDR, then select best ranked by t-test, more advanced options can be implemented).
    Save the data/matrix with the resulting/kept features to a new output file, "REDUCED_Feat.csv"
    Returns new features matrix, FD scaler, and K-select scaler
    '''
    a=alpha
    FD = SelectFdr(alpha=a)
    X = FD.fit_transform(X,y)

    selectK = SelectKBest(k=kbest)
    selectK.fit(X,y)
    selectK_mask=selectK.get_support()
    K_featnames = featureNames[selectK_mask]
    print("K_featnames: %s" %(K_featnames))
    Reduced_df = pd.read_csv(filename, index_col=0)
    Reduced_df = Reduced_df[Reduced_df.columns[selectK_mask]]
    Reduced_df.to_csv('REDUCED_Feat.csv')
    return Reduced_df, FD, selectK 
开发者ID:ddofer,项目名称:ProFET,代码行数:21,代码来源:Model_trainer.py

示例11: ReducedFeaturesDF

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectKBest [as 别名]
def ReducedFeaturesDF(X,y):
        '''
        Returns a dataframe with only a subset of features/columns retained
        '''
        from sklearn.feature_selection import RFE
        est = LinearSVC( penalty='l1', loss='l2', dual=False, class_weight='auto')
#        selectK = SelectKBest(score_func = f_classif, k=45)
        selectRFE = RFE(estimator=est, n_features_to_select=22, step=0.15)
        selectK=selectRFE

        selectK.fit(X,y)
        selectK_mask=selectK.get_support()
        K_featnames = feature_names[selectK_mask]
        print ("reduced RFE features:")
        print(K_featnames)
        Reduced_df = pd.read_csv(filename, index_col=0)
        Reduced_df = Reduced_df[Reduced_df.columns[selectK_mask]]
#        Reduced_df.to_csv('REDUCED_Feat.csv')
        return Reduced_df

#    ReducedFeaturesDF(X,y)
    # z=pd.DataFrame(data=X_SGD,index=y)
    # z.to_csv('REDUCED_Feat.csv') 
开发者ID:ddofer,项目名称:ProFET,代码行数:25,代码来源:Model_Parameters_CV.py

示例12: fit

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectKBest [as 别名]
def fit(self, X, y):
        
        self.selector = SelectKBest(f_classif, k=self.max_features)
        self.selector.fit(X, y)

        X_train=self.selector.transform(X)
        y_train=y

        param_list=[]
        idx = range(len(y_train))
        for i in range(self.n_estimators):
            random.shuffle(idx)
            param_list.append((X_train[idx[:self.max_samples]], 
                               y_train[idx[:self.max_samples]]))

        pool = ThreadPool(cpu_count())
        self.clf_list = pool.map(self._prepare_classifier, param_list)
        pool.close()
        pool.join()

        """
        X2=[]
        for clf in self.clf_list:
            P=clf.predict_proba(X_train)
            if len(X2)==0:
                X2=P[:, 0]
            else:
                X2=numpy.vstack((X2, P[:, 0]))
        X2=numpy.swapaxes(X2, 0, 1)
        print "X2:", X2.shape

        from sklearn.ensemble import RandomForestClassifier
        self.clf2=RandomForestClassifier(n_estimators=100)
        self.clf2.fit(X2, y_train)
        """ 
开发者ID:daniellerch,项目名称:aletheia,代码行数:37,代码来源:models.py

示例13: test_bagging_with_pipeline

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectKBest [as 别名]
def test_bagging_with_pipeline():
    estimator = BaggingClassifier(make_pipeline(SelectKBest(k=1),
                                                DecisionTreeClassifier()),
                                  max_features=2)
    estimator.fit(iris.data, iris.target)
    assert isinstance(estimator[0].steps[-1][1].random_state, int) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:8,代码来源:test_bagging.py

示例14: test_feature_selection

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectKBest [as 别名]
def test_feature_selection():
    # make two feature dicts with two useful features and a bunch of useless
    # ones, in terms of chi2
    d1 = dict([("useless%d" % i, 10) for i in range(20)],
              useful1=1, useful2=20)
    d2 = dict([("useless%d" % i, 10) for i in range(20)],
              useful1=20, useful2=1)

    for indices in (True, False):
        v = DictVectorizer().fit([d1, d2])
        X = v.transform([d1, d2])
        sel = SelectKBest(chi2, k=2).fit(X, [0, 1])

        v.restrict(sel.get_support(indices=indices), indices=indices)
        assert_equal(v.get_feature_names(), ["useful1", "useful2"]) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:17,代码来源:test_dict_vectorizer.py

示例15: mkchi2

# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import SelectKBest [as 别名]
def mkchi2(k):
    """Make k-best chi2 selector"""
    return SelectKBest(chi2, k=k) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:5,代码来源:test_chi2.py


注:本文中的sklearn.feature_selection.SelectKBest方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。