當前位置: 首頁>>代碼示例>>Python>>正文


Python feature_selection.mutual_info_classif方法代碼示例

本文整理匯總了Python中sklearn.feature_selection.mutual_info_classif方法的典型用法代碼示例。如果您正苦於以下問題:Python feature_selection.mutual_info_classif方法的具體用法?Python feature_selection.mutual_info_classif怎麽用?Python feature_selection.mutual_info_classif使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在sklearn.feature_selection的用法示例。


在下文中一共展示了feature_selection.mutual_info_classif方法的5個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: mutual_info_select

# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import mutual_info_classif [as 別名]
def mutual_info_select(self,F,y,threshold):
		mi = list(enumerate(mutual_info_classif(F,y)))
		f_best = []
		for (ind,rank) in mi:
			if rank > threshold:
				f_best.append(ind)
		return f_best 
開發者ID:MarioRuggieri,項目名稱:Emotion-Recognition-from-Speech,代碼行數:9,代碼來源:preprocessing.py

示例2: _set_scoring_func

# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import mutual_info_classif [as 別名]
def _set_scoring_func(self):
        self.scoring_func = [('variance', False)]
        if self.annotated_instances.num_instances() > 0:
            self.scoring_func.append(('f_classif', True))
            self.scoring_func.append(('mutual_info_classif', False))
            if self.instances.features.all_positives():
                self.scoring_func.append(('chi2', True)) 
開發者ID:ANSSI-FR,項目名稱:SecuML,代碼行數:9,代碼來源:scores.py

示例3: compute_scoring_func

# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import mutual_info_classif [as 別名]
def compute_scoring_func(self, func):
        if func == 'variance':
            features = self.instances.features.get_values()
            annotations = self.instances.annotations.get_labels()
            if isinstance(features, spmatrix):
                variance = mean_variance_axis(features, axis=0)[1]
            else:
                variance = features.var(axis=0)
            return variance, None

        features = self.annotated_instances.features.get_values()
        annotations = self.annotated_instances.annotations.get_supervision(
                                                               self.multiclass)
        if func == 'f_classif':
            return f_classif(features, annotations)
        elif func == 'mutual_info_classif':
            if isinstance(features, spmatrix):
                discrete_indexes = True
            else:
                features_types = self.instances.features.info.types
                discrete_indexes = [i for i, t in enumerate(features_types)
                                    if t == FeatureType.binary]
                if not discrete_indexes:
                    discrete_indexes = False
            return (mutual_info_classif(features, annotations,
                                        discrete_features=discrete_indexes),
                    None)
        elif func == 'chi2':
            return chi2(features, annotations)
        else:
            assert(False) 
開發者ID:ANSSI-FR,項目名稱:SecuML,代碼行數:33,代碼來源:scores.py

示例4: feature_importance_classification

# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import mutual_info_classif [as 別名]
def feature_importance_classification(features, target, n_neighbors=3, random_state=None):

    cont = features.select_dtypes(include=[np.floating])
    disc = features.select_dtypes(include=[np.integer, np.bool])

    cont_imp = pd.DataFrame(index=cont.columns)
    disc_imp = pd.DataFrame(index=disc.columns)

    # Continuous features
    if cont_imp.index.size > 0:

        # F-test
        f_test = feature_selection.f_classif(cont, target)
        cont_imp['f_statistic'] = f_test[0]
        cont_imp['f_p_value'] = f_test[1]

        # Mutual information
        mut_inf = feature_selection.mutual_info_classif(cont, target, discrete_features=False,
                                                        n_neighbors=n_neighbors,
                                                        random_state=random_state)
        cont_imp['mutual_information'] = mut_inf

    # Discrete features
    if disc_imp.index.size > 0:

        # Chi²-test
        chi2_tests = defaultdict(dict)

        for feature in disc.columns:
            cont = pd.crosstab(disc[feature], target)
            statistic, p_value, _, _ = stats.chi2_contingency(cont)
            chi2_tests[feature]['chi2_statistic'] = statistic
            chi2_tests[feature]['chi2_p_value'] = p_value

        chi2_tests_df = pd.DataFrame.from_dict(chi2_tests, orient='index')
        disc_imp['chi2_statistic'] = chi2_tests_df['chi2_statistic']
        disc_imp['chi2_p_value'] = chi2_tests_df['chi2_p_value']

        # Cramér's V (corrected)
        disc_imp['cramers_v'] = [
            cramers_v_corrected_stat(pd.crosstab(feature, target).values)
            for _, feature in disc.iteritems()
        ]

        # Mutual information
        mut_inf = feature_selection.mutual_info_classif(disc, target, discrete_features=True,
                                                        n_neighbors=n_neighbors,
                                                        random_state=random_state)
        disc_imp['mutual_information'] = mut_inf

    return cont_imp, disc_imp 
開發者ID:MaxHalford,項目名稱:xam,代碼行數:53,代碼來源:eda.py

示例5: _fit_one_time_series

# 需要導入模塊: from sklearn import feature_selection [as 別名]
# 或者: from sklearn.feature_selection import mutual_info_classif [as 別名]
def _fit_one_time_series(
        self, x, X, y, n_timestamps, n_shapelets, window_sizes,
        window_steps, remove_similar, i, rng
    ):
        """Fit one time series."""
        # Extract all shapelets
        shapelets, lengths, start_idx, end_idx = _extract_all_shapelets(
            x, window_sizes, window_steps, n_timestamps)

        # Derive distances between shapelets and time series
        X_dist = _derive_all_distances(
            X, window_sizes, shapelets, lengths, fit=True)

        if self.criterion == 'mutual_info':
            scores = mutual_info_classif(X_dist, y, discrete_features=False,
                                         random_state=rng)
        else:
            scores, _ = f_classif(X_dist, y)

        # Flatten the list of 2D arrays into an array of 1D arrays
        shapelets = [list(shapelet) for shapelet in shapelets]
        shapelets = np.asarray(list(chain.from_iterable(shapelets)))

        # Concatenate the list/tuple of 1D arrays into one 1D array
        start_idx = np.concatenate(start_idx)
        end_idx = np.concatenate(end_idx)

        # Remove similar shapelets
        if remove_similar:
            idx = _remove_similar_shapelets(scores.copy(), start_idx, end_idx)
            scores = scores[idx]
            shapelets = shapelets[idx]
            start_idx = start_idx[idx]
            end_idx = end_idx[idx]
            X_dist = X_dist[:, idx]

        # Keep at most 'n_shapelets'
        if scores.size > n_shapelets - 1:
            idx = np.argpartition(
                scores, scores.size - n_shapelets)[-n_shapelets:]
            scores = scores[idx]
            shapelets = shapelets[idx]
            start_idx = start_idx[idx]
            end_idx = end_idx[idx]
            X_dist = X_dist[:, idx]

        time_series_idx = np.full(scores.size, i)
        return X_dist, scores, shapelets, start_idx, end_idx, time_series_idx 
開發者ID:johannfaouzi,項目名稱:pyts,代碼行數:50,代碼來源:shapelet_transform.py


注:本文中的sklearn.feature_selection.mutual_info_classif方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。