本文整理汇总了Python中sklearn.feature_selection.mutual_info_classif方法的典型用法代码示例。如果您正苦于以下问题:Python feature_selection.mutual_info_classif方法的具体用法?Python feature_selection.mutual_info_classif怎么用?Python feature_selection.mutual_info_classif使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.feature_selection
的用法示例。
在下文中一共展示了feature_selection.mutual_info_classif方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: mutual_info_select
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import mutual_info_classif [as 别名]
def mutual_info_select(self,F,y,threshold):
mi = list(enumerate(mutual_info_classif(F,y)))
f_best = []
for (ind,rank) in mi:
if rank > threshold:
f_best.append(ind)
return f_best
示例2: _set_scoring_func
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import mutual_info_classif [as 别名]
def _set_scoring_func(self):
self.scoring_func = [('variance', False)]
if self.annotated_instances.num_instances() > 0:
self.scoring_func.append(('f_classif', True))
self.scoring_func.append(('mutual_info_classif', False))
if self.instances.features.all_positives():
self.scoring_func.append(('chi2', True))
示例3: compute_scoring_func
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import mutual_info_classif [as 别名]
def compute_scoring_func(self, func):
if func == 'variance':
features = self.instances.features.get_values()
annotations = self.instances.annotations.get_labels()
if isinstance(features, spmatrix):
variance = mean_variance_axis(features, axis=0)[1]
else:
variance = features.var(axis=0)
return variance, None
features = self.annotated_instances.features.get_values()
annotations = self.annotated_instances.annotations.get_supervision(
self.multiclass)
if func == 'f_classif':
return f_classif(features, annotations)
elif func == 'mutual_info_classif':
if isinstance(features, spmatrix):
discrete_indexes = True
else:
features_types = self.instances.features.info.types
discrete_indexes = [i for i, t in enumerate(features_types)
if t == FeatureType.binary]
if not discrete_indexes:
discrete_indexes = False
return (mutual_info_classif(features, annotations,
discrete_features=discrete_indexes),
None)
elif func == 'chi2':
return chi2(features, annotations)
else:
assert(False)
示例4: feature_importance_classification
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import mutual_info_classif [as 别名]
def feature_importance_classification(features, target, n_neighbors=3, random_state=None):
cont = features.select_dtypes(include=[np.floating])
disc = features.select_dtypes(include=[np.integer, np.bool])
cont_imp = pd.DataFrame(index=cont.columns)
disc_imp = pd.DataFrame(index=disc.columns)
# Continuous features
if cont_imp.index.size > 0:
# F-test
f_test = feature_selection.f_classif(cont, target)
cont_imp['f_statistic'] = f_test[0]
cont_imp['f_p_value'] = f_test[1]
# Mutual information
mut_inf = feature_selection.mutual_info_classif(cont, target, discrete_features=False,
n_neighbors=n_neighbors,
random_state=random_state)
cont_imp['mutual_information'] = mut_inf
# Discrete features
if disc_imp.index.size > 0:
# Chi²-test
chi2_tests = defaultdict(dict)
for feature in disc.columns:
cont = pd.crosstab(disc[feature], target)
statistic, p_value, _, _ = stats.chi2_contingency(cont)
chi2_tests[feature]['chi2_statistic'] = statistic
chi2_tests[feature]['chi2_p_value'] = p_value
chi2_tests_df = pd.DataFrame.from_dict(chi2_tests, orient='index')
disc_imp['chi2_statistic'] = chi2_tests_df['chi2_statistic']
disc_imp['chi2_p_value'] = chi2_tests_df['chi2_p_value']
# Cramér's V (corrected)
disc_imp['cramers_v'] = [
cramers_v_corrected_stat(pd.crosstab(feature, target).values)
for _, feature in disc.iteritems()
]
# Mutual information
mut_inf = feature_selection.mutual_info_classif(disc, target, discrete_features=True,
n_neighbors=n_neighbors,
random_state=random_state)
disc_imp['mutual_information'] = mut_inf
return cont_imp, disc_imp
示例5: _fit_one_time_series
# 需要导入模块: from sklearn import feature_selection [as 别名]
# 或者: from sklearn.feature_selection import mutual_info_classif [as 别名]
def _fit_one_time_series(
self, x, X, y, n_timestamps, n_shapelets, window_sizes,
window_steps, remove_similar, i, rng
):
"""Fit one time series."""
# Extract all shapelets
shapelets, lengths, start_idx, end_idx = _extract_all_shapelets(
x, window_sizes, window_steps, n_timestamps)
# Derive distances between shapelets and time series
X_dist = _derive_all_distances(
X, window_sizes, shapelets, lengths, fit=True)
if self.criterion == 'mutual_info':
scores = mutual_info_classif(X_dist, y, discrete_features=False,
random_state=rng)
else:
scores, _ = f_classif(X_dist, y)
# Flatten the list of 2D arrays into an array of 1D arrays
shapelets = [list(shapelet) for shapelet in shapelets]
shapelets = np.asarray(list(chain.from_iterable(shapelets)))
# Concatenate the list/tuple of 1D arrays into one 1D array
start_idx = np.concatenate(start_idx)
end_idx = np.concatenate(end_idx)
# Remove similar shapelets
if remove_similar:
idx = _remove_similar_shapelets(scores.copy(), start_idx, end_idx)
scores = scores[idx]
shapelets = shapelets[idx]
start_idx = start_idx[idx]
end_idx = end_idx[idx]
X_dist = X_dist[:, idx]
# Keep at most 'n_shapelets'
if scores.size > n_shapelets - 1:
idx = np.argpartition(
scores, scores.size - n_shapelets)[-n_shapelets:]
scores = scores[idx]
shapelets = shapelets[idx]
start_idx = start_idx[idx]
end_idx = end_idx[idx]
X_dist = X_dist[:, idx]
time_series_idx = np.full(scores.size, i)
return X_dist, scores, shapelets, start_idx, end_idx, time_series_idx