本文整理匯總了Python中sklearn.utils.resample方法的典型用法代碼示例。如果您正苦於以下問題:Python utils.resample方法的具體用法?Python utils.resample怎麽用?Python utils.resample使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類sklearn.utils
的用法示例。
在下文中一共展示了utils.resample方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: test_resample_stratified_replace
# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def test_resample_stratified_replace():
# Make sure stratified resampling supports the replace parameter
rng = np.random.RandomState(0)
n_samples = 100
X = rng.normal(size=(n_samples, 1))
y = rng.randint(0, 2, size=n_samples)
X_replace, _ = resample(X, y, replace=True, n_samples=50,
random_state=rng, stratify=y)
X_no_replace, _ = resample(X, y, replace=False, n_samples=50,
random_state=rng, stratify=y)
assert np.unique(X_replace).shape[0] < 50
assert np.unique(X_no_replace).shape[0] == 50
# make sure n_samples can be greater than X.shape[0] if we sample with
# replacement
X_replace, _ = resample(X, y, replace=True, n_samples=1000,
random_state=rng, stratify=y)
assert X_replace.shape[0] == 1000
assert np.unique(X_replace).shape[0] == 100
示例2: plot_mean_bootstrap_exponential_readme
# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def plot_mean_bootstrap_exponential_readme():
X = np.random.exponential(7, 4)
classical_samples = [np.mean(resample(X)) for _ in range(10000)]
posterior_samples = mean(X, 10000)
l, r = highest_density_interval(posterior_samples)
classical_l, classical_r = highest_density_interval(classical_samples)
plt.subplot(2, 1, 1)
plt.title('Bayesian Bootstrap of mean')
sns.distplot(posterior_samples, label='Bayesian Bootstrap Samples')
plt.plot([l, r], [0, 0], linewidth=5.0, marker='o', label='95% HDI')
plt.xlim(-1, 18)
plt.legend()
plt.subplot(2, 1, 2)
plt.title('Classical Bootstrap of mean')
sns.distplot(classical_samples, label='Classical Bootstrap Samples')
plt.plot([classical_l, classical_r], [0, 0], linewidth=5.0, marker='o', label='95% HDI')
plt.xlim(-1, 18)
plt.legend()
plt.savefig('readme_exponential.png', bbox_inches='tight')
示例3: bootstrap_CI
# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def bootstrap_CI(actual_list, predict_list, num_repeats=1000, stat='roc_auc',
confident_lvl=0.95, side='two', random_state=0):
assert len(actual_list) == len(predict_list)
from sklearn.utils import resample
try:
all_stats = []
for i in range(num_repeats):
actual_list_resampled, predict_list_resampled = resample(actual_list, predict_list)
if stat == 'roc_auc':
cur_roc_auc = roc_auc_score(actual_list_resampled, predict_list_resampled)
all_stats.append(cur_roc_auc)
roc_auc_left = np.percentile(all_stats, (1 - confident_lvl) / 2. * 100)
roc_auc_right = np.percentile(all_stats, (1 + confident_lvl) / 2. * 100)
except Exception as e:
# print e
roc_auc_left, roc_auc_right = float('nan'), float('nan')
return roc_auc_left, roc_auc_right
示例4: plot_mean_bootstrap
# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def plot_mean_bootstrap():
X = [-1, 0, 1]
posterior_samples = mean(X, 10000)
sns.distplot(posterior_samples)
classical_samples = [np.mean(resample(X)) for _ in range(10000)]
sns.distplot(classical_samples)
plt.show()
示例5: plot_mean_resample_bootstrap
# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def plot_mean_resample_bootstrap():
X = [-1, 0, 1]
posterior_samples = bayesian_bootstrap(X, np.mean, 10000, 100)
sns.distplot(posterior_samples)
classical_samples = [np.mean(resample(X)) for _ in range(10000)]
sns.distplot(classical_samples)
plt.show()
示例6: plot_median
# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def plot_median():
X = np.random.uniform(-1, 1, 10)
posterior_samples = bayesian_bootstrap(X, np.median, 10000, 100)
sns.distplot(posterior_samples)
classical_samples = [np.median(resample(X)) for _ in range(10000)]
sns.distplot(classical_samples)
plt.show()
示例7: plot_var_bootstrap
# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def plot_var_bootstrap():
X = np.random.uniform(-1, 1, 100)
posterior_samples = var(X, 10000)
sns.distplot(posterior_samples)
classical_samples = [np.var(resample(X)) for _ in range(10000)]
sns.distplot(classical_samples)
plt.show()
示例8: plot_var_resample_bootstrap
# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def plot_var_resample_bootstrap():
X = np.random.uniform(-1, 1, 100)
posterior_samples = bayesian_bootstrap(X, np.var, 10000, 500)
sns.distplot(posterior_samples)
classical_samples = [np.var(resample(X)) for _ in range(10000)]
sns.distplot(classical_samples)
plt.show()
示例9: plot_regression_bootstrap
# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def plot_regression_bootstrap():
X = np.array([[0], [1], [2], [3]])
y = np.array([0, 1, 2, 3]) + np.random.normal(0, 1, 4)
classical_samples = [LinearRegression().fit(*resample(X, y)).coef_ for _ in tqdm(range(10000))]
posterior_samples = bayesian_bootstrap_regression(X,
y,
lambda X, y: LinearRegression().fit(X, y).coef_,
10000,
1000)
plt.scatter(X.reshape(-1, 1), y)
plt.show()
sns.distplot(classical_samples)
sns.distplot(posterior_samples)
plt.show()
示例10: resample_returns
# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def resample_returns(
returns,
func,
seed=0,
num_trials=100
):
"""
Resample the returns and calculate any statistic on every new sample.
https://en.wikipedia.org/wiki/Resampling_(statistics)
:param returns (Series, DataFrame): Returns
:param func: Given the resampled returns calculate a statistic
:param seed: Seed for random number generator
:param num_trials: Number of times to resample and run the experiment
:return: Series of resampled statistics
"""
# stats = []
if type(returns) is pd.Series:
stats = pd.Series(index=range(num_trials))
elif type(returns) is pd.DataFrame:
stats = pd.DataFrame(
index=range(num_trials),
columns=returns.columns
)
else:
raise(TypeError("returns needs to be a Series or DataFrame!"))
n = returns.shape[0]
for i in range(num_trials):
random_indices = resample(returns.index, n_samples=n, random_state=seed + i)
stats.loc[i] = func(returns.loc[random_indices])
return stats
示例11: test_resample
# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def test_resample():
# Border case not worth mentioning in doctests
assert resample() is None
# Check that invalid arguments yield ValueError
assert_raises(ValueError, resample, [0], [0, 1])
assert_raises(ValueError, resample, [0, 1], [0, 1],
replace=False, n_samples=3)
assert_raises(ValueError, resample, [0, 1], [0, 1], meaning_of_life=42)
# Issue:6581, n_samples can be more when replace is True (default).
assert_equal(len(resample([1, 2], n_samples=5)), 5)
示例12: test_resample_stratified
# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def test_resample_stratified():
# Make sure resample can stratify
rng = np.random.RandomState(0)
n_samples = 100
p = .9
X = rng.normal(size=(n_samples, 1))
y = rng.binomial(1, p, size=n_samples)
_, y_not_stratified = resample(X, y, n_samples=10, random_state=0,
stratify=None)
assert np.all(y_not_stratified == 1)
_, y_stratified = resample(X, y, n_samples=10, random_state=0, stratify=y)
assert not np.all(y_stratified == 1)
assert np.sum(y_stratified) == 9 # all 1s, one 0
示例13: test_resample_stratify_2dy
# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def test_resample_stratify_2dy():
# Make sure y can be 2d when stratifying
rng = np.random.RandomState(0)
n_samples = 100
X = rng.normal(size=(n_samples, 1))
y = rng.randint(0, 2, size=(n_samples, 2))
X, y = resample(X, y, n_samples=50, random_state=rng, stratify=y)
assert y.ndim == 2
示例14: test_resample_stratify_sparse_error
# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def test_resample_stratify_sparse_error():
# resample must be ndarray
rng = np.random.RandomState(0)
n_samples = 100
X = rng.normal(size=(n_samples, 2))
y = rng.randint(0, 2, size=n_samples)
stratify = sp.csr_matrix(y)
with pytest.raises(TypeError, match='A sparse matrix was passed'):
X, y = resample(X, y, n_samples=50, random_state=rng,
stratify=stratify)
示例15: subsample_df
# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def subsample_df(df:pd.DataFrame, objective:str, targ_name:str, n_samples:Optional[int]=None, replace:bool=False, strat_key:Optional[str]=None,
wgt_name:Optional[str]=None) -> pd.DataFrame:
r'''
Subsamples, or samples with replacement, a DataFrame.
Will automatically reweight data such that weight sums remain the same as the original DataFrame (per class)
Arguments:
df: DataFrame to sample
objective: string representation of objective: either 'classification' or 'regression'
targ_name: name of column containing target data
n_samples: If set, will sample that number of data points, otherwise will sample with replacement a new DataFRame of the same size as the original
replace: whether to sample with replacement
strat_key: column name to use for stratified subsampling, if desired
wgt_name: name of column containing weight data. If set, will reweight subsampled data, otherwise will not
'''
tmp_df = df.loc[resample(df.index, replace=replace, n_samples=n_samples, stratify=None if strat_key is None else df[strat_key])]
# Reweight resampled data
if wgt_name is not None:
if 'class' in objective.lower():
for c in tmp_df[targ_name].unique():
tmp_df.loc[tmp_df[targ_name] == c, wgt_name] *= df.loc[df[targ_name] == c, wgt_name].sum() / tmp_df.loc[tmp_df[targ_name] == c, wgt_name].sum()
else:
tmp_df[wgt_name] *= df[wgt_name].sum() / tmp_df[wgt_name].sum()
return tmp_df