當前位置: 首頁>>代碼示例>>Python>>正文


Python utils.resample方法代碼示例

本文整理匯總了Python中sklearn.utils.resample方法的典型用法代碼示例。如果您正苦於以下問題:Python utils.resample方法的具體用法?Python utils.resample怎麽用?Python utils.resample使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在sklearn.utils的用法示例。


在下文中一共展示了utils.resample方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: test_resample_stratified_replace

# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def test_resample_stratified_replace():
    # Make sure stratified resampling supports the replace parameter
    rng = np.random.RandomState(0)
    n_samples = 100
    X = rng.normal(size=(n_samples, 1))
    y = rng.randint(0, 2, size=n_samples)

    X_replace, _ = resample(X, y, replace=True, n_samples=50,
                            random_state=rng, stratify=y)
    X_no_replace, _ = resample(X, y, replace=False, n_samples=50,
                               random_state=rng, stratify=y)
    assert np.unique(X_replace).shape[0] < 50
    assert np.unique(X_no_replace).shape[0] == 50

    # make sure n_samples can be greater than X.shape[0] if we sample with
    # replacement
    X_replace, _ = resample(X, y, replace=True, n_samples=1000,
                            random_state=rng, stratify=y)
    assert X_replace.shape[0] == 1000
    assert np.unique(X_replace).shape[0] == 100 
開發者ID:PacktPublishing,項目名稱:Mastering-Elasticsearch-7.0,代碼行數:22,代碼來源:test_utils.py

示例2: plot_mean_bootstrap_exponential_readme

# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def plot_mean_bootstrap_exponential_readme():
    X = np.random.exponential(7, 4)
    classical_samples = [np.mean(resample(X)) for _ in range(10000)]
    posterior_samples = mean(X, 10000)
    l, r = highest_density_interval(posterior_samples)
    classical_l, classical_r = highest_density_interval(classical_samples)
    plt.subplot(2, 1, 1)
    plt.title('Bayesian Bootstrap of mean')
    sns.distplot(posterior_samples, label='Bayesian Bootstrap Samples')
    plt.plot([l, r], [0, 0], linewidth=5.0, marker='o', label='95% HDI')
    plt.xlim(-1, 18)
    plt.legend()
    plt.subplot(2, 1, 2)
    plt.title('Classical Bootstrap of mean')
    sns.distplot(classical_samples, label='Classical Bootstrap Samples')
    plt.plot([classical_l, classical_r], [0, 0], linewidth=5.0, marker='o', label='95% HDI')
    plt.xlim(-1, 18)
    plt.legend()
    plt.savefig('readme_exponential.png', bbox_inches='tight') 
開發者ID:lmc2179,項目名稱:bayesian_bootstrap,代碼行數:21,代碼來源:demos.py

示例3: bootstrap_CI

# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def bootstrap_CI(actual_list, predict_list, num_repeats=1000, stat='roc_auc',
                 confident_lvl=0.95, side='two', random_state=0):
    assert len(actual_list) == len(predict_list)

    from sklearn.utils import resample

    try:

        all_stats = []
        for i in range(num_repeats):
            actual_list_resampled, predict_list_resampled = resample(actual_list, predict_list)
            if stat == 'roc_auc':
                cur_roc_auc = roc_auc_score(actual_list_resampled, predict_list_resampled)
                all_stats.append(cur_roc_auc)

        roc_auc_left = np.percentile(all_stats, (1 - confident_lvl) / 2. * 100)
        roc_auc_right = np.percentile(all_stats, (1 + confident_lvl) / 2. * 100)

    except Exception as e:
        # print e
        roc_auc_left, roc_auc_right = float('nan'), float('nan')

    return roc_auc_left, roc_auc_right 
開發者ID:HealthRex,項目名稱:CDSS,代碼行數:25,代碼來源:stats_utils.py

示例4: plot_mean_bootstrap

# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def plot_mean_bootstrap():
    X = [-1, 0, 1]
    posterior_samples = mean(X, 10000)
    sns.distplot(posterior_samples)
    classical_samples = [np.mean(resample(X)) for _ in range(10000)]
    sns.distplot(classical_samples)
    plt.show() 
開發者ID:lmc2179,項目名稱:bayesian_bootstrap,代碼行數:9,代碼來源:demos.py

示例5: plot_mean_resample_bootstrap

# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def plot_mean_resample_bootstrap():
    X = [-1, 0, 1]
    posterior_samples = bayesian_bootstrap(X, np.mean, 10000, 100)
    sns.distplot(posterior_samples)
    classical_samples = [np.mean(resample(X)) for _ in range(10000)]
    sns.distplot(classical_samples)
    plt.show() 
開發者ID:lmc2179,項目名稱:bayesian_bootstrap,代碼行數:9,代碼來源:demos.py

示例6: plot_median

# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def plot_median():
    X = np.random.uniform(-1, 1, 10)
    posterior_samples = bayesian_bootstrap(X, np.median, 10000, 100)
    sns.distplot(posterior_samples)
    classical_samples = [np.median(resample(X)) for _ in range(10000)]
    sns.distplot(classical_samples)
    plt.show() 
開發者ID:lmc2179,項目名稱:bayesian_bootstrap,代碼行數:9,代碼來源:demos.py

示例7: plot_var_bootstrap

# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def plot_var_bootstrap():
    X = np.random.uniform(-1, 1, 100)
    posterior_samples = var(X, 10000)
    sns.distplot(posterior_samples)
    classical_samples = [np.var(resample(X)) for _ in range(10000)]
    sns.distplot(classical_samples)
    plt.show() 
開發者ID:lmc2179,項目名稱:bayesian_bootstrap,代碼行數:9,代碼來源:demos.py

示例8: plot_var_resample_bootstrap

# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def plot_var_resample_bootstrap():
    X = np.random.uniform(-1, 1, 100)
    posterior_samples = bayesian_bootstrap(X, np.var, 10000, 500)
    sns.distplot(posterior_samples)
    classical_samples = [np.var(resample(X)) for _ in range(10000)]
    sns.distplot(classical_samples)
    plt.show() 
開發者ID:lmc2179,項目名稱:bayesian_bootstrap,代碼行數:9,代碼來源:demos.py

示例9: plot_regression_bootstrap

# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def plot_regression_bootstrap():
    X = np.array([[0], [1], [2], [3]])
    y = np.array([0, 1, 2, 3]) + np.random.normal(0, 1, 4)
    classical_samples = [LinearRegression().fit(*resample(X, y)).coef_ for _ in tqdm(range(10000))]
    posterior_samples =     bayesian_bootstrap_regression(X,
                                                          y,
                                                          lambda X, y: LinearRegression().fit(X, y).coef_,
                                                          10000,
                                                          1000)
    plt.scatter(X.reshape(-1, 1), y)
    plt.show()
    sns.distplot(classical_samples)
    sns.distplot(posterior_samples)
    plt.show() 
開發者ID:lmc2179,項目名稱:bayesian_bootstrap,代碼行數:16,代碼來源:demos.py

示例10: resample_returns

# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def resample_returns(
        returns,
        func,
        seed=0,
        num_trials=100
):
    """
    Resample the returns and calculate any statistic on every new sample.

    https://en.wikipedia.org/wiki/Resampling_(statistics)

    :param returns (Series, DataFrame): Returns
    :param func: Given the resampled returns calculate a statistic
    :param seed: Seed for random number generator
    :param num_trials: Number of times to resample and run the experiment
    :return: Series of resampled statistics
    """

    # stats = []
    if type(returns) is pd.Series:
        stats = pd.Series(index=range(num_trials))
    elif type(returns) is pd.DataFrame:
        stats = pd.DataFrame(
            index=range(num_trials),
            columns=returns.columns
        )
    else:
        raise(TypeError("returns needs to be a Series or DataFrame!"))

    n = returns.shape[0]
    for i in range(num_trials):
        random_indices = resample(returns.index, n_samples=n, random_state=seed + i)
        stats.loc[i] = func(returns.loc[random_indices])

    return stats 
開發者ID:pmorissette,項目名稱:ffn,代碼行數:37,代碼來源:core.py

示例11: test_resample

# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def test_resample():
    # Border case not worth mentioning in doctests
    assert resample() is None

    # Check that invalid arguments yield ValueError
    assert_raises(ValueError, resample, [0], [0, 1])
    assert_raises(ValueError, resample, [0, 1], [0, 1],
                  replace=False, n_samples=3)
    assert_raises(ValueError, resample, [0, 1], [0, 1], meaning_of_life=42)
    # Issue:6581, n_samples can be more when replace is True (default).
    assert_equal(len(resample([1, 2], n_samples=5)), 5) 
開發者ID:PacktPublishing,項目名稱:Mastering-Elasticsearch-7.0,代碼行數:13,代碼來源:test_utils.py

示例12: test_resample_stratified

# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def test_resample_stratified():
    # Make sure resample can stratify
    rng = np.random.RandomState(0)
    n_samples = 100
    p = .9
    X = rng.normal(size=(n_samples, 1))
    y = rng.binomial(1, p, size=n_samples)

    _, y_not_stratified = resample(X, y, n_samples=10, random_state=0,
                                   stratify=None)
    assert np.all(y_not_stratified == 1)

    _, y_stratified = resample(X, y, n_samples=10, random_state=0, stratify=y)
    assert not np.all(y_stratified == 1)
    assert np.sum(y_stratified) == 9  # all 1s, one 0 
開發者ID:PacktPublishing,項目名稱:Mastering-Elasticsearch-7.0,代碼行數:17,代碼來源:test_utils.py

示例13: test_resample_stratify_2dy

# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def test_resample_stratify_2dy():
    # Make sure y can be 2d when stratifying
    rng = np.random.RandomState(0)
    n_samples = 100
    X = rng.normal(size=(n_samples, 1))
    y = rng.randint(0, 2, size=(n_samples, 2))
    X, y = resample(X, y, n_samples=50, random_state=rng, stratify=y)
    assert y.ndim == 2 
開發者ID:PacktPublishing,項目名稱:Mastering-Elasticsearch-7.0,代碼行數:10,代碼來源:test_utils.py

示例14: test_resample_stratify_sparse_error

# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def test_resample_stratify_sparse_error():
    # resample must be ndarray
    rng = np.random.RandomState(0)
    n_samples = 100
    X = rng.normal(size=(n_samples, 2))
    y = rng.randint(0, 2, size=n_samples)
    stratify = sp.csr_matrix(y)
    with pytest.raises(TypeError, match='A sparse matrix was passed'):
        X, y = resample(X, y, n_samples=50, random_state=rng,
                        stratify=stratify) 
開發者ID:PacktPublishing,項目名稱:Mastering-Elasticsearch-7.0,代碼行數:12,代碼來源:test_utils.py

示例15: subsample_df

# 需要導入模塊: from sklearn import utils [as 別名]
# 或者: from sklearn.utils import resample [as 別名]
def subsample_df(df:pd.DataFrame, objective:str, targ_name:str, n_samples:Optional[int]=None, replace:bool=False, strat_key:Optional[str]=None,
                 wgt_name:Optional[str]=None) -> pd.DataFrame:
    r'''
    Subsamples, or samples with replacement, a DataFrame.
    Will automatically reweight data such that weight sums remain the same as the original DataFrame (per class)

    Arguments:
        df: DataFrame to sample
        objective: string representation of objective: either 'classification' or 'regression'
        targ_name: name of column containing target data
        n_samples: If set, will sample that number of data points, otherwise will sample with replacement a new DataFRame of the same size as the original
        replace: whether to sample with replacement
        strat_key: column name to use for stratified subsampling, if desired
        wgt_name: name of column containing weight data. If set, will reweight subsampled data, otherwise will not
    '''

    tmp_df = df.loc[resample(df.index, replace=replace, n_samples=n_samples, stratify=None if strat_key is None else df[strat_key])]
    
    # Reweight resampled data
    if wgt_name is not None:
        if 'class' in objective.lower():
            for c in tmp_df[targ_name].unique():
                tmp_df.loc[tmp_df[targ_name] == c, wgt_name] *= df.loc[df[targ_name] == c, wgt_name].sum() / tmp_df.loc[tmp_df[targ_name] == c, wgt_name].sum()
        else:
            tmp_df[wgt_name] *= df[wgt_name].sum() / tmp_df[wgt_name].sum()
    return tmp_df 
開發者ID:GilesStrong,項目名稱:lumin,代碼行數:28,代碼來源:misc.py


注:本文中的sklearn.utils.resample方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。