Python RandomOverSampler.fit_resample方法代码示例

本文整理汇总了Python中imblearn.over_sampling.RandomOverSampler.fit_resample方法的典型用法代码示例。如果您正苦于以下问题：Python RandomOverSampler.fit_resample方法的具体用法？Python RandomOverSampler.fit_resample怎么用？Python RandomOverSampler.fit_resample使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类imblearn.over_sampling.RandomOverSampler的用法示例。

在下文中一共展示了RandomOverSampler.fit_resample方法的8个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _fit_resample

# 需要导入模块: from imblearn.over_sampling import RandomOverSampler [as 别名]
# 或者: from imblearn.over_sampling.RandomOverSampler import fit_resample [as 别名]
    def _fit_resample(self, X, y):
        n_samples = X.shape[0]

        # convert y to z_score
        y_z = (y - y.mean()) / y.std()

        index0 = np.arange(n_samples)
        index_negative = index0[y_z > self.negative_thres]
        index_positive = index0[y_z <= self.positive_thres]
        index_unclassified = [x for x in index0
                              if x not in index_negative
                              and x not in index_positive]

        y_z[index_negative] = 0
        y_z[index_positive] = 1
        y_z[index_unclassified] = -1

        ros = RandomOverSampler(
            sampling_strategy=self.sampling_strategy,
            random_state=self.random_state,
            ratio=self.ratio)
        _, _ = ros.fit_resample(X, y_z)
        sample_indices = ros.sample_indices_

        print("Before sampler: %s. Total after: %s"
              % (Counter(y_z), sample_indices.shape))

        self.sample_indices_ = np.array(sample_indices)

        if self.return_indices:
            return (safe_indexing(X, sample_indices),
                    safe_indexing(y, sample_indices),
                    sample_indices)
        return (safe_indexing(X, sample_indices),
                safe_indexing(y, sample_indices))

开发者ID:bgruening，项目名称:galaxytools，代码行数:37，代码来源:preprocessors.py

示例2: test_multiclass_fit_resample

# 需要导入模块: from imblearn.over_sampling import RandomOverSampler [as 别名]
# 或者: from imblearn.over_sampling.RandomOverSampler import fit_resample [as 别名]
def test_multiclass_fit_resample():
    y = Y.copy()
    y[5] = 2
    y[6] = 2
    ros = RandomOverSampler(random_state=RND_SEED)
    X_resampled, y_resampled = ros.fit_resample(X, y)
    count_y_res = Counter(y_resampled)
    assert count_y_res[0] == 5
    assert count_y_res[1] == 5
    assert count_y_res[2] == 5

开发者ID:bodycat，项目名称:imbalanced-learn，代码行数:12，代码来源:test_random_over_sampler.py

示例3: test_random_over_sampling_heterogeneous_data

# 需要导入模块: from imblearn.over_sampling import RandomOverSampler [as 别名]
# 或者: from imblearn.over_sampling.RandomOverSampler import fit_resample [as 别名]
def test_random_over_sampling_heterogeneous_data():
    X_hetero = np.array([['xxx', 1, 1.0], ['yyy', 2, 2.0], ['zzz', 3, 3.0]],
                        dtype=np.object)
    y = np.array([0, 0, 1])
    ros = RandomOverSampler(random_state=RND_SEED)
    X_res, y_res = ros.fit_resample(X_hetero, y)

    assert X_res.shape[0] == 4
    assert y_res.shape[0] == 4
    assert X_res.dtype == object
    assert X_res[-1, 0] in X_hetero[:, 0]

开发者ID:bodycat，项目名称:imbalanced-learn，代码行数:13，代码来源:test_random_over_sampler.py

示例4: test_ros_fit_resample_half

# 需要导入模块: from imblearn.over_sampling import RandomOverSampler [as 别名]
# 或者: from imblearn.over_sampling.RandomOverSampler import fit_resample [as 别名]
def test_ros_fit_resample_half():
    sampling_strategy = {0: 3, 1: 7}
    ros = RandomOverSampler(
        sampling_strategy=sampling_strategy, random_state=RND_SEED)
    X_resampled, y_resampled = ros.fit_resample(X, Y)
    X_gt = np.array([[0.04352327, -0.20515826], [0.92923648, 0.76103773], [
        0.20792588, 1.49407907
    ], [0.47104475, 0.44386323], [0.22950086,
                                  0.33367433], [0.15490546, 0.3130677],
                     [0.09125309, -0.85409574], [0.12372842, 0.6536186],
                     [0.13347175, 0.12167502], [0.094035, -2.55298982]])
    y_gt = np.array([1, 0, 1, 0, 1, 1, 1, 1, 0, 1])
    assert_allclose(X_resampled, X_gt)
    assert_array_equal(y_resampled, y_gt)

开发者ID:bodycat，项目名称:imbalanced-learn，代码行数:16，代码来源:test_random_over_sampler.py

示例5: test_random_over_sampling_return_indices

# 需要导入模块: from imblearn.over_sampling import RandomOverSampler [as 别名]
# 或者: from imblearn.over_sampling.RandomOverSampler import fit_resample [as 别名]
def test_random_over_sampling_return_indices():
    ros = RandomOverSampler(return_indices=True, random_state=RND_SEED)
    X_resampled, y_resampled, sample_indices = ros.fit_resample(X, Y)
    X_gt = np.array([[0.04352327, -0.20515826], [0.92923648, 0.76103773], [
        0.20792588, 1.49407907
    ], [0.47104475, 0.44386323], [0.22950086, 0.33367433], [
        0.15490546, 0.3130677
    ], [0.09125309, -0.85409574], [0.12372842, 0.6536186],
                     [0.13347175, 0.12167502], [0.094035, -2.55298982],
                     [0.92923648, 0.76103773], [0.47104475, 0.44386323],
                     [0.92923648, 0.76103773], [0.47104475, 0.44386323]])
    y_gt = np.array([1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0])
    assert_allclose(X_resampled, X_gt)
    assert_array_equal(y_resampled, y_gt)
    assert_array_equal(np.sort(np.unique(sample_indices)), np.arange(len(X)))

开发者ID:bodycat，项目名称:imbalanced-learn，代码行数:17，代码来源:test_random_over_sampler.py

示例6: print

# 需要导入模块: from imblearn.over_sampling import RandomOverSampler [as 别名]
# 或者: from imblearn.over_sampling.RandomOverSampler import fit_resample [as 别名]
print(__doc__)

# Generate the dataset
X, y = make_classification(n_classes=2, class_sep=2, weights=[0.1, 0.9],
                           n_informative=3, n_redundant=1, flip_y=0,
                           n_features=20, n_clusters_per_class=1,
                           n_samples=200, random_state=10)

# Instanciate a PCA object for the sake of easy visualisation
pca = PCA(n_components=2)
# Fit and transform x to visualise inside a 2D feature space
X_vis = pca.fit_transform(X)

# Apply the random over-sampling
ros = RandomOverSampler()
X_resampled, y_resampled = ros.fit_resample(X, y)
X_res_vis = pca.transform(X_resampled)

# Two subplots, unpack the axes array immediately
f, (ax1, ax2) = plt.subplots(1, 2)

c0 = ax1.scatter(X_vis[y == 0, 0], X_vis[y == 0, 1], label="Class #0",
                 alpha=0.5)
c1 = ax1.scatter(X_vis[y == 1, 0], X_vis[y == 1, 1], label="Class #1",
                 alpha=0.5)
ax1.set_title('Original set')

ax2.scatter(X_res_vis[y_resampled == 0, 0], X_res_vis[y_resampled == 0, 1],
            label="Class #0", alpha=.5)
ax2.scatter(X_res_vis[y_resampled == 1, 0], X_res_vis[y_resampled == 1, 1],
            label="Class #1", alpha=.5)

开发者ID:bodycat，项目名称:imbalanced-learn，代码行数:33，代码来源:plot_random_over_sampling.py

示例7: SMOTE

# 需要导入模块: from imblearn.over_sampling import RandomOverSampler [as 别名]
# 或者: from imblearn.over_sampling.RandomOverSampler import fit_resample [as 别名]
SEED=0xDEADBEEF

y_col = 'add'
X_cols = ['pct_contrib','turnover','VWAP','vol','VWMC','SPTSXComp']
all_cols = X_cols + [y_col]
X = filtered[X_cols]
y = filtered[y_col]

X_test, X_train, y_test, y_train  = sk.model_selection.train_test_split(X.values, y.values, test_size=0.2, random_state=SEED)

filtered[all_cols].to_sql('model_inputs', conn, if_exists='replace', index=False)

#oversampler = SMOTE(random_state=SEED)
oversampler = RandomOverSampler(random_state=SEED)

X_train_resample, y_train_resamle = oversampler.fit_resample(X_train, y_train)

print(len(X_train), len(X_test))

#log_clf = LogisticRegression()#
#log_clf = RandomForestClassifier()
#log_clf = xgb.XGBClassifier(max_depth=4, min_child_weight=50, learning_rate=0.01, n_estimators=50, gamma=1)
log_clf = svm.LinearSVC()
 ##LogisticRegression()
log_clf.fit(X_train_resample, y_train_resamle)

print(log_clf.score(X_train, y_train))

y_pred = log_clf.predict(X_test)
try:
	y_pred_prob = log_clf.predict_proba(X_test)

开发者ID:ebryce，项目名称:Portfolio-Risk，代码行数:33，代码来源:create_signals.py

示例8: print

# 需要导入模块: from imblearn.over_sampling import RandomOverSampler [as 别名]
# 或者: from imblearn.over_sampling.RandomOverSampler import fit_resample [as 别名]
X_res, y_res = rus.fit_resample(binary_X, binary_y)
print('Information of the iris data set after making it '
      'balanced using a float and an under-sampling method: \n '
      'sampling_strategy={} \n y: {}'
      .format(sampling_strategy, Counter(y_res)))
plot_pie(y_res)

###############################################################################
# For **over-sampling methods**, it correspond to the ratio
# :math:`\\alpha_{os}` defined by :math:`N_{rm} = \\alpha_{os} \\times N_{M}`
# where :math:`N_{rm}` and :math:`N_{M}` are the number of samples in the
# minority class after resampling and the number of samples in the majority
# class, respectively.

ros = RandomOverSampler(sampling_strategy=sampling_strategy)
X_res, y_res = ros.fit_resample(binary_X, binary_y)
print('Information of the iris data set after making it '
      'balanced using a float and an over-sampling method: \n '
      'sampling_strategy={} \n y: {}'
      .format(sampling_strategy, Counter(y_res)))
plot_pie(y_res)

###############################################################################
# ``sampling_strategy`` has a ``str``
# ...................................
#
# ``sampling_strategy`` can be given as a string which specify the class
# targeted by the resampling. With under- and over-sampling, the number of
# samples will be equalized.
#
# Note that we are using multiple classes from now on.

开发者ID:scikit-learn-contrib，项目名称:imbalanced-learn，代码行数:33，代码来源:plot_sampling_strategy_usage.py

注：本文中的imblearn.over_sampling.RandomOverSampler.fit_resample方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。