当前位置: 首页>>代码示例>>Python>>正文


Python over_sampling.RandomOverSampler类代码示例

本文整理汇总了Python中imblearn.over_sampling.RandomOverSampler的典型用法代码示例。如果您正苦于以下问题:Python RandomOverSampler类的具体用法?Python RandomOverSampler怎么用?Python RandomOverSampler使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了RandomOverSampler类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _fit_resample

    def _fit_resample(self, X, y):
        n_samples = X.shape[0]

        # convert y to z_score
        y_z = (y - y.mean()) / y.std()

        index0 = np.arange(n_samples)
        index_negative = index0[y_z > self.negative_thres]
        index_positive = index0[y_z <= self.positive_thres]
        index_unclassified = [x for x in index0
                              if x not in index_negative
                              and x not in index_positive]

        y_z[index_negative] = 0
        y_z[index_positive] = 1
        y_z[index_unclassified] = -1

        ros = RandomOverSampler(
            sampling_strategy=self.sampling_strategy,
            random_state=self.random_state,
            ratio=self.ratio)
        _, _ = ros.fit_resample(X, y_z)
        sample_indices = ros.sample_indices_

        print("Before sampler: %s. Total after: %s"
              % (Counter(y_z), sample_indices.shape))

        self.sample_indices_ = np.array(sample_indices)

        if self.return_indices:
            return (safe_indexing(X, sample_indices),
                    safe_indexing(y, sample_indices),
                    sample_indices)
        return (safe_indexing(X, sample_indices),
                safe_indexing(y, sample_indices))
开发者ID:bgruening,项目名称:galaxytools,代码行数:35,代码来源:preprocessors.py

示例2: transform

    def transform(self, X, y=None):
        # TODO how do we validate this happens before train/test split? Or do we need to? Can we implement it in the
        # TODO      simple trainer in the correct order and leave this to advanced users?

        # Extract predicted column
        y = np.squeeze(X[[self.predicted_column]])

        # Copy the dataframe without the predicted column
        temp_dataframe = X.drop([self.predicted_column], axis=1)

        # Initialize and fit the under sampler
        over_sampler = RandomOverSampler(random_state=self.random_seed)
        x_over_sampled, y_over_sampled = over_sampler.fit_sample(temp_dataframe, y)

        # Build the resulting under sampled dataframe
        result = pd.DataFrame(x_over_sampled)

        # Restore the column names
        result.columns = temp_dataframe.columns

        # Restore the y values
        y_over_sampled = pd.Series(y_over_sampled)
        result[self.predicted_column] = y_over_sampled

        return result
开发者ID:xtaraim,项目名称:healthcareai-py,代码行数:25,代码来源:transformers.py

示例3: oversample

 def oversample(self):
     self._X_original = self._X
     self._y_original = self._y
     ros = RandomOverSampler(random_state=0)
     X, y = ros.fit_sample(self._X, self._y)
     self._X = X
     self._y = y
开发者ID:dermatologist,项目名称:nlp-qrmine,代码行数:7,代码来源:mlqrmine.py

示例4: test_sample_wrong_X

def test_sample_wrong_X():
    """Test either if an error is raised when X is different at fitting
    and sampling"""

    # Create the object
    ros = RandomOverSampler(random_state=RND_SEED)
    ros.fit(X, Y)
    assert_raises(RuntimeError, ros.sample, np.random.random((100, 40)),
                  np.array([0] * 50 + [1] * 50))
开发者ID:dvro,项目名称:imbalanced-learn,代码行数:9,代码来源:test_random_over_sampler.py

示例5: test_multiclass_fit_resample

def test_multiclass_fit_resample():
    y = Y.copy()
    y[5] = 2
    y[6] = 2
    ros = RandomOverSampler(random_state=RND_SEED)
    X_resampled, y_resampled = ros.fit_resample(X, y)
    count_y_res = Counter(y_resampled)
    assert count_y_res[0] == 5
    assert count_y_res[1] == 5
    assert count_y_res[2] == 5
开发者ID:bodycat,项目名称:imbalanced-learn,代码行数:10,代码来源:test_random_over_sampler.py

示例6: test_random_over_sampling_heterogeneous_data

def test_random_over_sampling_heterogeneous_data():
    X_hetero = np.array([['xxx', 1, 1.0], ['yyy', 2, 2.0], ['zzz', 3, 3.0]],
                        dtype=np.object)
    y = np.array([0, 0, 1])
    ros = RandomOverSampler(random_state=RND_SEED)
    X_res, y_res = ros.fit_resample(X_hetero, y)

    assert X_res.shape[0] == 4
    assert y_res.shape[0] == 4
    assert X_res.dtype == object
    assert X_res[-1, 0] in X_hetero[:, 0]
开发者ID:bodycat,项目名称:imbalanced-learn,代码行数:11,代码来源:test_random_over_sampler.py

示例7: test_ros_fit_sample

def test_ros_fit_sample():
    """Test the fit sample routine"""

    # Resample the data
    ros = RandomOverSampler(random_state=RND_SEED)
    X_resampled, y_resampled = ros.fit_sample(X, Y)

    currdir = os.path.dirname(os.path.abspath(__file__))
    X_gt = np.load(os.path.join(currdir, 'data', 'ros_x.npy'))
    y_gt = np.load(os.path.join(currdir, 'data', 'ros_y.npy'))
    assert_array_equal(X_resampled, X_gt)
    assert_array_equal(y_resampled, y_gt)
开发者ID:vivounicorn,项目名称:imbalanced-learn,代码行数:12,代码来源:test_random_over_sampler.py

示例8: test_ros_fit

def test_ros_fit():
    """Test the fitting method"""

    # Create the object
    ros = RandomOverSampler(random_state=RND_SEED)
    # Fit the data
    ros.fit(X, Y)

    # Check if the data information have been computed
    assert_equal(ros.min_c_, 0)
    assert_equal(ros.maj_c_, 1)
    assert_equal(ros.stats_c_[0], 3)
    assert_equal(ros.stats_c_[1], 7)
开发者ID:dvro,项目名称:imbalanced-learn,代码行数:13,代码来源:test_random_over_sampler.py

示例9: oversample

    def oversample(self):
        """Balance class data based on outcome"""
        print('Current outcome sampling {}'.format(Counter(self.y)))
        
        # to use a random sampling seed at random:
        ros = RandomOverSampler()
        #ros = SMOTE()
        #ros = ADASYN()

        self.X, self.y = ros.fit_sample(self.X, self.y)

        self.Xview = self.X.view()[:, :self.n_features]
        print('Resampled dataset shape {}'.format(Counter(self.y)))
开发者ID:kellyhennigan,项目名称:cueexp_scripts,代码行数:13,代码来源:sgdrfe_GMmask.py

示例10: test_ros_fit_resample_half

def test_ros_fit_resample_half():
    sampling_strategy = {0: 3, 1: 7}
    ros = RandomOverSampler(
        sampling_strategy=sampling_strategy, random_state=RND_SEED)
    X_resampled, y_resampled = ros.fit_resample(X, Y)
    X_gt = np.array([[0.04352327, -0.20515826], [0.92923648, 0.76103773], [
        0.20792588, 1.49407907
    ], [0.47104475, 0.44386323], [0.22950086,
                                  0.33367433], [0.15490546, 0.3130677],
                     [0.09125309, -0.85409574], [0.12372842, 0.6536186],
                     [0.13347175, 0.12167502], [0.094035, -2.55298982]])
    y_gt = np.array([1, 0, 1, 0, 1, 1, 1, 1, 0, 1])
    assert_allclose(X_resampled, X_gt)
    assert_array_equal(y_resampled, y_gt)
开发者ID:bodycat,项目名称:imbalanced-learn,代码行数:14,代码来源:test_random_over_sampler.py

示例11: oversample

 def oversample(self):
     """Balance class data based on outcome"""
     print('Current outcome sampling {}'.format(Counter(self.y)))
     
     # to use a random sampling seed at random:
     ros = RandomOverSampler()
     
     # to fix the random sampling seed at a certain value & return indices: 
     #ros = RandomOverSampler(random_state=2)
     
     self.X, self.y = ros.fit_sample(self.X, self.y)
     
     self.Xview = self.X.view()[:, :self.n_features]
     print('Resampled dataset shape {}'.format(Counter(self.y)))
开发者ID:kellyhennigan,项目名称:cueexp_scripts,代码行数:14,代码来源:sgdrfe_oversample.py

示例12: resample

def resample(X, y, sample_fraction=0.1, test_size=0.3):
    X_columns = X.columns
    y_columns = y.columns
    n = len(X_columns)

    print('~' * 80)
    print('@@-\n', y.converted.value_counts())
    print('@@0 - Original')
    show_balance(y.values)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
    print('@@2 - y_train')
    show_balance(y_train)
    print('@@2 -  y_test')
    show_balance(y_test)
    assert X_train.shape[1] == n and X_test.shape[1] == n

    ros = RandomOverSampler(random_state=42)
    X_train, y_train = ros.fit_sample(X_train, y_train)
    X_test, y_test = ros.fit_sample(X_test, y_test)
    print('@@3 - Oversampled y_train')
    show_balance(y_train)
    print('@@3 - Oversampled y_test')
    show_balance(y_test)
    assert X_train.shape[1] == n and X_test.shape[1] == n

    if sample_fraction < 1.0:
        _, X_train, _, y_train = train_test_split(X_train, y_train, test_size=sample_fraction, random_state=43)
        _, X_test, _, y_test = train_test_split(X_test, y_test, test_size=sample_fraction, random_state=44)
        print('@@2 - Downsampled y_train')
        show_balance(y_train)
        print('@@2 - Downsampled y_test')
        show_balance(y_test)
        assert len(X_train.shape) == 2 and len(X_test.shape) == 2, (X_train.shape, X_test.shape)
        assert X_train.shape[1] == n and X_test.shape[1] == n, (X_train.shape, X_test.shape)

    print('X_columns=%d %s' % (len(X_columns), X_columns))
    print('y_columns=%d %s' % (len(y_columns), y_columns))
    print('X_train=%-10s y_train=%s' % (list(X_train.shape), list(y_train.shape)))
    print('X_test =%-10s y_test =%s' % (list(X_test.shape), list(y_test.shape)))
    assert X_train.shape[1] == n and X_test.shape[1] == n

    X_train = pd.DataFrame(X_train, columns=X_columns)
    y_train = pd.DataFrame(y_train, columns=y_columns, index=X_train.index)
    X_test = pd.DataFrame(X_test, columns=X_columns)
    y_test = pd.DataFrame(y_test, columns=y_columns, index=X_test.index)
    print('@@+ y_train\n', y_train.converted.value_counts(), flush=True)
    print('@@+ y_test\n', y_test.converted.value_counts(), flush=True)

    return (X_train, y_train), (X_test, y_test)
开发者ID:peterwilliams97,项目名称:Butt-Head-Astronomer,代码行数:50,代码来源:feature_select.py

示例13: test_random_over_sampling_return_indices

def test_random_over_sampling_return_indices():
    ros = RandomOverSampler(return_indices=True, random_state=RND_SEED)
    X_resampled, y_resampled, sample_indices = ros.fit_resample(X, Y)
    X_gt = np.array([[0.04352327, -0.20515826], [0.92923648, 0.76103773], [
        0.20792588, 1.49407907
    ], [0.47104475, 0.44386323], [0.22950086, 0.33367433], [
        0.15490546, 0.3130677
    ], [0.09125309, -0.85409574], [0.12372842, 0.6536186],
                     [0.13347175, 0.12167502], [0.094035, -2.55298982],
                     [0.92923648, 0.76103773], [0.47104475, 0.44386323],
                     [0.92923648, 0.76103773], [0.47104475, 0.44386323]])
    y_gt = np.array([1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0])
    assert_allclose(X_resampled, X_gt)
    assert_array_equal(y_resampled, y_gt)
    assert_array_equal(np.sort(np.unique(sample_indices)), np.arange(len(X)))
开发者ID:bodycat,项目名称:imbalanced-learn,代码行数:15,代码来源:test_random_over_sampler.py

示例14: test_ros_fit_sample_half

def test_ros_fit_sample_half():
    """Test the fit sample routine with a 0.5 ratio"""

    # Resample the data
    ratio = 0.5
    ros = RandomOverSampler(ratio=ratio, random_state=RND_SEED)
    X_resampled, y_resampled = ros.fit_sample(X, Y)

    X_gt = np.array([[0.04352327, -0.20515826], [0.20792588, 1.49407907],
                     [0.22950086, 0.33367433], [0.15490546, 0.3130677],
                     [0.09125309, -0.85409574], [0.12372842, 0.6536186],
                     [0.094035, -2.55298982], [0.92923648, 0.76103773],
                     [0.47104475, 0.44386323], [0.13347175, 0.12167502]])
    y_gt = np.array([1, 1, 1, 1, 1, 1, 1, 0, 0, 0])
    assert_array_equal(X_resampled, X_gt)
    assert_array_equal(y_resampled, y_gt)
开发者ID:kellyhennigan,项目名称:cueexp_scripts,代码行数:16,代码来源:test_random_over_sampler.py

示例15: test_multiclass_fit_sample

def test_multiclass_fit_sample():
    """Test fit sample method with multiclass target"""

    # Make y to be multiclass
    y = Y.copy()
    y[0:1000] = 2

    # Resample the data
    ros = RandomOverSampler(random_state=RND_SEED)
    X_resampled, y_resampled = ros.fit_sample(X, y)

    # Check the size of y
    count_y_res = Counter(y_resampled)
    assert_equal(count_y_res[0], 3600)
    assert_equal(count_y_res[1], 3600)
    assert_equal(count_y_res[2], 3600)
开发者ID:integrallyclosed,项目名称:imbalanced-learn,代码行数:16,代码来源:test_random_over_sampler.py


注:本文中的imblearn.over_sampling.RandomOverSampler类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。