Python under_sampling.RandomUnderSampler类代码示例

本文整理汇总了Python中imblearn.under_sampling.RandomUnderSampler类的典型用法代码示例。如果您正苦于以下问题：Python RandomUnderSampler类的具体用法？Python RandomUnderSampler怎么用？Python RandomUnderSampler使用的例子？那么, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了RandomUnderSampler类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: transform

    def transform(self, X, y=None):
        # TODO how do we validate this happens before train/test split? Or do we need to? Can we implement it in the
        # TODO      simple trainer in the correct order and leave this to advanced users?

        # Extract predicted column
        y = np.squeeze(X[[self.predicted_column]])

        # Copy the dataframe without the predicted column
        temp_dataframe = X.drop([self.predicted_column], axis=1)

        # Initialize and fit the under sampler
        under_sampler = RandomUnderSampler(random_state=self.random_seed)
        x_under_sampled, y_under_sampled = under_sampler.fit_sample(temp_dataframe, y)

        # Build the resulting under sampled dataframe
        result = pd.DataFrame(x_under_sampled)

        # Restore the column names
        result.columns = temp_dataframe.columns

        # Restore the y values
        y_under_sampled = pd.Series(y_under_sampled)
        result[self.predicted_column] = y_under_sampled

        return result

开发者ID:xtaraim，项目名称:healthcareai-py，代码行数:25，代码来源:transformers.py

示例2: undersample

def undersample(X, y, bal_strategy):
	print 'Shape of X: ', X.shape
	print 'Shape of y_Train: ', y.shape

	if(bal_strategy == "RANDOM" or bal_strategy == "ALL"):
		# apply random under-sampling
		rus = RandomUnderSampler()
		X_sampled, y_sampled = rus.fit_sample(X, y)

		print 'Shape of X_sampled: ', X_sampled.shape
		print 'Shape of y_sampled: ', y_sampled.shape

	elif(bal_strategy == "TOMEK" or bal_strategy == "ALL"):
		# Apply Tomek Links cleaning
		tl = TomekLinks()
		X_sampled, y_sampled = tl.fit_sample(X, y)

		print 'Shape of X_sampled: ', X_sampled.shape
		print 'Shape of y_sampled: ', y_sampled.shape

	elif(bal_strategy == 'NONE'):
		X_sampled = X
		y_sampled = y

		print 'Shape of X_sampled: ', X_sampled.shape
		print 'Shape of y_sampled: ', y_sampled.shape

	else:
		print 'bal_stragegy not in ALL, RANDOM, TOMEK, NONE'
		sys.exit(1)

	return (X_sampled, y_sampled)

开发者ID:brettin，项目名称:pilot1-docs，代码行数:32，代码来源:undersample.py

示例3: test_pipeline_sample

def test_pipeline_sample():
    # Test whether pipeline works with a sampler at the end.
    # Also test pipeline.sampler
    X, y = make_classification(n_classes=2, class_sep=2, weights=[0.1, 0.9],
                               n_informative=3, n_redundant=1, flip_y=0,
                               n_features=20, n_clusters_per_class=1,
                               n_samples=5000, random_state=0)

    rus = RandomUnderSampler(random_state=0)
    pipeline = Pipeline([('rus', rus)])

    # test transform and fit_transform:
    X_trans, y_trans = pipeline.fit(X, y).sample(X, y)
    X_trans2, y_trans2 = pipeline.fit_sample(X, y)
    X_trans3, y_trans3 = rus.fit_sample(X, y)
    assert_array_almost_equal(X_trans, X_trans2)
    assert_array_almost_equal(X_trans, X_trans3)
    assert_array_almost_equal(y_trans, y_trans2)
    assert_array_almost_equal(y_trans, y_trans3)

    pca = PCA()
    pipeline = Pipeline([('pca', pca), ('rus', rus)])

    X_trans, y_trans = pipeline.fit(X, y).sample(X, y)
    X_pca = pca.fit_transform(X)
    X_trans2, y_trans2 = rus.fit_sample(X_pca, y)
    assert_array_almost_equal(X_trans, X_trans2)
    assert_array_almost_equal(y_trans, y_trans2)

开发者ID:apyeh，项目名称:UnbalancedDataset，代码行数:28，代码来源:test_pipeline.py

示例4: downsample

 def downsample(self):
     """Balance class data based on outcome"""
     print('Current outcome sampling {}'.format(Counter(self.y)))
     rus = RandomUnderSampler()
     self.X, self.y = rus.fit_sample(self.X, self.y)
     self.Xview = self.X.view()[:,:self.n_features]
     print('Resampled dataset shape {}'.format(Counter(self.y)))

开发者ID:kellyhennigan，项目名称:cueexp_scripts，代码行数:7，代码来源:sgdrfe_beta.py

示例5: downsample

    def downsample(self):
        """Balance class data based on outcome"""
        print('Current outcome sampling {}'.format(Counter(self.y)))
        
        # to use a random sampling seed at random:
       # rus = RandomUnderSampler()
       # self.X, self.y = rus.fit_sample(self.X, self.y)
       
        # to fix the random sampling seed at a certain value & return indices: 
        rus = RandomUnderSampler(random_state=0,return_indices=True)
        self.X, self.y, ds_idx = rus.fit_sample(self.X, self.y)
        
        # print out the downsampled index to file: 
        file = open('downsampled_idx','a')
        file.write(str(ds_idx)+'\n')
        file.close()

        # print out the downsampled y to file: 
        file = open('downsampled_y','a')
        file.write(str(self.y)+'\n')
        file.close()
        
        
        self.Xview = self.X.view()[:, :self.n_features]
        print('Resampled dataset shape {}'.format(Counter(self.y)))

开发者ID:kellyhennigan，项目名称:cueexp_scripts，代码行数:25，代码来源:sgdrfe.py

示例6: test_rus_sample_wrong_X

def test_rus_sample_wrong_X():
    """Test either if an error is raised when X is different at fitting
    and sampling"""

    # Create the object
    rus = RandomUnderSampler(random_state=RND_SEED)
    rus.fit(X, Y)
    assert_raises(RuntimeError, rus.sample,
                  np.random.random((100, 40)), np.array([0] * 50 + [1] * 50))

开发者ID:kellyhennigan，项目名称:cueexp_scripts，代码行数:9，代码来源:test_random_under_sampler.py

示例7: test_multiclass_fit_sample

def test_multiclass_fit_sample():
    y = Y.copy()
    y[5] = 2
    y[6] = 2
    rus = RandomUnderSampler(random_state=RND_SEED)
    X_resampled, y_resampled = rus.fit_sample(X, y)
    count_y_res = Counter(y_resampled)
    assert count_y_res[0] == 2
    assert count_y_res[1] == 2
    assert count_y_res[2] == 2

开发者ID:glemaitre，项目名称:imbalanced-learn，代码行数:10，代码来源:test_random_under_sampler.py

示例8: test_random_under_sampling_heterogeneous_data

def test_random_under_sampling_heterogeneous_data():
    X_hetero = np.array([['xxx', 1, 1.0], ['yyy', 2, 2.0], ['zzz', 3, 3.0]],
                        dtype=np.object)
    y = np.array([0, 0, 1])
    rus = RandomUnderSampler(random_state=RND_SEED)
    X_res, y_res = rus.fit_resample(X_hetero, y)

    assert X_res.shape[0] == 2
    assert y_res.shape[0] == 2
    assert X_res.dtype == object

开发者ID:bodycat，项目名称:imbalanced-learn，代码行数:10，代码来源:test_random_under_sampler.py

示例9: test_rus_fit_resample

def test_rus_fit_resample():
    rus = RandomUnderSampler(random_state=RND_SEED, replacement=True)
    X_resampled, y_resampled = rus.fit_resample(X, Y)

    X_gt = np.array([[0.92923648, 0.76103773], [0.47104475, 0.44386323],
                     [0.13347175, 0.12167502], [0.09125309, -0.85409574],
                     [0.12372842, 0.6536186], [0.04352327, -0.20515826]])
    y_gt = np.array([0, 0, 0, 1, 1, 1])

    assert_array_equal(X_resampled, X_gt)
    assert_array_equal(y_resampled, y_gt)

开发者ID:bodycat，项目名称:imbalanced-learn，代码行数:11，代码来源:test_random_under_sampler.py

示例10: test_rus_fit_sample

def test_rus_fit_sample():
    """Test the fit sample routine"""

    # Resample the data
    rus = RandomUnderSampler(random_state=RND_SEED)
    X_resampled, y_resampled = rus.fit_sample(X, Y)

    currdir = os.path.dirname(os.path.abspath(__file__))
    X_gt = np.load(os.path.join(currdir, 'data', 'rus_x.npy'))
    y_gt = np.load(os.path.join(currdir, 'data', 'rus_y.npy'))
    assert_array_equal(X_resampled, X_gt)
    assert_array_equal(y_resampled, y_gt)

开发者ID:apyeh，项目名称:UnbalancedDataset，代码行数:12，代码来源:test_random_under_sampler.py

示例11: test_rus_fit

def test_rus_fit():
    """Test the fitting method"""

    # Create the object
    rus = RandomUnderSampler(random_state=RND_SEED)
    # Fit the data
    rus.fit(X, Y)

    # Check if the data information have been computed
    assert_equal(rus.min_c_, 0)
    assert_equal(rus.maj_c_, 1)
    assert_equal(rus.stats_c_[0], 3)
    assert_equal(rus.stats_c_[1], 7)

开发者ID:kellyhennigan，项目名称:cueexp_scripts，代码行数:13，代码来源:test_random_under_sampler.py

示例12: test_rus_fit_sample_with_indices

def test_rus_fit_sample_with_indices():
    """Test the fit sample routine with indices support"""

    # Resample the data
    rus = RandomUnderSampler(return_indices=True, random_state=RND_SEED)
    X_resampled, y_resampled, idx_under = rus.fit_sample(X, Y)

    currdir = os.path.dirname(os.path.abspath(__file__))
    X_gt = np.load(os.path.join(currdir, 'data', 'rus_x.npy'))
    y_gt = np.load(os.path.join(currdir, 'data', 'rus_y.npy'))
    idx_gt = np.load(os.path.join(currdir, 'data', 'rus_idx.npy'))
    assert_array_equal(X_resampled, X_gt)
    assert_array_equal(y_resampled, y_gt)
    assert_array_equal(idx_under, idx_gt)

开发者ID:apyeh，项目名称:UnbalancedDataset，代码行数:14，代码来源:test_random_under_sampler.py

示例13: test_rus_fit_sample_with_indices

def test_rus_fit_sample_with_indices():
    """Test the fit sample routine with indices support"""

    # Resample the data
    rus = RandomUnderSampler(return_indices=True, random_state=RND_SEED)
    X_resampled, y_resampled, idx_under = rus.fit_sample(X, Y)

    X_gt = np.array([[0.92923648, 0.76103773], [0.47104475, 0.44386323],
                     [0.13347175, 0.12167502], [0.09125309, -0.85409574],
                     [0.12372842, 0.6536186], [0.04352327, -0.20515826]])
    y_gt = np.array([0, 0, 0, 1, 1, 1])
    idx_gt = np.array([1, 3, 8, 6, 7, 0])
    assert_array_equal(X_resampled, X_gt)
    assert_array_equal(y_resampled, y_gt)
    assert_array_equal(idx_under, idx_gt)

开发者ID:kellyhennigan，项目名称:cueexp_scripts，代码行数:15，代码来源:test_random_under_sampler.py

示例14: CrossVal

def CrossVal(estimator, X, y,procsessor=None,cv=3,times=10,random_state=0,imb=False):
    """
    交叉验证
    
    estimator:
        模型
    
    X:
        数据集X部分
    
    y:
        数据集的label
    
    procsessor:
        预处理器，其实就是做特征选择
    
    cv:
        做cv折交叉验证
    
    times:
        重复times次交叉验证
        
    random_state:
        随机数种子
    
    imb:
        是否使用SMOTE使得正负样本数平衡
    
    """
    
    res=[]
    for t in range(times):
        skf=StratifiedKFold(n_splits=cv, shuffle=True, random_state=random_state+t)
        indices=list(skf.split(X=X,y=y))        
        for k in indices:
            x_train,y_train,x_test,y_test=X[k[0]],y[k[0]],X[k[1]],y[k[1]]              
            if(imb==True):
                n,p=__lableCount(y_train)
                rus=RandomUnderSampler(random_state=random_state+t)
                x_train,y_train=rus.fit_sample(x_train,y_train)         
            if(procsessor is not None):
                procsessor.fit(x_train,y_train)
                x_train,y_train=procsessor.transform(x_train,y_train)
                x_test,y_test=procsessor.transform(x_test,y_test)
            estimator.fit(x_train,y_train)
            res.append(Metrics.Score(estimator,x_test,y_test))                
    res=np.array(res)
    return res

开发者ID:DunZhang，项目名称:SVPFS，代码行数:48，代码来源:ModelSelection.py

示例15: test_rus_fit_sample_half

def test_rus_fit_sample_half():
    """Test the fit sample routine with a 0.5 ratio"""

    # Resample the data
    ratio = 0.5
    rus = RandomUnderSampler(ratio=ratio, random_state=RND_SEED)
    X_resampled, y_resampled = rus.fit_sample(X, Y)

    X_gt = np.array([[0.92923648, 0.76103773], [0.47104475, 0.44386323],
                     [0.13347175, 0.12167502], [0.09125309, -0.85409574],
                     [0.12372842, 0.6536186], [0.04352327, -0.20515826],
                     [0.15490546, 0.3130677], [0.15490546, 0.3130677],
                     [0.15490546, 0.3130677]])
    y_gt = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1])
    assert_array_equal(X_resampled, X_gt)
    assert_array_equal(y_resampled, y_gt)

开发者ID:kellyhennigan，项目名称:cueexp_scripts，代码行数:16，代码来源:test_random_under_sampler.py

注：本文中的imblearn.under_sampling.RandomUnderSampler类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。