本文整理汇总了Python中imblearn.under_sampling.RandomUnderSampler.fit_sample方法的典型用法代码示例。如果您正苦于以下问题:Python RandomUnderSampler.fit_sample方法的具体用法?Python RandomUnderSampler.fit_sample怎么用?Python RandomUnderSampler.fit_sample使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类imblearn.under_sampling.RandomUnderSampler
的用法示例。
在下文中一共展示了RandomUnderSampler.fit_sample方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_pipeline_sample
# 需要导入模块: from imblearn.under_sampling import RandomUnderSampler [as 别名]
# 或者: from imblearn.under_sampling.RandomUnderSampler import fit_sample [as 别名]
def test_pipeline_sample():
# Test whether pipeline works with a sampler at the end.
# Also test pipeline.sampler
X, y = make_classification(n_classes=2, class_sep=2, weights=[0.1, 0.9],
n_informative=3, n_redundant=1, flip_y=0,
n_features=20, n_clusters_per_class=1,
n_samples=5000, random_state=0)
rus = RandomUnderSampler(random_state=0)
pipeline = Pipeline([('rus', rus)])
# test transform and fit_transform:
X_trans, y_trans = pipeline.fit(X, y).sample(X, y)
X_trans2, y_trans2 = pipeline.fit_sample(X, y)
X_trans3, y_trans3 = rus.fit_sample(X, y)
assert_array_almost_equal(X_trans, X_trans2)
assert_array_almost_equal(X_trans, X_trans3)
assert_array_almost_equal(y_trans, y_trans2)
assert_array_almost_equal(y_trans, y_trans3)
pca = PCA()
pipeline = Pipeline([('pca', pca), ('rus', rus)])
X_trans, y_trans = pipeline.fit(X, y).sample(X, y)
X_pca = pca.fit_transform(X)
X_trans2, y_trans2 = rus.fit_sample(X_pca, y)
assert_array_almost_equal(X_trans, X_trans2)
assert_array_almost_equal(y_trans, y_trans2)
示例2: downsample
# 需要导入模块: from imblearn.under_sampling import RandomUnderSampler [as 别名]
# 或者: from imblearn.under_sampling.RandomUnderSampler import fit_sample [as 别名]
def downsample(self):
"""Balance class data based on outcome"""
print('Current outcome sampling {}'.format(Counter(self.y)))
rus = RandomUnderSampler()
self.X, self.y = rus.fit_sample(self.X, self.y)
self.Xview = self.X.view()[:,:self.n_features]
print('Resampled dataset shape {}'.format(Counter(self.y)))
示例3: undersample
# 需要导入模块: from imblearn.under_sampling import RandomUnderSampler [as 别名]
# 或者: from imblearn.under_sampling.RandomUnderSampler import fit_sample [as 别名]
def undersample(X, y, bal_strategy):
print 'Shape of X: ', X.shape
print 'Shape of y_Train: ', y.shape
if(bal_strategy == "RANDOM" or bal_strategy == "ALL"):
# apply random under-sampling
rus = RandomUnderSampler()
X_sampled, y_sampled = rus.fit_sample(X, y)
print 'Shape of X_sampled: ', X_sampled.shape
print 'Shape of y_sampled: ', y_sampled.shape
elif(bal_strategy == "TOMEK" or bal_strategy == "ALL"):
# Apply Tomek Links cleaning
tl = TomekLinks()
X_sampled, y_sampled = tl.fit_sample(X, y)
print 'Shape of X_sampled: ', X_sampled.shape
print 'Shape of y_sampled: ', y_sampled.shape
elif(bal_strategy == 'NONE'):
X_sampled = X
y_sampled = y
print 'Shape of X_sampled: ', X_sampled.shape
print 'Shape of y_sampled: ', y_sampled.shape
else:
print 'bal_stragegy not in ALL, RANDOM, TOMEK, NONE'
sys.exit(1)
return (X_sampled, y_sampled)
示例4: downsample
# 需要导入模块: from imblearn.under_sampling import RandomUnderSampler [as 别名]
# 或者: from imblearn.under_sampling.RandomUnderSampler import fit_sample [as 别名]
def downsample(self):
"""Balance class data based on outcome"""
print('Current outcome sampling {}'.format(Counter(self.y)))
# to use a random sampling seed at random:
# rus = RandomUnderSampler()
# self.X, self.y = rus.fit_sample(self.X, self.y)
# to fix the random sampling seed at a certain value & return indices:
rus = RandomUnderSampler(random_state=0,return_indices=True)
self.X, self.y, ds_idx = rus.fit_sample(self.X, self.y)
# print out the downsampled index to file:
file = open('downsampled_idx','a')
file.write(str(ds_idx)+'\n')
file.close()
# print out the downsampled y to file:
file = open('downsampled_y','a')
file.write(str(self.y)+'\n')
file.close()
self.Xview = self.X.view()[:, :self.n_features]
print('Resampled dataset shape {}'.format(Counter(self.y)))
示例5: transform
# 需要导入模块: from imblearn.under_sampling import RandomUnderSampler [as 别名]
# 或者: from imblearn.under_sampling.RandomUnderSampler import fit_sample [as 别名]
def transform(self, X, y=None):
# TODO how do we validate this happens before train/test split? Or do we need to? Can we implement it in the
# TODO simple trainer in the correct order and leave this to advanced users?
# Extract predicted column
y = np.squeeze(X[[self.predicted_column]])
# Copy the dataframe without the predicted column
temp_dataframe = X.drop([self.predicted_column], axis=1)
# Initialize and fit the under sampler
under_sampler = RandomUnderSampler(random_state=self.random_seed)
x_under_sampled, y_under_sampled = under_sampler.fit_sample(temp_dataframe, y)
# Build the resulting under sampled dataframe
result = pd.DataFrame(x_under_sampled)
# Restore the column names
result.columns = temp_dataframe.columns
# Restore the y values
y_under_sampled = pd.Series(y_under_sampled)
result[self.predicted_column] = y_under_sampled
return result
示例6: test_multiclass_fit_sample
# 需要导入模块: from imblearn.under_sampling import RandomUnderSampler [as 别名]
# 或者: from imblearn.under_sampling.RandomUnderSampler import fit_sample [as 别名]
def test_multiclass_fit_sample():
y = Y.copy()
y[5] = 2
y[6] = 2
rus = RandomUnderSampler(random_state=RND_SEED)
X_resampled, y_resampled = rus.fit_sample(X, y)
count_y_res = Counter(y_resampled)
assert count_y_res[0] == 2
assert count_y_res[1] == 2
assert count_y_res[2] == 2
示例7: test_rus_fit_sample
# 需要导入模块: from imblearn.under_sampling import RandomUnderSampler [as 别名]
# 或者: from imblearn.under_sampling.RandomUnderSampler import fit_sample [as 别名]
def test_rus_fit_sample():
"""Test the fit sample routine"""
# Resample the data
rus = RandomUnderSampler(random_state=RND_SEED)
X_resampled, y_resampled = rus.fit_sample(X, Y)
currdir = os.path.dirname(os.path.abspath(__file__))
X_gt = np.load(os.path.join(currdir, 'data', 'rus_x.npy'))
y_gt = np.load(os.path.join(currdir, 'data', 'rus_y.npy'))
assert_array_equal(X_resampled, X_gt)
assert_array_equal(y_resampled, y_gt)
示例8: test_rus_fit_sample
# 需要导入模块: from imblearn.under_sampling import RandomUnderSampler [as 别名]
# 或者: from imblearn.under_sampling.RandomUnderSampler import fit_sample [as 别名]
def test_rus_fit_sample():
rus = RandomUnderSampler(random_state=RND_SEED,
replacement=True)
X_resampled, y_resampled = rus.fit_sample(X, Y)
X_gt = np.array([[0.92923648, 0.76103773], [0.47104475, 0.44386323],
[0.13347175, 0.12167502], [0.09125309, -0.85409574],
[0.12372842, 0.6536186], [0.04352327, -0.20515826]])
y_gt = np.array([0, 0, 0, 1, 1, 1])
assert_array_equal(X_resampled, X_gt)
assert_array_equal(y_resampled, y_gt)
示例9: test_pipeline_sample
# 需要导入模块: from imblearn.under_sampling import RandomUnderSampler [as 别名]
# 或者: from imblearn.under_sampling.RandomUnderSampler import fit_sample [as 别名]
def test_pipeline_sample():
# Test whether pipeline works with a sampler at the end.
# Also test pipeline.sampler
X, y = make_classification(
n_classes=2,
class_sep=2,
weights=[0.1, 0.9],
n_informative=3,
n_redundant=1,
flip_y=0,
n_features=20,
n_clusters_per_class=1,
n_samples=5000,
random_state=0)
rus = RandomUnderSampler(random_state=0)
pipeline = Pipeline([('rus', rus)])
# test transform and fit_transform:
X_trans, y_trans = pipeline.fit(X, y).sample(X, y)
X_trans2, y_trans2 = pipeline.fit_sample(X, y)
X_trans3, y_trans3 = rus.fit_sample(X, y)
assert_allclose(X_trans, X_trans2, rtol=R_TOL)
assert_allclose(X_trans, X_trans3, rtol=R_TOL)
assert_allclose(y_trans, y_trans2, rtol=R_TOL)
assert_allclose(y_trans, y_trans3, rtol=R_TOL)
pca = PCA()
pipeline = Pipeline([('pca', PCA()),
('rus', rus)])
X_trans, y_trans = pipeline.fit(X, y).sample(X, y)
X_pca = pca.fit_transform(X)
X_trans2, y_trans2 = rus.fit_sample(X_pca, y)
# We round the value near to zero. It seems that PCA has some issue
# with that
X_trans[np.bitwise_and(X_trans < R_TOL, X_trans > -R_TOL)] = 0
X_trans2[np.bitwise_and(X_trans2 < R_TOL, X_trans2 > -R_TOL)] = 0
assert_allclose(X_trans, X_trans2, rtol=R_TOL)
assert_allclose(y_trans, y_trans2, rtol=R_TOL)
示例10: test_rus_fit_sample_with_indices
# 需要导入模块: from imblearn.under_sampling import RandomUnderSampler [as 别名]
# 或者: from imblearn.under_sampling.RandomUnderSampler import fit_sample [as 别名]
def test_rus_fit_sample_with_indices():
"""Test the fit sample routine with indices support"""
# Resample the data
rus = RandomUnderSampler(return_indices=True, random_state=RND_SEED)
X_resampled, y_resampled, idx_under = rus.fit_sample(X, Y)
currdir = os.path.dirname(os.path.abspath(__file__))
X_gt = np.load(os.path.join(currdir, 'data', 'rus_x.npy'))
y_gt = np.load(os.path.join(currdir, 'data', 'rus_y.npy'))
idx_gt = np.load(os.path.join(currdir, 'data', 'rus_idx.npy'))
assert_array_equal(X_resampled, X_gt)
assert_array_equal(y_resampled, y_gt)
assert_array_equal(idx_under, idx_gt)
示例11: test_rus_fit_sample_with_indices
# 需要导入模块: from imblearn.under_sampling import RandomUnderSampler [as 别名]
# 或者: from imblearn.under_sampling.RandomUnderSampler import fit_sample [as 别名]
def test_rus_fit_sample_with_indices():
"""Test the fit sample routine with indices support"""
# Resample the data
rus = RandomUnderSampler(return_indices=True, random_state=RND_SEED)
X_resampled, y_resampled, idx_under = rus.fit_sample(X, Y)
X_gt = np.array([[0.92923648, 0.76103773], [0.47104475, 0.44386323],
[0.13347175, 0.12167502], [0.09125309, -0.85409574],
[0.12372842, 0.6536186], [0.04352327, -0.20515826]])
y_gt = np.array([0, 0, 0, 1, 1, 1])
idx_gt = np.array([1, 3, 8, 6, 7, 0])
assert_array_equal(X_resampled, X_gt)
assert_array_equal(y_resampled, y_gt)
assert_array_equal(idx_under, idx_gt)
示例12: CrossVal
# 需要导入模块: from imblearn.under_sampling import RandomUnderSampler [as 别名]
# 或者: from imblearn.under_sampling.RandomUnderSampler import fit_sample [as 别名]
def CrossVal(estimator, X, y,procsessor=None,cv=3,times=10,random_state=0,imb=False):
"""
交叉验证
estimator:
模型
X:
数据集X部分
y:
数据集的label
procsessor:
预处理器,其实就是做特征选择
cv:
做cv折交叉验证
times:
重复times次交叉验证
random_state:
随机数种子
imb:
是否使用SMOTE使得正负样本数平衡
"""
res=[]
for t in range(times):
skf=StratifiedKFold(n_splits=cv, shuffle=True, random_state=random_state+t)
indices=list(skf.split(X=X,y=y))
for k in indices:
x_train,y_train,x_test,y_test=X[k[0]],y[k[0]],X[k[1]],y[k[1]]
if(imb==True):
n,p=__lableCount(y_train)
rus=RandomUnderSampler(random_state=random_state+t)
x_train,y_train=rus.fit_sample(x_train,y_train)
if(procsessor is not None):
procsessor.fit(x_train,y_train)
x_train,y_train=procsessor.transform(x_train,y_train)
x_test,y_test=procsessor.transform(x_test,y_test)
estimator.fit(x_train,y_train)
res.append(Metrics.Score(estimator,x_test,y_test))
res=np.array(res)
return res
示例13: test_rus_fit_sample_half
# 需要导入模块: from imblearn.under_sampling import RandomUnderSampler [as 别名]
# 或者: from imblearn.under_sampling.RandomUnderSampler import fit_sample [as 别名]
def test_rus_fit_sample_half():
"""Test the fit sample routine with a 0.5 ratio"""
# Resample the data
ratio = 0.5
rus = RandomUnderSampler(ratio=ratio, random_state=RND_SEED)
X_resampled, y_resampled = rus.fit_sample(X, Y)
X_gt = np.array([[0.92923648, 0.76103773], [0.47104475, 0.44386323],
[0.13347175, 0.12167502], [0.09125309, -0.85409574],
[0.12372842, 0.6536186], [0.04352327, -0.20515826],
[0.15490546, 0.3130677], [0.15490546, 0.3130677],
[0.15490546, 0.3130677]])
y_gt = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1])
assert_array_equal(X_resampled, X_gt)
assert_array_equal(y_resampled, y_gt)
示例14: test_multiclass_fit_sample
# 需要导入模块: from imblearn.under_sampling import RandomUnderSampler [as 别名]
# 或者: from imblearn.under_sampling.RandomUnderSampler import fit_sample [as 别名]
def test_multiclass_fit_sample():
"""Test fit sample method with multiclass target"""
# Make y to be multiclass
y = Y.copy()
y[5] = 2
y[6] = 2
# Resample the data
rus = RandomUnderSampler(random_state=RND_SEED)
X_resampled, y_resampled = rus.fit_sample(X, y)
# Check the size of y
count_y_res = Counter(y_resampled)
assert_equal(count_y_res[0], 2)
assert_equal(count_y_res[1], 2)
assert_equal(count_y_res[2], 2)
示例15: test_rus_fit_sample_half
# 需要导入模块: from imblearn.under_sampling import RandomUnderSampler [as 别名]
# 或者: from imblearn.under_sampling.RandomUnderSampler import fit_sample [as 别名]
def test_rus_fit_sample_half():
ratio = {0: 3, 1: 6}
rus = RandomUnderSampler(ratio=ratio, random_state=RND_SEED,
replacement=True)
X_resampled, y_resampled = rus.fit_sample(X, Y)
X_gt = np.array([[0.92923648, 0.76103773],
[0.47104475, 0.44386323],
[0.92923648, 0.76103773],
[0.15490546, 0.3130677],
[0.15490546, 0.3130677],
[0.15490546, 0.3130677],
[0.20792588, 1.49407907],
[0.15490546, 0.3130677],
[0.12372842, 0.6536186]])
y_gt = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1])
print(X_resampled)
assert_array_equal(X_resampled, X_gt)
assert_array_equal(y_resampled, y_gt)