本文整理汇总了Python中imblearn.combine.SMOTETomek类的典型用法代码示例。如果您正苦于以下问题:Python SMOTETomek类的具体用法?Python SMOTETomek怎么用?Python SMOTETomek使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SMOTETomek类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: outer_cv_loop
def outer_cv_loop(Xdata,Ydata,clf,parameters=[],
n_splits=10,test_size=0.25):
pred=numpy.zeros(len(Ydata))
importances=[]
kf=StratifiedShuffleSplit(n_splits=n_splits,test_size=test_size)
rocscores=[]
for train,test in kf.split(Xdata,Ydata):
if numpy.var(Ydata[test])==0:
print('zero variance',varname)
rocscores.append(numpy.nan)
continue
Ytrain=Ydata[train]
Xtrain=fancyimpute.SoftImpute(verbose=False).complete(Xdata[train,:])
Xtest=fancyimpute.SoftImpute(verbose=False).complete(Xdata[test,:])
if numpy.abs(numpy.mean(Ytrain)-0.5)>0.2:
smt = SMOTETomek()
Xtrain,Ytrain=smt.fit_sample(Xtrain.copy(),Ydata[train])
# filter out bad folds
clf.fit(Xtrain,Ytrain)
pred=clf.predict(Xtest)
if numpy.var(pred)>0:
rocscores.append(roc_auc_score(Ydata[test],pred))
else:
rocscores.append(numpy.nan)
importances.append(clf.feature_importances_)
return rocscores,importances
开发者ID:IanEisenberg,项目名称:Self_Regulation_Ontology,代码行数:27,代码来源:demographic_feature_importance_behav.py
示例2: test_sample_wrong_X
def test_sample_wrong_X():
"""Test either if an error is raised when X is different at fitting
and sampling"""
# Create the object
sm = SMOTETomek(random_state=RND_SEED)
sm.fit(X, Y)
assert_raises(RuntimeError, sm.sample, np.random.random((100, 40)), np.array([0] * 50 + [1] * 50))
示例3: test_sample_regular
def test_sample_regular():
"""Test sample function with regular SMOTE."""
# Create the object
smote = SMOTETomek(random_state=RND_SEED)
# Fit the data
smote.fit(X, Y)
X_resampled, y_resampled = smote.fit_sample(X, Y)
currdir = os.path.dirname(os.path.abspath(__file__))
X_gt = np.load(os.path.join(currdir, 'data', 'smote_tomek_reg_x.npy'))
y_gt = np.load(os.path.join(currdir, 'data', 'smote_tomek_reg_y.npy'))
assert_array_equal(X_resampled, X_gt)
assert_array_equal(y_resampled, y_gt)
示例4: test_validate_estimator_default
def test_validate_estimator_default():
smt = SMOTETomek(random_state=RND_SEED)
X_resampled, y_resampled = smt.fit_resample(X, Y)
X_gt = np.array([[0.68481731, 0.51935141], [1.34192108, -0.13367336], [
0.62366841, -0.21312976
], [1.61091956, -0.40283504], [-0.37162401,
-2.19400981], [0.74680821, 1.63827342],
[0.61472253, -0.82309052], [0.19893132, -0.47761769],
[1.40301027, -0.83648734], [-1.20515198, -1.02689695], [
-0.23374509, 0.18370049
], [-0.00288378, 0.84259929], [1.79580611, -0.02219234], [
0.38307743, -0.05670439
], [0.70319159, -0.02571667], [0.75052536, -0.19246518]])
y_gt = np.array([1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0])
assert_allclose(X_resampled, X_gt, rtol=R_TOL)
assert_array_equal(y_resampled, y_gt)
示例5: test_sample_regular_half
def test_sample_regular_half():
sampling_strategy = {0: 9, 1: 12}
smote = SMOTETomek(
sampling_strategy=sampling_strategy, random_state=RND_SEED)
X_resampled, y_resampled = smote.fit_resample(X, Y)
X_gt = np.array([[0.68481731, 0.51935141], [0.62366841, -0.21312976], [
1.61091956, -0.40283504
], [-0.37162401, -2.19400981], [0.74680821,
1.63827342], [0.61472253, -0.82309052],
[0.19893132, -0.47761769], [1.40301027, -0.83648734],
[-1.20515198, -1.02689695], [-0.23374509, 0.18370049], [
-0.00288378, 0.84259929
], [1.79580611, -0.02219234], [0.45784496, -0.1053161]])
y_gt = np.array([1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0])
assert_allclose(X_resampled, X_gt, rtol=R_TOL)
assert_array_equal(y_resampled, y_gt)
示例6: test_sample_regular_half
def test_sample_regular_half():
"""Test sample function with regular SMOTE and a ratio of 0.5."""
# Create the object
ratio = 0.5
smote = SMOTETomek(ratio=ratio, random_state=RND_SEED)
# Fit the data
smote.fit(X, Y)
X_resampled, y_resampled = smote.fit_sample(X, Y)
currdir = os.path.dirname(os.path.abspath(__file__))
X_gt = np.load(os.path.join(currdir, "data", "smote_tomek_reg_x_05.npy"))
y_gt = np.load(os.path.join(currdir, "data", "smote_tomek_reg_y_05.npy"))
assert_array_equal(X_resampled, X_gt)
assert_array_equal(y_resampled, y_gt)
示例7: main_cv_loop
def main_cv_loop(Xdata,Ydata,clf,parameters,
n_folds=4,oversample_thresh=0.1,verbose=False):
# use stratified K-fold CV to get roughly equal folds
#kf=StratifiedKFold(n_splits=nfolds)
kf=StratifiedShuffleSplit(n_splits=4,test_size=0.2)
# use oversampling if the difference in prevalence is greater than 20%
if numpy.abs(numpy.mean(Ydata)-0.5)>oversample_thresh:
oversample='smote'
else:
oversample='none'
# variables to store outputs
pred=numpy.zeros(len(Ydata)) # predicted values
pred_proba=numpy.zeros(len(Ydata)) # predicted values
kernel=[]
C=[]
fa_ctr=0
for train,test in kf.split(Xdata,Ydata):
Xtrain=Xdata[train,:]
Xtest=Xdata[test,:]
Ytrain=Ydata[train]
if numpy.abs(numpy.mean(Ytrain)-0.5)>0.2:
if verbose:
print('oversampling using SMOTETomek')
sm = SMOTETomek()
Xtrain, Ytrain = sm.fit_sample(Xtrain, Ytrain)
best_estimator_,bestroc,fa=inner_cv_loop(Xtrain,Ytrain,clf,
parameters,verbose=True)
if not fa is None:
if verbose:
print('transforming using fa')
print(fa)
tmp=fa.transform(Xtest)
Xtest=tmp
fa_ctr+=1
pred_proba.flat[test]=best_estimator_.predict_proba(Xtest)
pred.flat[test]=best_estimator_.predict(Xtest)
kernel.append(best_estimator_.kernel)
C.append(best_estimator_.C)
return roc_auc_score(Ydata,pred,average='weighted'),Ydata,pred,pred_proba
示例8: test_error_wrong_object
def test_error_wrong_object():
smote = 'rnd'
tomek = 'rnd'
smt = SMOTETomek(smote=smote, random_state=RND_SEED)
with raises(ValueError, match="smote needs to be a SMOTE"):
smt.fit_resample(X, Y)
smt = SMOTETomek(tomek=tomek, random_state=RND_SEED)
with raises(ValueError, match="tomek needs to be a TomekLinks"):
smt.fit_resample(X, Y)
示例9: test_parallelisation
def test_parallelisation():
# Check if default job count is 1
smt = SMOTETomek(random_state=RND_SEED)
smt._validate_estimator()
assert smt.n_jobs == 1
assert smt.smote_.n_jobs == 1
assert smt.tomek_.n_jobs == 1
# Check if job count is set
smt = SMOTETomek(random_state=RND_SEED, n_jobs=8)
smt._validate_estimator()
assert smt.n_jobs == 8
assert smt.smote_.n_jobs == 8
assert smt.tomek_.n_jobs == 8
示例10: print
print(__doc__)
# Generate the dataset
X, y = make_classification(n_classes=2, class_sep=2, weights=[0.1, 0.9],
n_informative=3, n_redundant=1, flip_y=0,
n_features=20, n_clusters_per_class=1,
n_samples=100, random_state=10)
# Instanciate a PCA object for the sake of easy visualisation
pca = PCA(n_components=2)
# Fit and transform x to visualise inside a 2D feature space
X_vis = pca.fit_transform(X)
# Apply SMOTE + Tomek links
sm = SMOTETomek()
X_resampled, y_resampled = sm.fit_sample(X, y)
X_res_vis = pca.transform(X_resampled)
# Two subplots, unpack the axes array immediately
f, (ax1, ax2) = plt.subplots(1, 2)
c0 = ax1.scatter(X_vis[y == 0, 0], X_vis[y == 0, 1], label="Class #0",
alpha=0.5)
c1 = ax1.scatter(X_vis[y == 1, 0], X_vis[y == 1, 1], label="Class #1",
alpha=0.5)
ax1.set_title('Original set')
ax2.scatter(X_res_vis[y_resampled == 0, 0], X_res_vis[y_resampled == 0, 1],
label="Class #0", alpha=0.5)
ax2.scatter(X_res_vis[y_resampled == 1, 0], X_res_vis[y_resampled == 1, 1],
示例11: test_error_wrong_object
def test_error_wrong_object(smote_params, err_msg):
smt = SMOTETomek(**smote_params)
with pytest.raises(ValueError, match=err_msg):
smt.fit_resample(X, Y)