本文整理汇总了Python中imblearn.combine.SMOTETomek.fit_sample方法的典型用法代码示例。如果您正苦于以下问题:Python SMOTETomek.fit_sample方法的具体用法?Python SMOTETomek.fit_sample怎么用?Python SMOTETomek.fit_sample使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类imblearn.combine.SMOTETomek
的用法示例。
在下文中一共展示了SMOTETomek.fit_sample方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_error_wrong_object
# 需要导入模块: from imblearn.combine import SMOTETomek [as 别名]
# 或者: from imblearn.combine.SMOTETomek import fit_sample [as 别名]
def test_error_wrong_object():
smote = 'rnd'
tomek = 'rnd'
smt = SMOTETomek(smote=smote, random_state=RND_SEED)
with raises(ValueError, match="smote needs to be a SMOTE"):
smt.fit_sample(X, Y)
smt = SMOTETomek(tomek=tomek, random_state=RND_SEED)
with raises(ValueError, match="tomek needs to be a TomekLinks"):
smt.fit_sample(X, Y)
示例2: outer_cv_loop
# 需要导入模块: from imblearn.combine import SMOTETomek [as 别名]
# 或者: from imblearn.combine.SMOTETomek import fit_sample [as 别名]
def outer_cv_loop(Xdata,Ydata,clf,parameters=[],
n_splits=10,test_size=0.25):
pred=numpy.zeros(len(Ydata))
importances=[]
kf=StratifiedShuffleSplit(n_splits=n_splits,test_size=test_size)
rocscores=[]
for train,test in kf.split(Xdata,Ydata):
if numpy.var(Ydata[test])==0:
print('zero variance',varname)
rocscores.append(numpy.nan)
continue
Ytrain=Ydata[train]
Xtrain=fancyimpute.SoftImpute(verbose=False).complete(Xdata[train,:])
Xtest=fancyimpute.SoftImpute(verbose=False).complete(Xdata[test,:])
if numpy.abs(numpy.mean(Ytrain)-0.5)>0.2:
smt = SMOTETomek()
Xtrain,Ytrain=smt.fit_sample(Xtrain.copy(),Ydata[train])
# filter out bad folds
clf.fit(Xtrain,Ytrain)
pred=clf.predict(Xtest)
if numpy.var(pred)>0:
rocscores.append(roc_auc_score(Ydata[test],pred))
else:
rocscores.append(numpy.nan)
importances.append(clf.feature_importances_)
return rocscores,importances
开发者ID:IanEisenberg,项目名称:Self_Regulation_Ontology,代码行数:29,代码来源:demographic_feature_importance_behav.py
示例3: test_sample_regular
# 需要导入模块: from imblearn.combine import SMOTETomek [as 别名]
# 或者: from imblearn.combine.SMOTETomek import fit_sample [as 别名]
def test_sample_regular():
"""Test sample function with regular SMOTE."""
# Create the object
smote = SMOTETomek(random_state=RND_SEED)
# Fit the data
smote.fit(X, Y)
X_resampled, y_resampled = smote.fit_sample(X, Y)
currdir = os.path.dirname(os.path.abspath(__file__))
X_gt = np.load(os.path.join(currdir, 'data', 'smote_tomek_reg_x.npy'))
y_gt = np.load(os.path.join(currdir, 'data', 'smote_tomek_reg_y.npy'))
assert_array_equal(X_resampled, X_gt)
assert_array_equal(y_resampled, y_gt)
示例4: test_sample_regular_half
# 需要导入模块: from imblearn.combine import SMOTETomek [as 别名]
# 或者: from imblearn.combine.SMOTETomek import fit_sample [as 别名]
def test_sample_regular_half():
"""Test sample function with regular SMOTE and a ratio of 0.5."""
# Create the object
ratio = 0.5
smote = SMOTETomek(ratio=ratio, random_state=RND_SEED)
# Fit the data
smote.fit(X, Y)
X_resampled, y_resampled = smote.fit_sample(X, Y)
currdir = os.path.dirname(os.path.abspath(__file__))
X_gt = np.load(os.path.join(currdir, "data", "smote_tomek_reg_x_05.npy"))
y_gt = np.load(os.path.join(currdir, "data", "smote_tomek_reg_y_05.npy"))
assert_array_equal(X_resampled, X_gt)
assert_array_equal(y_resampled, y_gt)
示例5: main_cv_loop
# 需要导入模块: from imblearn.combine import SMOTETomek [as 别名]
# 或者: from imblearn.combine.SMOTETomek import fit_sample [as 别名]
def main_cv_loop(Xdata,Ydata,clf,parameters,
n_folds=4,oversample_thresh=0.1,verbose=False):
# use stratified K-fold CV to get roughly equal folds
#kf=StratifiedKFold(n_splits=nfolds)
kf=StratifiedShuffleSplit(n_splits=4,test_size=0.2)
# use oversampling if the difference in prevalence is greater than 20%
if numpy.abs(numpy.mean(Ydata)-0.5)>oversample_thresh:
oversample='smote'
else:
oversample='none'
# variables to store outputs
pred=numpy.zeros(len(Ydata)) # predicted values
pred_proba=numpy.zeros(len(Ydata)) # predicted values
kernel=[]
C=[]
fa_ctr=0
for train,test in kf.split(Xdata,Ydata):
Xtrain=Xdata[train,:]
Xtest=Xdata[test,:]
Ytrain=Ydata[train]
if numpy.abs(numpy.mean(Ytrain)-0.5)>0.2:
if verbose:
print('oversampling using SMOTETomek')
sm = SMOTETomek()
Xtrain, Ytrain = sm.fit_sample(Xtrain, Ytrain)
best_estimator_,bestroc,fa=inner_cv_loop(Xtrain,Ytrain,clf,
parameters,verbose=True)
if not fa is None:
if verbose:
print('transforming using fa')
print(fa)
tmp=fa.transform(Xtest)
Xtest=tmp
fa_ctr+=1
pred_proba.flat[test]=best_estimator_.predict_proba(Xtest)
pred.flat[test]=best_estimator_.predict(Xtest)
kernel.append(best_estimator_.kernel)
C.append(best_estimator_.C)
return roc_auc_score(Ydata,pred,average='weighted'),Ydata,pred,pred_proba
示例6: test_sample_regular_half
# 需要导入模块: from imblearn.combine import SMOTETomek [as 别名]
# 或者: from imblearn.combine.SMOTETomek import fit_sample [as 别名]
def test_sample_regular_half():
ratio = {0: 9, 1: 12}
smote = SMOTETomek(ratio=ratio, random_state=RND_SEED)
X_resampled, y_resampled = smote.fit_sample(X, Y)
X_gt = np.array([[0.68481731, 0.51935141],
[0.62366841, -0.21312976],
[1.61091956, -0.40283504],
[-0.37162401, -2.19400981],
[0.74680821, 1.63827342],
[0.61472253, -0.82309052],
[0.19893132, -0.47761769],
[1.40301027, -0.83648734],
[-1.20515198, -1.02689695],
[-0.23374509, 0.18370049],
[-0.00288378, 0.84259929],
[1.79580611, -0.02219234],
[0.45784496, -0.1053161]])
y_gt = np.array([1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0])
assert_allclose(X_resampled, X_gt, rtol=R_TOL)
assert_array_equal(y_resampled, y_gt)
示例7: test_sample_regular
# 需要导入模块: from imblearn.combine import SMOTETomek [as 别名]
# 或者: from imblearn.combine.SMOTETomek import fit_sample [as 别名]
def test_sample_regular():
smote = SMOTETomek(random_state=RND_SEED)
X_resampled, y_resampled = smote.fit_sample(X, Y)
X_gt = np.array([[0.68481731, 0.51935141],
[1.34192108, -0.13367336],
[0.62366841, -0.21312976],
[1.61091956, -0.40283504],
[-0.37162401, -2.19400981],
[0.74680821, 1.63827342],
[0.61472253, -0.82309052],
[0.19893132, -0.47761769],
[1.40301027, -0.83648734],
[-1.20515198, -1.02689695],
[-0.23374509, 0.18370049],
[-0.00288378, 0.84259929],
[1.79580611, -0.02219234],
[0.38307743, -0.05670439],
[0.70319159, -0.02571667],
[0.75052536, -0.19246518]])
y_gt = np.array([1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0])
assert_allclose(X_resampled, X_gt, rtol=R_TOL)
assert_array_equal(y_resampled, y_gt)
示例8: print
# 需要导入模块: from imblearn.combine import SMOTETomek [as 别名]
# 或者: from imblearn.combine.SMOTETomek import fit_sample [as 别名]
print(__doc__)
# Generate the dataset
X, y = make_classification(n_classes=2, class_sep=2, weights=[0.1, 0.9],
n_informative=3, n_redundant=1, flip_y=0,
n_features=20, n_clusters_per_class=1,
n_samples=100, random_state=10)
# Instanciate a PCA object for the sake of easy visualisation
pca = PCA(n_components=2)
# Fit and transform x to visualise inside a 2D feature space
X_vis = pca.fit_transform(X)
# Apply SMOTE + Tomek links
sm = SMOTETomek()
X_resampled, y_resampled = sm.fit_sample(X, y)
X_res_vis = pca.transform(X_resampled)
# Two subplots, unpack the axes array immediately
f, (ax1, ax2) = plt.subplots(1, 2)
c0 = ax1.scatter(X_vis[y == 0, 0], X_vis[y == 0, 1], label="Class #0",
alpha=0.5)
c1 = ax1.scatter(X_vis[y == 1, 0], X_vis[y == 1, 1], label="Class #1",
alpha=0.5)
ax1.set_title('Original set')
ax2.scatter(X_res_vis[y_resampled == 0, 0], X_res_vis[y_resampled == 0, 1],
label="Class #0", alpha=0.5)
ax2.scatter(X_res_vis[y_resampled == 1, 0], X_res_vis[y_resampled == 1, 1],
label="Class #1", alpha=0.5)