Python combine.SMOTETomek類代碼示例

本文整理匯總了Python中imblearn.combine.SMOTETomek類的典型用法代碼示例。如果您正苦於以下問題：Python SMOTETomek類的具體用法？Python SMOTETomek怎麽用？Python SMOTETomek使用的例子？那麽, 這裏精選的類代碼示例或許可以為您提供幫助。

在下文中一共展示了SMOTETomek類的11個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: outer_cv_loop

def outer_cv_loop(Xdata,Ydata,clf,parameters=[],
                    n_splits=10,test_size=0.25):

    pred=numpy.zeros(len(Ydata))
    importances=[]
    kf=StratifiedShuffleSplit(n_splits=n_splits,test_size=test_size)
    rocscores=[]
    for train,test in kf.split(Xdata,Ydata):
        if numpy.var(Ydata[test])==0:
           print('zero variance',varname)
           rocscores.append(numpy.nan)
           continue
        Ytrain=Ydata[train]
        Xtrain=fancyimpute.SoftImpute(verbose=False).complete(Xdata[train,:])
        Xtest=fancyimpute.SoftImpute(verbose=False).complete(Xdata[test,:])
        if numpy.abs(numpy.mean(Ytrain)-0.5)>0.2:
           smt = SMOTETomek()
           Xtrain,Ytrain=smt.fit_sample(Xtrain.copy(),Ydata[train])
        # filter out bad folds
        clf.fit(Xtrain,Ytrain)
        pred=clf.predict(Xtest)
        if numpy.var(pred)>0:
           rocscores.append(roc_auc_score(Ydata[test],pred))
        else:
           rocscores.append(numpy.nan)
        importances.append(clf.feature_importances_)
    return rocscores,importances

開發者ID:IanEisenberg，項目名稱:Self_Regulation_Ontology，代碼行數:27，代碼來源:demographic_feature_importance_behav.py

示例2: test_sample_wrong_X

def test_sample_wrong_X():
    """Test either if an error is raised when X is different at fitting
    and sampling"""

    # Create the object
    sm = SMOTETomek(random_state=RND_SEED)
    sm.fit(X, Y)
    assert_raises(RuntimeError, sm.sample, np.random.random((100, 40)), np.array([0] * 50 + [1] * 50))

開發者ID:yuwin，項目名稱:UnbalancedDataset，代碼行數:8，代碼來源:test_smote_tomek.py

示例3: test_sample_regular

def test_sample_regular():
    """Test sample function with regular SMOTE."""

    # Create the object
    smote = SMOTETomek(random_state=RND_SEED)
    # Fit the data
    smote.fit(X, Y)

    X_resampled, y_resampled = smote.fit_sample(X, Y)

    currdir = os.path.dirname(os.path.abspath(__file__))
    X_gt = np.load(os.path.join(currdir, 'data', 'smote_tomek_reg_x.npy'))
    y_gt = np.load(os.path.join(currdir, 'data', 'smote_tomek_reg_y.npy'))
    assert_array_equal(X_resampled, X_gt)
    assert_array_equal(y_resampled, y_gt)

開發者ID:vivounicorn，項目名稱:imbalanced-learn，代碼行數:15，代碼來源:test_smote_tomek.py

示例4: test_validate_estimator_default

def test_validate_estimator_default():
    smt = SMOTETomek(random_state=RND_SEED)
    X_resampled, y_resampled = smt.fit_resample(X, Y)
    X_gt = np.array([[0.68481731, 0.51935141], [1.34192108, -0.13367336], [
        0.62366841, -0.21312976
    ], [1.61091956, -0.40283504], [-0.37162401,
                                   -2.19400981], [0.74680821, 1.63827342],
                     [0.61472253, -0.82309052], [0.19893132, -0.47761769],
                     [1.40301027, -0.83648734], [-1.20515198, -1.02689695], [
                         -0.23374509, 0.18370049
                     ], [-0.00288378, 0.84259929], [1.79580611, -0.02219234], [
                         0.38307743, -0.05670439
                     ], [0.70319159, -0.02571667], [0.75052536, -0.19246518]])
    y_gt = np.array([1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0])
    assert_allclose(X_resampled, X_gt, rtol=R_TOL)
    assert_array_equal(y_resampled, y_gt)

開發者ID:bodycat，項目名稱:imbalanced-learn，代碼行數:16，代碼來源:test_smote_tomek.py

示例5: test_sample_regular_half

def test_sample_regular_half():
    sampling_strategy = {0: 9, 1: 12}
    smote = SMOTETomek(
        sampling_strategy=sampling_strategy, random_state=RND_SEED)
    X_resampled, y_resampled = smote.fit_resample(X, Y)
    X_gt = np.array([[0.68481731, 0.51935141], [0.62366841, -0.21312976], [
        1.61091956, -0.40283504
    ], [-0.37162401, -2.19400981], [0.74680821,
                                    1.63827342], [0.61472253, -0.82309052],
                     [0.19893132, -0.47761769], [1.40301027, -0.83648734],
                     [-1.20515198, -1.02689695], [-0.23374509, 0.18370049], [
                         -0.00288378, 0.84259929
                     ], [1.79580611, -0.02219234], [0.45784496, -0.1053161]])
    y_gt = np.array([1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0])
    assert_allclose(X_resampled, X_gt, rtol=R_TOL)
    assert_array_equal(y_resampled, y_gt)

開發者ID:bodycat，項目名稱:imbalanced-learn，代碼行數:16，代碼來源:test_smote_tomek.py

示例6: test_sample_regular_half

def test_sample_regular_half():
    """Test sample function with regular SMOTE and a ratio of 0.5."""

    # Create the object
    ratio = 0.5
    smote = SMOTETomek(ratio=ratio, random_state=RND_SEED)
    # Fit the data
    smote.fit(X, Y)

    X_resampled, y_resampled = smote.fit_sample(X, Y)

    currdir = os.path.dirname(os.path.abspath(__file__))
    X_gt = np.load(os.path.join(currdir, "data", "smote_tomek_reg_x_05.npy"))
    y_gt = np.load(os.path.join(currdir, "data", "smote_tomek_reg_y_05.npy"))
    assert_array_equal(X_resampled, X_gt)
    assert_array_equal(y_resampled, y_gt)

開發者ID:yuwin，項目名稱:UnbalancedDataset，代碼行數:16，代碼來源:test_smote_tomek.py

示例7: main_cv_loop

def main_cv_loop(Xdata,Ydata,clf,parameters,
                n_folds=4,oversample_thresh=0.1,verbose=False):

    # use stratified K-fold CV to get roughly equal folds
    #kf=StratifiedKFold(n_splits=nfolds)
    kf=StratifiedShuffleSplit(n_splits=4,test_size=0.2)
    # use oversampling if the difference in prevalence is greater than 20%
    if numpy.abs(numpy.mean(Ydata)-0.5)>oversample_thresh:
        oversample='smote'
    else:
        oversample='none'

    # variables to store outputs
    pred=numpy.zeros(len(Ydata))  # predicted values
    pred_proba=numpy.zeros(len(Ydata))  # predicted values
    kernel=[]
    C=[]
    fa_ctr=0

    for train,test in kf.split(Xdata,Ydata):
        Xtrain=Xdata[train,:]
        Xtest=Xdata[test,:]
        Ytrain=Ydata[train]
        if numpy.abs(numpy.mean(Ytrain)-0.5)>0.2:
            if verbose:
                print('oversampling using SMOTETomek')
            sm = SMOTETomek()
            Xtrain, Ytrain = sm.fit_sample(Xtrain, Ytrain)

        best_estimator_,bestroc,fa=inner_cv_loop(Xtrain,Ytrain,clf,
                    parameters,verbose=True)
        if not fa is None:
            if verbose:
                print('transforming using fa')
                print(fa)
            tmp=fa.transform(Xtest)
            Xtest=tmp
            fa_ctr+=1
        pred_proba.flat[test]=best_estimator_.predict_proba(Xtest)
        pred.flat[test]=best_estimator_.predict(Xtest)
        kernel.append(best_estimator_.kernel)
        C.append(best_estimator_.C)
    return roc_auc_score(Ydata,pred,average='weighted'),Ydata,pred,pred_proba

開發者ID:IanEisenberg，項目名稱:Self_Regulation_Ontology，代碼行數:43，代碼來源:crossvalidation.py

示例8: test_error_wrong_object

def test_error_wrong_object():
    smote = 'rnd'
    tomek = 'rnd'
    smt = SMOTETomek(smote=smote, random_state=RND_SEED)
    with raises(ValueError, match="smote needs to be a SMOTE"):
        smt.fit_resample(X, Y)
    smt = SMOTETomek(tomek=tomek, random_state=RND_SEED)
    with raises(ValueError, match="tomek needs to be a TomekLinks"):
        smt.fit_resample(X, Y)

開發者ID:bodycat，項目名稱:imbalanced-learn，代碼行數:9，代碼來源:test_smote_tomek.py

示例9: test_parallelisation

def test_parallelisation():
    # Check if default job count is 1
    smt = SMOTETomek(random_state=RND_SEED)
    smt._validate_estimator()
    assert smt.n_jobs == 1
    assert smt.smote_.n_jobs == 1
    assert smt.tomek_.n_jobs == 1

    # Check if job count is set
    smt = SMOTETomek(random_state=RND_SEED, n_jobs=8)
    smt._validate_estimator()
    assert smt.n_jobs == 8
    assert smt.smote_.n_jobs == 8
    assert smt.tomek_.n_jobs == 8

開發者ID:scikit-learn-contrib，項目名稱:imbalanced-learn，代碼行數:14，代碼來源:test_smote_tomek.py

示例10: print

print(__doc__)

# Generate the dataset
X, y = make_classification(n_classes=2, class_sep=2, weights=[0.1, 0.9],
                           n_informative=3, n_redundant=1, flip_y=0,
                           n_features=20, n_clusters_per_class=1,
                           n_samples=100, random_state=10)

# Instanciate a PCA object for the sake of easy visualisation
pca = PCA(n_components=2)
# Fit and transform x to visualise inside a 2D feature space
X_vis = pca.fit_transform(X)

# Apply SMOTE + Tomek links
sm = SMOTETomek()
X_resampled, y_resampled = sm.fit_sample(X, y)
X_res_vis = pca.transform(X_resampled)

# Two subplots, unpack the axes array immediately
f, (ax1, ax2) = plt.subplots(1, 2)

c0 = ax1.scatter(X_vis[y == 0, 0], X_vis[y == 0, 1], label="Class #0",
                 alpha=0.5)
c1 = ax1.scatter(X_vis[y == 1, 0], X_vis[y == 1, 1], label="Class #1",
                 alpha=0.5)
ax1.set_title('Original set')

ax2.scatter(X_res_vis[y_resampled == 0, 0], X_res_vis[y_resampled == 0, 1],
            label="Class #0", alpha=0.5)
ax2.scatter(X_res_vis[y_resampled == 1, 0], X_res_vis[y_resampled == 1, 1],

開發者ID:glemaitre，項目名稱:imbalanced-learn，代碼行數:30，代碼來源:plot_smote_tomek.py

示例11: test_error_wrong_object

def test_error_wrong_object(smote_params, err_msg):
    smt = SMOTETomek(**smote_params)
    with pytest.raises(ValueError, match=err_msg):
        smt.fit_resample(X, Y)

開發者ID:scikit-learn-contrib，項目名稱:imbalanced-learn，代碼行數:4，代碼來源:test_smote_tomek.py

注：本文中的imblearn.combine.SMOTETomek類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。