當前位置: 首頁>>代碼示例>>Python>>正文


Python over_sampling.SMOTE類代碼示例

本文整理匯總了Python中imblearn.over_sampling.SMOTE的典型用法代碼示例。如果您正苦於以下問題:Python SMOTE類的具體用法?Python SMOTE怎麽用?Python SMOTE使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。


在下文中一共展示了SMOTE類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: test_fit_resample_nn_obj

def test_fit_resample_nn_obj():
    kind = 'borderline1'
    nn_m = NearestNeighbors(n_neighbors=11)
    nn_k = NearestNeighbors(n_neighbors=6)
    smote = SMOTE(
        random_state=RND_SEED, kind=kind, k_neighbors=nn_k, m_neighbors=nn_m)
    X_resampled, y_resampled = smote.fit_resample(X, Y)
    X_gt = np.array([[0.11622591, -0.0317206], [0.77481731, 0.60935141], [
        1.25192108, -0.22367336
    ], [0.53366841, -0.30312976], [1.52091956, -0.49283504], [
        -0.28162401, -2.10400981
    ], [0.83680821, 1.72827342], [0.3084254, 0.33299982], [
        0.70472253, -0.73309052
    ], [0.28893132, -0.38761769], [1.15514042, 0.0129463], [
        0.88407872, 0.35454207
    ], [1.31301027, -0.92648734], [-1.11515198, -0.93689695], [
        -0.18410027, -0.45194484
    ], [0.9281014, 0.53085498], [-0.14374509, 0.27370049], [
        -0.41635887, -0.38299653
    ], [0.08711622, 0.93259929], [1.70580611, -0.11219234],
                     [0.3765279, -0.2009615], [0.55276636, -0.10550373],
                     [0.45413452, -0.08883319], [1.21118683, -0.22817957]])
    y_gt = np.array([
        0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0
    ])
    assert_allclose(X_resampled, X_gt, rtol=R_TOL)
    assert_array_equal(y_resampled, y_gt)
開發者ID:chkoar,項目名稱:imbalanced-learn,代碼行數:27,代碼來源:test_smote.py

示例2: test_sample_regular_with_nn_svm

def test_sample_regular_with_nn_svm():
    """Test sample function with regular SMOTE with a NN object."""

    # Create the object
    kind = 'svm'
    nn_k = NearestNeighbors(n_neighbors=6)
    svm = SVC(random_state=RND_SEED)
    smote = SMOTE(
        random_state=RND_SEED, kind=kind, k_neighbors=nn_k, svm_estimator=svm)

    X_resampled, y_resampled = smote.fit_sample(X, Y)

    X_gt = np.array([[0.11622591, -0.0317206], [0.77481731, 0.60935141],
                     [1.25192108, -0.22367336], [0.53366841, -0.30312976],
                     [1.52091956, -0.49283504], [-0.28162401, -2.10400981],
                     [0.83680821, 1.72827342], [0.3084254, 0.33299982],
                     [0.70472253, -0.73309052], [0.28893132, -0.38761769],
                     [1.15514042, 0.0129463], [0.88407872, 0.35454207],
                     [1.31301027, -0.92648734], [-1.11515198, -0.93689695],
                     [-0.18410027, -0.45194484], [0.9281014, 0.53085498],
                     [-0.14374509, 0.27370049], [-0.41635887, -0.38299653],
                     [0.08711622, 0.93259929], [1.70580611, -0.11219234],
                     [0.47436888, -0.2645749], [1.07844561, -0.19435291],
                     [1.44015515, -1.30621303]])
    y_gt = np.array(
        [0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0])
    assert_array_almost_equal(X_resampled, X_gt)
    assert_array_equal(y_resampled, y_gt)
開發者ID:kellyhennigan,項目名稱:cueexp_scripts,代碼行數:28,代碼來源:test_smote.py

示例3: test_sample_with_nn_svm

def test_sample_with_nn_svm():
    kind = 'svm'
    nn_k = NearestNeighbors(n_neighbors=6)
    svm = SVC(gamma='scale', random_state=RND_SEED)
    smote = SMOTE(
        random_state=RND_SEED, kind=kind, k_neighbors=nn_k, svm_estimator=svm)
    X_resampled, y_resampled = smote.fit_resample(X, Y)
    X_gt = np.array([[0.11622591, -0.0317206],
                     [0.77481731, 0.60935141],
                     [1.25192108, -0.22367336],
                     [0.53366841, -0.30312976],
                     [1.52091956, -0.49283504],
                     [-0.28162401, -2.10400981],
                     [0.83680821, 1.72827342],
                     [0.3084254, 0.33299982],
                     [0.70472253, -0.73309052],
                     [0.28893132, -0.38761769],
                     [1.15514042, 0.0129463],
                     [0.88407872, 0.35454207],
                     [1.31301027, -0.92648734],
                     [-1.11515198, -0.93689695],
                     [-0.18410027, -0.45194484],
                     [0.9281014, 0.53085498],
                     [-0.14374509, 0.27370049],
                     [-0.41635887, -0.38299653],
                     [0.08711622, 0.93259929],
                     [1.70580611, -0.11219234],
                     [0.47436887, -0.2645749],
                     [1.07844562, -0.19435291],
                     [1.44228238, -1.31256615],
                     [1.25636713, -1.04463226]])
    y_gt = np.array([0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0,
                     1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0])
    assert_allclose(X_resampled, X_gt, rtol=R_TOL)
    assert_array_equal(y_resampled, y_gt)
開發者ID:chkoar,項目名稱:imbalanced-learn,代碼行數:35,代碼來源:test_smote.py

示例4: fit

    def fit(self, X , y = None):
        # 'Random under-sampling'
        # CondensedNearestNeighbour(size_ngh=51, n_seeds_S=51)
        #Accuracy: 0.939693267481
        #Precision: 0.238095238095
        #Recall: 0.897435897436

        #Accuracy: 0.962568234988
        #Precision: 0.324468085106
        #Recall: 0.782051282051
        #SMOTE(ratio=ratio, kind='borderline1')
        #Accuracy: 0.971146347803
        #Precision: 0.372093023256
        #Recall: 0.615384615385
        #SMOTE(ratio=ratio, kind='borderline2')
        #Accuracy: 0.965427605927
        #Precision: 0.333333333333
        #Recall: 0.705128205128
        #svm_args = {'class_weight': 'auto'}
        #svmsmote = SMOTE(ratio=ratio, kind='svm', **svm_args)
        #Accuracy: 0.972186119054
        #Precision: 0.395683453237
        #Recall: 0.705128205128

        smote = SMOTE(ratio='auto', kind='regular')
        X, y = smote.fit_sample(X, y)
       # weights = np.array([1/y.mean() if i == 1 else 1 for i in y])
        return super(RandomForestClassifier, self).fit(X,y)#,sample_weight=weights)
開發者ID:nmoraesmunter,項目名稱:BeTheChange,代碼行數:28,代碼來源:verified_victory_pipeline.py

示例5: train

def train(addr_train, clf, sampling, add_estimators):
    with open(os.path.join(addr_train, "day_samp_bin.npy"), "r") as file_in:
        X = smio.load_sparse_csr(file_in)
    width = np.size(X, 1)
    X_train = X[:, :width-1]
    y_train = X[:, width-1]
    if sampling == "Over":
        sm = SMOTE(ratio=0.95)
        X_train, y_train = sm.fit_sample(X_train, y_train)
    elif sampling == "Under":
        X_train, y_train = US.undersample(X, 0.01)

    print "Fitting Model......"
    clf.n_estimators += add_estimators
    clf.fit(X_train, y_train)
    print "Done"

    if __SAVE_MODEL:
        model_name = "RF_" + onoff_line + "_" + sampling + "_Model.p"
        dir_out = os.path.join(addr_train, "Random_Forest_Models")
        if not os.path.isdir(dir_out):
            os.mkdir(dir_out)
        path_out = os.path.join(dir_out, model_name)
        with open(path_out, "w") as file_out:
            pickle.dump(clf, file_out)

    return clf
開發者ID:Shurooo,項目名稱:gumgum,代碼行數:27,代碼來源:Random_Forest_Test_Tmp.py

示例6: test_sample_borderline2

def test_sample_borderline2():
    """Test sample function with borderline 2 SMOTE."""

    # Create the object
    kind = 'borderline2'
    smote = SMOTE(random_state=RND_SEED, kind=kind)
    # Fit the data
    smote.fit(X, Y)

    X_resampled, y_resampled = smote.fit_sample(X, Y)

    X_gt = np.array([[0.11622591, -0.0317206], [0.77481731, 0.60935141],
                     [1.25192108, -0.22367336], [0.53366841, -0.30312976],
                     [1.52091956, -0.49283504], [-0.28162401, -2.10400981],
                     [0.83680821, 1.72827342], [0.3084254, 0.33299982],
                     [0.70472253, -0.73309052], [0.28893132, -0.38761769],
                     [1.15514042, 0.0129463], [0.88407872, 0.35454207],
                     [1.31301027, -0.92648734], [-1.11515198, -0.93689695],
                     [-0.18410027, -0.45194484], [0.9281014, 0.53085498],
                     [-0.14374509, 0.27370049], [-0.41635887, -0.38299653],
                     [0.08711622, 0.93259929], [1.70580611, -0.11219234],
                     [0.47436888, -0.2645749], [1.07844561, -0.19435291],
                     [0.33339622, 0.49870937]])
    y_gt = np.array(
        [0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0])
    assert_array_almost_equal(X_resampled, X_gt)
    assert_array_equal(y_resampled, y_gt)
開發者ID:kellyhennigan,項目名稱:cueexp_scripts,代碼行數:27,代碼來源:test_smote.py

示例7: get_data

def get_data(month, day, hour=-1, mode="normal"):
    if hour != -1:
        if hour == 24:
            hour = 0
            day += 1
        addr_in = os.path.join("/mnt/rips2/2016",
                               str(month).rjust(2, "0"),
                               str(day).rjust(2, "0"),
                               str(hour).rjust(2, "0"),
                               "output_bin.npy")
    else:
        addr_in = os.path.join("/mnt/rips2/2016",
                               str(month).rjust(2, "0"),
                               str(day).rjust(2, "0"),
                               "day_samp_newer_bin.npy")
    with open(addr_in, "r") as file_in:
        loader = np.load(file_in)
        data = csr_matrix((loader['data'], loader['indices'], loader['indptr']), shape=loader['shape']).toarray()
    X = data[:, :-1]
    y = data[:, -1]

    if mode == "over":
        sm = SMOTE(ratio=0.99, verbose=0)
        X, y = sm.fit_sample(X, y)

    return X, y
開發者ID:Shurooo,項目名稱:gumgum,代碼行數:26,代碼來源:Bernoulli_Simple.py

示例8: resample_data

def resample_data(X, y, categorical_lst):
    '''
    up-samples minority class
    '''
    sm = SMOTE(kind='regular')
    X_train_re, y_train_re = sm.fit_sample(X,y)
    #rounding categorical variables
    X_train_re[:,categorical_lst] = np.round(X_train_re[:,categorical_lst])
    return X_train_re, y_train_re
開發者ID:BryceLuna,項目名稱:Fraud_Detection,代碼行數:9,代碼來源:Load_Data.py

示例9: test_sample_wrong_X

def test_sample_wrong_X():
    """Test either if an error is raised when X is different at fitting
    and sampling"""

    # Create the object
    sm = SMOTE(random_state=RND_SEED)
    sm.fit(X, Y)
    assert_raises(RuntimeError, sm.sample,
                  np.random.random((100, 40)), np.array([0] * 50 + [1] * 50))
開發者ID:kellyhennigan,項目名稱:cueexp_scripts,代碼行數:9,代碼來源:test_smote.py

示例10: test_sample_regular_wrong_svm

def test_sample_regular_wrong_svm():
    kind = 'svm'
    nn_k = NearestNeighbors(n_neighbors=6)
    svm = 'rnd'
    smote = SMOTE(
        random_state=RND_SEED, kind=kind, k_neighbors=nn_k, svm_estimator=svm)

    with raises(ValueError, match="has to be one of"):
        smote.fit_sample(X, Y)
開發者ID:glemaitre,項目名稱:imbalanced-learn,代碼行數:9,代碼來源:test_smote.py

示例11: Input_Preparing

def Input_Preparing(Scaled_Input_Data, Surgery_Outcome, N_Feat):
    # Feature Selection
    MIFS = mifs.MutualInformationFeatureSelector(method='JMI', verbose=2, n_features = N_Feat)
    MIFS.fit(Scaled_Input_Data, Surgery_Outcome)
    Selected_Input_Data = Scaled_Input_Data.loc[:,MIFS.support_]

    # Balancing using SMOTE
    sm = SMOTE(kind='regular')
    Prep_Train_Data, Prep_Surgery_Outcome = sm.fit_sample(X, y)
    
    return(Prep_Train_Data, Prep_Surgery_Outcome, MIFS.support_)
開發者ID:Nekooeimehr,項目名稱:Python-Surgical-Failure-Prediction-Code,代碼行數:11,代碼來源:Input_Preparing.py

示例12: SMT

def SMT(df, target):
    df1 = df.copy()
    y = df1.pop('anti_churn')
    X = df1
    Xcols = df1.columns
    sm = SMOTE(kind='regular', ratio = target)
    X_resampled, y_resampled = sm.fit_sample(X, y)
    X_resampled = pd.DataFrame(X_resampled)
    y_resampled = pd.DataFrame(y_resampled)
    X_resampled.columns = Xcols
    y_resampled.columns = ['anti_churn']
    return X_resampled, y_resampled
開發者ID:Shimonzu,項目名稱:Ultralinks,代碼行數:12,代碼來源:Ultralinks_Code.py

示例13: transform

    def transform(self, fp):
        fm, train_x, train_y = FeaturePool.to_train_arrays(fp)

        os = SMOTE(random_state = self.random_state)
        os_train_x, os_train_y = os.fit_sample(train_x, train_y[:, 0])
        os_train_y = os_train_y.reshape((os_train_y.shape[0], 1))

        for f in FeaturePool.from_train_arrays(fm, os_train_x, os_train_y):
            yield Feature.apply_config(f, is_over_sampled=True)
        for f in fp:
            if f.split_type == SplitType.TEST:
                yield f
開發者ID:alexeyche,項目名稱:alexeyche-junk,代碼行數:12,代碼來源:transform.py

示例14: oversample

    def oversample(self):
        """Balance class data based on outcome"""
        print('Current outcome sampling {}'.format(Counter(self.y)))
        
        # to use a random sampling seed at random:
        #ros = RandomOverSampler()
        ros = SMOTE()
        #ros = ADASYN()

        self.X, self.y = ros.fit_sample(self.X, self.y)

        self.Xview = self.X.view()[:, :self.n_features]
        print('Resampled dataset shape {}'.format(Counter(self.y)))
開發者ID:kellyhennigan,項目名稱:cueexp_scripts,代碼行數:13,代碼來源:sgdrfe_SMOTEoversample_GMmask.py

示例15: test_smote_fit

def test_smote_fit():
    """Test the fitting method"""

    # Create the object
    smote = SMOTE(random_state=RND_SEED)
    # Fit the data
    smote.fit(X, Y)

    # Check if the data information have been computed
    assert_equal(smote.min_c_, 0)
    assert_equal(smote.maj_c_, 1)
    assert_equal(smote.stats_c_[0], 8)
    assert_equal(smote.stats_c_[1], 12)
開發者ID:kellyhennigan,項目名稱:cueexp_scripts,代碼行數:13,代碼來源:test_smote.py


注:本文中的imblearn.over_sampling.SMOTE類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。