当前位置: 首页>>代码示例>>Python>>正文


Python SMOTE.fit_sample方法代码示例

本文整理汇总了Python中imblearn.over_sampling.SMOTE.fit_sample方法的典型用法代码示例。如果您正苦于以下问题:Python SMOTE.fit_sample方法的具体用法?Python SMOTE.fit_sample怎么用?Python SMOTE.fit_sample使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在imblearn.over_sampling.SMOTE的用法示例。


在下文中一共展示了SMOTE.fit_sample方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_sample_regular_wrong_svm

# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
def test_sample_regular_wrong_svm():
    kind = 'svm'
    nn_k = NearestNeighbors(n_neighbors=6)
    svm = 'rnd'
    smote = SMOTE(
        random_state=RND_SEED, kind=kind, k_neighbors=nn_k, svm_estimator=svm)

    with raises(ValueError, match="has to be one of"):
        smote.fit_sample(X, Y)
开发者ID:glemaitre,项目名称:imbalanced-learn,代码行数:11,代码来源:test_smote.py

示例2: test_fit_sample_nn_obj

# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
def test_fit_sample_nn_obj():
    """Test sample with NN object provided."""

    # Create the object
    kind = 'borderline1'
    nn_m = NearestNeighbors(n_neighbors=11)
    nn_k = NearestNeighbors(n_neighbors=6)
    smote = SMOTE(
        random_state=RND_SEED, kind=kind, k_neighbors=nn_k, m_neighbors=nn_m)

    X_resampled, y_resampled = smote.fit_sample(X, Y)

    X_gt = np.array([[0.11622591, -0.0317206], [0.77481731, 0.60935141],
                     [1.25192108, -0.22367336], [0.53366841, -0.30312976],
                     [1.52091956, -0.49283504], [-0.28162401, -2.10400981],
                     [0.83680821, 1.72827342], [0.3084254, 0.33299982],
                     [0.70472253, -0.73309052], [0.28893132, -0.38761769],
                     [1.15514042, 0.0129463], [0.88407872, 0.35454207],
                     [1.31301027, -0.92648734], [-1.11515198, -0.93689695],
                     [-0.18410027, -0.45194484], [0.9281014, 0.53085498],
                     [-0.14374509, 0.27370049], [-0.41635887, -0.38299653],
                     [0.08711622, 0.93259929], [1.70580611, -0.11219234],
                     [0.3765279, -0.2009615], [0.55276636, -0.10550373],
                     [0.45413452, -0.08883319], [1.21118683, -0.22817957]])
    y_gt = np.array([
        0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0
    ])
    assert_array_almost_equal(X_resampled, X_gt)
    assert_array_equal(y_resampled, y_gt)
开发者ID:kellyhennigan,项目名称:cueexp_scripts,代码行数:31,代码来源:test_smote.py

示例3: test_sample_regular_with_nn_svm

# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
def test_sample_regular_with_nn_svm():
    """Test sample function with regular SMOTE with a NN object."""

    # Create the object
    kind = 'svm'
    nn_k = NearestNeighbors(n_neighbors=6)
    svm = SVC(random_state=RND_SEED)
    smote = SMOTE(
        random_state=RND_SEED, kind=kind, k_neighbors=nn_k, svm_estimator=svm)

    X_resampled, y_resampled = smote.fit_sample(X, Y)

    X_gt = np.array([[0.11622591, -0.0317206], [0.77481731, 0.60935141],
                     [1.25192108, -0.22367336], [0.53366841, -0.30312976],
                     [1.52091956, -0.49283504], [-0.28162401, -2.10400981],
                     [0.83680821, 1.72827342], [0.3084254, 0.33299982],
                     [0.70472253, -0.73309052], [0.28893132, -0.38761769],
                     [1.15514042, 0.0129463], [0.88407872, 0.35454207],
                     [1.31301027, -0.92648734], [-1.11515198, -0.93689695],
                     [-0.18410027, -0.45194484], [0.9281014, 0.53085498],
                     [-0.14374509, 0.27370049], [-0.41635887, -0.38299653],
                     [0.08711622, 0.93259929], [1.70580611, -0.11219234],
                     [0.47436888, -0.2645749], [1.07844561, -0.19435291],
                     [1.44015515, -1.30621303]])
    y_gt = np.array(
        [0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0])
    assert_array_almost_equal(X_resampled, X_gt)
    assert_array_equal(y_resampled, y_gt)
开发者ID:kellyhennigan,项目名称:cueexp_scripts,代码行数:30,代码来源:test_smote.py

示例4: fit

# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
    def fit(self, X , y = None):
        # 'Random under-sampling'
        # CondensedNearestNeighbour(size_ngh=51, n_seeds_S=51)
        #Accuracy: 0.939693267481
        #Precision: 0.238095238095
        #Recall: 0.897435897436

        #Accuracy: 0.962568234988
        #Precision: 0.324468085106
        #Recall: 0.782051282051
        #SMOTE(ratio=ratio, kind='borderline1')
        #Accuracy: 0.971146347803
        #Precision: 0.372093023256
        #Recall: 0.615384615385
        #SMOTE(ratio=ratio, kind='borderline2')
        #Accuracy: 0.965427605927
        #Precision: 0.333333333333
        #Recall: 0.705128205128
        #svm_args = {'class_weight': 'auto'}
        #svmsmote = SMOTE(ratio=ratio, kind='svm', **svm_args)
        #Accuracy: 0.972186119054
        #Precision: 0.395683453237
        #Recall: 0.705128205128

        smote = SMOTE(ratio='auto', kind='regular')
        X, y = smote.fit_sample(X, y)
       # weights = np.array([1/y.mean() if i == 1 else 1 for i in y])
        return super(RandomForestClassifier, self).fit(X,y)#,sample_weight=weights)
开发者ID:nmoraesmunter,项目名称:BeTheChange,代码行数:30,代码来源:verified_victory_pipeline.py

示例5: train

# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
def train(addr_train, clf, sampling, add_estimators):
    with open(os.path.join(addr_train, "day_samp_bin.npy"), "r") as file_in:
        X = smio.load_sparse_csr(file_in)
    width = np.size(X, 1)
    X_train = X[:, :width-1]
    y_train = X[:, width-1]
    if sampling == "Over":
        sm = SMOTE(ratio=0.95)
        X_train, y_train = sm.fit_sample(X_train, y_train)
    elif sampling == "Under":
        X_train, y_train = US.undersample(X, 0.01)

    print "Fitting Model......"
    clf.n_estimators += add_estimators
    clf.fit(X_train, y_train)
    print "Done"

    if __SAVE_MODEL:
        model_name = "RF_" + onoff_line + "_" + sampling + "_Model.p"
        dir_out = os.path.join(addr_train, "Random_Forest_Models")
        if not os.path.isdir(dir_out):
            os.mkdir(dir_out)
        path_out = os.path.join(dir_out, model_name)
        with open(path_out, "w") as file_out:
            pickle.dump(clf, file_out)

    return clf
开发者ID:Shurooo,项目名称:gumgum,代码行数:29,代码来源:Random_Forest_Test_Tmp.py

示例6: test_sample_borderline2

# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
def test_sample_borderline2():
    """Test sample function with borderline 2 SMOTE."""

    # Create the object
    kind = 'borderline2'
    smote = SMOTE(random_state=RND_SEED, kind=kind)
    # Fit the data
    smote.fit(X, Y)

    X_resampled, y_resampled = smote.fit_sample(X, Y)

    X_gt = np.array([[0.11622591, -0.0317206], [0.77481731, 0.60935141],
                     [1.25192108, -0.22367336], [0.53366841, -0.30312976],
                     [1.52091956, -0.49283504], [-0.28162401, -2.10400981],
                     [0.83680821, 1.72827342], [0.3084254, 0.33299982],
                     [0.70472253, -0.73309052], [0.28893132, -0.38761769],
                     [1.15514042, 0.0129463], [0.88407872, 0.35454207],
                     [1.31301027, -0.92648734], [-1.11515198, -0.93689695],
                     [-0.18410027, -0.45194484], [0.9281014, 0.53085498],
                     [-0.14374509, 0.27370049], [-0.41635887, -0.38299653],
                     [0.08711622, 0.93259929], [1.70580611, -0.11219234],
                     [0.47436888, -0.2645749], [1.07844561, -0.19435291],
                     [0.33339622, 0.49870937]])
    y_gt = np.array(
        [0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0])
    assert_array_almost_equal(X_resampled, X_gt)
    assert_array_equal(y_resampled, y_gt)
开发者ID:kellyhennigan,项目名称:cueexp_scripts,代码行数:29,代码来源:test_smote.py

示例7: get_data

# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
def get_data(month, day, hour=-1, mode="normal"):
    if hour != -1:
        if hour == 24:
            hour = 0
            day += 1
        addr_in = os.path.join("/mnt/rips2/2016",
                               str(month).rjust(2, "0"),
                               str(day).rjust(2, "0"),
                               str(hour).rjust(2, "0"),
                               "output_bin.npy")
    else:
        addr_in = os.path.join("/mnt/rips2/2016",
                               str(month).rjust(2, "0"),
                               str(day).rjust(2, "0"),
                               "day_samp_newer_bin.npy")
    with open(addr_in, "r") as file_in:
        loader = np.load(file_in)
        data = csr_matrix((loader['data'], loader['indices'], loader['indptr']), shape=loader['shape']).toarray()
    X = data[:, :-1]
    y = data[:, -1]

    if mode == "over":
        sm = SMOTE(ratio=0.99, verbose=0)
        X, y = sm.fit_sample(X, y)

    return X, y
开发者ID:Shurooo,项目名称:gumgum,代码行数:28,代码来源:Bernoulli_Simple.py

示例8: resample_data

# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
def resample_data(X, y, categorical_lst):
    '''
    up-samples minority class
    '''
    sm = SMOTE(kind='regular')
    X_train_re, y_train_re = sm.fit_sample(X,y)
    #rounding categorical variables
    X_train_re[:,categorical_lst] = np.round(X_train_re[:,categorical_lst])
    return X_train_re, y_train_re
开发者ID:BryceLuna,项目名称:Fraud_Detection,代码行数:11,代码来源:Load_Data.py

示例9: Input_Preparing

# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
def Input_Preparing(Scaled_Input_Data, Surgery_Outcome, N_Feat):
    # Feature Selection
    MIFS = mifs.MutualInformationFeatureSelector(method='JMI', verbose=2, n_features = N_Feat)
    MIFS.fit(Scaled_Input_Data, Surgery_Outcome)
    Selected_Input_Data = Scaled_Input_Data.loc[:,MIFS.support_]

    # Balancing using SMOTE
    sm = SMOTE(kind='regular')
    Prep_Train_Data, Prep_Surgery_Outcome = sm.fit_sample(X, y)
    
    return(Prep_Train_Data, Prep_Surgery_Outcome, MIFS.support_)
开发者ID:Nekooeimehr,项目名称:Python-Surgical-Failure-Prediction-Code,代码行数:13,代码来源:Input_Preparing.py

示例10: SMT

# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
def SMT(df, target):
    df1 = df.copy()
    y = df1.pop('anti_churn')
    X = df1
    Xcols = df1.columns
    sm = SMOTE(kind='regular', ratio = target)
    X_resampled, y_resampled = sm.fit_sample(X, y)
    X_resampled = pd.DataFrame(X_resampled)
    y_resampled = pd.DataFrame(y_resampled)
    X_resampled.columns = Xcols
    y_resampled.columns = ['anti_churn']
    return X_resampled, y_resampled
开发者ID:Shimonzu,项目名称:Ultralinks,代码行数:14,代码来源:Ultralinks_Code.py

示例11: transform

# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
    def transform(self, fp):
        fm, train_x, train_y = FeaturePool.to_train_arrays(fp)

        os = SMOTE(random_state = self.random_state)
        os_train_x, os_train_y = os.fit_sample(train_x, train_y[:, 0])
        os_train_y = os_train_y.reshape((os_train_y.shape[0], 1))

        for f in FeaturePool.from_train_arrays(fm, os_train_x, os_train_y):
            yield Feature.apply_config(f, is_over_sampled=True)
        for f in fp:
            if f.split_type == SplitType.TEST:
                yield f
开发者ID:alexeyche,项目名称:alexeyche-junk,代码行数:14,代码来源:transform.py

示例12: oversample

# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
def oversample(X, y, bal_strategy):

	if(bal_strategy == "SMOTESVN"  or bal_strategy == "ALL"):
		# Apply SMOTE SVM
		sm = SMOTE(kind='svm')
		X_sampled, y_sampled = sm.fit_sample(X, y)

		print 'Shape of X_sampled: ', X_sampled.shape
		print 'Shape of y_sampled: ', y_sampled.shape

	elif(bal_strategy == "SMOTE"  or bal_strategy == "ALL"):
		# Apply regular SMOTE
		sm = SMOTE(kind='regular')
		X_sampled, y_sampled = sm.fit_sample(X, y)

		print 'Shape of X_sampled: ', X_sampled.shape
		print 'Shape of y_sampled: ', y_sampled.shape

	elif(bal_strategy == "ADASYN"  or bal_strategy == "ALL"):
	# Apply the random over-sampling
		ada = ADASYN()
		X_sampled, y_sampled = ada.fit_sample(X, y)

		print 'Shape of X_sampled: ', X_sampled.shape
		print 'Shape of y_sampled: ', y_sampled.shape

	elif(bal_strategy == 'NONE'):
		X_sampled = X
		y_sampled = y

		print 'Shape of X_sampled: ', X_sampled.shape
		print 'Shape of y_sampled: ', y_sampled.shape

	else:
		print 'bal_stragegy not in SMOTESVN, SMOTE, ADASYN, ALL, NONE'
		sys.exit(1)


	return (X_sampled, y_sampled)
开发者ID:brettin,项目名称:pilot1-docs,代码行数:41,代码来源:oversample.py

示例13: oversample

# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
    def oversample(self):
        """Balance class data based on outcome"""
        print('Current outcome sampling {}'.format(Counter(self.y)))
        
        # to use a random sampling seed at random:
        #ros = RandomOverSampler()
        ros = SMOTE()
        #ros = ADASYN()

        self.X, self.y = ros.fit_sample(self.X, self.y)

        self.Xview = self.X.view()[:, :self.n_features]
        print('Resampled dataset shape {}'.format(Counter(self.y)))
开发者ID:kellyhennigan,项目名称:cueexp_scripts,代码行数:15,代码来源:sgdrfe_SMOTEoversample_GMmask.py

示例14: test_sample_regular

# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
def test_sample_regular():
    """Test sample function with regular SMOTE."""

    # Create the object
    kind = 'regular'
    smote = SMOTE(random_state=RND_SEED, kind=kind)
    # Fit the data
    smote.fit(X, Y)

    X_resampled, y_resampled = smote.fit_sample(X, Y)

    currdir = os.path.dirname(os.path.abspath(__file__))
    X_gt = np.load(os.path.join(currdir, 'data', 'smote_reg_x.npy'))
    y_gt = np.load(os.path.join(currdir, 'data', 'smote_reg_y.npy'))
    assert_array_equal(X_resampled, X_gt)
    assert_array_equal(y_resampled, y_gt)
开发者ID:integrallyclosed,项目名称:imbalanced-learn,代码行数:18,代码来源:test_smote.py

示例15: train

# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
def train(cutoffs):
    print "\n========== Start Training =========="
    if __DATA_FROM == 2:
        list_io_addr = get_io_addr(__TRAIN_DATA[0], __TRAIN_DATA[1])
    else:
        list_io_addr = get_io_addr_random_sample(__TRAIN_DATA[0], __TRAIN_DATA[1])
    clf = BernoulliNB(class_prior=[0.05, 0.95])

    if __IF_TRAIN_WITHOUT_SAVE:
        print "Performing correlation explanation......"
        with open("/home/wlu/Desktop/day_samp_bin_1-2.npy", "r") as file_in:
            X = Sparse_Matrix_IO.load_sparse_csr(file_in)
            if len(cutoffs) > 0:
                X = discard_vars(X, cutoffs)
            layer = correlation_ex(X)

    for i in range(0, len(list_io_addr)):
        path_in = list_io_addr[i]
        print "\nGenerating training set from {}".format(path_in)
        with open(path_in, "r") as file_in:
            X = Sparse_Matrix_IO.load_sparse_csr(file_in)

        if len(cutoffs) > 0:
            X = discard_vars(X, cutoffs)

        vector_len = len(X[0])
        X_train = X[:, 0:vector_len-1]
        y_train = X[:, vector_len-1]

        if __IF_TRAIN_WITHOUT_SAVE:
            print "Transforming training set according to CorEx......"
            X_train = corex_transform(layer, X_train)

        sm = SMOTE(ratio=0.95)
        X_train, y_train = sm.fit_sample(X_train, y_train)

        print "Fitting Model......"
        clf.partial_fit(X_train, y_train, classes=[0, 1])
        print "Done"

    if __IF_TRAIN_WITHOUT_SAVE:
        return [clf, layer]
    else:
        with open(__ROOT_MODEL, "w") as file_out:
            pickle.dump(clf, file_out)
        return []
开发者ID:Shurooo,项目名称:gumgum,代码行数:48,代码来源:Bernoulli_CorEx.py


注:本文中的imblearn.over_sampling.SMOTE.fit_sample方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。