本文整理汇总了Python中imblearn.over_sampling.SMOTE类的典型用法代码示例。如果您正苦于以下问题:Python SMOTE类的具体用法?Python SMOTE怎么用?Python SMOTE使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SMOTE类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_fit_resample_nn_obj
def test_fit_resample_nn_obj():
kind = 'borderline1'
nn_m = NearestNeighbors(n_neighbors=11)
nn_k = NearestNeighbors(n_neighbors=6)
smote = SMOTE(
random_state=RND_SEED, kind=kind, k_neighbors=nn_k, m_neighbors=nn_m)
X_resampled, y_resampled = smote.fit_resample(X, Y)
X_gt = np.array([[0.11622591, -0.0317206], [0.77481731, 0.60935141], [
1.25192108, -0.22367336
], [0.53366841, -0.30312976], [1.52091956, -0.49283504], [
-0.28162401, -2.10400981
], [0.83680821, 1.72827342], [0.3084254, 0.33299982], [
0.70472253, -0.73309052
], [0.28893132, -0.38761769], [1.15514042, 0.0129463], [
0.88407872, 0.35454207
], [1.31301027, -0.92648734], [-1.11515198, -0.93689695], [
-0.18410027, -0.45194484
], [0.9281014, 0.53085498], [-0.14374509, 0.27370049], [
-0.41635887, -0.38299653
], [0.08711622, 0.93259929], [1.70580611, -0.11219234],
[0.3765279, -0.2009615], [0.55276636, -0.10550373],
[0.45413452, -0.08883319], [1.21118683, -0.22817957]])
y_gt = np.array([
0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0
])
assert_allclose(X_resampled, X_gt, rtol=R_TOL)
assert_array_equal(y_resampled, y_gt)
示例2: test_sample_regular_with_nn_svm
def test_sample_regular_with_nn_svm():
"""Test sample function with regular SMOTE with a NN object."""
# Create the object
kind = 'svm'
nn_k = NearestNeighbors(n_neighbors=6)
svm = SVC(random_state=RND_SEED)
smote = SMOTE(
random_state=RND_SEED, kind=kind, k_neighbors=nn_k, svm_estimator=svm)
X_resampled, y_resampled = smote.fit_sample(X, Y)
X_gt = np.array([[0.11622591, -0.0317206], [0.77481731, 0.60935141],
[1.25192108, -0.22367336], [0.53366841, -0.30312976],
[1.52091956, -0.49283504], [-0.28162401, -2.10400981],
[0.83680821, 1.72827342], [0.3084254, 0.33299982],
[0.70472253, -0.73309052], [0.28893132, -0.38761769],
[1.15514042, 0.0129463], [0.88407872, 0.35454207],
[1.31301027, -0.92648734], [-1.11515198, -0.93689695],
[-0.18410027, -0.45194484], [0.9281014, 0.53085498],
[-0.14374509, 0.27370049], [-0.41635887, -0.38299653],
[0.08711622, 0.93259929], [1.70580611, -0.11219234],
[0.47436888, -0.2645749], [1.07844561, -0.19435291],
[1.44015515, -1.30621303]])
y_gt = np.array(
[0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0])
assert_array_almost_equal(X_resampled, X_gt)
assert_array_equal(y_resampled, y_gt)
示例3: test_sample_with_nn_svm
def test_sample_with_nn_svm():
kind = 'svm'
nn_k = NearestNeighbors(n_neighbors=6)
svm = SVC(gamma='scale', random_state=RND_SEED)
smote = SMOTE(
random_state=RND_SEED, kind=kind, k_neighbors=nn_k, svm_estimator=svm)
X_resampled, y_resampled = smote.fit_resample(X, Y)
X_gt = np.array([[0.11622591, -0.0317206],
[0.77481731, 0.60935141],
[1.25192108, -0.22367336],
[0.53366841, -0.30312976],
[1.52091956, -0.49283504],
[-0.28162401, -2.10400981],
[0.83680821, 1.72827342],
[0.3084254, 0.33299982],
[0.70472253, -0.73309052],
[0.28893132, -0.38761769],
[1.15514042, 0.0129463],
[0.88407872, 0.35454207],
[1.31301027, -0.92648734],
[-1.11515198, -0.93689695],
[-0.18410027, -0.45194484],
[0.9281014, 0.53085498],
[-0.14374509, 0.27370049],
[-0.41635887, -0.38299653],
[0.08711622, 0.93259929],
[1.70580611, -0.11219234],
[0.47436887, -0.2645749],
[1.07844562, -0.19435291],
[1.44228238, -1.31256615],
[1.25636713, -1.04463226]])
y_gt = np.array([0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0,
1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0])
assert_allclose(X_resampled, X_gt, rtol=R_TOL)
assert_array_equal(y_resampled, y_gt)
示例4: fit
def fit(self, X , y = None):
# 'Random under-sampling'
# CondensedNearestNeighbour(size_ngh=51, n_seeds_S=51)
#Accuracy: 0.939693267481
#Precision: 0.238095238095
#Recall: 0.897435897436
#Accuracy: 0.962568234988
#Precision: 0.324468085106
#Recall: 0.782051282051
#SMOTE(ratio=ratio, kind='borderline1')
#Accuracy: 0.971146347803
#Precision: 0.372093023256
#Recall: 0.615384615385
#SMOTE(ratio=ratio, kind='borderline2')
#Accuracy: 0.965427605927
#Precision: 0.333333333333
#Recall: 0.705128205128
#svm_args = {'class_weight': 'auto'}
#svmsmote = SMOTE(ratio=ratio, kind='svm', **svm_args)
#Accuracy: 0.972186119054
#Precision: 0.395683453237
#Recall: 0.705128205128
smote = SMOTE(ratio='auto', kind='regular')
X, y = smote.fit_sample(X, y)
# weights = np.array([1/y.mean() if i == 1 else 1 for i in y])
return super(RandomForestClassifier, self).fit(X,y)#,sample_weight=weights)
示例5: train
def train(addr_train, clf, sampling, add_estimators):
with open(os.path.join(addr_train, "day_samp_bin.npy"), "r") as file_in:
X = smio.load_sparse_csr(file_in)
width = np.size(X, 1)
X_train = X[:, :width-1]
y_train = X[:, width-1]
if sampling == "Over":
sm = SMOTE(ratio=0.95)
X_train, y_train = sm.fit_sample(X_train, y_train)
elif sampling == "Under":
X_train, y_train = US.undersample(X, 0.01)
print "Fitting Model......"
clf.n_estimators += add_estimators
clf.fit(X_train, y_train)
print "Done"
if __SAVE_MODEL:
model_name = "RF_" + onoff_line + "_" + sampling + "_Model.p"
dir_out = os.path.join(addr_train, "Random_Forest_Models")
if not os.path.isdir(dir_out):
os.mkdir(dir_out)
path_out = os.path.join(dir_out, model_name)
with open(path_out, "w") as file_out:
pickle.dump(clf, file_out)
return clf
示例6: test_sample_borderline2
def test_sample_borderline2():
"""Test sample function with borderline 2 SMOTE."""
# Create the object
kind = 'borderline2'
smote = SMOTE(random_state=RND_SEED, kind=kind)
# Fit the data
smote.fit(X, Y)
X_resampled, y_resampled = smote.fit_sample(X, Y)
X_gt = np.array([[0.11622591, -0.0317206], [0.77481731, 0.60935141],
[1.25192108, -0.22367336], [0.53366841, -0.30312976],
[1.52091956, -0.49283504], [-0.28162401, -2.10400981],
[0.83680821, 1.72827342], [0.3084254, 0.33299982],
[0.70472253, -0.73309052], [0.28893132, -0.38761769],
[1.15514042, 0.0129463], [0.88407872, 0.35454207],
[1.31301027, -0.92648734], [-1.11515198, -0.93689695],
[-0.18410027, -0.45194484], [0.9281014, 0.53085498],
[-0.14374509, 0.27370049], [-0.41635887, -0.38299653],
[0.08711622, 0.93259929], [1.70580611, -0.11219234],
[0.47436888, -0.2645749], [1.07844561, -0.19435291],
[0.33339622, 0.49870937]])
y_gt = np.array(
[0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0])
assert_array_almost_equal(X_resampled, X_gt)
assert_array_equal(y_resampled, y_gt)
示例7: get_data
def get_data(month, day, hour=-1, mode="normal"):
if hour != -1:
if hour == 24:
hour = 0
day += 1
addr_in = os.path.join("/mnt/rips2/2016",
str(month).rjust(2, "0"),
str(day).rjust(2, "0"),
str(hour).rjust(2, "0"),
"output_bin.npy")
else:
addr_in = os.path.join("/mnt/rips2/2016",
str(month).rjust(2, "0"),
str(day).rjust(2, "0"),
"day_samp_newer_bin.npy")
with open(addr_in, "r") as file_in:
loader = np.load(file_in)
data = csr_matrix((loader['data'], loader['indices'], loader['indptr']), shape=loader['shape']).toarray()
X = data[:, :-1]
y = data[:, -1]
if mode == "over":
sm = SMOTE(ratio=0.99, verbose=0)
X, y = sm.fit_sample(X, y)
return X, y
示例8: resample_data
def resample_data(X, y, categorical_lst):
'''
up-samples minority class
'''
sm = SMOTE(kind='regular')
X_train_re, y_train_re = sm.fit_sample(X,y)
#rounding categorical variables
X_train_re[:,categorical_lst] = np.round(X_train_re[:,categorical_lst])
return X_train_re, y_train_re
示例9: test_sample_wrong_X
def test_sample_wrong_X():
"""Test either if an error is raised when X is different at fitting
and sampling"""
# Create the object
sm = SMOTE(random_state=RND_SEED)
sm.fit(X, Y)
assert_raises(RuntimeError, sm.sample,
np.random.random((100, 40)), np.array([0] * 50 + [1] * 50))
示例10: test_sample_regular_wrong_svm
def test_sample_regular_wrong_svm():
kind = 'svm'
nn_k = NearestNeighbors(n_neighbors=6)
svm = 'rnd'
smote = SMOTE(
random_state=RND_SEED, kind=kind, k_neighbors=nn_k, svm_estimator=svm)
with raises(ValueError, match="has to be one of"):
smote.fit_sample(X, Y)
示例11: Input_Preparing
def Input_Preparing(Scaled_Input_Data, Surgery_Outcome, N_Feat):
# Feature Selection
MIFS = mifs.MutualInformationFeatureSelector(method='JMI', verbose=2, n_features = N_Feat)
MIFS.fit(Scaled_Input_Data, Surgery_Outcome)
Selected_Input_Data = Scaled_Input_Data.loc[:,MIFS.support_]
# Balancing using SMOTE
sm = SMOTE(kind='regular')
Prep_Train_Data, Prep_Surgery_Outcome = sm.fit_sample(X, y)
return(Prep_Train_Data, Prep_Surgery_Outcome, MIFS.support_)
示例12: SMT
def SMT(df, target):
df1 = df.copy()
y = df1.pop('anti_churn')
X = df1
Xcols = df1.columns
sm = SMOTE(kind='regular', ratio = target)
X_resampled, y_resampled = sm.fit_sample(X, y)
X_resampled = pd.DataFrame(X_resampled)
y_resampled = pd.DataFrame(y_resampled)
X_resampled.columns = Xcols
y_resampled.columns = ['anti_churn']
return X_resampled, y_resampled
示例13: transform
def transform(self, fp):
fm, train_x, train_y = FeaturePool.to_train_arrays(fp)
os = SMOTE(random_state = self.random_state)
os_train_x, os_train_y = os.fit_sample(train_x, train_y[:, 0])
os_train_y = os_train_y.reshape((os_train_y.shape[0], 1))
for f in FeaturePool.from_train_arrays(fm, os_train_x, os_train_y):
yield Feature.apply_config(f, is_over_sampled=True)
for f in fp:
if f.split_type == SplitType.TEST:
yield f
示例14: oversample
def oversample(self):
"""Balance class data based on outcome"""
print('Current outcome sampling {}'.format(Counter(self.y)))
# to use a random sampling seed at random:
#ros = RandomOverSampler()
ros = SMOTE()
#ros = ADASYN()
self.X, self.y = ros.fit_sample(self.X, self.y)
self.Xview = self.X.view()[:, :self.n_features]
print('Resampled dataset shape {}'.format(Counter(self.y)))
示例15: test_smote_fit
def test_smote_fit():
"""Test the fitting method"""
# Create the object
smote = SMOTE(random_state=RND_SEED)
# Fit the data
smote.fit(X, Y)
# Check if the data information have been computed
assert_equal(smote.min_c_, 0)
assert_equal(smote.maj_c_, 1)
assert_equal(smote.stats_c_[0], 8)
assert_equal(smote.stats_c_[1], 12)