本文整理汇总了Python中imblearn.over_sampling.SMOTE.fit_sample方法的典型用法代码示例。如果您正苦于以下问题:Python SMOTE.fit_sample方法的具体用法?Python SMOTE.fit_sample怎么用?Python SMOTE.fit_sample使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类imblearn.over_sampling.SMOTE
的用法示例。
在下文中一共展示了SMOTE.fit_sample方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_sample_regular_wrong_svm
# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
def test_sample_regular_wrong_svm():
kind = 'svm'
nn_k = NearestNeighbors(n_neighbors=6)
svm = 'rnd'
smote = SMOTE(
random_state=RND_SEED, kind=kind, k_neighbors=nn_k, svm_estimator=svm)
with raises(ValueError, match="has to be one of"):
smote.fit_sample(X, Y)
示例2: test_fit_sample_nn_obj
# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
def test_fit_sample_nn_obj():
"""Test sample with NN object provided."""
# Create the object
kind = 'borderline1'
nn_m = NearestNeighbors(n_neighbors=11)
nn_k = NearestNeighbors(n_neighbors=6)
smote = SMOTE(
random_state=RND_SEED, kind=kind, k_neighbors=nn_k, m_neighbors=nn_m)
X_resampled, y_resampled = smote.fit_sample(X, Y)
X_gt = np.array([[0.11622591, -0.0317206], [0.77481731, 0.60935141],
[1.25192108, -0.22367336], [0.53366841, -0.30312976],
[1.52091956, -0.49283504], [-0.28162401, -2.10400981],
[0.83680821, 1.72827342], [0.3084254, 0.33299982],
[0.70472253, -0.73309052], [0.28893132, -0.38761769],
[1.15514042, 0.0129463], [0.88407872, 0.35454207],
[1.31301027, -0.92648734], [-1.11515198, -0.93689695],
[-0.18410027, -0.45194484], [0.9281014, 0.53085498],
[-0.14374509, 0.27370049], [-0.41635887, -0.38299653],
[0.08711622, 0.93259929], [1.70580611, -0.11219234],
[0.3765279, -0.2009615], [0.55276636, -0.10550373],
[0.45413452, -0.08883319], [1.21118683, -0.22817957]])
y_gt = np.array([
0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0
])
assert_array_almost_equal(X_resampled, X_gt)
assert_array_equal(y_resampled, y_gt)
示例3: test_sample_regular_with_nn_svm
# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
def test_sample_regular_with_nn_svm():
"""Test sample function with regular SMOTE with a NN object."""
# Create the object
kind = 'svm'
nn_k = NearestNeighbors(n_neighbors=6)
svm = SVC(random_state=RND_SEED)
smote = SMOTE(
random_state=RND_SEED, kind=kind, k_neighbors=nn_k, svm_estimator=svm)
X_resampled, y_resampled = smote.fit_sample(X, Y)
X_gt = np.array([[0.11622591, -0.0317206], [0.77481731, 0.60935141],
[1.25192108, -0.22367336], [0.53366841, -0.30312976],
[1.52091956, -0.49283504], [-0.28162401, -2.10400981],
[0.83680821, 1.72827342], [0.3084254, 0.33299982],
[0.70472253, -0.73309052], [0.28893132, -0.38761769],
[1.15514042, 0.0129463], [0.88407872, 0.35454207],
[1.31301027, -0.92648734], [-1.11515198, -0.93689695],
[-0.18410027, -0.45194484], [0.9281014, 0.53085498],
[-0.14374509, 0.27370049], [-0.41635887, -0.38299653],
[0.08711622, 0.93259929], [1.70580611, -0.11219234],
[0.47436888, -0.2645749], [1.07844561, -0.19435291],
[1.44015515, -1.30621303]])
y_gt = np.array(
[0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0])
assert_array_almost_equal(X_resampled, X_gt)
assert_array_equal(y_resampled, y_gt)
示例4: fit
# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
def fit(self, X , y = None):
# 'Random under-sampling'
# CondensedNearestNeighbour(size_ngh=51, n_seeds_S=51)
#Accuracy: 0.939693267481
#Precision: 0.238095238095
#Recall: 0.897435897436
#Accuracy: 0.962568234988
#Precision: 0.324468085106
#Recall: 0.782051282051
#SMOTE(ratio=ratio, kind='borderline1')
#Accuracy: 0.971146347803
#Precision: 0.372093023256
#Recall: 0.615384615385
#SMOTE(ratio=ratio, kind='borderline2')
#Accuracy: 0.965427605927
#Precision: 0.333333333333
#Recall: 0.705128205128
#svm_args = {'class_weight': 'auto'}
#svmsmote = SMOTE(ratio=ratio, kind='svm', **svm_args)
#Accuracy: 0.972186119054
#Precision: 0.395683453237
#Recall: 0.705128205128
smote = SMOTE(ratio='auto', kind='regular')
X, y = smote.fit_sample(X, y)
# weights = np.array([1/y.mean() if i == 1 else 1 for i in y])
return super(RandomForestClassifier, self).fit(X,y)#,sample_weight=weights)
示例5: train
# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
def train(addr_train, clf, sampling, add_estimators):
with open(os.path.join(addr_train, "day_samp_bin.npy"), "r") as file_in:
X = smio.load_sparse_csr(file_in)
width = np.size(X, 1)
X_train = X[:, :width-1]
y_train = X[:, width-1]
if sampling == "Over":
sm = SMOTE(ratio=0.95)
X_train, y_train = sm.fit_sample(X_train, y_train)
elif sampling == "Under":
X_train, y_train = US.undersample(X, 0.01)
print "Fitting Model......"
clf.n_estimators += add_estimators
clf.fit(X_train, y_train)
print "Done"
if __SAVE_MODEL:
model_name = "RF_" + onoff_line + "_" + sampling + "_Model.p"
dir_out = os.path.join(addr_train, "Random_Forest_Models")
if not os.path.isdir(dir_out):
os.mkdir(dir_out)
path_out = os.path.join(dir_out, model_name)
with open(path_out, "w") as file_out:
pickle.dump(clf, file_out)
return clf
示例6: test_sample_borderline2
# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
def test_sample_borderline2():
"""Test sample function with borderline 2 SMOTE."""
# Create the object
kind = 'borderline2'
smote = SMOTE(random_state=RND_SEED, kind=kind)
# Fit the data
smote.fit(X, Y)
X_resampled, y_resampled = smote.fit_sample(X, Y)
X_gt = np.array([[0.11622591, -0.0317206], [0.77481731, 0.60935141],
[1.25192108, -0.22367336], [0.53366841, -0.30312976],
[1.52091956, -0.49283504], [-0.28162401, -2.10400981],
[0.83680821, 1.72827342], [0.3084254, 0.33299982],
[0.70472253, -0.73309052], [0.28893132, -0.38761769],
[1.15514042, 0.0129463], [0.88407872, 0.35454207],
[1.31301027, -0.92648734], [-1.11515198, -0.93689695],
[-0.18410027, -0.45194484], [0.9281014, 0.53085498],
[-0.14374509, 0.27370049], [-0.41635887, -0.38299653],
[0.08711622, 0.93259929], [1.70580611, -0.11219234],
[0.47436888, -0.2645749], [1.07844561, -0.19435291],
[0.33339622, 0.49870937]])
y_gt = np.array(
[0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0])
assert_array_almost_equal(X_resampled, X_gt)
assert_array_equal(y_resampled, y_gt)
示例7: get_data
# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
def get_data(month, day, hour=-1, mode="normal"):
if hour != -1:
if hour == 24:
hour = 0
day += 1
addr_in = os.path.join("/mnt/rips2/2016",
str(month).rjust(2, "0"),
str(day).rjust(2, "0"),
str(hour).rjust(2, "0"),
"output_bin.npy")
else:
addr_in = os.path.join("/mnt/rips2/2016",
str(month).rjust(2, "0"),
str(day).rjust(2, "0"),
"day_samp_newer_bin.npy")
with open(addr_in, "r") as file_in:
loader = np.load(file_in)
data = csr_matrix((loader['data'], loader['indices'], loader['indptr']), shape=loader['shape']).toarray()
X = data[:, :-1]
y = data[:, -1]
if mode == "over":
sm = SMOTE(ratio=0.99, verbose=0)
X, y = sm.fit_sample(X, y)
return X, y
示例8: resample_data
# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
def resample_data(X, y, categorical_lst):
'''
up-samples minority class
'''
sm = SMOTE(kind='regular')
X_train_re, y_train_re = sm.fit_sample(X,y)
#rounding categorical variables
X_train_re[:,categorical_lst] = np.round(X_train_re[:,categorical_lst])
return X_train_re, y_train_re
示例9: Input_Preparing
# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
def Input_Preparing(Scaled_Input_Data, Surgery_Outcome, N_Feat):
# Feature Selection
MIFS = mifs.MutualInformationFeatureSelector(method='JMI', verbose=2, n_features = N_Feat)
MIFS.fit(Scaled_Input_Data, Surgery_Outcome)
Selected_Input_Data = Scaled_Input_Data.loc[:,MIFS.support_]
# Balancing using SMOTE
sm = SMOTE(kind='regular')
Prep_Train_Data, Prep_Surgery_Outcome = sm.fit_sample(X, y)
return(Prep_Train_Data, Prep_Surgery_Outcome, MIFS.support_)
示例10: SMT
# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
def SMT(df, target):
df1 = df.copy()
y = df1.pop('anti_churn')
X = df1
Xcols = df1.columns
sm = SMOTE(kind='regular', ratio = target)
X_resampled, y_resampled = sm.fit_sample(X, y)
X_resampled = pd.DataFrame(X_resampled)
y_resampled = pd.DataFrame(y_resampled)
X_resampled.columns = Xcols
y_resampled.columns = ['anti_churn']
return X_resampled, y_resampled
示例11: transform
# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
def transform(self, fp):
fm, train_x, train_y = FeaturePool.to_train_arrays(fp)
os = SMOTE(random_state = self.random_state)
os_train_x, os_train_y = os.fit_sample(train_x, train_y[:, 0])
os_train_y = os_train_y.reshape((os_train_y.shape[0], 1))
for f in FeaturePool.from_train_arrays(fm, os_train_x, os_train_y):
yield Feature.apply_config(f, is_over_sampled=True)
for f in fp:
if f.split_type == SplitType.TEST:
yield f
示例12: oversample
# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
def oversample(X, y, bal_strategy):
if(bal_strategy == "SMOTESVN" or bal_strategy == "ALL"):
# Apply SMOTE SVM
sm = SMOTE(kind='svm')
X_sampled, y_sampled = sm.fit_sample(X, y)
print 'Shape of X_sampled: ', X_sampled.shape
print 'Shape of y_sampled: ', y_sampled.shape
elif(bal_strategy == "SMOTE" or bal_strategy == "ALL"):
# Apply regular SMOTE
sm = SMOTE(kind='regular')
X_sampled, y_sampled = sm.fit_sample(X, y)
print 'Shape of X_sampled: ', X_sampled.shape
print 'Shape of y_sampled: ', y_sampled.shape
elif(bal_strategy == "ADASYN" or bal_strategy == "ALL"):
# Apply the random over-sampling
ada = ADASYN()
X_sampled, y_sampled = ada.fit_sample(X, y)
print 'Shape of X_sampled: ', X_sampled.shape
print 'Shape of y_sampled: ', y_sampled.shape
elif(bal_strategy == 'NONE'):
X_sampled = X
y_sampled = y
print 'Shape of X_sampled: ', X_sampled.shape
print 'Shape of y_sampled: ', y_sampled.shape
else:
print 'bal_stragegy not in SMOTESVN, SMOTE, ADASYN, ALL, NONE'
sys.exit(1)
return (X_sampled, y_sampled)
示例13: oversample
# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
def oversample(self):
"""Balance class data based on outcome"""
print('Current outcome sampling {}'.format(Counter(self.y)))
# to use a random sampling seed at random:
#ros = RandomOverSampler()
ros = SMOTE()
#ros = ADASYN()
self.X, self.y = ros.fit_sample(self.X, self.y)
self.Xview = self.X.view()[:, :self.n_features]
print('Resampled dataset shape {}'.format(Counter(self.y)))
示例14: test_sample_regular
# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
def test_sample_regular():
"""Test sample function with regular SMOTE."""
# Create the object
kind = 'regular'
smote = SMOTE(random_state=RND_SEED, kind=kind)
# Fit the data
smote.fit(X, Y)
X_resampled, y_resampled = smote.fit_sample(X, Y)
currdir = os.path.dirname(os.path.abspath(__file__))
X_gt = np.load(os.path.join(currdir, 'data', 'smote_reg_x.npy'))
y_gt = np.load(os.path.join(currdir, 'data', 'smote_reg_y.npy'))
assert_array_equal(X_resampled, X_gt)
assert_array_equal(y_resampled, y_gt)
示例15: train
# 需要导入模块: from imblearn.over_sampling import SMOTE [as 别名]
# 或者: from imblearn.over_sampling.SMOTE import fit_sample [as 别名]
def train(cutoffs):
print "\n========== Start Training =========="
if __DATA_FROM == 2:
list_io_addr = get_io_addr(__TRAIN_DATA[0], __TRAIN_DATA[1])
else:
list_io_addr = get_io_addr_random_sample(__TRAIN_DATA[0], __TRAIN_DATA[1])
clf = BernoulliNB(class_prior=[0.05, 0.95])
if __IF_TRAIN_WITHOUT_SAVE:
print "Performing correlation explanation......"
with open("/home/wlu/Desktop/day_samp_bin_1-2.npy", "r") as file_in:
X = Sparse_Matrix_IO.load_sparse_csr(file_in)
if len(cutoffs) > 0:
X = discard_vars(X, cutoffs)
layer = correlation_ex(X)
for i in range(0, len(list_io_addr)):
path_in = list_io_addr[i]
print "\nGenerating training set from {}".format(path_in)
with open(path_in, "r") as file_in:
X = Sparse_Matrix_IO.load_sparse_csr(file_in)
if len(cutoffs) > 0:
X = discard_vars(X, cutoffs)
vector_len = len(X[0])
X_train = X[:, 0:vector_len-1]
y_train = X[:, vector_len-1]
if __IF_TRAIN_WITHOUT_SAVE:
print "Transforming training set according to CorEx......"
X_train = corex_transform(layer, X_train)
sm = SMOTE(ratio=0.95)
X_train, y_train = sm.fit_sample(X_train, y_train)
print "Fitting Model......"
clf.partial_fit(X_train, y_train, classes=[0, 1])
print "Done"
if __IF_TRAIN_WITHOUT_SAVE:
return [clf, layer]
else:
with open(__ROOT_MODEL, "w") as file_out:
pickle.dump(clf, file_out)
return []