本文整理汇总了Python中imblearn.combine.SMOTEENN类的典型用法代码示例。如果您正苦于以下问题:Python SMOTEENN类的具体用法?Python SMOTEENN怎么用?Python SMOTEENN使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SMOTEENN类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_sample_wrong_X
def test_sample_wrong_X():
"""Test either if an error is raised when X is different at fitting
and sampling"""
# Create the object
sm = SMOTEENN(random_state=RND_SEED)
sm.fit(X, Y)
assert_raises(RuntimeError, sm.sample, np.random.random((100, 40)),
np.array([0] * 50 + [1] * 50))
示例2: test_validate_estimator_default
def test_validate_estimator_default():
smt = SMOTEENN(random_state=RND_SEED)
X_resampled, y_resampled = smt.fit_resample(X, Y)
X_gt = np.array([[1.52091956, -0.49283504], [0.84976473, -0.15570176], [
0.61319159, -0.11571667
], [0.66052536, -0.28246518], [-0.28162401, -2.10400981],
[0.83680821, 1.72827342], [0.08711622, 0.93259929]])
y_gt = np.array([0, 0, 0, 0, 1, 1, 1])
assert_allclose(X_resampled, X_gt, rtol=R_TOL)
assert_array_equal(y_resampled, y_gt)
示例3: test_sample_regular_half
def test_sample_regular_half():
sampling_strategy = {0: 10, 1: 12}
smote = SMOTEENN(
sampling_strategy=sampling_strategy, random_state=RND_SEED)
X_resampled, y_resampled = smote.fit_resample(X, Y)
X_gt = np.array([[1.52091956, -0.49283504], [-0.28162401, -2.10400981],
[0.83680821, 1.72827342], [0.08711622, 0.93259929]])
y_gt = np.array([0, 1, 1, 1])
assert_allclose(X_resampled, X_gt)
assert_array_equal(y_resampled, y_gt)
示例4: test_validate_estimator_init
def test_validate_estimator_init():
smote = SMOTE(random_state=RND_SEED)
enn = EditedNearestNeighbours(sampling_strategy='all')
smt = SMOTEENN(smote=smote, enn=enn, random_state=RND_SEED)
X_resampled, y_resampled = smt.fit_resample(X, Y)
X_gt = np.array([[1.52091956, -0.49283504], [0.84976473, -0.15570176], [
0.61319159, -0.11571667
], [0.66052536, -0.28246518], [-0.28162401, -2.10400981],
[0.83680821, 1.72827342], [0.08711622, 0.93259929]])
y_gt = np.array([0, 0, 0, 0, 1, 1, 1])
assert_allclose(X_resampled, X_gt, rtol=R_TOL)
assert_array_equal(y_resampled, y_gt)
示例5: test_smote_fit
def test_smote_fit():
"""Test the fitting method"""
# Create the object
smote = SMOTEENN(random_state=RND_SEED)
# Fit the data
smote.fit(X, Y)
# Check if the data information have been computed
assert_equal(smote.min_c_, 0)
assert_equal(smote.maj_c_, 1)
assert_equal(smote.stats_c_[0], 500)
assert_equal(smote.stats_c_[1], 4500)
示例6: test_sample_regular
def test_sample_regular():
"""Test sample function with regular SMOTE."""
# Create the object
smote = SMOTEENN(random_state=RND_SEED)
# Fit the data
smote.fit(X, Y)
X_resampled, y_resampled = smote.fit_sample(X, Y)
currdir = os.path.dirname(os.path.abspath(__file__))
X_gt = np.load(os.path.join(currdir, 'data', 'smote_enn_reg_x.npy'))
y_gt = np.load(os.path.join(currdir, 'data', 'smote_enn_reg_y.npy'))
assert_array_equal(X_resampled, X_gt)
assert_array_equal(y_resampled, y_gt)
示例7: SMOTE
def SMOTE(self, bug_rate, X, Y):
"""
Combine over- and under-sampling using SMOTE and
Edited Nearest Neighbours.
通过改进的SMOTE来对原来的数据集做处理
:param bug_rate:
:param X:数据集除了lable以外的部分
:param Y:lable信息
:return:处理过的X,Y。
"""
from collections import Counter
from imblearn.combine import SMOTEENN
sme = SMOTEENN(ratio=bug_rate)
x_res, y_res = sme.fit_sample(X, Y)
import numpy as np
nx = np.column_stack((x_res, y_res))
self.new_list_SMOTE = nx
示例8: test_sample_regular_pass_smote_enn
def test_sample_regular_pass_smote_enn():
smote = SMOTEENN(smote=SMOTE(ratio='auto', random_state=RND_SEED),
enn=EditedNearestNeighbours(ratio='all',
random_state=RND_SEED),
random_state=RND_SEED)
X_resampled, y_resampled = smote.fit_sample(X, Y)
X_gt = np.array([[1.52091956, -0.49283504],
[0.84976473, -0.15570176],
[0.61319159, -0.11571667],
[0.66052536, -0.28246518],
[-0.28162401, -2.10400981],
[0.83680821, 1.72827342],
[0.08711622, 0.93259929]])
y_gt = np.array([0, 0, 0, 0, 1, 1, 1])
assert_allclose(X_resampled, X_gt, rtol=R_TOL)
assert_array_equal(y_resampled, y_gt)
示例9: __init__
def __init__(self,kind,data,target,verbose = False, ratio = 'auto'):
assert len(data) == len(target)
self.data = data
self.target = target
if kind in [Undersampling.ClusterCentroids]:
if verbose: print('> CLUSTER CENTROIDS')
# Undersampling por Cluster Centroids
self.undersampler = ClusterCentroids(verbose = verbose, ratio=ratio)
elif kind in [Undersampling.SMOTEENN]:
if verbose: print('> SMOTEENN')
# Undersampling por SMOTEENN
self.undersampler = SMOTEENN(verbose = verbose, ratio=ratio)
else:
raise("Nonexistent undersampling type: "+kind.name)
示例10: train_decisiontree_with
def train_decisiontree_with(configurationname, train_data, k, score_function, undersam=False, oversam=False,
export=False):
assert k > 0
print("Training with configuration " + configurationname)
X_train, y_train, id_to_a_train = train_data
dtc = DecisionTreeClassifier(random_state=0)
print("Feature Selection")
# selector = SelectFpr(score_function)
selector = SelectKBest(score_function, k=k)
result = selector.fit(X_train, y_train)
X_train = selector.transform(X_train)
fitted_ids = [i for i in result.get_support(indices=True)]
print("Apply Resampling")
print(Counter(y_train))
if undersam and not oversam:
renn = RepeatedEditedNearestNeighbours()
X_train, y_train = renn.fit_resample(X_train, y_train)
if oversam and not undersam:
# feature_indices_array = list(range(len(f_to_id)))
# smote_nc = SMOTENC(categorical_features=feature_indices_array, random_state=0)
# X_train, y_train = smote_nc.fit_resample(X_train, y_train)
sm = SMOTE(random_state=42)
X_train, y_train = sm.fit_resample(X_train, y_train)
if oversam and undersam:
smote_enn = SMOTEENN(random_state=0)
X_train, y_train = smote_enn.fit_resample(X_train, y_train)
print(Counter(y_train))
print("Train Classifier")
dtc = dtc.fit(X_train, y_train, check_input=True)
if export:
export_graphviz(dtc, out_file=DATAP + "/temp/trees/sltree_" + configurationname + ".dot", filled=True)
transform(fitted_ids, configurationname)
print("Self Accuracy: " + str(dtc.score(X_train, y_train)))
return selector, dtc
示例11: test_error_wrong_object
def test_error_wrong_object():
smote = 'rnd'
enn = 'rnd'
smt = SMOTEENN(smote=smote, random_state=RND_SEED)
with raises(ValueError, match="smote needs to be a SMOTE"):
smt.fit_resample(X, Y)
smt = SMOTEENN(enn=enn, random_state=RND_SEED)
with raises(ValueError, match="enn needs to be an "):
smt.fit_resample(X, Y)
示例12: test_parallelisation
def test_parallelisation():
# Check if default job count is 1
smt = SMOTEENN(random_state=RND_SEED)
smt._validate_estimator()
assert smt.n_jobs == 1
assert smt.smote_.n_jobs == 1
assert smt.enn_.n_jobs == 1
# Check if job count is set
smt = SMOTEENN(random_state=RND_SEED, n_jobs=8)
smt._validate_estimator()
assert smt.n_jobs == 8
assert smt.smote_.n_jobs == 8
assert smt.enn_.n_jobs == 8
示例13: return
i = n//2
return (data[i - 1] + data[i])/2
start = time()
n_iter = 100 ## Number of evaluations (SMAC)
n_validations = 7 ## Number of Monte-Carlo Cross-Validations for each model's accuracy evaluated
## Dataset 11
url11 = "https://archive.ics.uci.edu/ml/machine-learning-databases/tic-mld/ticdata2000.txt"
dataset11 = np.genfromtxt(urllib.urlopen(url11))
X = dataset11[:,0:85]
Y = dataset11[:,85]
sm = SMOTEENN()
X, Y = sm.fit_sample(X, Y)
# We fit the MLP with the hyperparameters given and return the model's median accuracy from 7 trials
def mlp(number_layers, number_neurons_1, number_neurons_2, number_neurons_3, number_neurons_4, dropout_rate):
layers = []
number_neurons = []
number_neurons.append(number_neurons_1)
number_neurons.append(number_neurons_2)
number_neurons.append(number_neurons_3)
number_neurons.append(number_neurons_4)
for i in np.arange(number_layers):
layers.append(Layer("Sigmoid", units=number_neurons[i], dropout = dropout_rate))
示例14: range
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, cm[i, j],
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
#define X y
X, y = data.loc[:,data.columns != 'state'].values, data.loc[:,data.columns == 'state'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
#smoteen
sme = SMOTEENN(random_state=42)
os_X,os_y = sme.fit_sample(X_train,y_train)
#QDA
clf_QDA = QuadraticDiscriminantAnalysis(store_covariances=True)
clf_QDA.fit(os_X, os_y)
y_true, y_pred = y_test, clf_QDA.predict(X_test)
#F1_score, precision, recall, specifity, G score
print "F1_score : %.4g" % metrics.f1_score(y_true, y_pred)
print "Recall : %.4g" % metrics.recall_score(y_true, y_pred)
recall = metrics.recall_score(y_true, y_pred)
print "Precision : %.4g" % metrics.precision_score(y_true, y_pred)
#Compute confusion matrix
cnf_matrix = confusion_matrix(y_test,y_pred)
示例15: print
print(__doc__)
# Generate the dataset
X, y = make_classification(n_classes=2, class_sep=2, weights=[0.1, 0.9],
n_informative=3, n_redundant=1, flip_y=0,
n_features=20, n_clusters_per_class=1,
n_samples=100, random_state=10)
# Instanciate a PCA object for the sake of easy visualisation
pca = PCA(n_components=2)
# Fit and transform x to visualise inside a 2D feature space
X_vis = pca.fit_transform(X)
# Apply SMOTE + ENN
sm = SMOTEENN()
X_resampled, y_resampled = sm.fit_resample(X, y)
X_res_vis = pca.transform(X_resampled)
# Two subplots, unpack the axes array immediately
f, (ax1, ax2) = plt.subplots(1, 2)
c0 = ax1.scatter(X_vis[y == 0, 0], X_vis[y == 0, 1], label="Class #0",
alpha=0.5)
c1 = ax1.scatter(X_vis[y == 1, 0], X_vis[y == 1, 1], label="Class #1",
alpha=0.5)
ax1.set_title('Original set')
ax2.scatter(X_res_vis[y_resampled == 0, 0], X_res_vis[y_resampled == 0, 1],
label="Class #0", alpha=0.5)
ax2.scatter(X_res_vis[y_resampled == 1, 0], X_res_vis[y_resampled == 1, 1],