本文整理汇总了Python中imblearn.under_sampling.ClusterCentroids.fit_sample方法的典型用法代码示例。如果您正苦于以下问题:Python ClusterCentroids.fit_sample方法的具体用法?Python ClusterCentroids.fit_sample怎么用?Python ClusterCentroids.fit_sample使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类imblearn.under_sampling.ClusterCentroids
的用法示例。
在下文中一共展示了ClusterCentroids.fit_sample方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_fit_sample_check_voting
# 需要导入模块: from imblearn.under_sampling import ClusterCentroids [as 别名]
# 或者: from imblearn.under_sampling.ClusterCentroids import fit_sample [as 别名]
def test_fit_sample_check_voting():
cc = ClusterCentroids(random_state=RND_SEED)
cc.fit_sample(X, Y)
assert cc.voting_ == 'soft'
cc = ClusterCentroids(random_state=RND_SEED)
cc.fit_sample(sparse.csr_matrix(X), Y)
assert cc.voting_ == 'hard'
示例2: test_fit_sample_error
# 需要导入模块: from imblearn.under_sampling import ClusterCentroids [as 别名]
# 或者: from imblearn.under_sampling.ClusterCentroids import fit_sample [as 别名]
def test_fit_sample_error():
ratio = 'auto'
cluster = 'rnd'
cc = ClusterCentroids(
ratio=ratio, random_state=RND_SEED, estimator=cluster)
with raises(ValueError, match="has to be a KMeans clustering"):
cc.fit_sample(X, Y)
voting = 'unknown'
cc = ClusterCentroids(ratio=ratio, voting=voting, random_state=RND_SEED)
with raises(ValueError, match="needs to be one of"):
cc.fit_sample(X, Y)
示例3: test_multiclass_fit_sample
# 需要导入模块: from imblearn.under_sampling import ClusterCentroids [as 别名]
# 或者: from imblearn.under_sampling.ClusterCentroids import fit_sample [as 别名]
def test_multiclass_fit_sample():
y = Y.copy()
y[5] = 2
y[6] = 2
cc = ClusterCentroids(random_state=RND_SEED)
X_resampled, y_resampled = cc.fit_sample(X, y)
count_y_res = Counter(y_resampled)
assert count_y_res[0] == 2
assert count_y_res[1] == 2
assert count_y_res[2] == 2
示例4: test_fit_sample_auto
# 需要导入模块: from imblearn.under_sampling import ClusterCentroids [as 别名]
# 或者: from imblearn.under_sampling.ClusterCentroids import fit_sample [as 别名]
def test_fit_sample_auto():
ratio = 'auto'
cc = ClusterCentroids(ratio=ratio, random_state=RND_SEED)
X_resampled, y_resampled = cc.fit_sample(X, Y)
X_gt = np.array([[0.92923648, 0.76103773],
[0.47104475, 0.44386323],
[0.13347175, 0.12167502],
[0.06738818, -0.529627],
[0.17901516, 0.69860992],
[0.094035, -2.55298982]])
y_gt = np.array([0, 0, 0, 1, 1, 1])
assert_allclose(X_resampled, X_gt, rtol=R_TOL)
assert_array_equal(y_resampled, y_gt)
示例5: test_multiclass_fit_sample
# 需要导入模块: from imblearn.under_sampling import ClusterCentroids [as 别名]
# 或者: from imblearn.under_sampling.ClusterCentroids import fit_sample [as 别名]
def test_multiclass_fit_sample():
"""Test fit sample method with multiclass target"""
# Make y to be multiclass
y = Y.copy()
y[0:1000] = 2
# Resample the data
cc = ClusterCentroids(random_state=RND_SEED)
X_resampled, y_resampled = cc.fit_sample(X, y)
# Check the size of y
count_y_res = Counter(y_resampled)
assert_equal(count_y_res[0], 400)
assert_equal(count_y_res[1], 400)
assert_equal(count_y_res[2], 400)
示例6: test_fit_sample_half
# 需要导入模块: from imblearn.under_sampling import ClusterCentroids [as 别名]
# 或者: from imblearn.under_sampling.ClusterCentroids import fit_sample [as 别名]
def test_fit_sample_half():
ratio = {0: 3, 1: 6}
cc = ClusterCentroids(ratio=ratio, random_state=RND_SEED)
X_resampled, y_resampled = cc.fit_sample(X, Y)
X_gt = np.array([[0.92923648, 0.76103773],
[0.13347175, 0.12167502],
[0.47104475, 0.44386323],
[0.09125309, -0.85409574],
[0.19220316, 0.32337101],
[0.094035, -2.55298982],
[0.20792588, 1.49407907],
[0.04352327, -0.20515826],
[0.12372842, 0.6536186]])
y_gt = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1])
print(X_resampled)
assert_allclose(X_resampled, X_gt, rtol=R_TOL)
assert_array_equal(y_resampled, y_gt)
示例7: test_fit_sample_half
# 需要导入模块: from imblearn.under_sampling import ClusterCentroids [as 别名]
# 或者: from imblearn.under_sampling.ClusterCentroids import fit_sample [as 别名]
def test_fit_sample_half():
"""Test fit and sample routines with ratio of .5"""
# Define the parameter for the under-sampling
ratio = .5
# Create the object
cc = ClusterCentroids(ratio=ratio, random_state=RND_SEED)
# Fit and sample
X_resampled, y_resampled = cc.fit_sample(X, Y)
currdir = os.path.dirname(os.path.abspath(__file__))
X_gt = np.load(os.path.join(currdir, 'data', 'cc_x_05.npy'))
y_gt = np.load(os.path.join(currdir, 'data', 'cc_y_05.npy'))
assert_array_equal(X_resampled, X_gt)
assert_array_equal(y_resampled, y_gt)
示例8: test_fit_sample_auto
# 需要导入模块: from imblearn.under_sampling import ClusterCentroids [as 别名]
# 或者: from imblearn.under_sampling.ClusterCentroids import fit_sample [as 别名]
def test_fit_sample_auto():
"""Test fit and sample routines with auto ratio"""
# Define the parameter for the under-sampling
ratio = 'auto'
# Create the object
cc = ClusterCentroids(ratio=ratio, random_state=RND_SEED)
# Fit and sample
X_resampled, y_resampled = cc.fit_sample(X, Y)
X_gt = np.array([[0.92923648, 0.76103773], [0.47104475, 0.44386323],
[0.13347175, 0.12167502], [0.06738818, -0.529627],
[0.17901516, 0.69860992], [0.094035, -2.55298982]])
y_gt = np.array([0, 0, 0, 1, 1, 1])
assert_array_almost_equal(X_resampled, X_gt)
assert_array_equal(y_resampled, y_gt)
示例9: test_fit_hard_voting
# 需要导入模块: from imblearn.under_sampling import ClusterCentroids [as 别名]
# 或者: from imblearn.under_sampling.ClusterCentroids import fit_sample [as 别名]
def test_fit_hard_voting():
ratio = 'auto'
voting = 'hard'
cluster = KMeans(random_state=RND_SEED)
cc = ClusterCentroids(
ratio=ratio, random_state=RND_SEED, estimator=cluster,
voting=voting)
X_resampled, y_resampled = cc.fit_sample(X, Y)
X_gt = np.array([[0.92923648, 0.76103773],
[0.47104475, 0.44386323],
[0.13347175, 0.12167502],
[0.09125309, -0.85409574],
[0.12372842, 0.6536186],
[0.094035, -2.55298982]])
y_gt = np.array([0, 0, 0, 1, 1, 1])
assert_allclose(X_resampled, X_gt, rtol=R_TOL)
assert_array_equal(y_resampled, y_gt)
for x in X_resampled:
assert np.any(np.all(x == X, axis=1))
示例10: test_fit_sample_half
# 需要导入模块: from imblearn.under_sampling import ClusterCentroids [as 别名]
# 或者: from imblearn.under_sampling.ClusterCentroids import fit_sample [as 别名]
def test_fit_sample_half():
"""Test fit and sample routines with ratio of .5"""
# Define the parameter for the under-sampling
ratio = .5
# Create the object
cc = ClusterCentroids(ratio=ratio, random_state=RND_SEED)
# Fit and sample
X_resampled, y_resampled = cc.fit_sample(X, Y)
X_gt = np.array([[0.92923648, 0.76103773], [0.47104475, 0.44386323],
[0.13347175, 0.12167502], [0.09125309, -0.85409574],
[0.19220316, 0.32337101], [0.094035, -2.55298982],
[0.20792588, 1.49407907], [0.04352327, -0.20515826],
[0.12372842, 0.6536186]])
y_gt = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1])
assert_array_almost_equal(X_resampled, X_gt)
assert_array_equal(y_resampled, y_gt)
示例11: make_classification
# 需要导入模块: from imblearn.under_sampling import ClusterCentroids [as 别名]
# 或者: from imblearn.under_sampling.ClusterCentroids import fit_sample [as 别名]
from imblearn.under_sampling import ClusterCentroids
# Generate the dataset
X, y = make_classification(n_classes=2, class_sep=2, weights=[0.1, 0.9],
n_informative=3, n_redundant=1, flip_y=0,
n_features=20, n_clusters_per_class=1,
n_samples=5000, random_state=10)
# Instanciate a PCA object for the sake of easy visualisation
pca = PCA(n_components=2)
# Fit and transform x to visualise inside a 2D feature space
X_vis = pca.fit_transform(X)
# Apply Cluster Centroids
cc = ClusterCentroids()
X_resampled, y_resampled = cc.fit_sample(X, y)
X_res_vis = pca.transform(X_resampled)
# Two subplots, unpack the axes array immediately
f, (ax1, ax2) = plt.subplots(1, 2)
ax1.scatter(X_vis[y == 0, 0], X_vis[y == 0, 1], label="Class #0", alpha=0.5,
edgecolor=almost_black, facecolor=palette[0], linewidth=0.15)
ax1.scatter(X_vis[y == 1, 0], X_vis[y == 1, 1], label="Class #1", alpha=0.5,
edgecolor=almost_black, facecolor=palette[2], linewidth=0.15)
ax1.set_title('Original set')
ax2.scatter(X_res_vis[y_resampled == 0, 0], X_res_vis[y_resampled == 0, 1],
label="Class #0", alpha=.5, edgecolor=almost_black,
facecolor=palette[0], linewidth=0.15)
ax2.scatter(X_res_vis[y_resampled == 1, 0], X_res_vis[y_resampled == 1, 1],
示例12: train_test_split
# 需要导入模块: from imblearn.under_sampling import ClusterCentroids [as 别名]
# 或者: from imblearn.under_sampling.ClusterCentroids import fit_sample [as 别名]
plt.text(j, i, cm[i, j],
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
#define X y
X, y = data.loc[:,data.columns != 'state'].values, data.loc[:,data.columns == 'state'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
#ClusterCentroids
cc = ClusterCentroids(random_state=0)
os_X,os_y = cc.fit_sample(X_train,y_train)
#XGboost
clf_XG = XGBClassifier(learning_rate= 0.3, min_child_weight=1,
max_depth=6,gamma=0,subsample=1, max_delta_step=0, colsample_bytree=1,
reg_lambda=1, n_estimators=100, seed=1000, scale_pos_weight=1000)
clf_XG.fit(os_X, os_y,eval_set=[(os_X, os_y), (X_test, y_test)],eval_metric='auc',verbose=False)
evals_result = clf_XG.evals_result()
y_true, y_pred = y_test, clf_XG.predict(X_test)
#F1_score, precision, recall, specifity, G score
print "F1_score : %.4g" % metrics.f1_score(y_true, y_pred)
print "Recall : %.4g" % metrics.recall_score(y_true, y_pred)
recall = metrics.recall_score(y_true, y_pred)
print "Precision : %.4g" % metrics.precision_score(y_true, y_pred)
示例13: print
# 需要导入模块: from imblearn.under_sampling import ClusterCentroids [as 别名]
# 或者: from imblearn.under_sampling.ClusterCentroids import fit_sample [as 别名]
print("")
print('-----------------')
best_dict[imbalance] = [clf, roc_auc_score(y_test, clf.predict(X_test))]
#analysis with just cluster centroids(best imbalancer)
classifiers = [LogisticRegression(), SVC(probability=True),
GaussianNB(), DecisionTreeClassifier(), RandomForestClassifier(),
KNeighborsClassifier(n_neighbors=6)]
cc = ClusterCentroids()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=4444)
X_train, y_train = cc.fit_sample(X_train, y_train)
fprs,tprs,roc_aucs = [],[],[]
for clf in classifiers:
clf.fit(X_train,y_train)
y_pred = clf.predict_proba(X_test)[:,1]
y_true = y_test
fpr, tpr, _ = roc_curve(y_true, y_pred)
roc_auc = auc(fpr, tpr)
fprs.append(fpr)
tprs.append(tpr)
roc_aucs.append(roc_auc)
示例14: open
# 需要导入模块: from imblearn.under_sampling import ClusterCentroids [as 别名]
# 或者: from imblearn.under_sampling.ClusterCentroids import fit_sample [as 别名]
import sys, os, csv
from imblearn.under_sampling import ClusterCentroids
input_csv_file = sys.argv[1]
input_csv = input_csv_file.split(".csv")[0]
with open(input_csv_file, newline="") as input_file:
reader = csv.reader(input_file, delimiter=',')
with open(input_csv + "-cc-.csv", 'w', newline='') as output_file:
writer = csv.writer(output_file, delimiter=',')
skip_header = True
X = []
y = []
cc = ClusterCentroids()
for x in reader:
if skip_header:
skip_header = False
continue
y.append(x[-1])
X.append(list(map(int, x[:len(x) - 1])))
#print (X)
X_res, y_res = cc.fit_sample(X, y)
print (len(X_res))
print (len(y_res))
for idx, s in enumerate(X_res):
#print (list(s) + list(y_res[idx]))
writer.writerow(list(s) + list(y_res[idx]))
#break;