本文整理汇总了Python中sklearn.datasets.make_multilabel_classification方法的典型用法代码示例。如果您正苦于以下问题:Python datasets.make_multilabel_classification方法的具体用法?Python datasets.make_multilabel_classification怎么用?Python datasets.make_multilabel_classification使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.datasets
的用法示例。
在下文中一共展示了datasets.make_multilabel_classification方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_predict_proba_multilabel
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def test_predict_proba_multilabel():
# Test that predict_proba works as expected for multilabel.
# Multilabel should not use softmax which makes probabilities sum to 1
X, Y = make_multilabel_classification(n_samples=50, random_state=0,
return_indicator=True)
n_samples, n_classes = Y.shape
clf = MLPClassifier(solver='lbfgs', hidden_layer_sizes=30,
random_state=0)
clf.fit(X, Y)
y_proba = clf.predict_proba(X)
assert_equal(y_proba.shape, (n_samples, n_classes))
assert_array_equal(y_proba > 0.5, Y)
y_log_proba = clf.predict_log_proba(X)
proba_max = y_proba.argmax(axis=1)
proba_log_max = y_log_proba.argmax(axis=1)
assert_greater((y_proba.sum(1) - 1).dot(y_proba.sum(1) - 1), 1e-10)
assert_array_equal(proba_max, proba_log_max)
assert_array_equal(y_log_proba, np.log(y_proba))
示例2: check_alternative_lrap_implementation
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def check_alternative_lrap_implementation(lrap_score, n_classes=5,
n_samples=20, random_state=0):
_, y_true = make_multilabel_classification(n_features=1,
allow_unlabeled=False,
random_state=random_state,
n_classes=n_classes,
n_samples=n_samples)
# Score with ties
y_score = sparse_random_matrix(n_components=y_true.shape[0],
n_features=y_true.shape[1],
random_state=random_state)
if hasattr(y_score, "toarray"):
y_score = y_score.toarray()
score_lrap = label_ranking_average_precision_score(y_true, y_score)
score_my_lrap = _my_lrap(y_true, y_score)
assert_almost_equal(score_lrap, score_my_lrap)
# Uniform score
random_state = check_random_state(random_state)
y_score = random_state.uniform(size=(n_samples, n_classes))
score_lrap = label_ranking_average_precision_score(y_true, y_score)
score_my_lrap = _my_lrap(y_true, y_score)
assert_almost_equal(score_lrap, score_my_lrap)
示例3: test_multilabel_sample_weight_invariance
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def test_multilabel_sample_weight_invariance(name):
# multilabel indicator
random_state = check_random_state(0)
_, ya = make_multilabel_classification(n_features=1, n_classes=20,
random_state=0, n_samples=100,
allow_unlabeled=False)
_, yb = make_multilabel_classification(n_features=1, n_classes=20,
random_state=1, n_samples=100,
allow_unlabeled=False)
y_true = np.vstack([ya, yb])
y_pred = np.vstack([ya, ya])
y_score = random_state.randint(1, 4, size=y_true.shape)
metric = ALL_METRICS[name]
if name in THRESHOLDED_METRICS:
check_sample_weight_invariance(name, metric, y_true, y_score)
else:
check_sample_weight_invariance(name, metric, y_true, y_pred)
示例4: test_multilabel_classification
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def test_multilabel_classification():
# Test that multi-label classification works as expected.
# test fit method
X, y = make_multilabel_classification(n_samples=50, random_state=0,
return_indicator=True)
mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=50, alpha=1e-5,
max_iter=150, random_state=0, activation='logistic',
learning_rate_init=0.2)
mlp.fit(X, y)
assert_greater(mlp.score(X, y), 0.97)
# test partial fit method
mlp = MLPClassifier(solver='sgd', hidden_layer_sizes=50, max_iter=150,
random_state=0, activation='logistic', alpha=1e-5,
learning_rate_init=0.2)
for i in range(100):
mlp.partial_fit(X, y, classes=[0, 1, 2, 3, 4])
assert_greater(mlp.score(X, y), 0.9)
# Make sure early stopping still work now that spliting is stratified by
# default (it is disabled for multilabel classification)
mlp = MLPClassifier(early_stopping=True)
mlp.fit(X, y).predict(X)
示例5: test_ovr_multilabel_dataset
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def test_ovr_multilabel_dataset():
base_clf = MultinomialNB(alpha=1)
for au, prec, recall in zip((True, False), (0.51, 0.66), (0.51, 0.80)):
X, Y = datasets.make_multilabel_classification(n_samples=100,
n_features=20,
n_classes=5,
n_labels=2,
length=50,
allow_unlabeled=au,
random_state=0)
X_train, Y_train = X[:80], Y[:80]
X_test, Y_test = X[80:], Y[80:]
clf = OneVsRestClassifier(base_clf).fit(X_train, Y_train)
Y_pred = clf.predict(X_test)
assert clf.multilabel_
assert_almost_equal(precision_score(Y_test, Y_pred, average="micro"),
prec,
decimal=2)
assert_almost_equal(recall_score(Y_test, Y_pred, average="micro"),
recall,
decimal=2)
示例6: test_make_multilabel_classification_return_indicator
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def test_make_multilabel_classification_return_indicator():
for allow_unlabeled, min_length in zip((True, False), (0, 1)):
X, Y = make_multilabel_classification(n_samples=25, n_features=20,
n_classes=3, random_state=0,
allow_unlabeled=allow_unlabeled)
assert_equal(X.shape, (25, 20), "X shape mismatch")
assert_equal(Y.shape, (25, 3), "Y shape mismatch")
assert np.all(np.sum(Y, axis=0) > min_length)
# Also test return_distributions and return_indicator with True
X2, Y2, p_c, p_w_c = make_multilabel_classification(
n_samples=25, n_features=20, n_classes=3, random_state=0,
allow_unlabeled=allow_unlabeled, return_distributions=True)
assert_array_almost_equal(X, X2)
assert_array_equal(Y, Y2)
assert_equal(p_c.shape, (3,))
assert_almost_equal(p_c.sum(), 1)
assert_equal(p_w_c.shape, (20, 3))
assert_almost_equal(p_w_c.sum(axis=0), [1] * 3)
示例7: test_sparse_multilabel_targets
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def test_sparse_multilabel_targets(n_neighbors, n_jobs):
X, y_dense = make_multilabel_classification(random_state=123)
thresh = 80
knn = KNeighborsClassifier(n_neighbors=n_neighbors,
n_jobs=n_jobs, )
assert not issparse(y_dense)
knn.fit(X[:thresh], y_dense[:thresh])
y_pred = knn.predict(X[thresh:])
y_sparse = csr_matrix(y_dense)
knn = KNeighborsClassifier(n_neighbors=n_neighbors,
n_jobs=n_jobs,)
assert issparse(y_sparse)
knn.fit(X[:thresh], y_sparse[:thresh])
y_pred_sparse = knn.predict(X[thresh:, :])
# Test array equality
np.testing.assert_array_equal(y_pred, y_pred_sparse.toarray())
示例8: generate_classification
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def generate_classification(self, num_classes, num_features, num_samples, test_split=0.1, seed=0):
"""Generate a classification task
Arguments:
num_classes {int} -- Number of classes
num_features {int} -- Number of features
num_samples {int} -- Number of samples
Keyword Arguments:
test_split {float} -- Size of test split (default: {0.1})
seed {int} -- A random seed (default: {0})
"""
#X, Y = make_classification(n_samples=800, n_features=num_feats, n_classes=num_classes, n_informative=4)
X, y = make_multilabel_classification(
n_samples=num_samples, n_features=num_features, n_classes=num_classes, n_labels=0.01,
length=50, allow_unlabeled=False, sparse=False, return_indicator='dense',
return_distributions=False, random_state=seed
)
Y = np.argmax(y, axis=1)
self.categorical_features = [False] * num_features
self.problem_type = ProblemType.FeatureClassification
self.X, self.Y = X, Y
self._split_data(test_split, seed)
示例9: test_multilabel_classification
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def test_multilabel_classification():
# Test that multi-label classification works as expected.
# test fit method
X, y = make_multilabel_classification(n_samples=50, random_state=0,
return_indicator=True)
mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=50, alpha=1e-5,
max_iter=150, random_state=0, activation='logistic',
learning_rate_init=0.2)
mlp.fit(X, y)
assert_equal(mlp.score(X, y), 1)
# test partial fit method
mlp = MLPClassifier(solver='sgd', hidden_layer_sizes=50, max_iter=150,
random_state=0, activation='logistic', alpha=1e-5,
learning_rate_init=0.2)
for i in range(100):
mlp.partial_fit(X, y, classes=[0, 1, 2, 3, 4])
assert_greater(mlp.score(X, y), 0.9)
示例10: test_ovr_multilabel_dataset
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def test_ovr_multilabel_dataset():
base_clf = MultinomialNB(alpha=1)
for au, prec, recall in zip((True, False), (0.51, 0.66), (0.51, 0.80)):
X, Y = datasets.make_multilabel_classification(n_samples=100,
n_features=20,
n_classes=5,
n_labels=2,
length=50,
allow_unlabeled=au,
random_state=0)
X_train, Y_train = X[:80], Y[:80]
X_test, Y_test = X[80:], Y[80:]
clf = OneVsRestClassifier(base_clf).fit(X_train, Y_train)
Y_pred = clf.predict(X_test)
assert_true(clf.multilabel_)
assert_almost_equal(precision_score(Y_test, Y_pred, average="micro"),
prec,
decimal=2)
assert_almost_equal(recall_score(Y_test, Y_pred, average="micro"),
recall,
decimal=2)
示例11: test_make_multilabel_classification_return_indicator
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def test_make_multilabel_classification_return_indicator():
for allow_unlabeled, min_length in zip((True, False), (0, 1)):
X, Y = make_multilabel_classification(n_samples=25, n_features=20,
n_classes=3, random_state=0,
allow_unlabeled=allow_unlabeled)
assert_equal(X.shape, (25, 20), "X shape mismatch")
assert_equal(Y.shape, (25, 3), "Y shape mismatch")
assert_true(np.all(np.sum(Y, axis=0) > min_length))
# Also test return_distributions and return_indicator with True
X2, Y2, p_c, p_w_c = make_multilabel_classification(
n_samples=25, n_features=20, n_classes=3, random_state=0,
allow_unlabeled=allow_unlabeled, return_distributions=True)
assert_array_equal(X, X2)
assert_array_equal(Y, Y2)
assert_equal(p_c.shape, (3,))
assert_almost_equal(p_c.sum(), 1)
assert_equal(p_w_c.shape, (20, 3))
assert_almost_equal(p_w_c.sum(axis=0), [1] * 3)
示例12: setup_mlc_dataset
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def setup_mlc_dataset(self):
X, Y = datasets.make_multilabel_classification(
n_features=5, random_state=1126)
return Dataset(X, Y)
示例13: setUp
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def setUp(self):
X, Y = datasets.make_multilabel_classification(random_state=1126)
self.X_train, self.X_test, self.Y_train, self.Y_test = \
train_test_split(X, Y, test_size=0.3, random_state=1126)
示例14: split_train_test
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def split_train_test(test_size):
# choose a dataset with unbalanced class instances
data = make_multilabel_classification(
n_samples=300, n_classes=10, allow_unlabeled=False)
X = StandardScaler().fit_transform(data[0])
Y = data[1]
X_trn, X_tst, Y_trn, Y_tst = train_test_split(X, Y, test_size=test_size)
trn_ds = Dataset(X_trn, Y_trn[:5].tolist() + [None] * (len(Y_trn) - 5))
tst_ds = Dataset(X_tst, Y_tst.tolist())
fully_labeled_trn_ds = Dataset(X_trn, Y_trn)
return trn_ds, tst_ds, fully_labeled_trn_ds
示例15: _prepare_for_use
# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def _prepare_for_use(self):
self._random_state = check_random_state(self.random_state)
self.X, self.y = make_multilabel_classification(n_samples=self.n_samples,
n_features=self.n_features,
n_classes=self.n_targets,
n_labels=self.n_labels,
random_state=self._random_state)
self.target_names = ["target_" + str(i) for i in range(self.n_targets)]
self.feature_names = ["att_num_" + str(i) for i in range(self.n_num_features)]
self.target_values = np.unique(self.y).tolist() if self.n_targets == 1 else \
[np.unique(self.y[:, i]).tolist() for i in range(self.n_targets)]