本文整理汇总了Python中sklearn.datasets.make_multilabel_classification函数的典型用法代码示例。如果您正苦于以下问题:Python make_multilabel_classification函数的具体用法?Python make_multilabel_classification怎么用?Python make_multilabel_classification使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了make_multilabel_classification函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_multilabel_representation_invariance
def test_multilabel_representation_invariance():
# Generate some data
n_classes = 4
n_samples = 50
_, y1 = make_multilabel_classification(
n_features=1, n_classes=n_classes, random_state=0, n_samples=n_samples, allow_unlabeled=True
)
_, y2 = make_multilabel_classification(
n_features=1, n_classes=n_classes, random_state=1, n_samples=n_samples, allow_unlabeled=True
)
# To make sure at least one empty label is present
y1 += [0] * n_classes
y2 += [0] * n_classes
y1_sparse_indicator = sp.coo_matrix(y1)
y2_sparse_indicator = sp.coo_matrix(y2)
for name in MULTILABELS_METRICS:
metric = ALL_METRICS[name]
# XXX cruel hack to work with partial functions
if isinstance(metric, partial):
metric.__module__ = "tmp"
metric.__name__ = name
measure = metric(y1, y2)
# Check representation invariance
assert_almost_equal(
metric(y1_sparse_indicator, y2_sparse_indicator),
measure,
err_msg="%s failed representation invariance " "between dense and sparse indicator " "formats." % name,
)
示例2: test_normalize_option_multilabel_classification
def test_normalize_option_multilabel_classification():
# Test in the multilabel case
n_classes = 4
n_samples = 100
_, y_true = make_multilabel_classification(n_features=1, n_classes=n_classes, random_state=0, n_samples=n_samples)
_, y_pred = make_multilabel_classification(n_features=1, n_classes=n_classes, random_state=1, n_samples=n_samples)
# Be sure to have at least one empty label
y_true += ([],)
y_pred += ([],)
n_samples += 1
lb = LabelBinarizer().fit([range(n_classes)])
y_true_binary_indicator = lb.transform(y_true)
y_pred_binary_indicator = lb.transform(y_pred)
for name, metrics in METRICS_WITH_NORMALIZE_OPTION.items():
# List of list of labels
measure = metrics(y_true, y_pred, normalize=True)
assert_greater(measure, 0, msg="We failed to test correctly the normalize option")
assert_almost_equal(
metrics(y_true, y_pred, normalize=False) / n_samples, measure, err_msg="Failed with %s" % name
)
# Indicator matrix format
measure = metrics(y_true_binary_indicator, y_pred_binary_indicator, normalize=True)
assert_greater(measure, 0, msg="We failed to test correctly the normalize option")
assert_almost_equal(
metrics(y_true_binary_indicator, y_pred_binary_indicator, normalize=False) / n_samples,
measure,
err_msg="Failed with %s" % name,
)
示例3: test_sample_weight_invariance
def test_sample_weight_invariance(n_samples=50):
random_state = check_random_state(0)
# binary
random_state = check_random_state(0)
y_true = random_state.randint(0, 2, size=(n_samples, ))
y_pred = random_state.randint(0, 2, size=(n_samples, ))
y_score = random_state.random_sample(size=(n_samples,))
for name in ALL_METRICS:
if (name in METRICS_WITHOUT_SAMPLE_WEIGHT or
name in METRIC_UNDEFINED_BINARY):
continue
metric = ALL_METRICS[name]
if name in THRESHOLDED_METRICS:
yield _named_check(check_sample_weight_invariance, name), name,\
metric, y_true, y_score
else:
yield _named_check(check_sample_weight_invariance, name), name,\
metric, y_true, y_pred
# multiclass
random_state = check_random_state(0)
y_true = random_state.randint(0, 5, size=(n_samples, ))
y_pred = random_state.randint(0, 5, size=(n_samples, ))
y_score = random_state.random_sample(size=(n_samples, 5))
for name in ALL_METRICS:
if (name in METRICS_WITHOUT_SAMPLE_WEIGHT or
name in METRIC_UNDEFINED_BINARY_MULTICLASS):
continue
metric = ALL_METRICS[name]
if name in THRESHOLDED_METRICS:
yield _named_check(check_sample_weight_invariance, name), name,\
metric, y_true, y_score
else:
yield _named_check(check_sample_weight_invariance, name), name,\
metric, y_true, y_pred
# multilabel indicator
_, ya = make_multilabel_classification(n_features=1, n_classes=20,
random_state=0, n_samples=100,
allow_unlabeled=False)
_, yb = make_multilabel_classification(n_features=1, n_classes=20,
random_state=1, n_samples=100,
allow_unlabeled=False)
y_true = np.vstack([ya, yb])
y_pred = np.vstack([ya, ya])
y_score = random_state.randint(1, 4, size=y_true.shape)
for name in (MULTILABELS_METRICS + THRESHOLDED_MULTILABEL_METRICS +
MULTIOUTPUT_METRICS):
if name in METRICS_WITHOUT_SAMPLE_WEIGHT:
continue
metric = ALL_METRICS[name]
if name in THRESHOLDED_METRICS:
yield (_named_check(check_sample_weight_invariance, name), name,
metric, y_true, y_score)
else:
yield (_named_check(check_sample_weight_invariance, name), name,
metric, y_true, y_pred)
示例4: test_normalize_option_multilabel_classification
def test_normalize_option_multilabel_classification():
# Test in the multilabel case
n_classes = 4
n_samples = 100
# for both random_state 0 and 1, y_true and y_pred has at least one
# unlabelled entry
_, y_true = make_multilabel_classification(n_features=1,
n_classes=n_classes,
random_state=0,
allow_unlabeled=True,
n_samples=n_samples)
_, y_pred = make_multilabel_classification(n_features=1,
n_classes=n_classes,
random_state=1,
allow_unlabeled=True,
n_samples=n_samples)
# To make sure at least one empty label is present
y_true += [0]*n_classes
y_pred += [0]*n_classes
for name in METRICS_WITH_NORMALIZE_OPTION:
metrics = ALL_METRICS[name]
measure = metrics(y_true, y_pred, normalize=True)
assert_greater(measure, 0,
msg="We failed to test correctly the normalize option")
assert_almost_equal(metrics(y_true, y_pred, normalize=False)
/ n_samples, measure,
err_msg="Failed with %s" % name)
示例5: test_multilabel_classification_report
def test_multilabel_classification_report():
n_classes = 4
n_samples = 50
_, y_true = make_multilabel_classification(n_features=1,
n_samples=n_samples,
n_classes=n_classes,
random_state=0)
_, y_pred = make_multilabel_classification(n_features=1,
n_samples=n_samples,
n_classes=n_classes,
random_state=1)
expected_report = """\
precision recall f1-score support
0 0.50 0.67 0.57 24
1 0.51 0.74 0.61 27
2 0.29 0.08 0.12 26
3 0.52 0.56 0.54 27
avg / total 0.45 0.51 0.46 104
"""
report = classification_report(y_true, y_pred)
assert_equal(report, expected_report)
示例6: benchmark
def benchmark(metrics=tuple(v for k, v in sorted(METRICS.items())),
formats=tuple(v for k, v in sorted(FORMATS.items())),
samples=1000, classes=4, density=.2,
n_times=5):
"""Times metric calculations for a number of inputs
Parameters
----------
metrics : array-like of callables (1d or 0d)
The metric functions to time.
formats : array-like of callables (1d or 0d)
These may transform a dense indicator matrix into multilabel
representation.
samples : array-like of ints (1d or 0d)
The number of samples to generate as input.
classes : array-like of ints (1d or 0d)
The number of classes in the input.
density : array-like of ints (1d or 0d)
The density of positive labels in the input.
n_times : int
Time calling the metric n_times times.
Returns
-------
array of floats shaped like (metrics, formats, samples, classes, density)
Time in seconds.
"""
metrics = np.atleast_1d(metrics)
samples = np.atleast_1d(samples)
classes = np.atleast_1d(classes)
density = np.atleast_1d(density)
formats = np.atleast_1d(formats)
out = np.zeros((len(metrics), len(formats), len(samples), len(classes),
len(density)), dtype=float)
it = itertools.product(samples, classes, density)
for i, (s, c, d) in enumerate(it):
_, y_true = make_multilabel_classification(n_samples=s, n_features=1,
n_classes=c, n_labels=d * c,
return_indicator=True,
random_state=42)
_, y_pred = make_multilabel_classification(n_samples=s, n_features=1,
n_classes=c, n_labels=d * c,
return_indicator=True,
random_state=84)
for j, f in enumerate(formats):
f_true = f(y_true)
f_pred = f(y_pred)
for k, metric in enumerate(metrics):
t = timeit(partial(metric, f_true, f_pred), number=n_times)
out[k, j].flat[i] = t
return out
示例7: test_grid_search_with_multioutput_data
def test_grid_search_with_multioutput_data():
# Test search with multi-output estimator
X, y = make_multilabel_classification(random_state=0)
est_parameters = {"max_depth": [1, 2, 3, 4]}
cv = KFold(y.shape[0], random_state=0)
estimators = [DecisionTreeRegressor(random_state=0), DecisionTreeClassifier(random_state=0)]
# Test with grid search cv
for est in estimators:
grid_search = GridSearchCV(est, est_parameters, cv=cv)
grid_search.fit(X, y)
for parameters, _, cv_validation_scores in grid_search.grid_scores_:
est.set_params(**parameters)
for i, (train, test) in enumerate(cv):
est.fit(X[train], y[train])
correct_score = est.score(X[test], y[test])
assert_almost_equal(correct_score, cv_validation_scores[i])
# Test with a randomized search
for est in estimators:
random_search = RandomizedSearchCV(est, est_parameters, cv=cv, n_iter=3)
random_search.fit(X, y)
for parameters, _, cv_validation_scores in random_search.grid_scores_:
est.set_params(**parameters)
for i, (train, test) in enumerate(cv):
est.fit(X[train], y[train])
correct_score = est.score(X[test], y[test])
assert_almost_equal(correct_score, cv_validation_scores[i])
示例8: get_multilabel
def get_multilabel(self):
return make_multilabel_classification(n_samples=100,
n_features=10,
n_classes=5,
n_labels=5,
return_indicator=True,
random_state=1)
示例9: test_output_transformer
def test_output_transformer():
X, y = datasets.make_multilabel_classification(return_indicator=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
# Check that random_state are different
transformer = GaussianRandomProjection(n_components=5, random_state=None)
for name, ForestEstimator in FOREST_ESTIMATORS.items():
est = ForestEstimator(random_state=5, output_transformer=transformer)
est.fit(X_train, y_train)
y_pred = est.predict(X_test)
assert_equal(y_pred.shape, y_test.shape)
random_state = [sub.output_transformer_.random_state
for sub in est.estimators_]
assert_equal(len(set(random_state)), est.n_estimators)
# Check that random_state are equals
transformer = FixedStateTransformer(GaussianRandomProjection(
n_components=5), random_seed=0)
for name, ForestEstimator in FOREST_ESTIMATORS.items():
est = ForestEstimator(random_state=5, output_transformer=transformer)
est.fit(X_train, y_train)
y_pred = est.predict(X_test)
assert_equal(y_pred.shape, y_test.shape)
random_state = [sub.output_transformer_.random_state
for sub in est.estimators_]
assert_equal(len(set(random_state)), 1)
assert_equal(random_state[0], 0)
示例10: get_codes
def get_codes(self):
X, Y = make_multilabel_classification(n_samples=15, n_labels=8, n_classes=8, random_state=0)
self.classifier_labels = Y
self.classifier_error_codes = LabelBinarizer().fit_transform(Y)
print self.classifier_labels
print self.classifier_error_codes
f = open('ecoc_classifiers', 'w')
for row in self.classifier_labels:
str_op = '['
for label in row:
str_op += str(label) + ','
str_op += ']'
f.write(str_op)
f.write('\n')
for row in self.classifier_error_codes:
str_op = '['
for label in row:
str_op += str(label) + ','
str_op += ']'
f.write(str_op)
f.flush()
return
示例11: testMultiClassification
def testMultiClassification(self):
"""TODO(ilblackdragon): Implement multi-output classification.
"""
random.seed(42)
n_classes = 5
X, y = datasets.make_multilabel_classification(n_classes=n_classes,
random_state=42)
示例12: check_alternative_lrap_implementation
def check_alternative_lrap_implementation(lrap_score, n_classes=5,
n_samples=20, random_state=0):
_, y_true = make_multilabel_classification(n_features=1,
allow_unlabeled=False,
random_state=random_state,
n_classes=n_classes,
n_samples=n_samples)
# Score with ties
y_score = sparse_random_matrix(n_components=y_true.shape[0],
n_features=y_true.shape[1],
random_state=random_state)
if hasattr(y_score, "toarray"):
y_score = y_score.toarray()
score_lrap = label_ranking_average_precision_score(y_true, y_score)
score_my_lrap = _my_lrap(y_true, y_score)
assert_almost_equal(score_lrap, score_my_lrap)
# Uniform score
random_state = check_random_state(random_state)
y_score = random_state.uniform(size=(n_samples, n_classes))
score_lrap = label_ranking_average_precision_score(y_true, y_score)
score_my_lrap = _my_lrap(y_true, y_score)
assert_almost_equal(score_lrap, score_my_lrap)
示例13: test_ovr_fit_predict_sparse
def test_ovr_fit_predict_sparse():
for sparse in [sp.csr_matrix, sp.csc_matrix, sp.coo_matrix, sp.dok_matrix, sp.lil_matrix]:
base_clf = MultinomialNB(alpha=1)
X, Y = datasets.make_multilabel_classification(
n_samples=100, n_features=20, n_classes=5, n_labels=3, length=50, allow_unlabeled=True, random_state=0
)
X_train, Y_train = X[:80], Y[:80]
X_test = X[80:]
clf = OneVsRestClassifier(base_clf).fit(X_train, Y_train)
Y_pred = clf.predict(X_test)
clf_sprs = OneVsRestClassifier(base_clf).fit(X_train, sparse(Y_train))
Y_pred_sprs = clf_sprs.predict(X_test)
assert_true(clf.multilabel_)
assert_true(sp.issparse(Y_pred_sprs))
assert_array_equal(Y_pred_sprs.toarray(), Y_pred)
# Test predict_proba
Y_proba = clf_sprs.predict_proba(X_test)
# predict assigns a label if the probability that the
# sample has the label is greater than 0.5.
pred = Y_proba > 0.5
assert_array_equal(pred, Y_pred_sprs.toarray())
# Test decision_function
clf_sprs = OneVsRestClassifier(svm.SVC()).fit(X_train, sparse(Y_train))
dec_pred = (clf_sprs.decision_function(X_test) > 0).astype(int)
assert_array_equal(dec_pred, clf_sprs.predict(X_test).toarray())
示例14: test_sparse_input
def test_sparse_input():
X, y = datasets.make_multilabel_classification(random_state=0,
n_samples=50)
for name, sparse_matrix in product(FOREST_ESTIMATORS,
(csr_matrix, csc_matrix, coo_matrix)):
yield check_sparse_input, name, X, sparse_matrix(X), y
示例15: test_ovr_multilabel_predict_proba
def test_ovr_multilabel_predict_proba():
base_clf = MultinomialNB(alpha=1)
for au in (False, True):
X, Y = datasets.make_multilabel_classification(n_samples=100,
n_features=20,
n_classes=5,
n_labels=3,
length=50,
allow_unlabeled=au,
return_indicator=True,
random_state=0)
X_train, Y_train = X[:80], Y[:80]
X_test, Y_test = X[80:], Y[80:]
clf = OneVsRestClassifier(base_clf).fit(X_train, Y_train)
# decision function only estimator. Fails in current implementation.
decision_only = OneVsRestClassifier(svm.SVR()).fit(X_train, Y_train)
assert_raises(AttributeError, decision_only.predict_proba, X_test)
# Estimator with predict_proba disabled, depending on parameters.
decision_only = OneVsRestClassifier(svm.SVC(probability=False))
decision_only.fit(X_train, Y_train)
assert_raises(AttributeError, decision_only.predict_proba, X_test)
Y_pred = clf.predict(X_test)
Y_proba = clf.predict_proba(X_test)
# predict assigns a label if the probability that the
# sample has the label is greater than 0.5.
pred = Y_proba > .5
assert_array_equal(pred, Y_pred)