当前位置: 首页>>代码示例>>Python>>正文


Python datasets.make_multilabel_classification方法代码示例

本文整理汇总了Python中sklearn.datasets.make_multilabel_classification方法的典型用法代码示例。如果您正苦于以下问题:Python datasets.make_multilabel_classification方法的具体用法?Python datasets.make_multilabel_classification怎么用?Python datasets.make_multilabel_classification使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.datasets的用法示例。


在下文中一共展示了datasets.make_multilabel_classification方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_predict_proba_multilabel

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def test_predict_proba_multilabel():
    # Test that predict_proba works as expected for multilabel.
    # Multilabel should not use softmax which makes probabilities sum to 1
    X, Y = make_multilabel_classification(n_samples=50, random_state=0,
                                          return_indicator=True)
    n_samples, n_classes = Y.shape

    clf = MLPClassifier(solver='lbfgs', hidden_layer_sizes=30,
                        random_state=0)
    clf.fit(X, Y)
    y_proba = clf.predict_proba(X)

    assert_equal(y_proba.shape, (n_samples, n_classes))
    assert_array_equal(y_proba > 0.5, Y)

    y_log_proba = clf.predict_log_proba(X)
    proba_max = y_proba.argmax(axis=1)
    proba_log_max = y_log_proba.argmax(axis=1)

    assert_greater((y_proba.sum(1) - 1).dot(y_proba.sum(1) - 1), 1e-10)
    assert_array_equal(proba_max, proba_log_max)
    assert_array_equal(y_log_proba, np.log(y_proba)) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:24,代码来源:test_mlp.py

示例2: check_alternative_lrap_implementation

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def check_alternative_lrap_implementation(lrap_score, n_classes=5,
                                          n_samples=20, random_state=0):
    _, y_true = make_multilabel_classification(n_features=1,
                                               allow_unlabeled=False,
                                               random_state=random_state,
                                               n_classes=n_classes,
                                               n_samples=n_samples)

    # Score with ties
    y_score = sparse_random_matrix(n_components=y_true.shape[0],
                                   n_features=y_true.shape[1],
                                   random_state=random_state)

    if hasattr(y_score, "toarray"):
        y_score = y_score.toarray()
    score_lrap = label_ranking_average_precision_score(y_true, y_score)
    score_my_lrap = _my_lrap(y_true, y_score)
    assert_almost_equal(score_lrap, score_my_lrap)

    # Uniform score
    random_state = check_random_state(random_state)
    y_score = random_state.uniform(size=(n_samples, n_classes))
    score_lrap = label_ranking_average_precision_score(y_true, y_score)
    score_my_lrap = _my_lrap(y_true, y_score)
    assert_almost_equal(score_lrap, score_my_lrap) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:27,代码来源:test_ranking.py

示例3: test_multilabel_sample_weight_invariance

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def test_multilabel_sample_weight_invariance(name):
    # multilabel indicator
    random_state = check_random_state(0)
    _, ya = make_multilabel_classification(n_features=1, n_classes=20,
                                           random_state=0, n_samples=100,
                                           allow_unlabeled=False)
    _, yb = make_multilabel_classification(n_features=1, n_classes=20,
                                           random_state=1, n_samples=100,
                                           allow_unlabeled=False)
    y_true = np.vstack([ya, yb])
    y_pred = np.vstack([ya, ya])
    y_score = random_state.randint(1, 4, size=y_true.shape)

    metric = ALL_METRICS[name]
    if name in THRESHOLDED_METRICS:
        check_sample_weight_invariance(name, metric, y_true, y_score)
    else:
        check_sample_weight_invariance(name, metric, y_true, y_pred) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:20,代码来源:test_common.py

示例4: test_multilabel_classification

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def test_multilabel_classification():
    # Test that multi-label classification works as expected.
    # test fit method
    X, y = make_multilabel_classification(n_samples=50, random_state=0,
                                          return_indicator=True)
    mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=50, alpha=1e-5,
                        max_iter=150, random_state=0, activation='logistic',
                        learning_rate_init=0.2)
    mlp.fit(X, y)
    assert_greater(mlp.score(X, y), 0.97)

    # test partial fit method
    mlp = MLPClassifier(solver='sgd', hidden_layer_sizes=50, max_iter=150,
                        random_state=0, activation='logistic', alpha=1e-5,
                        learning_rate_init=0.2)
    for i in range(100):
        mlp.partial_fit(X, y, classes=[0, 1, 2, 3, 4])
    assert_greater(mlp.score(X, y), 0.9)

    # Make sure early stopping still work now that spliting is stratified by
    # default (it is disabled for multilabel classification)
    mlp = MLPClassifier(early_stopping=True)
    mlp.fit(X, y).predict(X) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:25,代码来源:test_mlp.py

示例5: test_ovr_multilabel_dataset

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def test_ovr_multilabel_dataset():
    base_clf = MultinomialNB(alpha=1)
    for au, prec, recall in zip((True, False), (0.51, 0.66), (0.51, 0.80)):
        X, Y = datasets.make_multilabel_classification(n_samples=100,
                                                       n_features=20,
                                                       n_classes=5,
                                                       n_labels=2,
                                                       length=50,
                                                       allow_unlabeled=au,
                                                       random_state=0)
        X_train, Y_train = X[:80], Y[:80]
        X_test, Y_test = X[80:], Y[80:]
        clf = OneVsRestClassifier(base_clf).fit(X_train, Y_train)
        Y_pred = clf.predict(X_test)

        assert clf.multilabel_
        assert_almost_equal(precision_score(Y_test, Y_pred, average="micro"),
                            prec,
                            decimal=2)
        assert_almost_equal(recall_score(Y_test, Y_pred, average="micro"),
                            recall,
                            decimal=2) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:24,代码来源:test_multiclass.py

示例6: test_make_multilabel_classification_return_indicator

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def test_make_multilabel_classification_return_indicator():
    for allow_unlabeled, min_length in zip((True, False), (0, 1)):
        X, Y = make_multilabel_classification(n_samples=25, n_features=20,
                                              n_classes=3, random_state=0,
                                              allow_unlabeled=allow_unlabeled)
        assert_equal(X.shape, (25, 20), "X shape mismatch")
        assert_equal(Y.shape, (25, 3), "Y shape mismatch")
        assert np.all(np.sum(Y, axis=0) > min_length)

    # Also test return_distributions and return_indicator with True
    X2, Y2, p_c, p_w_c = make_multilabel_classification(
        n_samples=25, n_features=20, n_classes=3, random_state=0,
        allow_unlabeled=allow_unlabeled, return_distributions=True)

    assert_array_almost_equal(X, X2)
    assert_array_equal(Y, Y2)
    assert_equal(p_c.shape, (3,))
    assert_almost_equal(p_c.sum(), 1)
    assert_equal(p_w_c.shape, (20, 3))
    assert_almost_equal(p_w_c.sum(axis=0), [1] * 3) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:22,代码来源:test_samples_generator.py

示例7: test_sparse_multilabel_targets

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def test_sparse_multilabel_targets(n_neighbors, n_jobs):
    X, y_dense = make_multilabel_classification(random_state=123)
    thresh = 80

    knn = KNeighborsClassifier(n_neighbors=n_neighbors,
                               n_jobs=n_jobs, )
    assert not issparse(y_dense)
    knn.fit(X[:thresh], y_dense[:thresh])
    y_pred = knn.predict(X[thresh:])

    y_sparse = csr_matrix(y_dense)
    knn = KNeighborsClassifier(n_neighbors=n_neighbors,
                               n_jobs=n_jobs,)
    assert issparse(y_sparse)
    knn.fit(X[:thresh], y_sparse[:thresh])
    y_pred_sparse = knn.predict(X[thresh:, :])

    # Test array equality
    np.testing.assert_array_equal(y_pred, y_pred_sparse.toarray()) 
开发者ID:VarIr,项目名称:scikit-hubness,代码行数:21,代码来源:test_classification.py

示例8: generate_classification

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def generate_classification(self, num_classes, num_features, num_samples, test_split=0.1, seed=0):
        """Generate a classification task
        
        Arguments:
            num_classes {int} -- Number of classes
            num_features {int} -- Number of features
            num_samples {int} -- Number of samples
        
        Keyword Arguments:
            test_split {float} -- Size of test split (default: {0.1})
            seed {int} -- A random seed (default: {0})
        """
        #X, Y = make_classification(n_samples=800, n_features=num_feats, n_classes=num_classes, n_informative=4)
        X, y = make_multilabel_classification(
            n_samples=num_samples, n_features=num_features, n_classes=num_classes, n_labels=0.01,
            length=50, allow_unlabeled=False, sparse=False, return_indicator='dense',
            return_distributions=False, random_state=seed
        )
        Y = np.argmax(y, axis=1)
        self.categorical_features = [False] * num_features
        self.problem_type = ProblemType.FeatureClassification
        self.X, self.Y = X, Y
        self._split_data(test_split, seed) 
开发者ID:automl,项目名称:Auto-PyTorch,代码行数:25,代码来源:data_manager.py

示例9: test_multilabel_classification

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def test_multilabel_classification():
    # Test that multi-label classification works as expected.
    # test fit method
    X, y = make_multilabel_classification(n_samples=50, random_state=0,
                                          return_indicator=True)
    mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=50, alpha=1e-5,
                        max_iter=150, random_state=0, activation='logistic',
                        learning_rate_init=0.2)
    mlp.fit(X, y)
    assert_equal(mlp.score(X, y), 1)

    # test partial fit method
    mlp = MLPClassifier(solver='sgd', hidden_layer_sizes=50, max_iter=150,
                        random_state=0, activation='logistic', alpha=1e-5,
                        learning_rate_init=0.2)
    for i in range(100):
        mlp.partial_fit(X, y, classes=[0, 1, 2, 3, 4])
    assert_greater(mlp.score(X, y), 0.9) 
开发者ID:alvarobartt,项目名称:twitter-stock-recommendation,代码行数:20,代码来源:test_mlp.py

示例10: test_ovr_multilabel_dataset

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def test_ovr_multilabel_dataset():
    base_clf = MultinomialNB(alpha=1)
    for au, prec, recall in zip((True, False), (0.51, 0.66), (0.51, 0.80)):
        X, Y = datasets.make_multilabel_classification(n_samples=100,
                                                       n_features=20,
                                                       n_classes=5,
                                                       n_labels=2,
                                                       length=50,
                                                       allow_unlabeled=au,
                                                       random_state=0)
        X_train, Y_train = X[:80], Y[:80]
        X_test, Y_test = X[80:], Y[80:]
        clf = OneVsRestClassifier(base_clf).fit(X_train, Y_train)
        Y_pred = clf.predict(X_test)

        assert_true(clf.multilabel_)
        assert_almost_equal(precision_score(Y_test, Y_pred, average="micro"),
                            prec,
                            decimal=2)
        assert_almost_equal(recall_score(Y_test, Y_pred, average="micro"),
                            recall,
                            decimal=2) 
开发者ID:alvarobartt,项目名称:twitter-stock-recommendation,代码行数:24,代码来源:test_multiclass.py

示例11: test_make_multilabel_classification_return_indicator

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def test_make_multilabel_classification_return_indicator():
    for allow_unlabeled, min_length in zip((True, False), (0, 1)):
        X, Y = make_multilabel_classification(n_samples=25, n_features=20,
                                              n_classes=3, random_state=0,
                                              allow_unlabeled=allow_unlabeled)
        assert_equal(X.shape, (25, 20), "X shape mismatch")
        assert_equal(Y.shape, (25, 3), "Y shape mismatch")
        assert_true(np.all(np.sum(Y, axis=0) > min_length))

    # Also test return_distributions and return_indicator with True
    X2, Y2, p_c, p_w_c = make_multilabel_classification(
        n_samples=25, n_features=20, n_classes=3, random_state=0,
        allow_unlabeled=allow_unlabeled, return_distributions=True)

    assert_array_equal(X, X2)
    assert_array_equal(Y, Y2)
    assert_equal(p_c.shape, (3,))
    assert_almost_equal(p_c.sum(), 1)
    assert_equal(p_w_c.shape, (20, 3))
    assert_almost_equal(p_w_c.sum(axis=0), [1] * 3) 
开发者ID:alvarobartt,项目名称:twitter-stock-recommendation,代码行数:22,代码来源:test_samples_generator.py

示例12: setup_mlc_dataset

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def setup_mlc_dataset(self):
        X, Y = datasets.make_multilabel_classification(
                n_features=5, random_state=1126)
        return Dataset(X, Y) 
开发者ID:ntucllab,项目名称:libact,代码行数:6,代码来源:test_labelers.py

示例13: setUp

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def setUp(self):
        X, Y = datasets.make_multilabel_classification(random_state=1126)
        self.X_train, self.X_test, self.Y_train, self.Y_test = \
            train_test_split(X, Y, test_size=0.3, random_state=1126) 
开发者ID:ntucllab,项目名称:libact,代码行数:6,代码来源:test_binary_relevance.py

示例14: split_train_test

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def split_train_test(test_size):
    # choose a dataset with unbalanced class instances
    data = make_multilabel_classification(
        n_samples=300, n_classes=10, allow_unlabeled=False)
    X = StandardScaler().fit_transform(data[0])
    Y = data[1]

    X_trn, X_tst, Y_trn, Y_tst = train_test_split(X, Y, test_size=test_size)

    trn_ds = Dataset(X_trn, Y_trn[:5].tolist() + [None] * (len(Y_trn) - 5))
    tst_ds = Dataset(X_tst, Y_tst.tolist())

    fully_labeled_trn_ds = Dataset(X_trn, Y_trn)

    return trn_ds, tst_ds, fully_labeled_trn_ds 
开发者ID:ntucllab,项目名称:libact,代码行数:17,代码来源:multilabel_plot.py

示例15: _prepare_for_use

# 需要导入模块: from sklearn import datasets [as 别名]
# 或者: from sklearn.datasets import make_multilabel_classification [as 别名]
def _prepare_for_use(self):
        self._random_state = check_random_state(self.random_state)
        self.X, self.y = make_multilabel_classification(n_samples=self.n_samples,
                                                        n_features=self.n_features,
                                                        n_classes=self.n_targets,
                                                        n_labels=self.n_labels,
                                                        random_state=self._random_state)
        self.target_names = ["target_" + str(i) for i in range(self.n_targets)]
        self.feature_names = ["att_num_" + str(i) for i in range(self.n_num_features)]
        self.target_values = np.unique(self.y).tolist() if self.n_targets == 1 else \
            [np.unique(self.y[:, i]).tolist() for i in range(self.n_targets)] 
开发者ID:scikit-multiflow,项目名称:scikit-multiflow,代码行数:13,代码来源:multilabel_generator.py


注:本文中的sklearn.datasets.make_multilabel_classification方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。