Python datasets.make_classification函数代码示例

本文整理汇总了Python中sklearn.datasets.make_classification函数的典型用法代码示例。如果您正苦于以下问题：Python make_classification函数的具体用法？Python make_classification怎么用？Python make_classification使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了make_classification函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_logistic_regressioncv_class_weights

def test_logistic_regressioncv_class_weights():
    X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
                               n_classes=3, random_state=0)

    msg = ("In LogisticRegressionCV the liblinear solver cannot handle "
           "multiclass with class_weight of type dict. Use the lbfgs, "
           "newton-cg or sag solvers or set class_weight='balanced'")
    clf_lib = LogisticRegressionCV(class_weight={0: 0.1, 1: 0.2},
                                   solver='liblinear')
    assert_raise_message(ValueError, msg, clf_lib.fit, X, y)
    y_ = y.copy()
    y_[y == 2] = 1
    clf_lib.fit(X, y_)
    assert_array_equal(clf_lib.classes_, [0, 1])

    # Test for class_weight=balanced
    X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
                               random_state=0)
    clf_lbf = LogisticRegressionCV(solver='lbfgs', fit_intercept=False,
                                   class_weight='balanced')
    clf_lbf.fit(X, y)
    clf_lib = LogisticRegressionCV(solver='liblinear', fit_intercept=False,
                                   class_weight='balanced')
    clf_lib.fit(X, y)
    clf_sag = LogisticRegressionCV(solver='sag', fit_intercept=False,
                                   class_weight='balanced', max_iter=2000)
    clf_sag.fit(X, y)
    assert_array_almost_equal(clf_lib.coef_, clf_lbf.coef_, decimal=4)
    assert_array_almost_equal(clf_sag.coef_, clf_lbf.coef_, decimal=4)
    assert_array_almost_equal(clf_lib.coef_, clf_sag.coef_, decimal=4)

开发者ID:0664j35t3r，项目名称:scikit-learn，代码行数:30，代码来源:test_logistic.py

示例2: test_make_classification

def test_make_classification():
    weights = [0.1, 0.25]
    X, y = make_classification(n_samples=100, n_features=20, n_informative=5,
                               n_redundant=1, n_repeated=1, n_classes=3,
                               n_clusters_per_class=1, hypercube=False,
                               shift=None, scale=None, weights=weights,
                               random_state=0)

    assert_equal(weights, [0.1, 0.25])
    assert_equal(X.shape, (100, 20), "X shape mismatch")
    assert_equal(y.shape, (100,), "y shape mismatch")
    assert_equal(np.unique(y).shape, (3,), "Unexpected number of classes")
    assert_equal(sum(y == 0), 10, "Unexpected number of samples in class #0")
    assert_equal(sum(y == 1), 25, "Unexpected number of samples in class #1")
    assert_equal(sum(y == 2), 65, "Unexpected number of samples in class #2")

    # Test for n_features > 30
    X, y = make_classification(n_samples=2000, n_features=31, n_informative=31,
                               n_redundant=0, n_repeated=0, hypercube=True,
                               scale=0.5, random_state=0)

    assert_equal(X.shape, (2000, 31), "X shape mismatch")
    assert_equal(y.shape, (2000,), "y shape mismatch")
    assert_equal(np.unique(X.view([('', X.dtype)]*X.shape[1])).view(X.dtype)
                 .reshape(-1, X.shape[1]).shape[0], 2000,
                 "Unexpected number of unique rows")

开发者ID:dominicSchiller，项目名称:DataScience_EA12_Clustering_Exercise，代码行数:26，代码来源:test_samples_generator.py

示例3: test_logistic_regressioncv_class_weights

def test_logistic_regressioncv_class_weights():
    X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
                               n_classes=3, random_state=0)

    # Test the liblinear fails when class_weight of type dict is
    # provided, when it is multiclass. However it can handle
    # binary problems.
    clf_lib = LogisticRegressionCV(class_weight={0: 0.1, 1: 0.2},
                                   solver='liblinear')
    assert_raises(ValueError, clf_lib.fit, X, y)
    y_ = y.copy()
    y_[y == 2] = 1
    clf_lib.fit(X, y_)
    assert_array_equal(clf_lib.classes_, [0, 1])

    # Test for class_weight=auto
    X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
                               random_state=0)
    clf_lbf = LogisticRegressionCV(solver='lbfgs', fit_intercept=False,
                                   class_weight='auto')
    clf_lbf.fit(X, y)
    clf_lib = LogisticRegressionCV(solver='liblinear', fit_intercept=False,
                                   class_weight='auto')
    clf_lib.fit(X, y)
    assert_array_almost_equal(clf_lib.coef_, clf_lbf.coef_, decimal=4)

开发者ID:AngelaGuoguo，项目名称:scikit-learn，代码行数:25，代码来源:test_logistic.py

示例4: setUp

    def setUp(self):
        np.random.seed(488881)
        # binomial
        x, y = make_classification(n_samples=300, random_state=6601)
        x_sparse = csr_matrix(x)

        x_wide, y_wide = make_classification(n_samples=100, n_features=150,
                                             random_state=8911)
        x_wide_sparse = csr_matrix(x_wide)
        self.binomial = [(x, y), (x_sparse, y), (x_wide, y_wide),
                         (x_wide_sparse, y_wide)]

        # multinomial
        x, y = make_classification(n_samples=400, n_classes=3, n_informative=15,
                                   n_features=25, random_state=10585)
        x_sparse = csr_matrix(x)

        x_wide, y_wide = make_classification(n_samples=400, n_classes=3,
                                             n_informative=15, n_features=500,
                                             random_state=15841)
        x_wide_sparse = csr_matrix(x_wide)
        self.multinomial = [(x, y), (x_sparse, y), (x_wide, y_wide),
                            (x_wide_sparse, y_wide)]

        self.alphas = [0., 0.25, 0.50, 0.75, 1.]
        self.n_splits = [-1, 0, 5]
        self.scoring = [
            "accuracy",
            "roc_auc",
            "average_precision",
            "log_loss",
            "precision_macro",
            "precision_micro",
            "precision_weighted",
            "f1_micro",
            "f1_macro",
            "f1_weighted",
        ]
        self.multinomial_scoring = [
            "accuracy",
            "log_loss",
            "precision_macro",
            "precision_micro",
            "precision_weighted",
            "f1_micro",
            "f1_macro",
            "f1_weighted"
        ]

开发者ID:civisanalytics，项目名称:python-glmnet，代码行数:48，代码来源:test_logistic.py

示例5: test_liblinear_random_state

def test_liblinear_random_state():
    X, y = make_classification(n_samples=20)
    lr1 = LogisticRegression(random_state=0)
    lr1.fit(X, y)
    lr2 = LogisticRegression(random_state=0)
    lr2.fit(X, y)
    assert_array_almost_equal(lr1.coef_, lr2.coef_)

开发者ID:AngelaGuoguo，项目名称:scikit-learn，代码行数:7，代码来源:test_logistic.py

示例6: test_importances

def test_importances():
    """Check variable importances."""
    X, y = datasets.make_classification(n_samples=2000,
                                        n_features=10,
                                        n_informative=3,
                                        n_redundant=0,
                                        n_repeated=0,
                                        shuffle=False,
                                        random_state=0)

    for name, Tree in CLF_TREES.items():
        clf = Tree(random_state=0)

        clf.fit(X, y)
        importances = clf.feature_importances_
        n_important = np.sum(importances > 0.1)

        assert_equal(importances.shape[0], 10, "Failed with {0}".format(name))
        assert_equal(n_important, 3, "Failed with {0}".format(name))

        X_new = clf.transform(X, threshold="mean")
        assert_less(0, X_new.shape[1], "Failed with {0}".format(name))
        assert_less(X_new.shape[1], X.shape[1], "Failed with {0}".format(name))

    # Check on iris that importances are the same for all builders
    clf = DecisionTreeClassifier(random_state=0)
    clf.fit(iris.data, iris.target)
    clf2 = DecisionTreeClassifier(random_state=0,
                                  max_leaf_nodes=len(iris.data))
    clf2.fit(iris.data, iris.target)

    assert_array_equal(clf.feature_importances_,
                       clf2.feature_importances_)

开发者ID:Carol-Hu，项目名称:scikit-learn，代码行数:33，代码来源:test_tree.py

示例7: test_grid_search_precomputed_kernel

def test_grid_search_precomputed_kernel():
    """Test that grid search works when the input features are given in the
    form of a precomputed kernel matrix """
    X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)

    # compute the training kernel matrix corresponding to the linear kernel
    K_train = np.dot(X_[:180], X_[:180].T)
    y_train = y_[:180]

    clf = SVC(kernel='precomputed')
    cv = GridSearchCV(clf, {'C': [0.1, 1.0]})
    cv.fit(K_train, y_train)

    assert_true(cv.best_score_ >= 0)

    # compute the test kernel matrix
    K_test = np.dot(X_[180:], X_[:180].T)
    y_test = y_[180:]

    y_pred = cv.predict(K_test)

    assert_true(np.mean(y_pred == y_test) >= 0)

    # test error is raised when the precomputed kernel is not array-like
    # or sparse
    assert_raises(ValueError, cv.fit, K_train.tolist(), y_train)

开发者ID:CheMcCandless，项目名称:scikit-learn，代码行数:26，代码来源:test_grid_search.py

示例8: test_grid_search_sparse_scoring

def test_grid_search_sparse_scoring():
    X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)

    clf = LinearSVC()
    cv = GridSearchCV(clf, {'C': [0.1, 1.0]}, scoring="f1")
    cv.fit(X_[:180], y_[:180])
    y_pred = cv.predict(X_[180:])
    C = cv.best_estimator_.C

    X_ = sp.csr_matrix(X_)
    clf = LinearSVC()
    cv = GridSearchCV(clf, {'C': [0.1, 1.0]}, scoring="f1")
    cv.fit(X_[:180], y_[:180])
    y_pred2 = cv.predict(X_[180:])
    C2 = cv.best_estimator_.C

    assert_array_equal(y_pred, y_pred2)
    assert_equal(C, C2)
    # Smoke test the score
    #np.testing.assert_allclose(f1_score(cv.predict(X_[:180]), y[:180]),
    #                        cv.score(X_[:180], y[:180]))

    # test loss where greater is worse
    def f1_loss(y_true_, y_pred_):
        return -f1_score(y_true_, y_pred_)
    F1Loss = make_scorer(f1_loss, greater_is_better=False)
    cv = GridSearchCV(clf, {'C': [0.1, 1.0]}, scoring=F1Loss)
    cv.fit(X_[:180], y_[:180])
    y_pred3 = cv.predict(X_[180:])
    C3 = cv.best_estimator_.C

    assert_equal(C, C3)
    assert_array_equal(y_pred, y_pred3)

开发者ID:CheMcCandless，项目名称:scikit-learn，代码行数:33，代码来源:test_grid_search.py

示例9: test_engine_info

 def test_engine_info(self):
     n_samples = 20
     n_features = 100
     n_proc = 2
     X, y = datasets.make_classification(n_samples=n_samples,
                                         n_features=n_features,
                                         n_informative=2,
                                         random_state=1)
     Xy = dict(X=X, y=y)
     cv_svm_local = CV(Methods(*[SVC(kernel="linear"),
                                 SVC(kernel="rbf")]),
                       n_folds=3)
     swf_engine = SomaWorkflowEngine(cv_svm_local,
                                     num_processes=n_proc,
                                     resource_id="[email protected]",
                                     login="jl237561",
                                     remove_finished_wf=False,
                                     remove_local_tree=False,
                                     queue="Global_long")
     swf_engine.run(**Xy)
     print "engine_info ================"
     for job_info in swf_engine.engine_info:
         print "  job_info================="
         print "  mem_cost= ", job_info.mem_cost
         print "  vmem_cost= ", job_info.vmem_cost
         print "  time_cost= ", job_info.time_cost
         self.assertTrue(job_info.time_cost > 0)

开发者ID:neurospin，项目名称:pylearn-epac，代码行数:27，代码来源:test_swf_engineinfo.py

示例10: test_intercept_logistic_helper

def test_intercept_logistic_helper():
    n_samples, n_features = 10, 5
    X, y = make_classification(n_samples=n_samples, n_features=n_features,
                               random_state=0)

    # Fit intercept case.
    alpha = 1.
    w = np.ones(n_features + 1)
    grad_interp, hess_interp = _logistic_grad_hess(w, X, y, alpha)
    loss_interp = _logistic_loss(w, X, y, alpha)

    # Do not fit intercept. This can be considered equivalent to adding
    # a feature vector of ones, i.e column of one vectors.
    X_ = np.hstack((X, np.ones(10)[:, np.newaxis]))
    grad, hess = _logistic_grad_hess(w, X_, y, alpha)
    loss = _logistic_loss(w, X_, y, alpha)

    # In the fit_intercept=False case, the feature vector of ones is
    # penalized. This should be taken care of.
    assert_almost_equal(loss_interp + 0.5 * (w[-1] ** 2), loss)

    # Check gradient.
    assert_array_almost_equal(grad_interp[:n_features], grad[:n_features])
    assert_almost_equal(grad_interp[-1] + alpha * w[-1], grad[-1])

    rng = np.random.RandomState(0)
    grad = rng.rand(n_features + 1)
    hess_interp = hess_interp(grad)
    hess = hess(grad)
    assert_array_almost_equal(hess_interp[:n_features], hess[:n_features])
    assert_almost_equal(hess_interp[-1] + alpha * grad[-1], hess[-1])

开发者ID:huafengw，项目名称:scikit-learn，代码行数:31，代码来源:test_logistic.py

示例11: test_class_weight_auto_classifiers

def test_class_weight_auto_classifiers():
    """Test that class_weight="auto" improves f1-score"""

    # This test is broken; its success depends on:
    # * a rare fortuitous RNG seed for make_classification; and
    # * the use of binary F1 over a seemingly arbitrary positive class for two
    #   datasets, and weighted average F1 for the third.
    # Its expectations need to be clarified and reimplemented.
    raise SkipTest("This test requires redefinition")

    classifiers = all_estimators(type_filter="classifier")

    clean_warning_registry()
    with warnings.catch_warnings(record=True):
        classifiers = [c for c in classifiers if "class_weight" in c[1]().get_params().keys()]

    for n_classes, weights in zip([2, 3], [[0.8, 0.2], [0.8, 0.1, 0.1]]):
        # create unbalanced dataset
        X, y = make_classification(
            n_classes=n_classes, n_samples=200, n_features=10, weights=weights, random_state=0, n_informative=n_classes
        )
        X = StandardScaler().fit_transform(X)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
        for name, Classifier in classifiers:
            if (
                name != "NuSVC"
                # the sparse version has a parameter that doesn't do anything
                and not name.startswith("RidgeClassifier")
                # RidgeClassifier behaves unexpected
                # FIXME!
                and not name.endswith("NB")
            ):
                # NaiveBayes classifiers have a somewhat different interface.
                # FIXME SOON!
                yield (check_class_weight_auto_classifiers, name, Classifier, X_train, y_train, X_test, y_test, weights)

开发者ID:mbarnes1，项目名称:entity_resolution，代码行数:35，代码来源:test_common.py

示例12: test_cv

    def test_cv(self):
        X, y = datasets.make_classification(n_samples=20, n_features=5, n_informative=2)
        n_folds = 2

        # = With EPAC
        wf = CV(SVC(kernel="linear"), n_folds=n_folds, reducer=ClassificationReport(keep=True))
        r_epac = wf.top_down(X=X, y=y)

        # = With SKLEARN
        clf = SVC(kernel="linear")
        r_sklearn = list()
        for idx_train, idx_test in StratifiedKFold(y=y, n_folds=n_folds):
            # idx_train, idx_test  = cv.__iter__().next()
            X_train = X[idx_train, :]
            X_test = X[idx_test, :]
            y_train = y[idx_train, :]
            clf.fit(X_train, y_train)
            r_sklearn.append(clf.predict(X_test))

        # = Comparison
        key2cmp = "y" + conf.SEP + conf.TEST + conf.SEP + conf.PREDICTION
        for icv in range(n_folds):
            comp = np.all(np.asarray(r_epac[0][key2cmp]) == np.asarray(r_sklearn[0]))
            self.assertTrue(comp, u"Diff CV: EPAC vs sklearn")

        # test reduce
        r_epac_reduce = wf.reduce().values()[0][key2cmp]
        comp = np.all(np.asarray(r_epac_reduce) == np.asarray(r_sklearn))
        self.assertTrue(comp, u"Diff CV: EPAC reduce")

开发者ID:neurospin，项目名称:pylearn-epac，代码行数:29，代码来源:test_primitives.py

示例13: test_perm

    def test_perm(self):
        X, y = datasets.make_classification(n_samples=20, n_features=5, n_informative=2)
        n_perms = 2
        rnd = 0
        # = With EPAC
        wf = Perms(SVC(kernel="linear"), n_perms=n_perms, permute="y", random_state=rnd, reducer=None)
        r_epac = wf.top_down(X=X, y=y)
        # = With SKLEARN
        clf = SVC(kernel="linear")
        r_sklearn = list()
        for perm in Permutations(n=y.shape[0], n_perms=n_perms, random_state=rnd):
            y_p = y[perm, :]
            clf.fit(X, y_p)
            r_sklearn.append(clf.predict(X))
        key2cmp = "y" + conf.SEP + conf.PREDICTION

        # = Comparison
        for iperm in range(n_perms):
            comp = np.all(np.asarray(r_epac[iperm][key2cmp]) == np.asarray(r_sklearn[iperm]))
            self.assertTrue(comp, u"Diff Perm: EPAC vs sklearn")
        # test reduce
        for iperm in range(n_perms):
            r_epac_reduce = wf.reduce().values()[iperm][key2cmp]
            comp = np.all(np.asarray(r_epac_reduce) == np.asarray(r_sklearn[iperm]))
            self.assertTrue(comp, u"Diff Perm: EPAC reduce")

开发者ID:neurospin，项目名称:pylearn-epac，代码行数:25，代码来源:test_primitives.py

示例14: test_cvbestsearchrefit_select_k_best

    def test_cvbestsearchrefit_select_k_best(self):
        list_C_value = range(2, 10, 1)
        #        print repr(list_C_value)
        for C_value in list_C_value:
            #            C_value = 2
            #            print C_value
            X, y = datasets.make_classification(n_samples=100, n_features=500, n_informative=5)
            n_folds_nested = 2
            # random_state = 0
            k_values = [2, 3, 4, 5, 6]
            key_y_pred = "y" + conf.SEP + conf.PREDICTION
            # With EPAC
            methods = Methods(*[Pipe(SelectKBest(k=k), SVC(C=C_value, kernel="linear")) for k in k_values])
            wf = CVBestSearchRefitParallel(methods, n_folds=n_folds_nested)
            wf.run(X=X, y=y)
            r_epac = wf.reduce().values()[0]
            # - Without EPAC
            from sklearn.pipeline import Pipeline

            r_sklearn = dict()
            clf = Pipeline([("anova", SelectKBest(k=3)), ("svm", SVC(C=C_value, kernel="linear"))])
            parameters = {"anova__k": k_values}
            cv_nested = StratifiedKFold(y=y, n_folds=n_folds_nested)
            gscv = grid_search.GridSearchCV(clf, parameters, cv=cv_nested)
            gscv.fit(X, y)
            r_sklearn[key_y_pred] = gscv.predict(X)
            r_sklearn[conf.BEST_PARAMS] = gscv.best_params_
            r_sklearn[conf.BEST_PARAMS]["k"] = r_sklearn[conf.BEST_PARAMS]["anova__k"]
            # - Comparisons
            comp = np.all(r_epac[key_y_pred] == r_sklearn[key_y_pred])
            self.assertTrue(comp, u"Diff CVBestSearchRefitParallel: prediction")
            for key_param in r_epac[conf.BEST_PARAMS][0]:
                if key_param in r_sklearn[conf.BEST_PARAMS]:
                    comp = r_sklearn[conf.BEST_PARAMS][key_param] == r_epac[conf.BEST_PARAMS][0][key_param]
                    self.assertTrue(comp, u"Diff CVBestSearchRefitParallel: best parameters")

开发者ID:neurospin，项目名称:pylearn-epac，代码行数:35，代码来源:test_primitives.py

示例15: test_cvbestsearchrefit

 def test_cvbestsearchrefit(self):
     X, y = datasets.make_classification(n_samples=12, n_features=10, n_informative=2)
     n_folds_nested = 2
     # random_state = 0
     C_values = [0.1, 0.5, 1, 2, 5]
     kernels = ["linear", "rbf"]
     key_y_pred = "y" + conf.SEP + conf.PREDICTION
     # With EPAC
     methods = Methods(*[SVC(C=C, kernel=kernel) for C in C_values for kernel in kernels])
     wf = CVBestSearchRefitParallel(methods, n_folds=n_folds_nested)
     wf.run(X=X, y=y)
     r_epac = wf.reduce().values()[0]
     # - Without EPAC
     r_sklearn = dict()
     clf = SVC(kernel="linear")
     parameters = {"C": C_values, "kernel": kernels}
     cv_nested = StratifiedKFold(y=y, n_folds=n_folds_nested)
     gscv = grid_search.GridSearchCV(clf, parameters, cv=cv_nested)
     gscv.fit(X, y)
     r_sklearn[key_y_pred] = gscv.predict(X)
     r_sklearn[conf.BEST_PARAMS] = gscv.best_params_
     # - Comparisons
     comp = np.all(r_epac[key_y_pred] == r_sklearn[key_y_pred])
     self.assertTrue(comp, u"Diff CVBestSearchRefitParallel: prediction")
     for key_param in r_epac[conf.BEST_PARAMS][0]:
         if key_param in r_sklearn[conf.BEST_PARAMS]:
             comp = r_sklearn[conf.BEST_PARAMS][key_param] == r_epac[conf.BEST_PARAMS][0][key_param]
             self.assertTrue(comp, u"Diff CVBestSearchRefitParallel: best parameters")

开发者ID:neurospin，项目名称:pylearn-epac，代码行数:28，代码来源:test_primitives.py

注：本文中的sklearn.datasets.make_classification函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。