当前位置: 首页>>代码示例>>Python>>正文


Python RandomizedSearchCV.fit方法代码示例

本文整理汇总了Python中sklearn.model_selection.RandomizedSearchCV.fit方法的典型用法代码示例。如果您正苦于以下问题:Python RandomizedSearchCV.fit方法的具体用法?Python RandomizedSearchCV.fit怎么用?Python RandomizedSearchCV.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.model_selection.RandomizedSearchCV的用法示例。


在下文中一共展示了RandomizedSearchCV.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_large_grid

# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
    def test_large_grid():
        """In this test, we purposely overfit a RandomForest to completely random data
        in order to assert that the test error will far supercede the train error.
        """

        if not SK18:
            custom_cv = KFold(n=y_train.shape[0], n_folds=3, shuffle=True, random_state=42)
        else:
            custom_cv = KFold(n_splits=3, shuffle=True, random_state=42)

        # define the pipe
        pipe = Pipeline([
            ('scaler', SelectiveScaler()),
            ('pca', SelectivePCA(weight=True)),
            ('rf', RandomForestClassifier(random_state=42))
        ])

        # define hyper parameters
        hp = {
            'scaler__scaler': [StandardScaler(), RobustScaler(), MinMaxScaler()],
            'pca__whiten': [True, False],
            'pca__weight': [True, False],
            'pca__n_components': uniform(0.75, 0.15),
            'rf__n_estimators': randint(5, 10),
            'rf__max_depth': randint(5, 15)
        }

        # define the grid
        grid = RandomizedSearchCV(pipe, hp, n_iter=2, scoring='accuracy', n_jobs=1, cv=custom_cv, random_state=42)

        # this will fail because we haven't fit yet
        assert_fails(grid.score, (ValueError, AttributeError), X_train, y_train)

        # fit the grid
        grid.fit(X_train, y_train)

        # score for coverage -- this might warn...
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            grid.score(X_train, y_train)

        # coverage:
        assert grid._estimator_type == 'classifier'

        # get predictions
        tr_pred, te_pred = grid.predict(X_train), grid.predict(X_test)

        # evaluate score (SHOULD be better than random...)
        accuracy_score(y_train, tr_pred), accuracy_score(y_test, te_pred)

        # grid score reports:
        # assert fails for bad percentile
        assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'percentile': 0.0})
        assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'percentile': 1.0})

        # assert fails for bad y_axis
        assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'y_axis': 'bad_axis'})

        # assert passes otherwise
        report_grid_score_detail(grid, charts=True, percentile=0.95)  # just ensure percentile works
开发者ID:tgsmith61591,项目名称:skutil,代码行数:62,代码来源:test_big.py

示例2: fit

# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
def fit(x, y, estimator, dataframe, params):
    vectorizer = CountVectorizer(stop_words=['go', '', ' '], binary=False, lowercase=True)
    vectorizer.fit(dataframe[x].values)
    fresh_estimator = clone(estimator)
    x_np, y_np, feature_names, selector = \
    select_features(
        df = dataframe,
        vectorizer=vectorizer,
        feature_col=x,
        label_col=y,
        select_method=None,
        continuous_col=None
    )
    estimator = RandomizedSearchCV(estimator, params, n_iter=60, cv=3, n_jobs=3, refit=True)
    estimator.fit(x_np, y_np)
    best_params = estimator.best_params_

    if method not in ['lr', 'svm']:
        print("Calibrating...")
        estimator = CalibratedClassifierCV(fresh_estimator.set_params(**best_params), 'isotonic', 3)
        estimator.fit(x_np, y_np)

    from sklearn.base import _pprint
    _pprint(estimator.get_params(deep=True), offset=2)
    return estimator, selector, vectorizer
开发者ID:daniaki,项目名称:ppi_wrangler,代码行数:27,代码来源:interactome_predict.py

示例3: search

# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
    def search(self, search_space, search_iter, n_estimators, x, y):
        if 'n_estimators' in search_space:
            del search_space['n_estimators']
        params = {
            'boosting_type': ['gbdt'],
            'min_child_weight': [5],
            'min_split_gain': [1.0],
            'subsample': [0.8],
            'colsample_bytree': [0.6],
            'max_depth': [10],
            'n_estimators': n_estimators,
            'num_leaves': [70],
            'learning_rate': [0.04],
        }
        params.update(search_space)
        if self.verbose:
            print(params)
        folds = 3
        score_metric, skf = self.get_skf(folds)

        random_search = RandomizedSearchCV(self.lgbm, param_distributions=params, n_iter=search_iter,
                                           scoring=score_metric,
                                           n_jobs=1, cv=skf, verbose=0, random_state=1001)

        random_search.fit(x, y)
        self.clf = random_search.best_estimator_

        return random_search.best_params_
开发者ID:Saiuz,项目名称:autokeras,代码行数:30,代码来源:tabular_supervised.py

示例4: parameter_search

# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
def parameter_search(model, X, y, params, metric, n=10):
    '''
    returns the best parameters of the classification model
    '''
    random_search = RandomizedSearchCV(model, param_distributions=params, \
    scoring = metric, n_jobs=3, n_iter=n)
    random_search.fit(X, y)
    return random_search
开发者ID:BryceLuna,项目名称:Fraud_Detection,代码行数:10,代码来源:Search_Models_Params.py

示例5: pr_curve

# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
    def pr_curve(i):
        label = labels[i]
        statistics_l = Statistics()
        print('Doing label {}'.format(label))

        for train_idx, valid_idx in folds:
            rng = np.random.RandomState()
            rng.seed(seeds[i])
            training_fold = developement_df.loc[train_idx, ]
            training_fold = training_fold.reset_index(drop=True)
            validation_fold = developement_df.loc[valid_idx, ]
            validation_fold = validation_fold.reset_index(drop=True)
            base_estimators = make_classifiers(method, balanced, labels, random_state=rng)

            # Find the best params, then do a final proper calibration.
            base_estimator = base_estimators[label]
            estimator = RandomizedSearchCV(
                estimator=base_estimator, param_distributions=params,
                n_iter=60, scoring='f1', cv=3, random_state=rng,
                error_score=0.0, n_jobs=1, pre_dispatch='2*n_jobs',
                refit=True
            )

            # Set up the vectorizer for the bag-of-words representation
            if vectorizer_method == 'tf-idf':
                vectorizer = TfidfVectorizer(
                    stop_words=['go', '', ' '], binary=binary, lowercase=True,
                    sublinear_tf=False, max_df=1.0, min_df=0
                )
                vectorizer.fit(training_fold['terms'].values)
            elif vectorizer_method == 'count':
                vectorizer = CountVectorizer(
                    stop_words=['go', '', ' '], binary=binary, lowercase=True
                )
                vectorizer.fit(training_fold['terms'].values)

            # Fit an evaluate the performance of the classifier.
            x_train = vectorizer.transform(training_fold['terms'].values)
            y_train = np.asarray(training_fold[label].values, dtype=int)

            x_valid = vectorizer.transform(validation_fold['terms'].values)
            y_valid = np.asarray(validation_fold[label].values, dtype=int)

            estimator.fit(x_train, y_train)

            for t in thresholds:
                y_pred = [int(p[1] >= t) for p in estimator.predict_proba(x_valid)]
                precision = precision_score(y_valid, y_pred, labels=[0, 1], pos_label=1)
                recall = recall_score(y_valid, y_pred, labels=[0, 1], pos_label=1)
                f1 = f1_score(y_valid, y_pred, labels=[0, 1], pos_label=1)
                statistics_l.update_statistics(label=t, s_type='Precision', data=precision)
                statistics_l.update_statistics(label=t, s_type='Recall', data=recall)
                statistics_l.update_statistics(label=t, s_type='F1-Score', data=f1)

        statistics_l.frame()['reaction'] = label
        return statistics_l
开发者ID:daniaki,项目名称:ppi_wrangler,代码行数:58,代码来源:recall_precision.py

示例6: test_pickle

# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
def test_pickle():
    # Test that a fit search can be pickled
    clf = MockClassifier()
    grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, refit=True)
    grid_search.fit(X, y)
    pickle.dumps(grid_search)  # smoke test

    random_search = RandomizedSearchCV(clf, {'foo_param': [1, 2, 3]},
                                       refit=True, n_iter=3)
    random_search.fit(X, y)
    pickle.dumps(random_search)  # smoke test
开发者ID:YinongLong,项目名称:scikit-learn,代码行数:13,代码来源:test_search.py

示例7: test_trivial_cv_results_attr

# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
def test_trivial_cv_results_attr():
    # Test search over a "grid" with only one point.
    # Non-regression test: grid_scores_ wouldn't be set by GridSearchCV.
    clf = MockClassifier()
    grid_search = GridSearchCV(clf, {'foo_param': [1]})
    grid_search.fit(X, y)
    assert_true(hasattr(grid_search, "cv_results_"))

    random_search = RandomizedSearchCV(clf, {'foo_param': [0]}, n_iter=1)
    random_search.fit(X, y)
    assert_true(hasattr(grid_search, "cv_results_"))
开发者ID:YinongLong,项目名称:scikit-learn,代码行数:13,代码来源:test_search.py

示例8: build_nn

# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
def build_nn(x_train, y_train, x_test, y_test, n_features):
    """
    Constructing a regression neural network model from input dataframe
    :param x_train: features dataframe for model training
    :param y_train: target dataframe for model training
    :param x_test: features dataframe for model testing
    :param y_test: target dataframe for model testing
    :return: None
    """
    net = NeuralNet(layers=[('input', InputLayer),
                            ('hidden0', DenseLayer),
                            ('hidden1', DenseLayer),
                            ('output', DenseLayer)],
                    input_shape=(None, x_train.shape[1]),  # Number of i/p nodes = number of columns in x
                    hidden0_num_units=15,
                    hidden0_nonlinearity=lasagne.nonlinearities.softmax,
                    hidden1_num_units=17,
                    hidden1_nonlinearity=lasagne.nonlinearities.softmax,
                    output_num_units=1,  # Number of o/p nodes = number of columns in y
                    output_nonlinearity=lasagne.nonlinearities.softmax,
                    max_epochs=100,
                    update_learning_rate=0.01,
                    regression=True,
                    verbose=0)

    # Finding the optimal set of params for each variable in the training of the neural network
    param_dist = {'hidden0_num_units':sp_randint(3, 30), 'hidden1_num_units':sp_randint(3, 30)}
    clf = RandomizedSearchCV(estimator=net, param_distributions=param_dist,
                             n_iter=15, n_jobs=-1)
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)

    # Mean absolute error regression loss
    mean_abs = sklearn.metrics.mean_absolute_error(y_test, y_pred)
    # Mean squared error regression loss
    mean_sq = sklearn.metrics.mean_squared_error(y_test, y_pred)
    # Median absolute error regression loss
    median_abs = sklearn.metrics.median_absolute_error(y_test, y_pred)
    # R^2 (coefficient of determination) regression score function
    r2 = sklearn.metrics.r2_score(y_test, y_pred)
    # Explained variance regression score function
    exp_var_score = sklearn.metrics.explained_variance_score(y_test, y_pred)

    with open('../trained_networks/nn_%d_data.pkl' % n_features, 'wb') as results:
        pickle.dump(clf, results, pickle.HIGHEST_PROTOCOL)
        pickle.dump(net, results, pickle.HIGHEST_PROTOCOL)
        pickle.dump(mean_abs, results, pickle.HIGHEST_PROTOCOL)
        pickle.dump(mean_sq, results, pickle.HIGHEST_PROTOCOL)
        pickle.dump(median_abs, results, pickle.HIGHEST_PROTOCOL)
        pickle.dump(r2, results, pickle.HIGHEST_PROTOCOL)
        pickle.dump(exp_var_score, results, pickle.HIGHEST_PROTOCOL)
        pickle.dump(y_pred, results, pickle.HIGHEST_PROTOCOL)

    return
开发者ID:pearlphilip,项目名称:USP-inhibition,代码行数:56,代码来源:models.py

示例9: test_randomgridsearch_slm

# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
def test_randomgridsearch_slm(make_gaus_data):

    X, y, Xs, ys = make_gaus_data

    slm = StandardLinearModel(LinearBasis(onescol=True))

    param_dict = {
        'var': [Parameter(1.0 / v, Positive()) for v in range(1, 6)]
    }
    estimator = RandomizedSearchCV(slm, param_dict, n_jobs=-1, n_iter=2)

    estimator.fit(X, y)
    Ey = estimator.predict(Xs)
    assert len(ys) == len(Ey)  # we just want to make sure this all runs
开发者ID:NICTA,项目名称:revrand,代码行数:16,代码来源:test_models.py

示例10: test_randomgridsearch_glm

# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
def test_randomgridsearch_glm(make_gaus_data):

    X, y, Xs, ys = make_gaus_data

    glm = GeneralizedLinearModel(Gaussian(), LinearBasis(onescol=True),
                                 random_state=1, maxiter=100)

    param_dict = {'batch_size': range(1, 11)}
    estimator = RandomizedSearchCV(glm, param_dict, verbose=1, n_jobs=-1,
                                   n_iter=2)

    estimator.fit(X, y)
    Ey = estimator.predict(Xs)
    assert len(ys) == len(Ey)  # we just want to make sure this all runs
开发者ID:NICTA,项目名称:revrand,代码行数:16,代码来源:test_models.py

示例11: test_pickle

# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
def test_pickle():
    # Test that a fit search can be pickled
    clf = MockClassifier()
    grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, refit=True)
    grid_search.fit(X, y)
    grid_search_pickled = pickle.loads(pickle.dumps(grid_search))
    assert_array_almost_equal(grid_search.predict(X),
                              grid_search_pickled.predict(X))

    random_search = RandomizedSearchCV(clf, {'foo_param': [1, 2, 3]},
                                       refit=True, n_iter=3)
    random_search.fit(X, y)
    random_search_pickled = pickle.loads(pickle.dumps(random_search))
    assert_array_almost_equal(random_search.predict(X),
                              random_search_pickled.predict(X))
开发者ID:IsaacHaze,项目名称:scikit-learn,代码行数:17,代码来源:test_search.py

示例12: test__extract_arfftrace

# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
    def test__extract_arfftrace(self):
        param_grid = {"max_depth": [3, None],
                      "max_features": [1, 2, 3, 4],
                      "bootstrap": [True, False],
                      "criterion": ["gini", "entropy"]}
        num_iters = 10
        task = openml.tasks.get_task(20)
        clf = RandomizedSearchCV(RandomForestClassifier(), param_grid, num_iters)
        # just run the task
        train, _ = task.get_train_test_split_indices(0, 0)
        X, y = task.get_X_and_y()
        clf.fit(X[train], y[train])

        trace_attribute_list = _extract_arfftrace_attributes(clf)
        trace_list = _extract_arfftrace(clf, 0, 0)
        self.assertIsInstance(trace_attribute_list, list)
        self.assertEquals(len(trace_attribute_list), 5 + len(param_grid))
        self.assertIsInstance(trace_list, list)
        self.assertEquals(len(trace_list), num_iters)

        # found parameters
        optimized_params = set()

        for att_idx in range(len(trace_attribute_list)):
            att_type = trace_attribute_list[att_idx][1]
            att_name = trace_attribute_list[att_idx][0]
            if att_name.startswith("parameter_"):
                # add this to the found parameters
                param_name = att_name[len("parameter_"):]
                optimized_params.add(param_name)

                for line_idx in range(len(trace_list)):
                    val = json.loads(trace_list[line_idx][att_idx])
                    legal_values = param_grid[param_name]
                    self.assertIn(val, legal_values)
            else:
                # repeat, fold, itt, bool
                for line_idx in range(len(trace_list)):
                    val = trace_list[line_idx][att_idx]
                    if isinstance(att_type, list):
                        self.assertIn(val, att_type)
                    elif att_name in ['repeat', 'fold', 'iteration']:
                        self.assertIsInstance(trace_list[line_idx][att_idx], int)
                    else: # att_type = real
                        self.assertIsInstance(trace_list[line_idx][att_idx], float)


        self.assertEqual(set(param_grid.keys()), optimized_params)
开发者ID:amueller,项目名称:python,代码行数:50,代码来源:test_run_functions.py

示例13: model_param_search

# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
def model_param_search(estimator, X, y, param_dist, scoring,
                       n_iter=1, n_cv=5, verbose=10, random_state=1, model_id='model', save_search=True):
    start = time.time()

    random_search = RandomizedSearchCV(estimator, param_distributions=param_dist,
                                       n_iter=n_iter, scoring=scoring, cv=n_cv,
                                       verbose=verbose, random_state=random_state)
    random_search.fit(X, y)
    print('Best param: ', random_search.best_params_)
    print('Best score: ', random_search.best_score_)
    print('Best model: ', random_search.best_estimator_)
    if save_search:
        with open(model_id+'.pickle', 'wb') as f:
            pickle.dump(random_search, f)
    print('Time searching param for {}: {}'.format(
        model_id, (time.time() - start) / 60))

    return random_search.best_estimator_
开发者ID:canzheng,项目名称:kaggle-talkingdata,代码行数:20,代码来源:model_param.py

示例14: test_grid_search_with_multioutput_data

# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
def test_grid_search_with_multioutput_data():
    # Test search with multi-output estimator

    X, y = make_multilabel_classification(return_indicator=True,
                                          random_state=0)

    est_parameters = {"max_depth": [1, 2, 3, 4]}
    cv = KFold(random_state=0)

    estimators = [DecisionTreeRegressor(random_state=0),
                  DecisionTreeClassifier(random_state=0)]

    # Test with grid search cv
    for est in estimators:
        grid_search = GridSearchCV(est, est_parameters, cv=cv)
        grid_search.fit(X, y)
        res_params = grid_search.cv_results_['params']
        for cand_i in range(len(res_params)):
            est.set_params(**res_params[cand_i])

            for i, (train, test) in enumerate(cv.split(X, y)):
                est.fit(X[train], y[train])
                correct_score = est.score(X[test], y[test])
                assert_almost_equal(
                    correct_score,
                    grid_search.cv_results_['split%d_test_score' % i][cand_i])

    # Test with a randomized search
    for est in estimators:
        random_search = RandomizedSearchCV(est, est_parameters,
                                           cv=cv, n_iter=3)
        random_search.fit(X, y)
        res_params = random_search.cv_results_['params']
        for cand_i in range(len(res_params)):
            est.set_params(**res_params[cand_i])

            for i, (train, test) in enumerate(cv.split(X, y)):
                est.fit(X[train], y[train])
                correct_score = est.score(X[test], y[test])
                assert_almost_equal(
                    correct_score,
                    random_search.cv_results_['split%d_test_score'
                                              % i][cand_i])
开发者ID:YinongLong,项目名称:scikit-learn,代码行数:45,代码来源:test_search.py

示例15: test_random_search_cv_results

# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
def test_random_search_cv_results():
    # Make a dataset with a lot of noise to get various kind of prediction
    # errors across CV folds and parameter settings
    X, y = make_classification(n_samples=200, n_features=100, n_informative=3,
                               random_state=0)

    # scipy.stats dists now supports `seed` but we still support scipy 0.12
    # which doesn't support the seed. Hence the assertions in the test for
    # random_search alone should not depend on randomization.
    n_splits = 3
    n_search_iter = 30
    params = dict(C=expon(scale=10), gamma=expon(scale=0.1))
    random_search = RandomizedSearchCV(SVC(), n_iter=n_search_iter,
                                       cv=n_splits, iid=False,
                                       param_distributions=params)
    random_search.fit(X, y)
    random_search_iid = RandomizedSearchCV(SVC(), n_iter=n_search_iter,
                                           cv=n_splits, iid=True,
                                           param_distributions=params)
    random_search_iid.fit(X, y)

    param_keys = ('param_C', 'param_gamma')
    score_keys = ('mean_test_score', 'mean_train_score',
                  'rank_test_score',
                  'split0_test_score', 'split1_test_score',
                  'split2_test_score',
                  'split0_train_score', 'split1_train_score',
                  'split2_train_score',
                  'std_test_score', 'std_train_score',
                  'mean_fit_time', 'std_fit_time',
                  'mean_score_time', 'std_score_time')
    n_cand = n_search_iter

    for search, iid in zip((random_search, random_search_iid), (False, True)):
        assert_equal(iid, search.iid)
        cv_results = search.cv_results_
        # Check results structure
        check_cv_results_array_types(cv_results, param_keys, score_keys)
        check_cv_results_keys(cv_results, param_keys, score_keys, n_cand)
        # For random_search, all the param array vals should be unmasked
        assert_false(any(cv_results['param_C'].mask) or
                     any(cv_results['param_gamma'].mask))
        check_cv_results_grid_scores_consistency(search)
开发者ID:IsaacHaze,项目名称:scikit-learn,代码行数:45,代码来源:test_search.py


注:本文中的sklearn.model_selection.RandomizedSearchCV.fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。