当前位置: 首页>>代码示例>>Python>>正文


Python model_selection.RandomizedSearchCV类代码示例

本文整理汇总了Python中sklearn.model_selection.RandomizedSearchCV的典型用法代码示例。如果您正苦于以下问题:Python RandomizedSearchCV类的具体用法?Python RandomizedSearchCV怎么用?Python RandomizedSearchCV使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了RandomizedSearchCV类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: search

    def search(self, search_space, search_iter, n_estimators, x, y):
        if 'n_estimators' in search_space:
            del search_space['n_estimators']
        params = {
            'boosting_type': ['gbdt'],
            'min_child_weight': [5],
            'min_split_gain': [1.0],
            'subsample': [0.8],
            'colsample_bytree': [0.6],
            'max_depth': [10],
            'n_estimators': n_estimators,
            'num_leaves': [70],
            'learning_rate': [0.04],
        }
        params.update(search_space)
        if self.verbose:
            print(params)
        folds = 3
        score_metric, skf = self.get_skf(folds)

        random_search = RandomizedSearchCV(self.lgbm, param_distributions=params, n_iter=search_iter,
                                           scoring=score_metric,
                                           n_jobs=1, cv=skf, verbose=0, random_state=1001)

        random_search.fit(x, y)
        self.clf = random_search.best_estimator_

        return random_search.best_params_
开发者ID:Saiuz,项目名称:autokeras,代码行数:28,代码来源:tabular_supervised.py

示例2: parameter_search

def parameter_search(model, X, y, params, metric, n=10):
    '''
    returns the best parameters of the classification model
    '''
    random_search = RandomizedSearchCV(model, param_distributions=params, \
    scoring = metric, n_jobs=3, n_iter=n)
    random_search.fit(X, y)
    return random_search
开发者ID:BryceLuna,项目名称:Fraud_Detection,代码行数:8,代码来源:Search_Models_Params.py

示例3: pr_curve

    def pr_curve(i):
        label = labels[i]
        statistics_l = Statistics()
        print('Doing label {}'.format(label))

        for train_idx, valid_idx in folds:
            rng = np.random.RandomState()
            rng.seed(seeds[i])
            training_fold = developement_df.loc[train_idx, ]
            training_fold = training_fold.reset_index(drop=True)
            validation_fold = developement_df.loc[valid_idx, ]
            validation_fold = validation_fold.reset_index(drop=True)
            base_estimators = make_classifiers(method, balanced, labels, random_state=rng)

            # Find the best params, then do a final proper calibration.
            base_estimator = base_estimators[label]
            estimator = RandomizedSearchCV(
                estimator=base_estimator, param_distributions=params,
                n_iter=60, scoring='f1', cv=3, random_state=rng,
                error_score=0.0, n_jobs=1, pre_dispatch='2*n_jobs',
                refit=True
            )

            # Set up the vectorizer for the bag-of-words representation
            if vectorizer_method == 'tf-idf':
                vectorizer = TfidfVectorizer(
                    stop_words=['go', '', ' '], binary=binary, lowercase=True,
                    sublinear_tf=False, max_df=1.0, min_df=0
                )
                vectorizer.fit(training_fold['terms'].values)
            elif vectorizer_method == 'count':
                vectorizer = CountVectorizer(
                    stop_words=['go', '', ' '], binary=binary, lowercase=True
                )
                vectorizer.fit(training_fold['terms'].values)

            # Fit an evaluate the performance of the classifier.
            x_train = vectorizer.transform(training_fold['terms'].values)
            y_train = np.asarray(training_fold[label].values, dtype=int)

            x_valid = vectorizer.transform(validation_fold['terms'].values)
            y_valid = np.asarray(validation_fold[label].values, dtype=int)

            estimator.fit(x_train, y_train)

            for t in thresholds:
                y_pred = [int(p[1] >= t) for p in estimator.predict_proba(x_valid)]
                precision = precision_score(y_valid, y_pred, labels=[0, 1], pos_label=1)
                recall = recall_score(y_valid, y_pred, labels=[0, 1], pos_label=1)
                f1 = f1_score(y_valid, y_pred, labels=[0, 1], pos_label=1)
                statistics_l.update_statistics(label=t, s_type='Precision', data=precision)
                statistics_l.update_statistics(label=t, s_type='Recall', data=recall)
                statistics_l.update_statistics(label=t, s_type='F1-Score', data=f1)

        statistics_l.frame()['reaction'] = label
        return statistics_l
开发者ID:daniaki,项目名称:ppi_wrangler,代码行数:56,代码来源:recall_precision.py

示例4: test_pickle

def test_pickle():
    # Test that a fit search can be pickled
    clf = MockClassifier()
    grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, refit=True)
    grid_search.fit(X, y)
    pickle.dumps(grid_search)  # smoke test

    random_search = RandomizedSearchCV(clf, {'foo_param': [1, 2, 3]},
                                       refit=True, n_iter=3)
    random_search.fit(X, y)
    pickle.dumps(random_search)  # smoke test
开发者ID:YinongLong,项目名称:scikit-learn,代码行数:11,代码来源:test_search.py

示例5: test_trivial_cv_results_attr

def test_trivial_cv_results_attr():
    # Test search over a "grid" with only one point.
    # Non-regression test: grid_scores_ wouldn't be set by GridSearchCV.
    clf = MockClassifier()
    grid_search = GridSearchCV(clf, {'foo_param': [1]})
    grid_search.fit(X, y)
    assert_true(hasattr(grid_search, "cv_results_"))

    random_search = RandomizedSearchCV(clf, {'foo_param': [0]}, n_iter=1)
    random_search.fit(X, y)
    assert_true(hasattr(grid_search, "cv_results_"))
开发者ID:YinongLong,项目名称:scikit-learn,代码行数:11,代码来源:test_search.py

示例6: build_nn

def build_nn(x_train, y_train, x_test, y_test, n_features):
    """
    Constructing a regression neural network model from input dataframe
    :param x_train: features dataframe for model training
    :param y_train: target dataframe for model training
    :param x_test: features dataframe for model testing
    :param y_test: target dataframe for model testing
    :return: None
    """
    net = NeuralNet(layers=[('input', InputLayer),
                            ('hidden0', DenseLayer),
                            ('hidden1', DenseLayer),
                            ('output', DenseLayer)],
                    input_shape=(None, x_train.shape[1]),  # Number of i/p nodes = number of columns in x
                    hidden0_num_units=15,
                    hidden0_nonlinearity=lasagne.nonlinearities.softmax,
                    hidden1_num_units=17,
                    hidden1_nonlinearity=lasagne.nonlinearities.softmax,
                    output_num_units=1,  # Number of o/p nodes = number of columns in y
                    output_nonlinearity=lasagne.nonlinearities.softmax,
                    max_epochs=100,
                    update_learning_rate=0.01,
                    regression=True,
                    verbose=0)

    # Finding the optimal set of params for each variable in the training of the neural network
    param_dist = {'hidden0_num_units':sp_randint(3, 30), 'hidden1_num_units':sp_randint(3, 30)}
    clf = RandomizedSearchCV(estimator=net, param_distributions=param_dist,
                             n_iter=15, n_jobs=-1)
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)

    # Mean absolute error regression loss
    mean_abs = sklearn.metrics.mean_absolute_error(y_test, y_pred)
    # Mean squared error regression loss
    mean_sq = sklearn.metrics.mean_squared_error(y_test, y_pred)
    # Median absolute error regression loss
    median_abs = sklearn.metrics.median_absolute_error(y_test, y_pred)
    # R^2 (coefficient of determination) regression score function
    r2 = sklearn.metrics.r2_score(y_test, y_pred)
    # Explained variance regression score function
    exp_var_score = sklearn.metrics.explained_variance_score(y_test, y_pred)

    with open('../trained_networks/nn_%d_data.pkl' % n_features, 'wb') as results:
        pickle.dump(clf, results, pickle.HIGHEST_PROTOCOL)
        pickle.dump(net, results, pickle.HIGHEST_PROTOCOL)
        pickle.dump(mean_abs, results, pickle.HIGHEST_PROTOCOL)
        pickle.dump(mean_sq, results, pickle.HIGHEST_PROTOCOL)
        pickle.dump(median_abs, results, pickle.HIGHEST_PROTOCOL)
        pickle.dump(r2, results, pickle.HIGHEST_PROTOCOL)
        pickle.dump(exp_var_score, results, pickle.HIGHEST_PROTOCOL)
        pickle.dump(y_pred, results, pickle.HIGHEST_PROTOCOL)

    return
开发者ID:pearlphilip,项目名称:USP-inhibition,代码行数:54,代码来源:models.py

示例7: test_randomgridsearch_slm

def test_randomgridsearch_slm(make_gaus_data):

    X, y, Xs, ys = make_gaus_data

    slm = StandardLinearModel(LinearBasis(onescol=True))

    param_dict = {
        'var': [Parameter(1.0 / v, Positive()) for v in range(1, 6)]
    }
    estimator = RandomizedSearchCV(slm, param_dict, n_jobs=-1, n_iter=2)

    estimator.fit(X, y)
    Ey = estimator.predict(Xs)
    assert len(ys) == len(Ey)  # we just want to make sure this all runs
开发者ID:NICTA,项目名称:revrand,代码行数:14,代码来源:test_models.py

示例8: test_randomgridsearch_glm

def test_randomgridsearch_glm(make_gaus_data):

    X, y, Xs, ys = make_gaus_data

    glm = GeneralizedLinearModel(Gaussian(), LinearBasis(onescol=True),
                                 random_state=1, maxiter=100)

    param_dict = {'batch_size': range(1, 11)}
    estimator = RandomizedSearchCV(glm, param_dict, verbose=1, n_jobs=-1,
                                   n_iter=2)

    estimator.fit(X, y)
    Ey = estimator.predict(Xs)
    assert len(ys) == len(Ey)  # we just want to make sure this all runs
开发者ID:NICTA,项目名称:revrand,代码行数:14,代码来源:test_models.py

示例9: test_pickle

def test_pickle():
    # Test that a fit search can be pickled
    clf = MockClassifier()
    grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, refit=True)
    grid_search.fit(X, y)
    grid_search_pickled = pickle.loads(pickle.dumps(grid_search))
    assert_array_almost_equal(grid_search.predict(X),
                              grid_search_pickled.predict(X))

    random_search = RandomizedSearchCV(clf, {'foo_param': [1, 2, 3]},
                                       refit=True, n_iter=3)
    random_search.fit(X, y)
    random_search_pickled = pickle.loads(pickle.dumps(random_search))
    assert_array_almost_equal(random_search.predict(X),
                              random_search_pickled.predict(X))
开发者ID:IsaacHaze,项目名称:scikit-learn,代码行数:15,代码来源:test_search.py

示例10: test__extract_arfftrace

    def test__extract_arfftrace(self):
        param_grid = {"max_depth": [3, None],
                      "max_features": [1, 2, 3, 4],
                      "bootstrap": [True, False],
                      "criterion": ["gini", "entropy"]}
        num_iters = 10
        task = openml.tasks.get_task(20)
        clf = RandomizedSearchCV(RandomForestClassifier(), param_grid, num_iters)
        # just run the task
        train, _ = task.get_train_test_split_indices(0, 0)
        X, y = task.get_X_and_y()
        clf.fit(X[train], y[train])

        trace_attribute_list = _extract_arfftrace_attributes(clf)
        trace_list = _extract_arfftrace(clf, 0, 0)
        self.assertIsInstance(trace_attribute_list, list)
        self.assertEquals(len(trace_attribute_list), 5 + len(param_grid))
        self.assertIsInstance(trace_list, list)
        self.assertEquals(len(trace_list), num_iters)

        # found parameters
        optimized_params = set()

        for att_idx in range(len(trace_attribute_list)):
            att_type = trace_attribute_list[att_idx][1]
            att_name = trace_attribute_list[att_idx][0]
            if att_name.startswith("parameter_"):
                # add this to the found parameters
                param_name = att_name[len("parameter_"):]
                optimized_params.add(param_name)

                for line_idx in range(len(trace_list)):
                    val = json.loads(trace_list[line_idx][att_idx])
                    legal_values = param_grid[param_name]
                    self.assertIn(val, legal_values)
            else:
                # repeat, fold, itt, bool
                for line_idx in range(len(trace_list)):
                    val = trace_list[line_idx][att_idx]
                    if isinstance(att_type, list):
                        self.assertIn(val, att_type)
                    elif att_name in ['repeat', 'fold', 'iteration']:
                        self.assertIsInstance(trace_list[line_idx][att_idx], int)
                    else: # att_type = real
                        self.assertIsInstance(trace_list[line_idx][att_idx], float)


        self.assertEqual(set(param_grid.keys()), optimized_params)
开发者ID:amueller,项目名称:python,代码行数:48,代码来源:test_run_functions.py

示例11: test_large_grid

    def test_large_grid():
        """In this test, we purposely overfit a RandomForest to completely random data
        in order to assert that the test error will far supercede the train error.
        """

        if not SK18:
            custom_cv = KFold(n=y_train.shape[0], n_folds=3, shuffle=True, random_state=42)
        else:
            custom_cv = KFold(n_splits=3, shuffle=True, random_state=42)

        # define the pipe
        pipe = Pipeline([
            ('scaler', SelectiveScaler()),
            ('pca', SelectivePCA(weight=True)),
            ('rf', RandomForestClassifier(random_state=42))
        ])

        # define hyper parameters
        hp = {
            'scaler__scaler': [StandardScaler(), RobustScaler(), MinMaxScaler()],
            'pca__whiten': [True, False],
            'pca__weight': [True, False],
            'pca__n_components': uniform(0.75, 0.15),
            'rf__n_estimators': randint(5, 10),
            'rf__max_depth': randint(5, 15)
        }

        # define the grid
        grid = RandomizedSearchCV(pipe, hp, n_iter=2, scoring='accuracy', n_jobs=1, cv=custom_cv, random_state=42)

        # this will fail because we haven't fit yet
        assert_fails(grid.score, (ValueError, AttributeError), X_train, y_train)

        # fit the grid
        grid.fit(X_train, y_train)

        # score for coverage -- this might warn...
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            grid.score(X_train, y_train)

        # coverage:
        assert grid._estimator_type == 'classifier'

        # get predictions
        tr_pred, te_pred = grid.predict(X_train), grid.predict(X_test)

        # evaluate score (SHOULD be better than random...)
        accuracy_score(y_train, tr_pred), accuracy_score(y_test, te_pred)

        # grid score reports:
        # assert fails for bad percentile
        assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'percentile': 0.0})
        assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'percentile': 1.0})

        # assert fails for bad y_axis
        assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'y_axis': 'bad_axis'})

        # assert passes otherwise
        report_grid_score_detail(grid, charts=True, percentile=0.95)  # just ensure percentile works
开发者ID:tgsmith61591,项目名称:skutil,代码行数:60,代码来源:test_big.py

示例12: fit

def fit(x, y, estimator, dataframe, params):
    vectorizer = CountVectorizer(stop_words=['go', '', ' '], binary=False, lowercase=True)
    vectorizer.fit(dataframe[x].values)
    fresh_estimator = clone(estimator)
    x_np, y_np, feature_names, selector = \
    select_features(
        df = dataframe,
        vectorizer=vectorizer,
        feature_col=x,
        label_col=y,
        select_method=None,
        continuous_col=None
    )
    estimator = RandomizedSearchCV(estimator, params, n_iter=60, cv=3, n_jobs=3, refit=True)
    estimator.fit(x_np, y_np)
    best_params = estimator.best_params_

    if method not in ['lr', 'svm']:
        print("Calibrating...")
        estimator = CalibratedClassifierCV(fresh_estimator.set_params(**best_params), 'isotonic', 3)
        estimator.fit(x_np, y_np)

    from sklearn.base import _pprint
    _pprint(estimator.get_params(deep=True), offset=2)
    return estimator, selector, vectorizer
开发者ID:daniaki,项目名称:ppi_wrangler,代码行数:25,代码来源:interactome_predict.py

示例13: model_param_search

def model_param_search(estimator, X, y, param_dist, scoring,
                       n_iter=1, n_cv=5, verbose=10, random_state=1, model_id='model', save_search=True):
    start = time.time()

    random_search = RandomizedSearchCV(estimator, param_distributions=param_dist,
                                       n_iter=n_iter, scoring=scoring, cv=n_cv,
                                       verbose=verbose, random_state=random_state)
    random_search.fit(X, y)
    print('Best param: ', random_search.best_params_)
    print('Best score: ', random_search.best_score_)
    print('Best model: ', random_search.best_estimator_)
    if save_search:
        with open(model_id+'.pickle', 'wb') as f:
            pickle.dump(random_search, f)
    print('Time searching param for {}: {}'.format(
        model_id, (time.time() - start) / 60))

    return random_search.best_estimator_
开发者ID:canzheng,项目名称:kaggle-talkingdata,代码行数:18,代码来源:model_param.py

示例14: test_grid_search_with_multioutput_data

def test_grid_search_with_multioutput_data():
    # Test search with multi-output estimator

    X, y = make_multilabel_classification(return_indicator=True,
                                          random_state=0)

    est_parameters = {"max_depth": [1, 2, 3, 4]}
    cv = KFold(random_state=0)

    estimators = [DecisionTreeRegressor(random_state=0),
                  DecisionTreeClassifier(random_state=0)]

    # Test with grid search cv
    for est in estimators:
        grid_search = GridSearchCV(est, est_parameters, cv=cv)
        grid_search.fit(X, y)
        res_params = grid_search.cv_results_['params']
        for cand_i in range(len(res_params)):
            est.set_params(**res_params[cand_i])

            for i, (train, test) in enumerate(cv.split(X, y)):
                est.fit(X[train], y[train])
                correct_score = est.score(X[test], y[test])
                assert_almost_equal(
                    correct_score,
                    grid_search.cv_results_['split%d_test_score' % i][cand_i])

    # Test with a randomized search
    for est in estimators:
        random_search = RandomizedSearchCV(est, est_parameters,
                                           cv=cv, n_iter=3)
        random_search.fit(X, y)
        res_params = random_search.cv_results_['params']
        for cand_i in range(len(res_params)):
            est.set_params(**res_params[cand_i])

            for i, (train, test) in enumerate(cv.split(X, y)):
                est.fit(X[train], y[train])
                correct_score = est.score(X[test], y[test])
                assert_almost_equal(
                    correct_score,
                    random_search.cv_results_['split%d_test_score'
                                              % i][cand_i])
开发者ID:YinongLong,项目名称:scikit-learn,代码行数:43,代码来源:test_search.py

示例15: train_classifier

 def train_classifier(self, trainvectors, labels, c='1.0', kernel='linear', gamma='0.1', degree='1', class_weight='balanced', jobs=1, iterations=10, scoring='f1_micro', v=2):
     if len(list(set(labels))) > 2: # more than two classes to distinguish
         parameters = ['estimator__C', 'estimator__kernel', 'estimator__gamma', 'estimator__degree']
         multi = True
     else: # only two classes to distinguish
         parameters = ['C', 'kernel', 'gamma', 'degree']
         multi = False
     if len(class_weight.split(':')) > 1: # dictionary
         class_weight = dict([label_weight.split(':') for label_weight in class_weight.split()])
     c_values = [0.001, 0.005, 0.01, 0.5, 1, 5, 10, 50, 100, 500, 1000] if c == 'search' else [float(x) for x in c.split()]
     kernel_values = ['linear', 'rbf', 'poly'] if kernel == 'search' else [k for  k in kernel.split()]
     gamma_values = [0.0005, 0.002, 0.008, 0.032, 0.128, 0.512, 1.024, 2.048] if gamma == 'search' else [float(x) for x in gamma.split()]
     degree_values = [1, 2, 3, 4] if degree == 'search' else [int(x) for x in degree.split()]
     grid_values = [c_values, kernel_values, gamma_values, degree_values]
     if not False in [len(x) == 1 for x in grid_values]: # only sinle parameter settings
         settings = {}
         for i, parameter in enumerate(parameters):
             settings[parameter] = grid_values[i][0]
     else:
         param_grid = {}
         for i, parameter in enumerate(parameters):
             param_grid[parameter] = grid_values[i]
         model = svm.SVC(probability=True)
         if multi:
             model = OutputCodeClassifier(model)
             trainvectors = trainvectors.todense()
         paramsearch = RandomizedSearchCV(model, param_grid, cv = 5, scoring=scoring, verbose = v, n_iter = iterations, n_jobs = jobs, pre_dispatch = 4)
         paramsearch.fit(trainvectors, labels)
         settings = paramsearch.best_params_
     # train an SVC classifier with the settings that led to the best performance
     self.model = svm.SVC(
         probability = True,
         C = settings[parameters[0]],
         kernel = settings[parameters[1]],
         gamma = settings[parameters[2]],
         degree = settings[parameters[3]],
         class_weight = class_weight,
         cache_size = 1000,
         verbose = v
     )
     self.model.fit(trainvectors, labels)
开发者ID:LanguageMachines,项目名称:quoll,代码行数:41,代码来源:classifier.py


注:本文中的sklearn.model_selection.RandomizedSearchCV类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。