当前位置: 首页>>代码示例>>Python>>正文


Python RandomizedSearchCV.predict_proba方法代码示例

本文整理汇总了Python中sklearn.model_selection.RandomizedSearchCV.predict_proba方法的典型用法代码示例。如果您正苦于以下问题:Python RandomizedSearchCV.predict_proba方法的具体用法?Python RandomizedSearchCV.predict_proba怎么用?Python RandomizedSearchCV.predict_proba使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.model_selection.RandomizedSearchCV的用法示例。


在下文中一共展示了RandomizedSearchCV.predict_proba方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: pr_curve

# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import predict_proba [as 别名]
    def pr_curve(i):
        label = labels[i]
        statistics_l = Statistics()
        print('Doing label {}'.format(label))

        for train_idx, valid_idx in folds:
            rng = np.random.RandomState()
            rng.seed(seeds[i])
            training_fold = developement_df.loc[train_idx, ]
            training_fold = training_fold.reset_index(drop=True)
            validation_fold = developement_df.loc[valid_idx, ]
            validation_fold = validation_fold.reset_index(drop=True)
            base_estimators = make_classifiers(method, balanced, labels, random_state=rng)

            # Find the best params, then do a final proper calibration.
            base_estimator = base_estimators[label]
            estimator = RandomizedSearchCV(
                estimator=base_estimator, param_distributions=params,
                n_iter=60, scoring='f1', cv=3, random_state=rng,
                error_score=0.0, n_jobs=1, pre_dispatch='2*n_jobs',
                refit=True
            )

            # Set up the vectorizer for the bag-of-words representation
            if vectorizer_method == 'tf-idf':
                vectorizer = TfidfVectorizer(
                    stop_words=['go', '', ' '], binary=binary, lowercase=True,
                    sublinear_tf=False, max_df=1.0, min_df=0
                )
                vectorizer.fit(training_fold['terms'].values)
            elif vectorizer_method == 'count':
                vectorizer = CountVectorizer(
                    stop_words=['go', '', ' '], binary=binary, lowercase=True
                )
                vectorizer.fit(training_fold['terms'].values)

            # Fit an evaluate the performance of the classifier.
            x_train = vectorizer.transform(training_fold['terms'].values)
            y_train = np.asarray(training_fold[label].values, dtype=int)

            x_valid = vectorizer.transform(validation_fold['terms'].values)
            y_valid = np.asarray(validation_fold[label].values, dtype=int)

            estimator.fit(x_train, y_train)

            for t in thresholds:
                y_pred = [int(p[1] >= t) for p in estimator.predict_proba(x_valid)]
                precision = precision_score(y_valid, y_pred, labels=[0, 1], pos_label=1)
                recall = recall_score(y_valid, y_pred, labels=[0, 1], pos_label=1)
                f1 = f1_score(y_valid, y_pred, labels=[0, 1], pos_label=1)
                statistics_l.update_statistics(label=t, s_type='Precision', data=precision)
                statistics_l.update_statistics(label=t, s_type='Recall', data=recall)
                statistics_l.update_statistics(label=t, s_type='F1-Score', data=f1)

        statistics_l.frame()['reaction'] = label
        return statistics_l
开发者ID:daniaki,项目名称:ppi_wrangler,代码行数:58,代码来源:recall_precision.py

示例2: do_fold

# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import predict_proba [as 别名]
        def do_fold(j):
            print("\tFold " + str(j+1))
            train_idx = folds_i[j][0]
            valid_idx = folds_i[j][1]
            training_fold = developement_df.loc[train_idx, ]
            training_fold = training_fold.reset_index(drop=True)
            validation_fold = developement_df.loc[valid_idx, ]
            validation_fold = validation_fold.reset_index(drop=True)

            # shuffle the folds
            training_stats_i_f = Statistics()
            validation_stats_i_f = Statistics()
            testing_stats_i_f = Statistics()

            # Init the label ranking lists.
            label_pred_proba_train = []
            label_pred_proba_valid = []
            label_pred_proba_test = []

            label_y_train = []
            label_y_valid = []
            label_y_test = []

            # Set up the vectorizer for the bag-of-words representation
            if vectorizer_method == 'tf-idf':
                vectorizer = TfidfVectorizer(
                    stop_words=['go', '', ' '], binary=binary, lowercase=True,
                    sublinear_tf=True, max_df=1.0, min_df=0
                )
                vectorizer.fit(training_fold['terms'].values)
                alpha = None
                percentile = 100
            elif vectorizer_method == 'count':
                vectorizer = CountVectorizer(
                    stop_words=['go', '', ' '], binary=binary, lowercase=True
                )
                vectorizer.fit(training_fold['terms'].values)
                alpha = None
                percentile = 100
            else:
                raise TypeError("Vectorizer_method has type {}.".format(type(vectorizer_method)))

            selectors = generate_selectors(selection, vectorizer.get_feature_names(), dag)
            base_estimators = make_classifiers(method, balanced, labels, selectors, selection, rng)
            for label in sorted(labels):
                print("\t\tFitting for label {}...".format(label))

                # SVMs make the assumption of standardised features. Hence we scale the features
                # avoiding the use of mean to maintain the structure of count sparsity. Scaling
                # May also help with linear model convergence speed.
                x_train_l = vectorizer.transform(training_fold['terms'].values)
                y_train_l = np.asarray(training_fold[label].values, dtype=int)

                x_valid_l = vectorizer.transform(validation_fold['terms'].values)
                y_valid_l = np.asarray(validation_fold[label].values, dtype=int)

                x_test_l = vectorizer.transform(testing_df['terms'].values)
                y_test_l = np.asarray(test_df_i[label].values, dtype=int)

                if scale:
                    x_train_l = mean_center(x_train_l, with_mean=False)
                    x_valid_l = mean_center(x_valid_l, with_mean=False)
                    x_test_l = mean_center(x_test_l, with_mean=False)

                # We generate the folds for randomised search up-front. We hold out one of the folds for
                # Probability calibration so each sampled param set gets calibrated on the same data.
                # This leaves cv_folds-2 folds for randomised search cross-validation.
                # cv_rand = StratifiedKFold(n_splits=3, shuffle=True, random_state=rng)
                base_estimator_l = base_estimators[label]
                fresh_estimator = clone(base_estimator_l)

                # Find the best params, then do a final proper calibration.
                params = sk_generate_params(method, selection)
                estimator_l = RandomizedSearchCV(
                    estimator=base_estimator_l, param_distributions=params,
                    n_iter=60, scoring='f1', cv=3, random_state=rng,
                    error_score=0.0, n_jobs=1, pre_dispatch='2*n_jobs',
                    refit=True
                )

                # Test if there's any signal if we permute the labels.
                # Classifier should do poorly if we do so.
                if permute:
                    y_train_l = rng.permutation(y_train_l)

                threshold = 0.5
                estimator_l.fit(x_train_l, y_train_l)
                best_params_l = estimator_l.best_params_

                # Calibrate the random forest with the best hyperparameters.
                if method not in ['lr']:
                    estimator_l = CalibratedClassifierCV(fresh_estimator.set_params(**best_params_l),
                                                         cv=3, method='sigmoid')
                    estimator_l.fit(x_train_l, y_train_l)

                # Evaluate Performance characteristics and test on training to check overfitting.
                y_train_prob_l = estimator_l.predict_proba(x_train_l)
                y_valid_prob_l = estimator_l.predict_proba(x_valid_l)
                y_test_prob_l = estimator_l.predict_proba(x_test_l)
                training_stats_i_f.merge(evaluate_model(y_train_l, y_train_prob_l, label, threshold))
#.........这里部分代码省略.........
开发者ID:daniaki,项目名称:ppi_wrangler,代码行数:103,代码来源:clf_br.py

示例3: print

# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import predict_proba [as 别名]
                         n_iter = 25, scoring = 'roc_auc', error_score = 0, 
                         cv=tscv, verbose = 3, n_jobs = -1,
                         refit=True)
clf.fit(X_train,y_train,early_stopping_rounds=10)

#examine
clf.cv_results_

print(clf.best_estimator_)

print(clf.best_score_)

best_param=clf.best_params_

#clf.predict(X_test)
preds_prob=clf.predict_proba(X_test)
#find the optimum hyper-parameters, apply them to the overall model training

preds_train_prob = clf.predict_proba(X_train)
preds_train = preds_train_prob[:,1]
print('Roc-auc for train sample is %.2f' %(roc_auc_score(y_train,preds_train)))

preds=preds_prob[:,1]
print('Roc-auc for test sample is %.2f' %(roc_auc_score(y_test,preds)))
########################################
#SVC

param_grid = [
  {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
  {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
 ]
开发者ID:nightrose79,项目名称:try,代码行数:33,代码来源:work+sample_yu-Oct2.py


注:本文中的sklearn.model_selection.RandomizedSearchCV.predict_proba方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。