本文整理汇总了Python中sklearn.model_selection.RandomizedSearchCV.predict_proba方法的典型用法代码示例。如果您正苦于以下问题:Python RandomizedSearchCV.predict_proba方法的具体用法?Python RandomizedSearchCV.predict_proba怎么用?Python RandomizedSearchCV.predict_proba使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.model_selection.RandomizedSearchCV
的用法示例。
在下文中一共展示了RandomizedSearchCV.predict_proba方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: pr_curve
# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import predict_proba [as 别名]
def pr_curve(i):
label = labels[i]
statistics_l = Statistics()
print('Doing label {}'.format(label))
for train_idx, valid_idx in folds:
rng = np.random.RandomState()
rng.seed(seeds[i])
training_fold = developement_df.loc[train_idx, ]
training_fold = training_fold.reset_index(drop=True)
validation_fold = developement_df.loc[valid_idx, ]
validation_fold = validation_fold.reset_index(drop=True)
base_estimators = make_classifiers(method, balanced, labels, random_state=rng)
# Find the best params, then do a final proper calibration.
base_estimator = base_estimators[label]
estimator = RandomizedSearchCV(
estimator=base_estimator, param_distributions=params,
n_iter=60, scoring='f1', cv=3, random_state=rng,
error_score=0.0, n_jobs=1, pre_dispatch='2*n_jobs',
refit=True
)
# Set up the vectorizer for the bag-of-words representation
if vectorizer_method == 'tf-idf':
vectorizer = TfidfVectorizer(
stop_words=['go', '', ' '], binary=binary, lowercase=True,
sublinear_tf=False, max_df=1.0, min_df=0
)
vectorizer.fit(training_fold['terms'].values)
elif vectorizer_method == 'count':
vectorizer = CountVectorizer(
stop_words=['go', '', ' '], binary=binary, lowercase=True
)
vectorizer.fit(training_fold['terms'].values)
# Fit an evaluate the performance of the classifier.
x_train = vectorizer.transform(training_fold['terms'].values)
y_train = np.asarray(training_fold[label].values, dtype=int)
x_valid = vectorizer.transform(validation_fold['terms'].values)
y_valid = np.asarray(validation_fold[label].values, dtype=int)
estimator.fit(x_train, y_train)
for t in thresholds:
y_pred = [int(p[1] >= t) for p in estimator.predict_proba(x_valid)]
precision = precision_score(y_valid, y_pred, labels=[0, 1], pos_label=1)
recall = recall_score(y_valid, y_pred, labels=[0, 1], pos_label=1)
f1 = f1_score(y_valid, y_pred, labels=[0, 1], pos_label=1)
statistics_l.update_statistics(label=t, s_type='Precision', data=precision)
statistics_l.update_statistics(label=t, s_type='Recall', data=recall)
statistics_l.update_statistics(label=t, s_type='F1-Score', data=f1)
statistics_l.frame()['reaction'] = label
return statistics_l
示例2: do_fold
# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import predict_proba [as 别名]
def do_fold(j):
print("\tFold " + str(j+1))
train_idx = folds_i[j][0]
valid_idx = folds_i[j][1]
training_fold = developement_df.loc[train_idx, ]
training_fold = training_fold.reset_index(drop=True)
validation_fold = developement_df.loc[valid_idx, ]
validation_fold = validation_fold.reset_index(drop=True)
# shuffle the folds
training_stats_i_f = Statistics()
validation_stats_i_f = Statistics()
testing_stats_i_f = Statistics()
# Init the label ranking lists.
label_pred_proba_train = []
label_pred_proba_valid = []
label_pred_proba_test = []
label_y_train = []
label_y_valid = []
label_y_test = []
# Set up the vectorizer for the bag-of-words representation
if vectorizer_method == 'tf-idf':
vectorizer = TfidfVectorizer(
stop_words=['go', '', ' '], binary=binary, lowercase=True,
sublinear_tf=True, max_df=1.0, min_df=0
)
vectorizer.fit(training_fold['terms'].values)
alpha = None
percentile = 100
elif vectorizer_method == 'count':
vectorizer = CountVectorizer(
stop_words=['go', '', ' '], binary=binary, lowercase=True
)
vectorizer.fit(training_fold['terms'].values)
alpha = None
percentile = 100
else:
raise TypeError("Vectorizer_method has type {}.".format(type(vectorizer_method)))
selectors = generate_selectors(selection, vectorizer.get_feature_names(), dag)
base_estimators = make_classifiers(method, balanced, labels, selectors, selection, rng)
for label in sorted(labels):
print("\t\tFitting for label {}...".format(label))
# SVMs make the assumption of standardised features. Hence we scale the features
# avoiding the use of mean to maintain the structure of count sparsity. Scaling
# May also help with linear model convergence speed.
x_train_l = vectorizer.transform(training_fold['terms'].values)
y_train_l = np.asarray(training_fold[label].values, dtype=int)
x_valid_l = vectorizer.transform(validation_fold['terms'].values)
y_valid_l = np.asarray(validation_fold[label].values, dtype=int)
x_test_l = vectorizer.transform(testing_df['terms'].values)
y_test_l = np.asarray(test_df_i[label].values, dtype=int)
if scale:
x_train_l = mean_center(x_train_l, with_mean=False)
x_valid_l = mean_center(x_valid_l, with_mean=False)
x_test_l = mean_center(x_test_l, with_mean=False)
# We generate the folds for randomised search up-front. We hold out one of the folds for
# Probability calibration so each sampled param set gets calibrated on the same data.
# This leaves cv_folds-2 folds for randomised search cross-validation.
# cv_rand = StratifiedKFold(n_splits=3, shuffle=True, random_state=rng)
base_estimator_l = base_estimators[label]
fresh_estimator = clone(base_estimator_l)
# Find the best params, then do a final proper calibration.
params = sk_generate_params(method, selection)
estimator_l = RandomizedSearchCV(
estimator=base_estimator_l, param_distributions=params,
n_iter=60, scoring='f1', cv=3, random_state=rng,
error_score=0.0, n_jobs=1, pre_dispatch='2*n_jobs',
refit=True
)
# Test if there's any signal if we permute the labels.
# Classifier should do poorly if we do so.
if permute:
y_train_l = rng.permutation(y_train_l)
threshold = 0.5
estimator_l.fit(x_train_l, y_train_l)
best_params_l = estimator_l.best_params_
# Calibrate the random forest with the best hyperparameters.
if method not in ['lr']:
estimator_l = CalibratedClassifierCV(fresh_estimator.set_params(**best_params_l),
cv=3, method='sigmoid')
estimator_l.fit(x_train_l, y_train_l)
# Evaluate Performance characteristics and test on training to check overfitting.
y_train_prob_l = estimator_l.predict_proba(x_train_l)
y_valid_prob_l = estimator_l.predict_proba(x_valid_l)
y_test_prob_l = estimator_l.predict_proba(x_test_l)
training_stats_i_f.merge(evaluate_model(y_train_l, y_train_prob_l, label, threshold))
#.........这里部分代码省略.........
示例3: print
# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import predict_proba [as 别名]
n_iter = 25, scoring = 'roc_auc', error_score = 0,
cv=tscv, verbose = 3, n_jobs = -1,
refit=True)
clf.fit(X_train,y_train,early_stopping_rounds=10)
#examine
clf.cv_results_
print(clf.best_estimator_)
print(clf.best_score_)
best_param=clf.best_params_
#clf.predict(X_test)
preds_prob=clf.predict_proba(X_test)
#find the optimum hyper-parameters, apply them to the overall model training
preds_train_prob = clf.predict_proba(X_train)
preds_train = preds_train_prob[:,1]
print('Roc-auc for train sample is %.2f' %(roc_auc_score(y_train,preds_train)))
preds=preds_prob[:,1]
print('Roc-auc for test sample is %.2f' %(roc_auc_score(y_test,preds)))
########################################
#SVC
param_grid = [
{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
{'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
]