当前位置: 首页>>代码示例>>Python>>正文


Python GridSearchCV.predict_proba方法代码示例

本文整理汇总了Python中sklearn.model_selection.GridSearchCV.predict_proba方法的典型用法代码示例。如果您正苦于以下问题:Python GridSearchCV.predict_proba方法的具体用法?Python GridSearchCV.predict_proba怎么用?Python GridSearchCV.predict_proba使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.model_selection.GridSearchCV的用法示例。


在下文中一共展示了GridSearchCV.predict_proba方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_stochastic_gradient_loss_param

# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import predict_proba [as 别名]
def test_stochastic_gradient_loss_param():
    # Make sure the predict_proba works when loss is specified
    # as one of the parameters in the param_grid.
    param_grid = {
        'loss': ['log'],
    }
    X = np.arange(24).reshape(6, -1)
    y = [0, 0, 0, 1, 1, 1]
    clf = GridSearchCV(estimator=SGDClassifier(loss='hinge'),
                       param_grid=param_grid)

    # When the estimator is not fitted, `predict_proba` is not available as the
    # loss is 'hinge'.
    assert_false(hasattr(clf, "predict_proba"))
    clf.fit(X, y)
    clf.predict_proba(X)
    clf.predict_log_proba(X)

    # Make sure `predict_proba` is not available when setting loss=['hinge']
    # in param_grid
    param_grid = {
        'loss': ['hinge'],
    }
    clf = GridSearchCV(estimator=SGDClassifier(loss='hinge'),
                       param_grid=param_grid)
    assert_false(hasattr(clf, "predict_proba"))
    clf.fit(X, y)
    assert_false(hasattr(clf, "predict_proba"))
开发者ID:IsaacHaze,项目名称:scikit-learn,代码行数:30,代码来源:test_search.py

示例2: test_grid_search

# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import predict_proba [as 别名]
def test_grid_search():
    # Test that the best estimator contains the right value for foo_param
    clf = MockClassifier()
    grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, verbose=3)
    # make sure it selects the smallest parameter in case of ties
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    grid_search.fit(X, y)
    sys.stdout = old_stdout
    assert_equal(grid_search.best_estimator_.foo_param, 2)

    assert_array_equal(grid_search.results_["param_foo_param"].data, [1, 2, 3])

    # Smoke test the score etc:
    grid_search.score(X, y)
    grid_search.predict_proba(X)
    grid_search.decision_function(X)
    grid_search.transform(X)

    # Test exception handling on scoring
    grid_search.scoring = 'sklearn'
    assert_raises(ValueError, grid_search.fit, X, y)
开发者ID:1992huanghai,项目名称:scikit-learn,代码行数:24,代码来源:test_search.py

示例3: build_grid_search

# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import predict_proba [as 别名]
def build_grid_search(X, y):
    parameters = {
        "estimator__criterion": ['gini', 'entropy'],
        "estimator__max_depth": [10, 15, 20, 25, None],
        "estimator__max_features": ['auto', 'sqrt', 'log2', None]
    }
    ovr = OneVsRestClassifier(RandomForestClassifier(n_estimators=1000,
                                    oob_score=True, n_jobs=-1, verbose=1))
    model_tunning = GridSearchCV(ovr, param_grid=parameters, verbose=1,
                                 n_jobs=-1, cv=10,
                                 scoring=make_scorer(f1_score))
    model_tunning.fit(X, y)
    test_score = model_tunning.best_score_
    print 'The best test score: ', test_score
    y_score = model_tunning.predict_proba(X_test)
    multiclass_roc(y_score, 'grid_search_02')
    return model_tunning
开发者ID:livenb,项目名称:crime_prediction,代码行数:19,代码来源:classification.py

示例4: enumerate

# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import predict_proba [as 别名]
    for est_idx, (name, (estimator, param_grid)) in \
            enumerate(zip(names, classifiers)):
        ax = axes[ds_cnt, est_idx + 1]

        clf = GridSearchCV(estimator=estimator, param_grid=param_grid, cv=5)
        with ignore_warnings(category=ConvergenceWarning):
            clf.fit(X_train, y_train)
        score = clf.score(X_test, y_test)
        print('%s: %.2f' % (name, score))

        # plot the decision boundary. For that, we will assign a color to each
        # point in the mesh [x_min, x_max]*[y_min, y_max].
        if hasattr(clf, "decision_function"):
            Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
        else:
            Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]

        # put the result into a color plot
        Z = Z.reshape(xx.shape)
        ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)

        # plot the training points
        ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright,
                   edgecolors='k')
        # and testing points
        ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,
                   edgecolors='k', alpha=0.6)
        ax.set_xlim(xx.min(), xx.max())
        ax.set_ylim(yy.min(), yy.max())
        ax.set_xticks(())
        ax.set_yticks(())
开发者ID:daniel-perry,项目名称:scikit-learn,代码行数:33,代码来源:plot_discretization_classification.py

示例5: SklearnIntentClassifier

# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import predict_proba [as 别名]

#.........这里部分代码省略.........
        labels = [e.get("intent") for e in training_data.intent_examples]

        if len(set(labels)) < 2:
            logger.warn("Can not train an intent classifier. Need at least 2 different classes. " +
                        "Skipping training of intent classifier.")
        else:
            y = self.transform_labels_str2num(labels)
            X = np.stack([example.get("text_features") for example in training_data.intent_examples])

            sklearn_config = config.get("intent_classifier_sklearn")
            C = sklearn_config.get("C", [1, 2, 5, 10, 20, 100])
            kernel = sklearn_config.get("kernel", "linear")
            # dirty str fix because sklearn is expecting str not instance of basestr...
            tuned_parameters = [{"C": C, "kernel": [str(kernel)]}]
            cv_splits = max(2, min(MAX_CV_FOLDS, np.min(np.bincount(y)) // 5))  # aim for 5 examples in each fold

            self.clf = GridSearchCV(SVC(C=1, probability=True, class_weight='balanced'),
                                    param_grid=tuned_parameters, n_jobs=config["num_threads"],
                                    cv=cv_splits, scoring='f1_weighted', verbose=1)

            self.clf.fit(X, y)

    def process(self, message, **kwargs):
        # type: (Message, **Any) -> None
        """Returns the most likely intent and its probability for the input text."""

        if not self.clf:
            # component is either not trained or didn't receive enough training data
            intent = None
            intent_ranking = []
        else:
            X = message.get("text_features").reshape(1, -1)
            intent_ids, probabilities = self.predict(X)
            intents = self.transform_labels_num2str(intent_ids)
            # `predict` returns a matrix as it is supposed
            # to work for multiple examples as well, hence we need to flatten
            intents, probabilities = intents.flatten(), probabilities.flatten()

            if intents.size > 0 and probabilities.size > 0:
                ranking = list(zip(list(intents), list(probabilities)))[:INTENT_RANKING_LENGTH]
                intent = {"name": intents[0], "confidence": probabilities[0]}
                intent_ranking = [{"name": intent_name, "confidence": score} for intent_name, score in ranking]
            else:
                intent = {"name": None, "confidence": 0.0}
                intent_ranking = []

        message.set("intent", intent, add_to_output=True)
        message.set("intent_ranking", intent_ranking, add_to_output=True)

    def predict_prob(self, X):
        # type: (np.ndarray) -> np.ndarray
        """Given a bow vector of an input text, predict the intent label. Returns probabilities for all labels.

        :param X: bow of input text
        :return: vector of probabilities containing one entry for each label"""

        return self.clf.predict_proba(X)

    def predict(self, X):
        # type: (np.ndarray) -> Tuple[np.ndarray, np.ndarray]
        """Given a bow vector of an input text, predict most probable label. Returns only the most likely label.

        :param X: bow of input text
        :return: tuple of first, the most probable label and second, its probability"""

        import numpy as np

        pred_result = self.predict_prob(X)
        # sort the probabilities retrieving the indices of the elements in sorted order
        sorted_indices = np.fliplr(np.argsort(pred_result, axis=1))
        return sorted_indices, pred_result[:, sorted_indices]

    @classmethod
    def load(cls, model_dir=None, model_metadata=None, cached_component=None, **kwargs):
        # type: (Text, Metadata, Optional[Component], **Any) -> SklearnIntentClassifier
        import cloudpickle

        if model_dir and model_metadata.get("intent_classifier_sklearn"):
            classifier_file = os.path.join(model_dir, model_metadata.get("intent_classifier_sklearn"))
            with io.open(classifier_file, 'rb') as f:  # pragma: no test
                if PY3:
                    return cloudpickle.load(f, encoding="latin-1")
                else:
                    return cloudpickle.load(f)
        else:
            return SklearnIntentClassifier()

    def persist(self, model_dir):
        # type: (Text) -> Dict[Text, Any]
        """Persist this model into the passed directory. Returns the metadata necessary to load the model again."""

        import cloudpickle

        classifier_file = os.path.join(model_dir, "intent_classifier.pkl")
        with io.open(classifier_file, 'wb') as f:
            cloudpickle.dump(self, f)

        return {
            "intent_classifier_sklearn": "intent_classifier.pkl"
        }
开发者ID:DominicBreuker,项目名称:rasa_nlu,代码行数:104,代码来源:sklearn_intent_classifier.py

示例6: negatives

# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import predict_proba [as 别名]
# In[21]:

coef_df.head(10)


# ## Investigate the predictions

# In[22]:

predict_df = pd.DataFrame.from_items([
    ('sample_id', X.index),
    ('testing', X.index.isin(X_test.index).astype(int)),
    ('status', y),
    ('decision_function', cv_pipeline.decision_function(X)),
    ('probability', cv_pipeline.predict_proba(X)[:, 1]),
])
predict_df['probability_str'] = predict_df['probability'].apply('{:.1%}'.format)


# In[23]:

# Top predictions amongst negatives (potential hidden responders)
predict_df.sort_values('decision_function', ascending=False).query("status == 0").head(10)


# In[24]:

# Ignore numpy warning caused by seaborn
warnings.filterwarnings('ignore', 'using a non-integer number instead of an integer')
开发者ID:KT12,项目名称:Machine-Learning,代码行数:31,代码来源:2.TCGA-MLexample.py

示例7: negatives

# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import predict_proba [as 别名]
# ## Investigate the predictions

# In[22]:

X_transformed = pipeline.transform(X)


# In[23]:

predict_df = pd.DataFrame.from_items([
    ('sample_id', X.index),
    ('testing', X.index.isin(X_test.index).astype(int)),
    ('status', y),
    ('decision_function', cv.decision_function(X_transformed)),
    ('probability', cv.predict_proba(X_transformed)[:, 1]),
])
predict_df['probability_str'] = predict_df['probability'].apply('{:.1%}'.format)


# In[24]:

# Top predictions amongst negatives (potential hidden responders)
predict_df.sort_values('decision_function', ascending=False).query("status == 0").head(10)


# In[25]:

# Ignore numpy warning caused by seaborn
warnings.filterwarnings('ignore', 'using a non-integer number instead of an integer')
开发者ID:KT12,项目名称:Machine-Learning,代码行数:31,代码来源:RIT1-PCA-htcai.py

示例8: MultinomialNB

# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import predict_proba [as 别名]
     clf = lm.LogisticRegression(C=1e5)
 elif model == 'MultinomialNB':
     clf =  MultinomialNB(alpha=0.0005)
 elif model == 'KNeighborsClassifier':
     clf = neighbors.KNeighborsClassifier(10, weights='distance')
 elif model == 'MLPClassifier':
     clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(1000, 50))
 else:
     raise ValueError('unkwown model: ' + model)
 #
 print(iter,"************************************************")
 print("label:",label,"--gram:",gram,"--stem_option:",stem_option,"--max_features:",max_feat,"--model:",model)
 grid_clf = GridSearchCV(estimator=clf, param_grid=parameters[model],
                         cv=kfolds, scoring='neg_log_loss', n_jobs=10, iid=False)
 grid_clf.fit(X_train, Y_train_lab)
 pred_proba = grid_clf.predict_proba(X_holdout)
 ll_holdout = log_loss(y_true=Y_holdout_lab,y_pred=pred_proba[:, 1])
 sys.stdout.flush()
 print("best params:",grid_clf.best_params_)
 best_idx = np.argmax(grid_clf.cv_results_['mean_test_score'])
 print("best LogLoss:", grid_clf.cv_results_['mean_test_score'][best_idx] , " - std:",grid_clf.cv_results_['std_test_score'][best_idx])
 print("LogLoss holdout:",ll_holdout)
 sys.stdout.flush()
 print()
 sys.stdout.flush()
 # finally
 perf_panel = perf_panels[label]
 perf_panel = perf_panel.append(pd.DataFrame(np.array([[gram >= 1, gram >= 2, gram >= 3, stem_option, max_feat,
                                                        model, str(grid_clf.best_params_), str(grid_clf.cv_results_['mean_test_score'][best_idx]),
                                                        str(grid_clf.cv_results_['std_test_score'][best_idx]), str(ll_holdout),
                                                        '1 Repeat 5-fold cross validation']]),
开发者ID:gtesei,项目名称:fast-furious,代码行数:33,代码来源:eda.py

示例9: nestedCVClassifier

# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import predict_proba [as 别名]
def nestedCVClassifier(df, outcomeVar, predVars, model, params={}, nFolds=10, LPO=None, scorer='log_loss', n_jobs=1):
    """Apply model to df in nested cross-validation framework
    with inner folds to optimize hyperparameters.
    and outer test folds to evaluate performance.
        
    Parameters
    ----------
    df : pd.DataFrame
        Must contain outcome and predictor variables.
    outcomeVar : str
    predVars : ndarray or list
        Predictor variables in the model.
    model : sklearn model
    nFolds : int
        N-fold stratified cross-validation
    LPO : int or None
        Use Leave-P-Out cross-validation instead of StratifiedNFoldCV
    params : dict
        Keys of model hyperparameters withe values to try in
        a grid search.

    Returns
    -------
    results : dict
        Contains results as keys below:
        fpr:            (100, ) average FPR for ROC
        tpr:            (100, ) average TPR for ROC
        AUC:            (outerFolds, ) AUC of ROC for each outer test fold
        meanAUC:        (1, ) AUC of the average ROC
        ACC:            (outerFolds, ) accuracy across outer test folds
        scores:         (outerFolds, innerFolds, Cs) log-likelihood for each C across inner and outer CV folds
        optimalCs:      (outerFolds, ) optimal C from each set of inner CV
        finalResult:    final fitted model with predict() exposed
        prob:           (N,) pd.Series of predicted probabilities avg over outer folds
        varList:        (Nvars, ) list of vars with non-zero coef in final model
        Cs:             (Cs, ) pre-specified grid of Cs
        coefs:          (outerFolds, predVars) refit with optimalC in each fold
        paths:          (outerFolds, Cs, predVars + intercept) avg across inner folds
        XVars:          list of all vars in X
        yVar:           name of outcome variable
        N:              total number of rows/instances in the model"""
    
    if not isinstance(predVars, list):
        predVars = list(predVars)
    
    tmp = df[[outcomeVar] + predVars].dropna()
    X,y = tmp[predVars].astype(float), tmp[outcomeVar].astype(float)

    if LPO is None:
        innerCV = StratifiedKFold(n_splits=nFolds, shuffle=True)
        outerCV = StratifiedKFold(n_splits=nFolds, shuffle=True)
    else:
        innerCV = LeavePOut(LPO)
        outerCV = LeavePOut(LPO)

    if scorer == 'log_loss':
        scorerFunc = sklearn.metrics.make_scorer(sklearn.metrics.log_loss,
                                                 greater_is_better=False,
                                                 needs_proba=True,
                                                 needs_threshold=False,
                                                 labels=[0, 1])
    elif scorer == 'accuracy':
        scorerFunc = sklearn.metrics.make_scorer(sklearn.metrics.accuracy_score,
                                                 greater_is_better=True,
                                                 needs_proba=False,
                                                 needs_threshold=False)
    
    fpr = np.linspace(0, 1, 100)
    tpr = np.nan * np.zeros((fpr.shape[0], nFolds))
    acc = np.nan * np.zeros(nFolds)
    auc = np.nan * np.zeros(nFolds)
    probs = []
    optimalParams = []
    optimalScores = []
    cvResults = []

    for outi, (trainInd, testInd) in enumerate(outerCV.split(X=X, y=y)):
        Xtrain, Xtest = X.iloc[trainInd], X.iloc[testInd]
        ytrain, ytest = y.iloc[trainInd], y.iloc[testInd]

        clf = GridSearchCV(estimator=model, param_grid=params, cv=innerCV, refit=True, scoring=scorerFunc, n_jobs=n_jobs)
        clf.fit(Xtrain, ytrain)
        cvResults.append(clf.cv_results_)
        optimalParams.append(clf.best_params_)
        optimalScores.append(clf.best_score_)

        prob = clf.predict_proba(Xtest)
        fprTest, tprTest, _ = sklearn.metrics.roc_curve(ytest, prob[:, 1])
        tpr[:, outi] = np.interp(fpr, fprTest, tprTest)
        auc[outi] = sklearn.metrics.auc(fprTest, tprTest)
        acc[outi] = sklearn.metrics.accuracy_score(ytest, np.round(prob[:, 1]), normalize=True)
        
        probs.append(pd.Series(prob[:, 1], index=Xtest.index))
    
    meanTPR = np.mean(tpr, axis=1)
    meanTPR[0], meanTPR[-1] = 0, 1
    meanACC = np.mean(acc)
    meanAUC = sklearn.metrics.auc(fpr, meanTPR)
    
    """Compute mean probability over test predictions in CV"""
#.........这里部分代码省略.........
开发者ID:agartland,项目名称:utils,代码行数:103,代码来源:roc.py

示例10: LogisticRegression

# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import predict_proba [as 别名]
# Logistic regression tuned model

LR_before_tuned_model= LogisticRegression()

tuned_parameters = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000] ,
              'penalty':['l1','l2'] }
from sklearn.model_selection import GridSearchCV

LR= GridSearchCV(LR_before_tuned_model, tuned_parameters,cv=10)

LR.fit(X_train,y_train)

print 'Model best params: ', LR.best_params_

y_prob = LR.predict_proba(X_test)[:,1] # This will give you positive class prediction probabilities  
y_pred = np.where(y_prob > 0.5, 1, 0) # This will threshold the probabilities to give class predictions.
print 'LR score after tunning: ', LR.score(X_test, y_pred)

# The cross_val_score is better than before tunning
model_LR_after_tunning_cross_val_scores = cross_val_score(LR, X, y, cv=5)
print 'cross_val_scores: ', model_LR_after_tunning_cross_val_scores

confusion_matrix_after_tunning=metrics.confusion_matrix(y_test,y_pred)
print 'confusion_matrix after tunning: ',confusion_matrix_after_tunning

# roc and auc after tunning
from sklearn.metrics import roc_curve, auc
false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, y_prob)
roc_auc = auc(false_positive_rate, true_positive_rate)
print 'roc_auc after tunning: ',roc_auc
开发者ID:fzhurd,项目名称:fzwork,代码行数:32,代码来源:predict_mushroom_v1c.py

示例11: GridSearchCV

# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import predict_proba [as 别名]
#parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
svc = svm.SVC()
clf_svc = GridSearchCV(estimator=svc, param_grid=param_grid,scoring='roc_auc',
                       n_jobs=-1,cv=tscv,verbose=3,refit=True)
clf_svc.fit(X_train,y_train)

clf_svc.cv_results_

print(clf_svc.best_estimator_)

print(clf_svc.best_score_)

best_param=clf_svc.best_params_

preds_train_svc_prob = clf_svc.predict_proba(X_train)
preds_train_svc = preds_train_svc_prob[:,1]
print('Roc-auc for train sample is %.2f' %(roc_auc_score(y_train,preds_train_svc)))

preds_test_svc_prob = clf_svc.predict_proba(X_test)
preds_svc=preds_test_svc_prob[:,1]
print('Roc-auc for test sample is %.2f' %(roc_auc_score(y_test,preds_svc)))
#######################################
# # IV. Performance Check
#######################################

# Compute ROC curve and area the curve
fpr, tpr, thresholds = roc_curve(y_test, preds)
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, lw=1, alpha=1,
             label='ROC (AUC = %0.2f)' % (roc_auc))
开发者ID:nightrose79,项目名称:try,代码行数:32,代码来源:work+sample_yu-Oct2.py

示例12: GridSearchCV

# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import predict_proba [as 别名]
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.cross_validation import train_test_split
from sklearn.model_selection import GridSearchCV

data = pd.read_csv("C:\\Users\\User\\Desktop\\iris_data.csv")

#print(irisData.head)
data.features = data[["SepalLength","SepalWidth","PetalLength","PetalWidth"]]
data.targets = data.Class 

#with grid search you can find an optimal parameter "parameter tuning"
param_grid = {'max_depth': np.arange(1, 10)}

#in every iteration data is splitted randomly in cross validation + DecisionTreeClassifier
#initializes the tree randomly: thats why you get different results !!!
tree = GridSearchCV(DecisionTreeClassifier(), param_grid)

feature_train, feature_test, target_train, target_test = train_test_split(data.features, data.targets, test_size=.2)

tree.fit(feature_train, target_train)
tree_predictions = tree.predict_proba(feature_test)[:, 1]

print("Best parameter with Grid Search: ", tree.best_params_)

开发者ID:michelleduer,项目名称:independent-study,代码行数:27,代码来源:decision2.py

示例13: main

# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import predict_proba [as 别名]
def main():
    
     data = pd.read_csv("mushrooms.csv")
#==============================================================================
#     print(data.head(6))
#     print("================================================")
#     print(data.isnull().sum())
#     print("=====================")
#     print(data['class'].unique())
#     print("=====================")
#     print(data.shape)
#==============================================================================
    
     labelencoder = LabelEncoder()
     for col in data.columns:
         data[col] = labelencoder.fit_transform(data[col])
        
    
 
     #print(data.head())
    
#==============================================================================
#      ax = sns.boxplot(x='class', y='stalk-color-above-ring',  data=data)
#      ax = sns.stripplot(x="class", y='stalk-color-above-ring',
#                    data=data, jitter=True,
#                    edgecolor="gray")
#      sns.plt.title("Class w.r.t stalkcolor above ring",fontsize=12)
#==============================================================================
    

     train_feature = data.iloc[:,1:23]
     test_feature = data.iloc[:, 0]
     
   #Heatmap  
#==============================================================================
#     data = pd.DataFrame(train_feature)
#     corrResult = data.corr()
#     sns.heatmap(corrResult)
#     plt.show()
#==============================================================================

#==============================================================================
#      # Build a classification task using 3 informative features
#      train_feature, test_feature = make_classification(n_samples=1000,
#                                 n_features=10,
#                                 n_informative=3,
#                                 n_redundant=0,
#                                 n_repeated=0,
#                                 n_classes=2,
#                                 random_state=0,
#                                 shuffle=False)
#      # Build a forest and compute the feature importance
#      forest = ExtraTreesClassifier(n_estimators=250, random_state=0)
#      forest.fit(train_feature, test_feature)
#      importances = forest.feature_importances_
#      for index in range(len(train_feature[0])):
#          print ("Importance of feature ", index, "is", importances[index])
#==============================================================================
     
     # Scale the data to be between -1 and 1
     scaler = StandardScaler()
     train_feature = scaler.fit_transform(train_feature)
     
     pca = PCA()
     pca.fit_transform(train_feature)
     covariance = pca.get_covariance()
     explained_variance=pca.explained_variance_
     print(explained_variance)
      
     
     # Splitting the data into training and testing dataset
     X_train, X_test, y_train, y_test = train_test_split(train_feature,test_feature,test_size=0.2,random_state=4)
     
     print("==============================================================")
     print("                     Logistic Regression                      ")
     print("==============================================================")
     
     # Logistic Regression
     logic = LogisticRegression()
     parameters_logic = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000] ,
               'penalty':['l1','l2']
                    }
     logic_grid_search = GridSearchCV(logic, parameters_logic,cv=10)
     logic_grid_search.fit(X_train,y_train)
     
     # Positive class prediction probabilities
     y_prob = logic_grid_search.predict_proba(X_test)[:,1]   
     # Threshold the probabilities to give class predictions.
     y_pred = np.where(y_prob > 0.5, 1, 0)
     
     print("Logic Regresion result: ",logic_grid_search.score(X_test, y_pred),"%")
     print("Best parameters for this model are: ",logic_grid_search.best_params_)
     
     print("==============================================================")
     print("                        Naive Bayes                           ")
     print("==============================================================")
     
     # Gaussian Naive Bayes
     naive = GaussianNB()
     naive.fit(X_train, y_train)
#.........这里部分代码省略.........
开发者ID:NicuVlasin,项目名称:Machine-Learning,代码行数:103,代码来源:Assignment_2.py


注:本文中的sklearn.model_selection.GridSearchCV.predict_proba方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。