当前位置: 首页>>代码示例>>Python>>正文


Python GridSearchCV.fit方法代码示例

本文整理汇总了Python中sklearn.model_selection.GridSearchCV.fit方法的典型用法代码示例。如果您正苦于以下问题:Python GridSearchCV.fit方法的具体用法?Python GridSearchCV.fit怎么用?Python GridSearchCV.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.model_selection.GridSearchCV的用法示例。


在下文中一共展示了GridSearchCV.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: build

# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
        def build(X, y=None):
            """
            Inner build function that builds a single model.
            :param X:
            :param y:
            :return:
            """
            model = Pipeline([
                ('vectorizer', TfidfVectorizer(
                    tokenizer=self.spacy_tokenizer, preprocessor=None, lowercase=False)),
                ('clf', SVC(C=1,kernel="linear",
                            probability=True,
                            class_weight='balanced'))])

            from sklearn.model_selection import GridSearchCV

            items,counts= np.unique(y, return_counts=True)

            cv_splits = max(2, min(5, np.min(counts) // 5))

            Cs = [0.01,0.25,1, 2, 5, 10, 20, 100]
            param_grid = {'clf__C': Cs, 'clf__kernel': ["linear"]}
            grid_search = GridSearchCV(model,
                                       param_grid=param_grid,
                                       scoring='f1_weighted',
                                       cv=cv_splits,
                                       verbose=2,
                                       n_jobs=-1
                                       )
            grid_search.fit(X, y)

            return grid_search
开发者ID:Truth0906,项目名称:ai-chatbot-framework,代码行数:34,代码来源:sklearn_intent_classifer.py

示例2: build_and_train

# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
def build_and_train():

	data = pd.read_csv('../data/training.csv')
	data = data.dropna(subset=['Gender', 'Married', 'Credit_History', 'LoanAmount'])

	pred_var = ['Gender','Married','Dependents','Education','Self_Employed','ApplicantIncome','CoapplicantIncome',\
				'LoanAmount','Loan_Amount_Term','Credit_History','Property_Area']

	X_train, X_test, y_train, y_test = train_test_split(data[pred_var], data['Loan_Status'], \
														test_size=0.25, random_state=42)
	y_train = y_train.replace({'Y':1, 'N':0}).as_matrix()
	y_test = y_test.replace({'Y':1, 'N':0}).as_matrix()

	pipe = make_pipeline(PreProcessing(),
						RandomForestClassifier())

	param_grid = {"randomforestclassifier__n_estimators" : [10, 20, 30],
				 "randomforestclassifier__max_depth" : [None, 6, 8, 10],
				 "randomforestclassifier__max_leaf_nodes": [None, 5, 10, 20], 
				 "randomforestclassifier__min_impurity_split": [0.1, 0.2, 0.3]}

	grid = GridSearchCV(pipe, param_grid=param_grid, cv=3)

	grid.fit(X_train, y_train)

	return(grid)
开发者ID:dsadulla,项目名称:flask_api,代码行数:28,代码来源:utils.py

示例3: optimize_model_regress

# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
def optimize_model_regress(data, tc):
    train_data = data.sample(frac=.8)
    test_data = data.drop(train_data.index)
    train_y = train_data['temperature']/tc
    train_X = train_data.drop(['T/Tc','temperature'], axis=1)
    test_y = test_data['temperature']/tc
    test_X = test_data.drop(['T/Tc','temperature'], axis=1)

    tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1,.5,.1,1e-2,1e-3, 1e-4],
                     'C': [.1,.5, 1,5, 10, 50, 100,500, 1000]},
                    {'kernel': ['linear'], 'C': [.1,.5, 1,5, 10, 50, 100,500, 1000]}]

    model = GridSearchCV(svm.SVR(), tuned_parameters, cv=5)
    model.fit(train_X, train_y)
    print()
    print("Best parameters:")
    print()
    print(model.best_params_)
    print()
    print("Grid scores:")
    print()
    means = model.cv_results_['mean_test_score']
    stds = model.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, model.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))
    print()
    y_true, y_pred = test_y, model.predict(test_X)
    print("Mean Absolute Error : " + str(mean_absolute_error(y_pred,y_true)))
    print()
开发者ID:aramamurti,项目名称:BEC-monopoles,代码行数:32,代码来源:ml_analysis.py

示例4: _search_param

# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
 def _search_param(self, metric, X, y):
   '''
   Find best potential parameters set using few n_estimators
   '''
   # Make sure user specified params are in the grid.
   max_depth_grid = list(np.unique([self.model_instance.max_depth, 5, 7]))
   colsample_bytree_grid = list(
       np.unique([self.model_instance.colsample_bytree, 0.66, 0.9]))
   reg_lambda_grid = list(np.unique([self.model_instance.reg_lambda, 1, 5]))
   param_grid = {
       'max_depth': max_depth_grid,
       'learning_rate': [max(self.model_instance.learning_rate, 0.3)],
       'n_estimators': [min(self.model_instance.n_estimators, 60)],
       'gamma': [self.model_instance.gamma],
       'min_child_weight': [self.model_instance.min_child_weight],
       'max_delta_step': [self.model_instance.max_delta_step],
       'subsample': [self.model_instance.subsample],
       'colsample_bytree': colsample_bytree_grid,
       'colsample_bylevel': [self.model_instance.colsample_bylevel],
       'reg_alpha': [self.model_instance.reg_alpha],
       'reg_lambda': reg_lambda_grid,
       'scale_pos_weight': [self.model_instance.scale_pos_weight],
       'base_score': [self.model_instance.base_score],
       'seed': [self.model_instance.seed]
   }
   grid_search = GridSearchCV(
       self.model_instance, param_grid, cv=2, refit=False, scoring=metric)
   grid_search.fit(X, y)
   best_params = grid_search.best_params_
   # Change params back original params
   best_params['learning_rate'] = self.model_instance.learning_rate
   best_params['n_estimators'] = self.model_instance.n_estimators
   return best_params
开发者ID:AhlamMD,项目名称:deepchem,代码行数:35,代码来源:__init__.py

示例5: tweak_hyperparameters

# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
	def tweak_hyperparameters(self, type_model):
		self.load_corpus()

		if type_model == "SVM":
			y = self.gender

			model = svm.SVC()
			param_grid = [
				{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
				{'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
			]
			clf = GridSearchCV(model, param_grid, n_jobs=-1, cv=3)
			clf.fit(self.X, y)

			# Best parameter set
			print('Best parameters found:\n', clf.best_params_)
		else:
			y = self.sentiment

			model = MLPClassifier(max_iter=100)
			param_grid = {
				'hidden_layer_sizes': [(50, 50, 50), (50, 100, 50), (100,)],
				'activation': ['tanh', 'relu'],
				'solver': ['sgd', 'adam'],
				'alpha': [0.0001, 0.05],
				'learning_rate': ['constant', 'adaptive'],
			}
			clf = GridSearchCV(model, param_grid, n_jobs=-1, cv=3)
			clf.fit(self.X, y)

			# Best parameter set
			print('Best parameters found:\n', clf.best_params_)
开发者ID:masterdcups,项目名称:iot-tweet-search-engine,代码行数:34,代码来源:model_prediction.py

示例6: inner_cv_loop

# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
def inner_cv_loop(Xtrain,Ytrain,clf,parameters,
                    oversample=None,fa_dims=20,
                    verbose=False):
    """
    use GridSearchCV to find best classifier for training set
    """

    rocscore={}
    best_est={}
    facanal={}
    for fa_d in [0,fa_dims]:
        clfname='fa' if fa_d>0 else "nofa"
        if fa_d>0:
            facanal[clfname]=FactorAnalysis(fa_d)
            Xtrain=facanal[clfname].fit_transform(Xtrain)
        else:
            facanal[clfname]=None

        if verbose:
            print(clfname)
        gs=GridSearchCV(clf,parameters,scoring='roc_auc')
        gs.fit(Xtrain,Ytrain)
        rocscore[clfname]=gs.best_score_
        best_est[clfname]=gs.best_estimator_

    bestscore=numpy.max([rocscore[i] for i in rocscore.keys()])
    bestclf=[i for i in rocscore.keys() if rocscore[i]==bestscore][0]
    if verbose:
        print('best:',bestclf,bestscore,best_est[bestclf],facanal[bestclf])
    return best_est[bestclf],bestscore,facanal[bestclf]
开发者ID:IanEisenberg,项目名称:Self_Regulation_Ontology,代码行数:32,代码来源:crossvalidation.py

示例7: plot_cross_val_selection

# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
def plot_cross_val_selection():
    iris = load_iris()
    X_trainval, X_test, y_trainval, y_test = train_test_split(iris.data,
                                                              iris.target,
                                                              random_state=0)

    param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100],
                  'gamma': [0.001, 0.01, 0.1, 1, 10, 100]}
    grid_search = GridSearchCV(SVC(), param_grid, cv=5)
    grid_search.fit(X_trainval, y_trainval)
    scores = grid_search.grid_scores_[15:]

    best = np.argmax([x.mean_validation_score for x in scores])
    plt.figure(figsize=(10, 3))
    plt.xlim(-1, len(scores))
    plt.ylim(0, 1.1)
    for i, score in enumerate(scores):
        marker_cv, = plt.plot([i] * 5, score.cv_validation_scores, '^', c='gray', markersize=5, alpha=.5)
        marker_mean, = plt.plot(i, score.mean_validation_score, 'v', c='none', alpha=1, markersize=10)
        if i == best:
            marker_best, = plt.plot(i, score.mean_validation_score, 'o', c='red', fillstyle="none", alpha=1, markersize=20, markeredgewidth=3)

    plt.xticks(range(len(scores)), [str(score.parameters).strip("{}").replace("'", "") for score in scores], rotation=90);
    plt.ylabel("validation accuracy")
    plt.xlabel("parameter settings")
    plt.legend([marker_cv, marker_mean, marker_best], ["cv accuracy", "mean accuracy", "best parameter setting"], loc=(1.05, .4))
开发者ID:Alan215,项目名称:advanced_training,代码行数:28,代码来源:plot_grid_search.py

示例8: train_sgd

# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
def train_sgd(data, result, scoring=None):
    print("train SGDClassifier {}".format(len(data)))
    #scaler = None
    #scaler = preprocessing.MinMaxScaler()
    print("Scale: {}".format(type(scaler)))
    if scaler != None:
        data = scaler.fit_transform(data)

    #classifier = SGDClassifier(loss="hinge", penalty="l2")
    #classifier.fit(data, result)
    #return scaler, classifier

    parameters = {
        'loss': ('hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron',
                 'squared_loss', 'huber', 'epsilon_insensitive',
                 'squared_epsilon_insensitive'),
        'penalty': ('none', 'l2', 'l1', 'elasticnet')
    }
    print(parameters)
    search = GridSearchCV(SGDClassifier(), parameters, scoring=scoring, n_jobs=1)
    search.fit(data, result)
    print("best params: {}".format(search.best_params_))
    print("best score: {}".format(search.best_score_))
    print
    return scaler, search.best_estimator_.fit(data,result)
开发者ID:osm-fr,项目名称:export-cadastre,代码行数:27,代码来源:segmented.py

示例9: svr_linear

# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
def svr_linear(X,Y,x,y):
    reg = GridSearchCV(SVR(kernel='linear'), cv=10,param_grid={"C":[1e0, 1e1, 1e2, 1e3], "degree":[1,2,3,4]})
    reg.fit(X, Y)
    y_predict = reg.predict(x)
    rmse = RMSE(y=y, y_predict=y_predict)
    print "rmse: ", str(rmse)
    return rmse, y_predict
开发者ID:gautamskumar,项目名称:ws,代码行数:9,代码来源:funcs.py

示例10: main

# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
def main():
    # 1 查看训练集和测试集的数据特征
    train_data = pandas.read_csv('data/train.csv')
    test_data = pandas.read_csv('data/test.csv')
    print(train_data.info())
    print(test_data.info())
    # 2 人工选取预测有效的特征
    selected_features = ['Pclass', 'Sex', 'Age', 'Embarked', 'SibSp', 'Parch', 'Fare']
    x_train = train_data[selected_features]
    x_test = test_data[selected_features]

    y_train = train_data['Survived']

    # 3 补充缺失值
    # 得知Embared特征惨在缺失值,需要补完
    print(x_train['Embarked'].value_counts())
    print(x_test['Embarked'].value_counts())

    # 对于类别型特征,使用出现频率最高的特征来填充,可以作为减少引入误差的方法之一
    x_train['Embarked'].fillna('S', inplace=True)
    x_test['Embarked'].fillna('S', inplace=True)

    x_train['Age'].fillna(x_train['Age'].mean(), inplace=True)
    x_test['Age'].fillna(x_test['Age'].mean(), inplace=True)

    x_test['Fare'].fillna(x_test['Fare'].mean(), inplace=True)
    print(x_train.info())
    print(x_test.info())

    # 4 采用DictVectorizer对特征向量化
    dict_vectorizer = DictVectorizer(sparse=False)
    x_train = dict_vectorizer.fit_transform(x_train.to_dict(orient='record'))
    print(dict_vectorizer.feature_names_)
    x_test = dict_vectorizer.transform(x_test.to_dict(orient='record'))

    # 5 训练模型
    forest_classifier = RandomForestClassifier()
    xgb_classifier = XGBClassifier()

    # 使用5折交叉验证的方式进行性能评估
    forest_mean_score = cross_val_score(forest_classifier, x_train, y_train, cv=5).mean()
    print(forest_mean_score)
    xgb_mean_score = cross_val_score(xgb_classifier, x_train, y_train, cv=5).mean()
    print(xgb_mean_score)

    # 6 使用并行网格搜索的方式选择更好的超参组合
    params = {
        'max_depth': range(2, 8), 'n_estimators': range(100, 1200, 200),
        'learning_rate': [0.05, 0.1, 0.25, 0.5, 1.0]
    }
    xgbc_best = XGBClassifier()
    grid_search_cv = GridSearchCV(xgbc_best, params, n_jobs=-1, cv=5)
    grid_search_cv.fit(x_train, y_train)
    print(grid_search_cv.best_score_)
    print(grid_search_cv.best_params_)

    # 7 预测结果并写入文件
    predict_result = grid_search_cv.predict(x_test)
    submission_data = pandas.DataFrame({'PassengerId': test_data['PassengerId'], 'Survived': predict_result})
    submission_data.to_csv('data/submission/titanic_submission.csv', index=False)
开发者ID:ACEGuiPeng,项目名称:kaggle_demo_tests,代码行数:62,代码来源:example_titanic_pratice.py

示例11: PipeFeauture

# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
def PipeFeauture(Xtrain, Ytrain):
    pipeline = Pipeline([
    ('vect', CountVectorizer()),
    ('tfidf', TfidfTransformer()),
    ('clf', SGDClassifier()),
    ])

# uncommenting more parameters will give better exploring power but will
# increase processing time in a combinatorial way
    parameters = {
        'vect__max_df': (0.5, 0.75, 1.0),
        #'vect__max_features': (None, 5000, 10000, 50000),
        'vect__ngram_range': ((1, 1), (1, 2)),  # unigrams or bigrams
        #'tfidf__use_idf': (True, False),
        #'tfidf__norm': ('l1', 'l2'),
        'clf__alpha': (0.00001, 0.000001),
        'clf__penalty': ('l2', 'elasticnet'),
        #'clf__n_iter': (10, 50, 80),
    }
    
    grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1)
    print("Performing grid search...")
    print("pipeline:", [name for name, _ in pipeline.steps])
    print("parameters:")
    pprint(parameters)
    t0 = time()
    grid_search.fit(Xtrain, Ytrain)
    print("done in %0.3fs" % (time() - t0))
    print()

    print("Best score: %0.3f" % grid_search.best_score_)
    print("Best parameters set:")
    best_parameters = grid_search.best_estimator_.get_params()
    for param_name in sorted(parameters.keys()):
        print("\t%s: %r" % (param_name, best_parameters[param_name]))
开发者ID:MojiFar,项目名称:test,代码行数:37,代码来源:ML2.py

示例12: nearest_neighbors

# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
 def nearest_neighbors(self):
     neighbors_array = [11, 31, 201, 401, 601]
     tuned_parameters = {"n_neighbors" : neighbors_array}
     knn = KNeighborsClassifier()
     clf = GridSearchCV(knn, tuned_parameters, cv=5, n_jobs= 5, scoring = "f1")
     clf.fit(self.train_data_x, self.train_labels_y)
     self.models.append(clf)
开发者ID:mahermkassem,项目名称:almost_scripts_repo,代码行数:9,代码来源:ml_contacts.py

示例13: logistic_regression

# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
 def logistic_regression(self):
     C_array = [2**i for i in range(-10, 10)]
     tuned_parameters = {'C' : C_array}
     logi_reg = LogisticRegression()
     clf = GridSearchCV(logi_reg, tuned_parameters, cv=5, scoring = "recall")# make_scorer(my_scorer))
     clf.fit(self.train_data_x, self.train_labels_y)
     self.models.append(clf)
开发者ID:mahermkassem,项目名称:almost_scripts_repo,代码行数:9,代码来源:ml_contacts.py

示例14: tuner

# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
def tuner(clf, parameters, data):
    from sklearn.model_selection import GridSearchCV
    labels, features = targetFeatureSplit(data)
    scaler = MinMaxScaler()
    select = SelectKBest()

    steps = [("scale", scaler),
             ("select", select),
             ("classifier", clf)]
    
    pipeline = Pipeline(steps)

    shuffle = StratifiedShuffleSplit(n_splits=1000, test_size=0.3,
                                     random_state=42)
    
    my_scorer = make_scorer(my_score_func)
    scoring_metric = my_scorer
    
    grid_searcher = GridSearchCV(pipeline, param_grid=parameters,
                                 cv=shuffle, scoring=scoring_metric)

    features = select.fit_transform(features, labels)

    grid_searcher.fit(features, labels)

    print("Cross-validated {0} score: {1}".format(scoring_metric,
                                                  grid_searcher.best_score_))

    print("Params: ", grid_searcher.best_params_)
开发者ID:jcmuddle,项目名称:Udacity,代码行数:31,代码来源:enron.py

示例15: score_nestedCV

# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
    def score_nestedCV(self, G1, model, param_grid, effect, nested):
        k_fold = model_selection.KFold(n_splits=self.n_folds).split(range(self.Y.shape[0]))
        i_fold=0
        scores = sp.zeros(self.n_folds)
        params = list()

        for train, test in k_fold:
            (trainData, trainY) = self._packData(G1, train, effect)
            (testData, testY) = self._packData(G1, test, effect)

            if nested:
                clf = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs = self.n_jobs_grid,
                                   cv=self.n_folds_params, scoring=self.scoring, verbose=self.verbose)

                clf.fit(trainData, trainY.flatten())

                params.append(clf.best_params_)

                scores[i_fold] = clf.score(testData, testY.flatten(), method_scorer=False)
            else:

                model.fit(trainData, trainY.flatten())
                scores[i_fold] = SCORERS[self.scoring](model, testData, testY.flatten())
            i_fold+=1

        return scores,params
开发者ID:MicrosoftGenomics,项目名称:FaST-LMM,代码行数:28,代码来源:testCV.py


注:本文中的sklearn.model_selection.GridSearchCV.fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。