当前位置: 首页>>代码示例>>Python>>正文


Python VotingClassifier.fit方法代码示例

本文整理汇总了Python中sklearn.ensemble.VotingClassifier.fit方法的典型用法代码示例。如果您正苦于以下问题:Python VotingClassifier.fit方法的具体用法?Python VotingClassifier.fit怎么用?Python VotingClassifier.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.ensemble.VotingClassifier的用法示例。


在下文中一共展示了VotingClassifier.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_sample_weight

# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import fit [as 别名]
def test_sample_weight():
    """Tests sample_weight parameter of VotingClassifier"""
    clf1 = LogisticRegression(random_state=123)
    clf2 = RandomForestClassifier(random_state=123)
    clf3 = SVC(probability=True, random_state=123)
    eclf1 = VotingClassifier(estimators=[
        ('lr', clf1), ('rf', clf2), ('svc', clf3)],
        voting='soft').fit(X, y, sample_weight=np.ones((len(y),)))
    eclf2 = VotingClassifier(estimators=[
        ('lr', clf1), ('rf', clf2), ('svc', clf3)],
        voting='soft').fit(X, y)
    assert_array_equal(eclf1.predict(X), eclf2.predict(X))
    assert_array_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))

    sample_weight = np.random.RandomState(123).uniform(size=(len(y),))
    eclf3 = VotingClassifier(estimators=[('lr', clf1)], voting='soft')
    eclf3.fit(X, y, sample_weight)
    clf1.fit(X, y, sample_weight)
    assert_array_equal(eclf3.predict(X), clf1.predict(X))
    assert_array_equal(eclf3.predict_proba(X), clf1.predict_proba(X))

    clf4 = KNeighborsClassifier()
    eclf3 = VotingClassifier(estimators=[
        ('lr', clf1), ('svc', clf3), ('knn', clf4)],
        voting='soft')
    msg = ('Underlying estimator \'knn\' does not support sample weights.')
    assert_raise_message(ValueError, msg, eclf3.fit, X, y, sample_weight)
开发者ID:ClimbsRocks,项目名称:scikit-learn,代码行数:29,代码来源:test_voting_classifier.py

示例2: voting_fit

# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import fit [as 别名]
def voting_fit(X, y, RESULT_TEST_PATH,RESULT_PATH):
    ada_best = fit_adaboost(X, y)
    extratree_best = fit_extratree(X, y)
    rf_best = fit_rf(X, y)
    gbdt_best = fit_xgboost(X, y)
    svc_best = fit_svc(X, y)
    lr_best = fit_lr(X, y)

    votingC = VotingClassifier(estimators=[('rfc', rf_best), ('extc', extratree_best),('lr',lr_best),
                                            ('adac', ada_best), ('gbc', gbdt_best)], voting='soft',
                               n_jobs=4)
    votingC.fit(X, y)

    test_df = pd.read_csv(RESULT_TEST_PATH)
    test = np.array(test_df)

    #test_Survived = pd.Series(votingC.predict(test), name="Survived")

    result = votingC.predict(test)
    test_df.insert(test_df.columns.size, 'Survived', result)

    test_df = test_df[['PassengerId', 'Survived']]
    test_df['PassengerId'] = test_df['PassengerId'].apply(np.int64)
    test_df.to_csv(RESULT_PATH, index=False)
    print("finish!")
开发者ID:jawiezhu,项目名称:kaggleLearning,代码行数:27,代码来源:fit.py

示例3: test_set_params

# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import fit [as 别名]
def test_set_params():
    """set_params should be able to set estimators"""
    clf1 = LogisticRegression(random_state=123, C=1.0)
    clf2 = RandomForestClassifier(random_state=123, max_depth=None)
    clf3 = GaussianNB()
    eclf1 = VotingClassifier([('lr', clf1), ('rf', clf2)], voting='soft',
                             weights=[1, 2])
    assert_true('lr' in eclf1.named_estimators)
    assert_true(eclf1.named_estimators.lr is eclf1.estimators[0][1])
    assert_true(eclf1.named_estimators.lr is eclf1.named_estimators['lr'])
    eclf1.fit(X, y)
    assert_true('lr' in eclf1.named_estimators_)
    assert_true(eclf1.named_estimators_.lr is eclf1.estimators_[0])
    assert_true(eclf1.named_estimators_.lr is eclf1.named_estimators_['lr'])

    eclf2 = VotingClassifier([('lr', clf1), ('nb', clf3)], voting='soft',
                             weights=[1, 2])
    eclf2.set_params(nb=clf2).fit(X, y)
    assert_false(hasattr(eclf2, 'nb'))

    assert_array_equal(eclf1.predict(X), eclf2.predict(X))
    assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
    assert_equal(eclf2.estimators[0][1].get_params(), clf1.get_params())
    assert_equal(eclf2.estimators[1][1].get_params(), clf2.get_params())

    eclf1.set_params(lr__C=10.0)
    eclf2.set_params(nb__max_depth=5)

    assert_true(eclf1.estimators[0][1].get_params()['C'] == 10.0)
    assert_true(eclf2.estimators[1][1].get_params()['max_depth'] == 5)
    assert_equal(eclf1.get_params()["lr__C"],
                 eclf1.get_params()["lr"].get_params()['C'])
开发者ID:abecadel,项目名称:scikit-learn,代码行数:34,代码来源:test_voting_classifier.py

示例4: process_cell

# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import fit [as 别名]
    def process_cell(self, df_cell_train, df_cell_test, window):

        place_counts = df_cell_train.place_id.value_counts()
        mask = (place_counts[df_cell_train.place_id.values] >= th).values
        df_cell_train = df_cell_train.loc[mask]

        # Working on df_test
        row_ids = df_cell_test.index

        # Preparing data
        le = LabelEncoder()
        y = le.fit_transform(df_cell_train.place_id.values)
        X = df_cell_train.drop(['place_id', ], axis=1).values.astype(int)
        X_test = df_cell_test.values.astype(int)

        # Applying the classifier
        clf1 = KNeighborsClassifier(n_neighbors=50, weights='distance',
                                    metric='manhattan')
        clf2 = RandomForestClassifier(n_estimators=50, n_jobs=-1)
        eclf = VotingClassifier(estimators=[('knn', clf1), ('rf', clf2)], voting='soft')

        eclf.fit(X, y)
        y_pred = eclf.predict_proba(X_test)
        pred_labels = le.inverse_transform(np.argsort(y_pred, axis=1)[:, ::-1][:, :3])
        return pred_labels, row_ids
开发者ID:rtindru,项目名称:springboard-ds2,代码行数:27,代码来源:ensemble_better_split.py

示例5: main

# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import fit [as 别名]
def main(directory, tools_directory, non_tools_dir):
    global path
    path = sys.path[0]
    start = time.time()
    if directory is None or not os.path.isdir(directory):
        print "Please input directory containing pdf publications to classify"
        sys.exit(1)
    x_train, y_train = fetch_from_file()
    x_test, test_files = get_test_set(directory)
    # Just for testing, update machine learning part later

    x_train, x_test = normalize_scale(x_train, x_test)
    classifier = VotingClassifier(
        [("first", classifier_list[0]), ("second", classifier_list[1]), ("second", classifier_list[2])]
    )
    classifier.fit(x_train, y_train)
    y_pred = classifier.predict(x_test)
    if os.path.isdir(tools_directory):
        shutil.rmtree(tools_directory)
    os.makedirs(tools_directory)

    if os.path.isdir(non_tools_dir):
        shutil.rmtree(non_tools_dir)
    os.makedirs(non_tools_dir)

    for num, pub in zip(y_pred, test_files):
        if num:
            shutil.copy2(directory + pub, tools_directory + pub)
        else:
            shutil.copy2(directory + pub, non_tools_dir + pub)

    print "Classification:    Seconds taken: " + str(time.time() - start)
开发者ID:UCLA-BD2K,项目名称:AztecRetrieval,代码行数:34,代码来源:classifier.py

示例6: test_predict_on_toy_problem

# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import fit [as 别名]
def test_predict_on_toy_problem():
    """Manually check predicted class labels for toy dataset."""
    clf1 = LogisticRegression(random_state=123)
    clf2 = RandomForestClassifier(random_state=123)
    clf3 = GaussianNB()

    X = np.array([[-1.1, -1.5],
                  [-1.2, -1.4],
                  [-3.4, -2.2],
                  [1.1, 1.2],
                  [2.1, 1.4],
                  [3.1, 2.3]])

    y = np.array([1, 1, 1, 2, 2, 2])

    assert_equal(all(clf1.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))
    assert_equal(all(clf2.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))
    assert_equal(all(clf3.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))

    eclf = VotingClassifier(estimators=[
                            ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
                            voting='hard',
                            weights=[1, 1, 1])
    assert_equal(all(eclf.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))

    eclf = VotingClassifier(estimators=[
                            ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
                            voting='soft',
                            weights=[1, 1, 1])
    assert_equal(all(eclf.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))
开发者ID:ClimbsRocks,项目名称:scikit-learn,代码行数:32,代码来源:test_voting_classifier.py

示例7: process_one_cell

# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import fit [as 别名]
def process_one_cell(df_train, df_test, x_min, x_max, y_min, y_max):

    x_border_augment = 0.025
    y_border_augment = 0.0125

    #Working on df_train
    df_cell_train = df_train[(df_train['x'] >= x_min-x_border_augment) & (df_train['x'] < x_max+x_border_augment) &
                               (df_train['y'] >= y_min-y_border_augment) & (df_train['y'] < y_max+y_border_augment)]
    place_counts = df_cell_train.place_id.value_counts()
    mask = (place_counts[df_cell_train.place_id.values] >= th).values
    df_cell_train = df_cell_train.loc[mask]

    #Working on df_test
    # to be delete: df_cell_test = df_test.loc[df_test.grid_cell == grid_id]
    df_cell_test = df_test[(df_test['x'] >= x_min) & (df_test['x'] < x_max) &
                               (df_test['y'] >= y_min) & (df_test['y'] < y_max)]
    row_ids = df_cell_test.index

    if(len(df_cell_train) == 0 or len(df_cell_test) == 0):
        return None, None

    #Feature engineering on x and y
    df_cell_train.loc[:,'x'] *= fw[0]
    df_cell_train.loc[:,'y'] *= fw[1]
    df_cell_test.loc[:,'x'] *= fw[0]
    df_cell_test.loc[:,'y'] *= fw[1]

    #Preparing data
    le = LabelEncoder()
    y = le.fit_transform(df_cell_train.place_id.values)
    X = df_cell_train.drop(['place_id'], axis=1).values.astype(float)

    if 'place_id' in df_cell_test.columns:

        cols = df_cell_test.columns
        cols = cols.drop('place_id')

        X_test = df_cell_test[cols].values.astype(float)

    else:

        X_test = df_cell_test.values.astype(float)

    #Applying the classifier
    # clf = KNeighborsClassifier(n_neighbors=26, weights='distance',
    #                            metric='manhattan')
    clf1 = BaggingClassifier(KNeighborsClassifier(n_neighbors=26, weights='distance',
                                metric='manhattan'), n_jobs=-1, n_estimators=50)
    clf2 = RandomForestClassifier(n_estimators=100, n_jobs=-1)

    eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2)], voting='hard')

    eclf.fit(X, y)
    y_pred = eclf.predict_proba(X_test)
    pred_labels = le.inverse_transform(np.argsort(y_pred, axis=1)[:,::-1][:,:3])

    return pred_labels, row_ids
开发者ID:itenyh,项目名称:kaggle,代码行数:59,代码来源:knn_plus.py

示例8: test_estimator_weights_format

# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import fit [as 别名]
def test_estimator_weights_format():
    # Test estimator weights inputs as list and array
    clf1 = LogisticRegression(random_state=123)
    clf2 = RandomForestClassifier(random_state=123)
    eclf1 = VotingClassifier(estimators=[("lr", clf1), ("rf", clf2)], weights=[1, 2], voting="soft")
    eclf2 = VotingClassifier(estimators=[("lr", clf1), ("rf", clf2)], weights=np.array((1, 2)), voting="soft")
    eclf1.fit(X, y)
    eclf2.fit(X, y)
    assert_array_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
开发者ID:nelson-liu,项目名称:scikit-learn,代码行数:11,代码来源:test_voting_classifier.py

示例9: classify

# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import fit [as 别名]
def classify():
    train_X,Y = load_svmlight_file('data/train_last')
    test_X,test_Y = load_svmlight_file('data/test_last')
    train_X = train_X.toarray()
    test_X = test_X.toarray()
    Y = [int(y) for y in Y]
    # print 'Y:',len(Y)
    rows = pd.read_csv('data/log_test2.csv',index_col=0).sort_index().index.unique()
    train_n = train_X.shape[0]
    m = train_X.shape[1]
    test_n = test_X.shape[0]
    print train_n,m,#test_n
     # 先用训练集训练出所有的分类器
    print 'train classify...'
    clf1 = LinearDiscriminantAnalysis()
    clf2 = GaussianNB()
    clf3 = LogisticRegression()
    clf4 = RandomForestClassifier()
    clf5 = KNeighborsClassifier(n_neighbors=12)
    clf6 = AdaBoostClassifier()
    # x_train,x_test,y_train,y_test = train_test_split(train_X,Y,test_size=0.2) # 对训练集进行划分

    # print x_train.shape
    # print x_test.shape
    # clf.fit(train_X,Y)
    clf = VotingClassifier(estimators=[('la',clf1),('nb',clf2),('lr',clf3),('rf',clf4),('nn',clf5),('ac',clf6)], voting='soft', weights=[1.5,1,1,1,1,1])
    # clf1.fit(x_train,y_train)
    # clf2.fit(x_train,y_train)
    # clf3.fit(x_train,y_train)
    # clf4.fit(x_train,y_train)
    clf.fit(train_X,Y)
    print 'end train classify'

    print 'start classify....'
    # print metrics.classification_report(Y,predict_Y)
    # clf2.fit(train_X,Y)
    # print 'clf2 fited...'
    # clf3.fit(train_X,Y)
    # print 'clf3 fited...'
    # clf4.fit(train_X,Y)
    # print 'clf4 fited...'
    # clf1.fit(train_X,Y)
    # print 'clf1 fited...'
    # 第一个分类结果
    predict_Y = clf.predict(train_X)
    # predict_Y = clf.predict(train_X)
    print 'classify result:'
    print metrics.classification_report(Y,predict_Y)

    predict_Y = clf.predict(test_X)
    # print predict_Y,len(predict_Y)
    print 'end classify...'
    # predict_Y = clf.predict(X[cnt_train:]) # 训练注释这一行,输出测试集打开这一行,注释之后的print metric
    # predict_Y = clf.predict(test_X) # 训练注释这一行,输出测试集打开这一行,注释之后的print metric
    DataFrame(predict_Y,index=rows).to_csv('data/info_test2.csv', header=False)
开发者ID:ganzhiruyi,项目名称:Machine-Learning,代码行数:57,代码来源:test.py

示例10: test_predict_for_hard_voting

# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import fit [as 别名]
def test_predict_for_hard_voting():
    # Test voting classifier with non-integer (float) prediction
    clf1 = FaultySVC(random_state=123)
    clf2 = GaussianNB()
    clf3 = SVC(probability=True, random_state=123)
    eclf1 = VotingClassifier(estimators=[
        ('fsvc', clf1), ('gnb', clf2), ('svc', clf3)], weights=[1, 2, 3],
        voting='hard')

    eclf1.fit(X, y)
    eclf1.predict(X)
开发者ID:ldirer,项目名称:scikit-learn,代码行数:13,代码来源:test_voting_classifier.py

示例11: test_multilabel

# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import fit [as 别名]
def test_multilabel():
    """Check if error is raised for multilabel classification."""
    X, y = make_multilabel_classification(n_classes=2, n_labels=1, allow_unlabeled=False, random_state=123)
    clf = OneVsRestClassifier(SVC(kernel="linear"))

    eclf = VotingClassifier(estimators=[("ovr", clf)], voting="hard")

    try:
        eclf.fit(X, y)
    except NotImplementedError:
        return
开发者ID:nelson-liu,项目名称:scikit-learn,代码行数:13,代码来源:test_voting_classifier.py

示例12: test_sample_weight_kwargs

# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import fit [as 别名]
def test_sample_weight_kwargs():
    """Check that VotingClassifier passes sample_weight as kwargs"""
    class MockClassifier(BaseEstimator, ClassifierMixin):
        """Mock Classifier to check that sample_weight is received as kwargs"""
        def fit(self, X, y, *args, **sample_weight):
            assert_true('sample_weight' in sample_weight)

    clf = MockClassifier()
    eclf = VotingClassifier(estimators=[('mock', clf)], voting='soft')

    # Should not raise an error.
    eclf.fit(X, y, sample_weight=np.ones((len(y),)))
开发者ID:abecadel,项目名称:scikit-learn,代码行数:14,代码来源:test_voting_classifier.py

示例13: train

# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import fit [as 别名]
    def train(self):
        for bin_id in sorted(self.xy_bins):
            file_name = xybins_file_name_str.format(bin_id)
            print 'Training model: {} of {}'.format(bin_id, max(self.xy_bins))
            df = self.df
            wdf = df[df.xy_bin == bin_id]
            X = wdf[self.features]
            y = wdf.place_id

            model = VotingClassifier(self.models)
            model.fit(X, y)
            joblib.dump(model, file_name, compress=3, )
开发者ID:rtindru,项目名称:springboard-ds2,代码行数:14,代码来源:fb_new_model_4.py

示例14: main

# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import fit [as 别名]
def main(path,filename):

	batchsT = ['histogramaByN','histogramaColor','patrones2x2ByN','patrones3x3ByN','patronesCirculaesByN_2_5','patronesCirculaesByN_2_9','patronesCirculaesByN_3_9','patronesCirculaesByN_5_9','patronesCirculaesByN_3_5']
	batchsAux = ['histogramaByN','histogramaColor','patronesCirculaesByN_2_5','patrones2x2ByN','patrones3x3ByN','patronesCirculaesByN_2_9','patronesCirculaesByN_3_9','patronesCirculaesByN_5_9','patronesCirculaesByN_3_5','patronesCirculaesByN_6_12','patronesCirculaesByN_8_12']
	#batchs = ['patrones2x2ByN','patrones3x3ByN','patronesCirculaesByN_2_5','patronesCirculaesByN_2_9']
	#batchs = ['patrones2x2ByN','patrones3x3ByN','patronesCirculaesByN_2_5','patronesCirculaesByN_3_5']
	#for batch in batchsAux:


	#print batch
	batchs = batchsAux
	#batchs.remove(batch)
	X = []
	y = []
	load_batch(y,path,'clases',filename) 
	y = [j for i in y for j in i]
	for batch in batchs:
		load_batch(X,path,batch,filename)
	
	#X,y = load_images('/tmp/train/')
	est = [RandomForest(),Boosting()]
	for i in xrange(0,15):
		est.append(Gradient(i))
	for i in xrange(0,4):
		est.append(SVM(i))

	#scores = cross_validation.cross_val_score(clf, X, y, cv=5)
	#print scores
	clf = VotingClassifier(estimators=est)

	clf.fit(X,y)
	pickle.dump( clf, open( "clf_grande.p", "wb" ) )
	return
	X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, y, test_size=0.2,random_state=777)
	#print clf.sub_score(X_test,Y_test)
	print 'start'
	conf_matrix = metrics.confusion_matrix(Y_test,clf.predict(X_test))
	print 'confution matrix'
	print conf_matrix
	return
	for name,estim in est:
		print name
		#estim.fit(X_train,Y_train)
		#print estim.score(X_test,Y_test)
		print cross_validation.cross_val_score(estim, X, y, cv=5,n_jobs=-1)
	print 'voter'
	print cross_validation.cross_val_score(clf, X, y, cv=5,n_jobs=-1)
	return
	#clf.fit(X_train,Y_train)
	print clf.score(X_test,Y_test)

	return
开发者ID:fcanay,项目名称:MachineLearning,代码行数:54,代码来源:src.py

示例15: train_classifier

# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import fit [as 别名]
def train_classifier(algorithm, features, train):
    print('Train classifier ({})...'.format(algorithm))
    estimators = []
    if 'rf' in algorithm:
        estimators.append(('rf', RandomForestClassifier(n_estimators=100)))
    if 'lr' in algorithm:
        estimators.append(('lr', LogisticRegression()))
    if 'mb' in algorithm:
        estimators.append(('mb', MultinomialNB()))
    # Training
    classifier = VotingClassifier(estimators=estimators, voting='soft')
    classifier.fit(features, train['sentiment'])
    return classifier
开发者ID:pvigier,项目名称:sa,代码行数:15,代码来源:util.py


注:本文中的sklearn.ensemble.VotingClassifier.fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。