本文整理汇总了Python中sklearn.ensemble.VotingClassifier类的典型用法代码示例。如果您正苦于以下问题:Python VotingClassifier类的具体用法?Python VotingClassifier怎么用?Python VotingClassifier使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了VotingClassifier类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_predict_on_toy_problem
def test_predict_on_toy_problem():
"""Manually check predicted class labels for toy dataset."""
clf1 = LogisticRegression(random_state=123)
clf2 = RandomForestClassifier(random_state=123)
clf3 = GaussianNB()
X = np.array([[-1.1, -1.5],
[-1.2, -1.4],
[-3.4, -2.2],
[1.1, 1.2],
[2.1, 1.4],
[3.1, 2.3]])
y = np.array([1, 1, 1, 2, 2, 2])
assert_equal(all(clf1.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))
assert_equal(all(clf2.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))
assert_equal(all(clf3.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))
eclf = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('gnb', clf3)],
voting='hard',
weights=[1, 1, 1])
assert_equal(all(eclf.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))
eclf = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('gnb', clf3)],
voting='soft',
weights=[1, 1, 1])
assert_equal(all(eclf.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))
示例2: predict
def predict(self,X_test):
'''
predict the class for each sample
'''
if self.use_append == True:
self.__X_test = X_test
elif self.use_append == False:
temp = []
# first stage
for clf in self.stage_one_clfs:
y_pred = clf[1].predict(X_test)
y_pred = np.reshape(y_pred,(len(y_pred),1))
if self.use_append == True:
self.__X_test = np.hstack((self.__X_test,y_pred))
elif self.use_append == False:
temp.append(y_pred)
if self.use_append == False:
self.__X_test = np.array(temp).T[0]
# second stage
majority_voting = VotingClassifier(estimators=self.stage_two_clfs, voting="hard", weights=self.weights)
y_out = majority_voting.predict(self.__X_test)
return y_out
示例3: voting_fit
def voting_fit(X, y, RESULT_TEST_PATH,RESULT_PATH):
ada_best = fit_adaboost(X, y)
extratree_best = fit_extratree(X, y)
rf_best = fit_rf(X, y)
gbdt_best = fit_xgboost(X, y)
svc_best = fit_svc(X, y)
lr_best = fit_lr(X, y)
votingC = VotingClassifier(estimators=[('rfc', rf_best), ('extc', extratree_best),('lr',lr_best),
('adac', ada_best), ('gbc', gbdt_best)], voting='soft',
n_jobs=4)
votingC.fit(X, y)
test_df = pd.read_csv(RESULT_TEST_PATH)
test = np.array(test_df)
#test_Survived = pd.Series(votingC.predict(test), name="Survived")
result = votingC.predict(test)
test_df.insert(test_df.columns.size, 'Survived', result)
test_df = test_df[['PassengerId', 'Survived']]
test_df['PassengerId'] = test_df['PassengerId'].apply(np.int64)
test_df.to_csv(RESULT_PATH, index=False)
print("finish!")
示例4: process_cell
def process_cell(self, df_cell_train, df_cell_test, window):
place_counts = df_cell_train.place_id.value_counts()
mask = (place_counts[df_cell_train.place_id.values] >= th).values
df_cell_train = df_cell_train.loc[mask]
# Working on df_test
row_ids = df_cell_test.index
# Preparing data
le = LabelEncoder()
y = le.fit_transform(df_cell_train.place_id.values)
X = df_cell_train.drop(['place_id', ], axis=1).values.astype(int)
X_test = df_cell_test.values.astype(int)
# Applying the classifier
clf1 = KNeighborsClassifier(n_neighbors=50, weights='distance',
metric='manhattan')
clf2 = RandomForestClassifier(n_estimators=50, n_jobs=-1)
eclf = VotingClassifier(estimators=[('knn', clf1), ('rf', clf2)], voting='soft')
eclf.fit(X, y)
y_pred = eclf.predict_proba(X_test)
pred_labels = le.inverse_transform(np.argsort(y_pred, axis=1)[:, ::-1][:, :3])
return pred_labels, row_ids
示例5: _voting
def _voting(estimators, **kwargs):
"""Build the classifier
"""
clfObj = VotingClassifier([(k.shStr, k) for k in estimators], n_jobs=1, **kwargs)
clfObj.lgStr = ' + '.join([k.lgStr for k in estimators])
clfObj.shStr = ' + '.join([k.shStr for k in estimators])
return clfObj
示例6: test_transform
def test_transform():
"""Check transform method of VotingClassifier on toy dataset."""
clf1 = LogisticRegression(random_state=123)
clf2 = RandomForestClassifier(random_state=123)
clf3 = GaussianNB()
X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]])
y = np.array([1, 1, 2, 2])
eclf1 = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('gnb', clf3)],
voting='soft').fit(X, y)
eclf2 = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('gnb', clf3)],
voting='soft',
flatten_transform=True).fit(X, y)
eclf3 = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('gnb', clf3)],
voting='soft',
flatten_transform=False).fit(X, y)
assert_array_equal(eclf1.transform(X).shape, (4, 6))
assert_array_equal(eclf2.transform(X).shape, (4, 6))
assert_array_equal(eclf3.transform(X).shape, (3, 4, 2))
assert_array_almost_equal(eclf1.transform(X),
eclf2.transform(X))
assert_array_almost_equal(
eclf3.transform(X).swapaxes(0, 1).reshape((4, 6)),
eclf2.transform(X)
)
示例7: main
def main(directory, tools_directory, non_tools_dir):
global path
path = sys.path[0]
start = time.time()
if directory is None or not os.path.isdir(directory):
print "Please input directory containing pdf publications to classify"
sys.exit(1)
x_train, y_train = fetch_from_file()
x_test, test_files = get_test_set(directory)
# Just for testing, update machine learning part later
x_train, x_test = normalize_scale(x_train, x_test)
classifier = VotingClassifier(
[("first", classifier_list[0]), ("second", classifier_list[1]), ("second", classifier_list[2])]
)
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)
if os.path.isdir(tools_directory):
shutil.rmtree(tools_directory)
os.makedirs(tools_directory)
if os.path.isdir(non_tools_dir):
shutil.rmtree(non_tools_dir)
os.makedirs(non_tools_dir)
for num, pub in zip(y_pred, test_files):
if num:
shutil.copy2(directory + pub, tools_directory + pub)
else:
shutil.copy2(directory + pub, non_tools_dir + pub)
print "Classification: Seconds taken: " + str(time.time() - start)
示例8: test_tie_situation
def test_tie_situation():
"""Check voting classifier selects smaller class label in tie situation."""
clf1 = LogisticRegression(random_state=123)
clf2 = RandomForestClassifier(random_state=123)
eclf = VotingClassifier(estimators=[("lr", clf1), ("rf", clf2)], voting="hard")
assert_equal(clf1.fit(X, y).predict(X)[73], 2)
assert_equal(clf2.fit(X, y).predict(X)[73], 1)
assert_equal(eclf.fit(X, y).predict(X)[73], 1)
示例9: test_tie_situation
def test_tie_situation():
"""Check voting classifier selects smaller class label in tie situation."""
clf1 = LogisticRegression(random_state=123, solver='liblinear')
clf2 = RandomForestClassifier(random_state=123)
eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2)],
voting='hard')
assert_equal(clf1.fit(X, y).predict(X)[73], 2)
assert_equal(clf2.fit(X, y).predict(X)[73], 1)
assert_equal(eclf.fit(X, y).predict(X)[73], 1)
示例10: process_one_cell
def process_one_cell(df_train, df_test, x_min, x_max, y_min, y_max):
x_border_augment = 0.025
y_border_augment = 0.0125
#Working on df_train
df_cell_train = df_train[(df_train['x'] >= x_min-x_border_augment) & (df_train['x'] < x_max+x_border_augment) &
(df_train['y'] >= y_min-y_border_augment) & (df_train['y'] < y_max+y_border_augment)]
place_counts = df_cell_train.place_id.value_counts()
mask = (place_counts[df_cell_train.place_id.values] >= th).values
df_cell_train = df_cell_train.loc[mask]
#Working on df_test
# to be delete: df_cell_test = df_test.loc[df_test.grid_cell == grid_id]
df_cell_test = df_test[(df_test['x'] >= x_min) & (df_test['x'] < x_max) &
(df_test['y'] >= y_min) & (df_test['y'] < y_max)]
row_ids = df_cell_test.index
if(len(df_cell_train) == 0 or len(df_cell_test) == 0):
return None, None
#Feature engineering on x and y
df_cell_train.loc[:,'x'] *= fw[0]
df_cell_train.loc[:,'y'] *= fw[1]
df_cell_test.loc[:,'x'] *= fw[0]
df_cell_test.loc[:,'y'] *= fw[1]
#Preparing data
le = LabelEncoder()
y = le.fit_transform(df_cell_train.place_id.values)
X = df_cell_train.drop(['place_id'], axis=1).values.astype(float)
if 'place_id' in df_cell_test.columns:
cols = df_cell_test.columns
cols = cols.drop('place_id')
X_test = df_cell_test[cols].values.astype(float)
else:
X_test = df_cell_test.values.astype(float)
#Applying the classifier
# clf = KNeighborsClassifier(n_neighbors=26, weights='distance',
# metric='manhattan')
clf1 = BaggingClassifier(KNeighborsClassifier(n_neighbors=26, weights='distance',
metric='manhattan'), n_jobs=-1, n_estimators=50)
clf2 = RandomForestClassifier(n_estimators=100, n_jobs=-1)
eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2)], voting='hard')
eclf.fit(X, y)
y_pred = eclf.predict_proba(X_test)
pred_labels = le.inverse_transform(np.argsort(y_pred, axis=1)[:,::-1][:,:3])
return pred_labels, row_ids
示例11: classify
def classify():
train_X,Y = load_svmlight_file('data/train_last')
test_X,test_Y = load_svmlight_file('data/test_last')
train_X = train_X.toarray()
test_X = test_X.toarray()
Y = [int(y) for y in Y]
# print 'Y:',len(Y)
rows = pd.read_csv('data/log_test2.csv',index_col=0).sort_index().index.unique()
train_n = train_X.shape[0]
m = train_X.shape[1]
test_n = test_X.shape[0]
print train_n,m,#test_n
# 先用训练集训练出所有的分类器
print 'train classify...'
clf1 = LinearDiscriminantAnalysis()
clf2 = GaussianNB()
clf3 = LogisticRegression()
clf4 = RandomForestClassifier()
clf5 = KNeighborsClassifier(n_neighbors=12)
clf6 = AdaBoostClassifier()
# x_train,x_test,y_train,y_test = train_test_split(train_X,Y,test_size=0.2) # 对训练集进行划分
# print x_train.shape
# print x_test.shape
# clf.fit(train_X,Y)
clf = VotingClassifier(estimators=[('la',clf1),('nb',clf2),('lr',clf3),('rf',clf4),('nn',clf5),('ac',clf6)], voting='soft', weights=[1.5,1,1,1,1,1])
# clf1.fit(x_train,y_train)
# clf2.fit(x_train,y_train)
# clf3.fit(x_train,y_train)
# clf4.fit(x_train,y_train)
clf.fit(train_X,Y)
print 'end train classify'
print 'start classify....'
# print metrics.classification_report(Y,predict_Y)
# clf2.fit(train_X,Y)
# print 'clf2 fited...'
# clf3.fit(train_X,Y)
# print 'clf3 fited...'
# clf4.fit(train_X,Y)
# print 'clf4 fited...'
# clf1.fit(train_X,Y)
# print 'clf1 fited...'
# 第一个分类结果
predict_Y = clf.predict(train_X)
# predict_Y = clf.predict(train_X)
print 'classify result:'
print metrics.classification_report(Y,predict_Y)
predict_Y = clf.predict(test_X)
# print predict_Y,len(predict_Y)
print 'end classify...'
# predict_Y = clf.predict(X[cnt_train:]) # 训练注释这一行,输出测试集打开这一行,注释之后的print metric
# predict_Y = clf.predict(test_X) # 训练注释这一行,输出测试集打开这一行,注释之后的print metric
DataFrame(predict_Y,index=rows).to_csv('data/info_test2.csv', header=False)
示例12: test_multilabel
def test_multilabel():
"""Check if error is raised for multilabel classification."""
X, y = make_multilabel_classification(n_classes=2, n_labels=1, allow_unlabeled=False, random_state=123)
clf = OneVsRestClassifier(SVC(kernel="linear"))
eclf = VotingClassifier(estimators=[("ovr", clf)], voting="hard")
try:
eclf.fit(X, y)
except NotImplementedError:
return
示例13: test_predict_for_hard_voting
def test_predict_for_hard_voting():
# Test voting classifier with non-integer (float) prediction
clf1 = FaultySVC(random_state=123)
clf2 = GaussianNB()
clf3 = SVC(probability=True, random_state=123)
eclf1 = VotingClassifier(estimators=[
('fsvc', clf1), ('gnb', clf2), ('svc', clf3)], weights=[1, 2, 3],
voting='hard')
eclf1.fit(X, y)
eclf1.predict(X)
示例14: test_sample_weight_kwargs
def test_sample_weight_kwargs():
"""Check that VotingClassifier passes sample_weight as kwargs"""
class MockClassifier(BaseEstimator, ClassifierMixin):
"""Mock Classifier to check that sample_weight is received as kwargs"""
def fit(self, X, y, *args, **sample_weight):
assert_true('sample_weight' in sample_weight)
clf = MockClassifier()
eclf = VotingClassifier(estimators=[('mock', clf)], voting='soft')
# Should not raise an error.
eclf.fit(X, y, sample_weight=np.ones((len(y),)))
示例15: train
def train(self):
for bin_id in sorted(self.xy_bins):
file_name = xybins_file_name_str.format(bin_id)
print 'Training model: {} of {}'.format(bin_id, max(self.xy_bins))
df = self.df
wdf = df[df.xy_bin == bin_id]
X = wdf[self.features]
y = wdf.place_id
model = VotingClassifier(self.models)
model.fit(X, y)
joblib.dump(model, file_name, compress=3, )