本文整理汇总了Python中sklearn.dummy.DummyClassifier.fit方法的典型用法代码示例。如果您正苦于以下问题:Python DummyClassifier.fit方法的具体用法?Python DummyClassifier.fit怎么用?Python DummyClassifier.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.dummy.DummyClassifier
的用法示例。
在下文中一共展示了DummyClassifier.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_scores
# 需要导入模块: from sklearn.dummy import DummyClassifier [as 别名]
# 或者: from sklearn.dummy.DummyClassifier import fit [as 别名]
def get_scores(X, y):
nfolds = 200
cv = StratifiedShuffleSplit(y, n_iter=nfolds, test_size=0.2)
dumb = DummyClassifier(strategy="most_frequent")
clf = svm.SVC(class_weight="auto")
clf = linear_model.LogisticRegression()
param_dist = {"C": [0.1, 1, 10], "kernel": ["rbf", "linear", "poly"]}
param_dist = {"C": [1e6, 1e5, 1e4, 1e3, 1e2, 10, 1, 0.1, 0.01, 0.001]}
search = GridSearchCV(clf, param_grid=param_dist, scoring="mean_absolute_error")
test_scores, train_scores, dummy_scores = [], [], []
preds, true_labels = [], []
for oidx, (train, test) in enumerate(cv):
y_train, y_test = y[train], y[test]
X_train, X_test = X[train, :], X[test, :]
search.fit(X_train, y_train)
clf = search.best_estimator_
print search.best_params_
clf.fit(X_train, y_train)
train_scores.append(accuracy_score(clf.predict(X_train), y_train))
test_scores.append(accuracy_score(clf.predict(X_test), y_test))
dumb.fit(X_train, y_train)
dummy_scores.append(accuracy_score(dumb.predict(X_test), y_test))
preds += list(clf.predict(X_test))
true_labels += list(y_test)
return test_scores, train_scores, dummy_scores, preds, true_labels
示例2: run_ML_leave_one_subject_out
# 需要导入模块: from sklearn.dummy import DummyClassifier [as 别名]
# 或者: from sklearn.dummy.DummyClassifier import fit [as 别名]
def run_ML_leave_one_subject_out(config, filename, question, clf, cols, return_arr=None, return_index=-1):
working_directory = config['DATA_DIRECTORY']
data_X, data_y = load_data(working_directory, filename, cols, question)
data = leave_one_subject_out(data_X, data_y, 'User')
score = 0
score_dummy_mf = 0
score_dummy_sf = 0
dummy_clf_mf = DummyClassifier('most_frequent')
dummy_clf_sf = DummyClassifier('stratified')
for (training_X, training_y), (testing_X, testing_y) in data:
clf.fit(training_X, training_y)
dummy_clf_mf.fit(training_X, training_y)
dummy_clf_sf.fit(training_X, training_y)
single_score = clf.score(testing_X, testing_y)
single_score_dummy_mf = dummy_clf_mf.score(testing_X, testing_y)
single_score_dummy_sf = dummy_clf_sf.score(testing_X, testing_y)
#print 'Single run score: ' + ("%0.2f" % single_score.mean())
#print 'Single run score (dummy most frequent): ' + ("%0.2f" % single_score_dummy_mf.mean())
#print 'Single run score (dummy stratified): ' + ("%0.2f" % single_score_dummy_sf.mean())
score = score + single_score.mean()
score_dummy_mf = score_dummy_mf + single_score_dummy_mf.mean()
score_dummy_sf = score_dummy_sf + single_score_dummy_sf.mean()
score = round(float(score / len(data)), 2)
score_dummy_mf = round(float(score_dummy_mf / len(data)), 2)
score_dummy_sf = round(float(score_dummy_sf / len(data)), 2)
#print 'Total score: ' + str(score)
#print 'Total score (dummy most frequent): ' + str(score_dummy_mf)
#print 'Total score (dummy stratified): ' + str(score_dummy_sf)
if return_index == -1:
return score, score_dummy_mf, score_dummy_sf
else:
return_arr[return_index] = (score, score_dummy_mf, score_dummy_sf)
示例3: _run_dummy_detection
# 需要导入模块: from sklearn.dummy import DummyClassifier [as 别名]
# 或者: from sklearn.dummy.DummyClassifier import fit [as 别名]
def _run_dummy_detection(x_train, x_test, y_train, y_test):
clf = DummyClassifier(strategy='most_frequent')
print "Training Dummy..."
clf.fit(x_train, y_train)
print "Predicting Test Set..."
print "Score for test set: {}".format(clf.score(x_test, y_test))
示例4: do_cross_validation
# 需要导入模块: from sklearn.dummy import DummyClassifier [as 别名]
# 或者: from sklearn.dummy.DummyClassifier import fit [as 别名]
def do_cross_validation(labels):
"""Perform the k-fold cross validation.
Perform the k-fold cross validation, collect the result and return the
single test instance predictions, as well as the classification results for
each single fold and for the combination of all folds.
Keyword arguments:
features -- all features
labels -- all labels
"""
skf = StratifiedKFold(labels, NO_OF_FOLDS)
single_predictions = [] # Store each single classification decision
# Store classification results for each fold and for the entire task (i.e.,
# entire cross validation).
classification_result = np.zeros((NO_OF_FOLDS + 1, 5))
for cur_fold, (train_idx, test_idx) in enumerate(skf):
model = DummyClassifier(strategy='most_frequent')
model.fit(None, labels[train_idx])
pred_labels = model.predict(np.zeros(labels[test_idx].shape[0]))
fold_array = np.empty(test_idx.shape[0])
fold_array.fill(cur_fold)
single_predictions.append(np.transpose(np.vstack((fold_array, test_idx,
labels[test_idx], pred_labels))))
classification_result[cur_fold, :] = get_classification_result(cur_fold,
labels[test_idx], pred_labels)
single_predictions = np.vstack(single_predictions)
return single_predictions, classification_result
示例5: get_scores
# 需要导入模块: from sklearn.dummy import DummyClassifier [as 别名]
# 或者: from sklearn.dummy.DummyClassifier import fit [as 别名]
def get_scores(X, y):
nfolds = 40
cv = StratifiedShuffleSplit(y, n_iter=nfolds, test_size=.05)
dumb = DummyClassifier(strategy='most_frequent')
clf = svm.SVC(class_weight='auto')
param_dist = {"C": [.1, 1, 10],
"kernel": ['rbf', 'linear', 'poly']
}
search = GridSearchCV(clf, param_grid=param_dist,
scoring='mean_absolute_error')
stest, strain, sdummy = [], [], []
for nfeats in range(X.shape[1]):
test_scores, train_scores, dummy_scores = [], [], []
# figure out our possible feature combinations
feats = itertools.combinations(range(X.shape[1]), nfeats + 1)
for my_feats in feats:
for oidx, (train, test) in enumerate(cv):
idx = np.array(my_feats)
y_train, y_test = y[train], y[test]
X_train, X_test = X[train, :], X[test, :]
search.fit(X_train, y_train)
clf = search.best_estimator_
clf.fit(X_train[:, idx], y_train)
train_scores.append(accuracy_score(clf.predict(X_train[:, idx]), y_train))
test_scores.append(accuracy_score(clf.predict(X_test[:, idx]), y_test))
dumb.fit(X_train[:, idx], y_train)
dummy_scores.append(accuracy_score(dumb.predict(X_test[:, idx]), y_test))
sdummy.append(np.mean(dummy_scores))
strain.append(np.mean(train_scores))
stest.append(np.mean(test_scores))
return stest, strain, sdummy
示例6: test_dummy_classifier_on_nan_value
# 需要导入模块: from sklearn.dummy import DummyClassifier [as 别名]
# 或者: from sklearn.dummy.DummyClassifier import fit [as 别名]
def test_dummy_classifier_on_nan_value():
X = [[np.NaN]]
y = [1]
y_expected = [1]
clf = DummyClassifier()
clf.fit(X, y)
y_pred = clf.predict(X)
assert_array_equal(y_pred, y_expected)
示例7: test_most_frequent_strategy
# 需要导入模块: from sklearn.dummy import DummyClassifier [as 别名]
# 或者: from sklearn.dummy.DummyClassifier import fit [as 别名]
def test_most_frequent_strategy():
X = [[0], [0], [0], [0]] # ignored
y = [1, 2, 1, 1]
clf = DummyClassifier(strategy="most_frequent", random_state=0)
clf.fit(X, y)
assert_array_equal(clf.predict(X), np.ones(len(X)))
_check_predict_proba(clf, X, y)
示例8: test_constant_strategy_multioutput
# 需要导入模块: from sklearn.dummy import DummyClassifier [as 别名]
# 或者: from sklearn.dummy.DummyClassifier import fit [as 别名]
def test_constant_strategy_multioutput():
X = [[0], [0], [0], [0]] # ignored
y = np.array([[2, 3], [1, 3], [2, 3], [2, 0]])
n_samples = len(X)
clf = DummyClassifier(strategy="constant", random_state=0, constant=[1, 0])
clf.fit(X, y)
assert_array_equal(clf.predict(X), np.hstack([np.ones((n_samples, 1)), np.zeros((n_samples, 1))]))
_check_predict_proba(clf, X, y)
示例9: test_dummy_classifier_on_3D_array
# 需要导入模块: from sklearn.dummy import DummyClassifier [as 别名]
# 或者: from sklearn.dummy.DummyClassifier import fit [as 别名]
def test_dummy_classifier_on_3D_array():
X = np.array([[['foo']], [['bar']], [['baz']]])
y = [2, 2, 2]
y_expected = [2, 2, 2]
y_proba_expected = [[1], [1], [1]]
cls = DummyClassifier()
cls.fit(X, y)
y_pred = cls.predict(X)
y_pred_proba = cls.predict_proba(X)
assert_array_equal(y_pred, y_expected)
assert_array_equal(y_pred_proba, y_proba_expected)
示例10: test_constant_strategy_sparse_target
# 需要导入模块: from sklearn.dummy import DummyClassifier [as 别名]
# 或者: from sklearn.dummy.DummyClassifier import fit [as 别名]
def test_constant_strategy_sparse_target():
X = [[0]] * 5 # ignored
y = sp.csc_matrix(np.array([[0, 1], [4, 0], [1, 1], [1, 4], [1, 1]]))
n_samples = len(X)
clf = DummyClassifier(strategy="constant", random_state=0, constant=[1, 0])
clf.fit(X, y)
y_pred = clf.predict(X)
assert_true(sp.issparse(y_pred))
assert_array_equal(y_pred.toarray(), np.hstack([np.ones((n_samples, 1)), np.zeros((n_samples, 1))]))
示例11: test_most_frequent_and_prior_strategy_multioutput
# 需要导入模块: from sklearn.dummy import DummyClassifier [as 别名]
# 或者: from sklearn.dummy.DummyClassifier import fit [as 别名]
def test_most_frequent_and_prior_strategy_multioutput():
X = [[0], [0], [0], [0]] # ignored
y = np.array([[1, 0], [2, 0], [1, 0], [1, 3]])
n_samples = len(X)
for strategy in ("prior", "most_frequent"):
clf = DummyClassifier(strategy=strategy, random_state=0)
clf.fit(X, y)
assert_array_equal(clf.predict(X), np.hstack([np.ones((n_samples, 1)), np.zeros((n_samples, 1))]))
_check_predict_proba(clf, X, y)
_check_behavior_2d(clf)
示例12: test_stratified_strategy
# 需要导入模块: from sklearn.dummy import DummyClassifier [as 别名]
# 或者: from sklearn.dummy.DummyClassifier import fit [as 别名]
def test_stratified_strategy():
X = [[0]] * 5 # ignored
y = [1, 2, 1, 1, 2]
clf = DummyClassifier(strategy="stratified", random_state=0)
clf.fit(X, y)
X = [[0]] * 1000
y_pred = clf.predict(X)
p = np.bincount(y_pred) / float(len(X))
assert_almost_equal(p[1], 3. / 5, decimal=1)
assert_almost_equal(p[2], 2. / 5, decimal=1)
_check_predict_proba(clf, X, y)
示例13: test_uniform_strategy
# 需要导入模块: from sklearn.dummy import DummyClassifier [as 别名]
# 或者: from sklearn.dummy.DummyClassifier import fit [as 别名]
def test_uniform_strategy():
X = [[0]] * 4 # ignored
y = [1, 2, 1, 1]
clf = DummyClassifier(strategy="uniform", random_state=0)
clf.fit(X, y)
X = [[0]] * 500
y_pred = clf.predict(X)
p = np.bincount(y_pred) / float(len(X))
assert_almost_equal(p[1], 0.5, decimal=1)
assert_almost_equal(p[2], 0.5, decimal=1)
_check_predict_proba(clf, X, y)
示例14: test_classifier_prediction_independent_of_X
# 需要导入模块: from sklearn.dummy import DummyClassifier [as 别名]
# 或者: from sklearn.dummy.DummyClassifier import fit [as 别名]
def test_classifier_prediction_independent_of_X(strategy):
y = [0, 2, 1, 1]
X1 = [[0]] * 4
clf1 = DummyClassifier(strategy=strategy, random_state=0, constant=0)
clf1.fit(X1, y)
predictions1 = clf1.predict(X1)
X2 = [[1]] * 4
clf2 = DummyClassifier(strategy=strategy, random_state=0, constant=0)
clf2.fit(X2, y)
predictions2 = clf2.predict(X2)
assert_array_equal(predictions1, predictions2)
示例15: test_most_frequent_and_prior_strategy_sparse_target
# 需要导入模块: from sklearn.dummy import DummyClassifier [as 别名]
# 或者: from sklearn.dummy.DummyClassifier import fit [as 别名]
def test_most_frequent_and_prior_strategy_sparse_target():
X = [[0]] * 5 # ignored
y = sp.csc_matrix(np.array([[1, 0], [1, 3], [4, 0], [0, 1], [1, 0]]))
n_samples = len(X)
y_expected = np.hstack([np.ones((n_samples, 1)), np.zeros((n_samples, 1))])
for strategy in ("most_frequent", "prior"):
clf = DummyClassifier(strategy=strategy, random_state=0)
clf.fit(X, y)
y_pred = clf.predict(X)
assert_true(sp.issparse(y_pred))
assert_array_equal(y_pred.toarray(), y_expected)