本文整理汇总了Python中sklearn.dummy.DummyClassifier.predict_proba方法的典型用法代码示例。如果您正苦于以下问题:Python DummyClassifier.predict_proba方法的具体用法?Python DummyClassifier.predict_proba怎么用?Python DummyClassifier.predict_proba使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.dummy.DummyClassifier
的用法示例。
在下文中一共展示了DummyClassifier.predict_proba方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_most_frequent_and_prior_strategy
# 需要导入模块: from sklearn.dummy import DummyClassifier [as 别名]
# 或者: from sklearn.dummy.DummyClassifier import predict_proba [as 别名]
def test_most_frequent_and_prior_strategy():
X = [[0], [0], [0], [0]] # ignored
y = [1, 2, 1, 1]
for strategy in ("most_frequent", "prior"):
clf = DummyClassifier(strategy=strategy, random_state=0)
clf.fit(X, y)
assert_array_equal(clf.predict(X), np.ones(len(X)))
_check_predict_proba(clf, X, y)
if strategy == "prior":
assert_array_equal(clf.predict_proba(X[0]), clf.class_prior_.reshape((1, -1)))
else:
assert_array_equal(clf.predict_proba(X[0]), clf.class_prior_.reshape((1, -1)) > 0.5)
示例2: test_dummy_classifier_on_3D_array
# 需要导入模块: from sklearn.dummy import DummyClassifier [as 别名]
# 或者: from sklearn.dummy.DummyClassifier import predict_proba [as 别名]
def test_dummy_classifier_on_3D_array():
X = np.array([[['foo']], [['bar']], [['baz']]])
y = [2, 2, 2]
y_expected = [2, 2, 2]
y_proba_expected = [[1], [1], [1]]
cls = DummyClassifier()
cls.fit(X, y)
y_pred = cls.predict(X)
y_pred_proba = cls.predict_proba(X)
assert_array_equal(y_pred, y_expected)
assert_array_equal(y_pred_proba, y_proba_expected)
示例3: Model
# 需要导入模块: from sklearn.dummy import DummyClassifier [as 别名]
# 或者: from sklearn.dummy.DummyClassifier import predict_proba [as 别名]
#.........这里部分代码省略.........
log_info('Training...')
if self.use_weights:
self.classifier.fit(train_filt, train_classes,
sample_weight=train.inst_weights)
else:
self.classifier.fit(train_filt, train_classes)
self.classifier_trained = True
log_info('Training done.')
def train(self, train_file, encoding='UTF-8'):
"""\
Train the model on the specified training data file.
"""
self.train_on_data(self.load_training_set(train_file, encoding))
def classify(self, instances, pdist=False):
"""\
Classify a set of instances (possibly one member).
@param pdist: Return probability distributions (as dictionaries)
"""
# prepare for classification
instances, nolist = self.check_classification_input(instances)
if not instances:
return instances
# vectorize and filter the instances
inst_vect = self.__vectorize(instances)
if self.feature_filter is not None:
inst_filt = self.__filter_features(inst_vect)
else:
inst_filt = inst_vect
# classify (get probability distributions if needed)
if pdist is True:
values = self.classifier.predict_proba(inst_filt)
class_attr = self.data_headers.get_attrib(self.class_attr)
values = [{class_attr.value(val): prob for val, prob in enumerate(inst)}
for inst in values]
else:
values = self.classifier.predict(inst_filt)
# return the result
class_attr = self.data_headers.get_attrib(self.class_attr)
values = [class_attr.value(val) for val in values]
# (optional) post-processing
if self.postprocess:
values = [self.postprocess(inst, val)
for inst, val in zip(instances, values)]
if nolist:
return values[0]
return values
def __vectorize(self, data):
"""\
Train vectorization and subsequently vectorize. Accepts a DataSet
or a list of dictionaries to be vectorized.
"""
# no vectorization performed, only converted to matrix
if self.vectorizer is None:
if not isinstance(data, DataSet):
data_set = self.data_headers.get_headers()
data_set.append_from_dict(data, add_values=True, default_val=self.unknown_value)
data = data_set
data.match_headers(self.data_headers, add_values=True)
# TODO pre-filtering here?
return data.as_bunch(target=self.class_attr,
select_attrib=self.attr_mask).data
# vectorization needed: converted to dictionary
示例4: _generalized_cross_validation_clas
# 需要导入模块: from sklearn.dummy import DummyClassifier [as 别名]
# 或者: from sklearn.dummy.DummyClassifier import predict_proba [as 别名]
def _generalized_cross_validation_clas(learner, data1, data2, cv_folds1):
"""Perform one part of the generalized version of the cross-validation
testing method on the given data sets.
Perform cross-validation over data set data1. For each fold of data1,
build models on the remaining folds of data1, the whole data set data2 and
the merged data set and test them on the selected fold of data1.
Return a tuple (pred_errs1, pred_errs2, pred_errsm), where:
pred_errs1 -- numpy.array of prediction errors of the model built on the
remaining folds of data1 for instances in data1
pred_errs2 -- numpy.array of prediction errors of the model built on the
whole data set data2 for instances in data1
pred_errm -- numpy.array of prediction errors of the model built on the
merged data set for instances in data1
Arguments:
learner -- scikit-learn classification estimator
data1 -- tuple (X, y) representing the first data set, where:
X -- numpy.array which holds the attribute values
y -- numpy.array which holds the class value
data2 -- tuple (X, y) representing the second data set, where:
X -- numpy.array which holds the attribute values
y -- numpy.array which holds the class value
cv_folds1 -- list of tuples (learn, test) to perform cross-validation over
data1, where:
learn -- numpy.array with a Boolean mask for selecting learning
instances
test -- numpy.array with a Boolean mask for selecting testing instances
"""
# unpack the data1 and data2 tuples
X1, y1 = data1
X2, y2 = data2
# build a model on data2
# NOTE: The model does not change throughout cross-validation on data1
# NOTE: When the number of unique class values is less than 2, we
# cannot fit an ordinary model (e.g. logistic regression). Instead, we
# have to use a dummy classifier which is subsequently augmented to
# handle all the other class values.
# NOTE: The scikit-learn estimator must be cloned so that each data set
# gets its own classifier
if len(np.unique(y2)) < 2:
model2 = DummyClassifier()
model2.fit(X2, y2)
change_dummy_classes(model2, np.array([0, 1]))
else:
model2 = clone(learner)
model2.fit(X2, y2)
_check_classes(model2)
# prediction errors of models computed as:
# 1 - P_model(predicted_class == true_class)
# (pred. errors of the model built on data2 can be computed right away)
pred_proba2 = model2.predict_proba(X1)
pred_errs2 = 1 - pred_proba2[np.arange(y1.shape[0]), y1]
pred_errs1 = -np.ones(y1.shape)
pred_errsm = -np.ones(y1.shape)
# perform generalized cross-validation on data1
for learn_ind, test_ind in cv_folds1:
# create testing data arrays for the current fold
test_X, test_y = X1[test_ind], y1[test_ind]
# create learning data arrays for the current fold
learn1 = X1[learn_ind], y1[learn_ind]
learnm = (np.concatenate((X1[learn_ind], X2), axis=0),
np.concatenate((y1[learn_ind], y2), axis=0))
# build models
# NOTE: When the number of unique class values is less than 2, we
# cannot fit an ordinary model (e.g. logistic regression). Instead, we
# have to use a dummy classifier which is subsequently augmented to
# handle all the other class values.
# NOTE: The scikit-learn estimator must be cloned so that each data
# set gets its own classifier
if len(np.unique(learn1[1])) < 2:
model1 = DummyClassifier()
model1.fit(*learn1)
change_dummy_classes(model1, np.array([0, 1]))
else:
model1 = clone(learner)
model1.fit(*learn1)
_check_classes(model1)
if len(np.unique(learnm[1])) < 2:
modelm = DummyClassifier()
modelm.fit(*learn1)
change_dummy_classes(modelm, np.array([0, 1]))
else:
modelm = clone(learner)
modelm.fit(*learnm)
_check_classes(modelm)
# compute the prediction errors of both models on the current testing
# data
pred_proba1 = model1.predict_proba(test_X)
pred_errs1[test_ind] = 1 - pred_proba1[np.arange(test_y.shape[0]),
test_y]
pred_probam = modelm.predict_proba(test_X)
pred_errsm[test_ind] = 1 - pred_probam[np.arange(test_y.shape[0]),
test_y]
return pred_errs1, pred_errs2, pred_errsm