当前位置: 首页>>代码示例>>Python>>正文


Python DummyClassifier.predict_proba方法代码示例

本文整理汇总了Python中sklearn.dummy.DummyClassifier.predict_proba方法的典型用法代码示例。如果您正苦于以下问题:Python DummyClassifier.predict_proba方法的具体用法?Python DummyClassifier.predict_proba怎么用?Python DummyClassifier.predict_proba使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.dummy.DummyClassifier的用法示例。


在下文中一共展示了DummyClassifier.predict_proba方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_most_frequent_and_prior_strategy

# 需要导入模块: from sklearn.dummy import DummyClassifier [as 别名]
# 或者: from sklearn.dummy.DummyClassifier import predict_proba [as 别名]
def test_most_frequent_and_prior_strategy():
    X = [[0], [0], [0], [0]]  # ignored
    y = [1, 2, 1, 1]

    for strategy in ("most_frequent", "prior"):
        clf = DummyClassifier(strategy=strategy, random_state=0)
        clf.fit(X, y)
        assert_array_equal(clf.predict(X), np.ones(len(X)))
        _check_predict_proba(clf, X, y)

        if strategy == "prior":
            assert_array_equal(clf.predict_proba(X[0]), clf.class_prior_.reshape((1, -1)))
        else:
            assert_array_equal(clf.predict_proba(X[0]), clf.class_prior_.reshape((1, -1)) > 0.5)
开发者ID:jonathanwoodard,项目名称:scikit-learn,代码行数:16,代码来源:test_dummy.py

示例2: test_dummy_classifier_on_3D_array

# 需要导入模块: from sklearn.dummy import DummyClassifier [as 别名]
# 或者: from sklearn.dummy.DummyClassifier import predict_proba [as 别名]
def test_dummy_classifier_on_3D_array():
    X = np.array([[['foo']], [['bar']], [['baz']]])
    y = [2, 2, 2]
    y_expected = [2, 2, 2]
    y_proba_expected = [[1], [1], [1]]
    cls = DummyClassifier()
    cls.fit(X, y)
    y_pred = cls.predict(X)
    y_pred_proba = cls.predict_proba(X)
    assert_array_equal(y_pred, y_expected)
    assert_array_equal(y_pred_proba, y_proba_expected)
开发者ID:aniryou,项目名称:scikit-learn,代码行数:13,代码来源:test_dummy.py

示例3: Model

# 需要导入模块: from sklearn.dummy import DummyClassifier [as 别名]
# 或者: from sklearn.dummy.DummyClassifier import predict_proba [as 别名]

#.........这里部分代码省略.........
        log_info('Training...')
        if self.use_weights:
            self.classifier.fit(train_filt, train_classes,
                                sample_weight=train.inst_weights)
        else:
            self.classifier.fit(train_filt, train_classes)
        self.classifier_trained = True
        log_info('Training done.')

    def train(self, train_file, encoding='UTF-8'):
        """\
        Train the model on the specified training data file.
        """
        self.train_on_data(self.load_training_set(train_file, encoding))

    def classify(self, instances, pdist=False):
        """\
        Classify a set of instances (possibly one member).

        @param pdist: Return probability distributions (as dictionaries)
        """
        # prepare for classification
        instances, nolist = self.check_classification_input(instances)
        if not instances:
            return instances
        # vectorize and filter the instances
        inst_vect = self.__vectorize(instances)
        if self.feature_filter is not None:
            inst_filt = self.__filter_features(inst_vect)
        else:
            inst_filt = inst_vect
        # classify (get probability distributions if needed)
        if pdist is True:
            values = self.classifier.predict_proba(inst_filt)
            class_attr = self.data_headers.get_attrib(self.class_attr)
            values = [{class_attr.value(val): prob for val, prob in enumerate(inst)}
                      for inst in values]
        else:
            values = self.classifier.predict(inst_filt)
            # return the result
            class_attr = self.data_headers.get_attrib(self.class_attr)
            values = [class_attr.value(val) for val in values]
            # (optional) post-processing
            if self.postprocess:
                values = [self.postprocess(inst, val)
                          for inst, val in zip(instances, values)]
        if nolist:
            return values[0]
        return values

    def __vectorize(self, data):
        """\
        Train vectorization and subsequently vectorize. Accepts a DataSet
        or a list of dictionaries to be vectorized.
        """
        # no vectorization performed, only converted to matrix
        if self.vectorizer is None:
            if not isinstance(data, DataSet):
                data_set = self.data_headers.get_headers()
                data_set.append_from_dict(data, add_values=True, default_val=self.unknown_value)
                data = data_set
            data.match_headers(self.data_headers, add_values=True)
            # TODO pre-filtering here?
            return data.as_bunch(target=self.class_attr,
                                 select_attrib=self.attr_mask).data
        # vectorization needed: converted to dictionary
开发者ID:imclab,项目名称:flect,代码行数:70,代码来源:model.py

示例4: _generalized_cross_validation_clas

# 需要导入模块: from sklearn.dummy import DummyClassifier [as 别名]
# 或者: from sklearn.dummy.DummyClassifier import predict_proba [as 别名]
def _generalized_cross_validation_clas(learner, data1, data2, cv_folds1):
    """Perform one part of the generalized version of the cross-validation
    testing method on the given data sets.
    Perform cross-validation over data set data1. For each fold of data1,
    build models on the remaining folds of data1, the whole data set data2 and
    the merged data set and test them on the selected fold of data1.
    Return a tuple (pred_errs1, pred_errs2, pred_errsm), where:
        pred_errs1 -- numpy.array of prediction errors of the model built on the
            remaining folds of data1 for instances in data1
        pred_errs2 -- numpy.array of prediction errors of the model built on the
            whole data set data2 for instances in data1
        pred_errm -- numpy.array of prediction errors of the model built on the
            merged data set for instances in data1
    
    Arguments:
    learner -- scikit-learn classification estimator
    data1 -- tuple (X, y) representing the first data set, where:
        X -- numpy.array which holds the attribute values
        y -- numpy.array which holds the class value
    data2 -- tuple (X, y) representing the second data set, where:
        X -- numpy.array which holds the attribute values
        y -- numpy.array which holds the class value
    cv_folds1 -- list of tuples (learn, test) to perform cross-validation over
        data1, where:
        learn -- numpy.array with a Boolean mask for selecting learning
            instances
        test -- numpy.array with a Boolean mask for selecting testing instances
    
    """
    # unpack the data1 and data2 tuples
    X1, y1 = data1
    X2, y2 = data2
    # build a model on data2
    # NOTE: The model does not change throughout cross-validation on data1
    # NOTE: When the number of unique class values is less than 2, we
    # cannot fit an ordinary model (e.g. logistic regression). Instead, we
    # have to use a dummy classifier which is subsequently augmented to
    # handle all the other class values.
    # NOTE: The scikit-learn estimator must be cloned so that each data set
    # gets its own classifier
    if len(np.unique(y2)) < 2:
        model2 = DummyClassifier()
        model2.fit(X2, y2)
        change_dummy_classes(model2, np.array([0, 1]))
    else:
        model2 = clone(learner)
        model2.fit(X2, y2)
    _check_classes(model2)
    # prediction errors of models computed as:
    # 1 - P_model(predicted_class == true_class)
    # (pred. errors of the model built on data2 can be computed right away) 
    pred_proba2 = model2.predict_proba(X1)
    pred_errs2 = 1 - pred_proba2[np.arange(y1.shape[0]), y1]
    pred_errs1 = -np.ones(y1.shape)
    pred_errsm = -np.ones(y1.shape)
    # perform generalized cross-validation on data1
    for learn_ind, test_ind in cv_folds1:
        # create testing data arrays for the current fold
        test_X, test_y = X1[test_ind], y1[test_ind]
        # create learning data arrays for the current fold
        learn1 = X1[learn_ind], y1[learn_ind]
        learnm = (np.concatenate((X1[learn_ind], X2), axis=0),
                  np.concatenate((y1[learn_ind], y2), axis=0))
        # build models
        # NOTE: When the number of unique class values is less than 2, we
        # cannot fit an ordinary model (e.g. logistic regression). Instead, we
        # have to use a dummy classifier which is subsequently augmented to
        # handle all the other class values.
        # NOTE: The scikit-learn estimator must be cloned so that each data
        # set gets its own classifier 
        if len(np.unique(learn1[1])) < 2:
            model1 = DummyClassifier()
            model1.fit(*learn1)
            change_dummy_classes(model1, np.array([0, 1]))
        else:
            model1 = clone(learner)
            model1.fit(*learn1)
        _check_classes(model1)
        if len(np.unique(learnm[1])) < 2:
            modelm = DummyClassifier()
            modelm.fit(*learn1)
            change_dummy_classes(modelm, np.array([0, 1]))
        else:
            modelm = clone(learner)
            modelm.fit(*learnm)
        _check_classes(modelm)
        # compute the prediction errors of both models on the current testing
        # data
        pred_proba1 = model1.predict_proba(test_X)
        pred_errs1[test_ind] = 1 - pred_proba1[np.arange(test_y.shape[0]),
                                               test_y]
        pred_probam = modelm.predict_proba(test_X)
        pred_errsm[test_ind] = 1 - pred_probam[np.arange(test_y.shape[0]),
                                               test_y]
    return pred_errs1, pred_errs2, pred_errsm
开发者ID:marinkaz,项目名称:PyMTL,代码行数:97,代码来源:testing.py


注:本文中的sklearn.dummy.DummyClassifier.predict_proba方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。