当前位置: 首页>>代码示例>>Python>>正文


Python validation.check_consistent_length函数代码示例

本文整理汇总了Python中sklearn.utils.validation.check_consistent_length函数的典型用法代码示例。如果您正苦于以下问题:Python check_consistent_length函数的具体用法?Python check_consistent_length怎么用?Python check_consistent_length使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了check_consistent_length函数的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: log_loss

def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None):
    lb = LabelBinarizer()
    T = lb.fit_transform(y_true)
    if T.shape[1] == 1:
        T = np.append(1 - T, T, axis=1)

    # Clipping
    Y = np.clip(y_pred, eps, 1 - eps)

    # This happens in cases when elements in y_pred have type "str".
    if not isinstance(Y, np.ndarray):
        raise ValueError("y_pred should be an array of floats.")

    # If y_pred is of single dimension, assume y_true to be binary
    # and then check.
    if Y.ndim == 1:
        Y = Y[:, np.newaxis]
    if Y.shape[1] == 1:
        Y = np.append(1 - Y, Y, axis=1)

    # Check if dimensions are consistent.
    val.check_consistent_length(T, Y)
    T = val.check_array(T)
    Y = val.check_array(Y)
    print(T)
    print(Y)
    if T.shape[1] != Y.shape[1]:
        raise ValueError("y_true and y_pred have different number of classes "
                         "%d, %d" % (T.shape[1], Y.shape[1]))

    # Renormalize
    Y /= Y.sum(axis=1)[:, np.newaxis]
    loss = -(T * np.log(Y)).sum(axis=1)

    return _weighted_sum(loss, sample_weight, normalize)
开发者ID:dvn123,项目名称:MachineLearning,代码行数:35,代码来源:aux_functions.py

示例2: pinball_loss

    def pinball_loss(y_true, y_pred, probs):
        """Compute the pinball loss.

        Parameters
        ----------
        pred : {array-like}, shape = [n_quantiles, n_samples] or [n_samples]
            Predictions.
        y : {array-like}, shape = [n_samples]
            Targets.

        Returns
        -------
        l : {array}, shape = [n_quantiles]
            Average loss for each quantile level.
        """
        probs = asarray(probs).reshape(-1)
        check_consistent_length(y_true, y_pred.T)
        y_true = check_array(y_true.reshape((-1, 1)),
                             ensure_2d=True)
        y_pred = check_array(y_pred.T.reshape((y_true.shape[0], -1)),
                             ensure_2d=True)
        residual = y_true - y_pred
        loss = npsum([fmax(prob * res, (prob - 1) * res) for (res, prob) in
                      zip(residual.T, probs)], axis=1)
        return loss / y_true.size
开发者ID:operalib,项目名称:operalib,代码行数:25,代码来源:quantile.py

示例3: _check_rows_and_columns

def _check_rows_and_columns(a, b):
    """Unpacks the row and column arrays and checks their shape."""
    check_consistent_length(*a)
    check_consistent_length(*b)
    checks = lambda x: check_array(x, ensure_2d=False)
    a_rows, a_cols = map(checks, a)
    b_rows, b_cols = map(checks, b)
    return a_rows, a_cols, b_rows, b_cols
开发者ID:0x0all,项目名称:scikit-learn,代码行数:8,代码来源:bicluster.py

示例4: test_check_dataframe_fit_attribute

def test_check_dataframe_fit_attribute():
    # check pandas dataframe with 'fit' column does not raise error
    # https://github.com/scikit-learn/scikit-learn/issues/8415
    try:
        import pandas as pd
        X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
        X_df = pd.DataFrame(X, columns=['a', 'b', 'fit'])
        check_consistent_length(X_df)
    except ImportError:
        raise SkipTest("Pandas not found")
开发者ID:daniel-perry,项目名称:scikit-learn,代码行数:10,代码来源:test_validation.py

示例5: fit

    def fit(self, X, y, sample_weight=None):
        """
        Build a classifier from the training set (X, y).

        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_samples, n_features]
            The training input samples.

        y : array-like, shape = [n_samples]
            The target values (class labels in classification).

        sample_weight : array-like, shape = [n_samples] or None
            Individual weights for each sample.

        Returns
        -------
        self : object
            Returns self.
        """
        self._validate_params(**self.get_params())

        X, y = check_X_y(X, y, accept_sparse=True)
        if sp.isspmatrix(X):
            self._is_sparse_train_X = True
        else:
            self._is_sparse_train_X = False
        self._n_samples, self._n_features = X.shape
        sample_weight = self._get_sample_weight(sample_weight)
        check_consistent_length(X, y, sample_weight)
        check_classification_targets(y)
        self._classes = sorted(np.unique(y))
        self._n_classes = len(self._classes)
        self._classes_map = {}

        self._set_params_with_dependencies()
        params = self._get_params()

        if self._n_classes == 2:
            self._classes_map[0] = self._classes[0]
            self._classes_map[1] = self._classes[1]
            self._estimators = [None]
            y = (y == self._classes[0]).astype(int)
            self._fit_binary_task(X, y, sample_weight, params)
        elif self._n_classes > 2:
            if sp.isspmatrix_dok(X):
                X = X.tocsr().tocoo()  # Fix to avoid scipy 7699 issue
            self._estimators = [None] * self._n_classes
            self._fit_multiclass_task(X, y, sample_weight, params)
        else:
            raise ValueError("Classifier can't predict when only one class is present.")

        self._fitted = True

        return self
开发者ID:fukatani,项目名称:rgf_python,代码行数:55,代码来源:utils.py

示例6: _indexable

def _indexable(X, y):
    """Make arrays indexable for cross-validation. Checks consistent 
    length, passes through None, and ensures that everything can be indexed.

    Parameters
    ----------

    X : array-like or pandas DataFrame, shape = [n_samples, n_features]
        Input data, where n_samples is the number of samples and
        n_features is the number of features.

    y : array-like, shape = [n_samples] or [n_samples, n_output], optional
        Target relative to X for classification or regression;
        None for unsupervised learning.
    """
    result = [_validate_X(X), _validate_y(y)]
    check_consistent_length(*result)
    return result
开发者ID:tgsmith61591,项目名称:skutil,代码行数:18,代码来源:fixes.py

示例7: _my_lrap

def _my_lrap(y_true, y_score):
    """Simple implementation of label ranking average precision"""
    check_consistent_length(y_true, y_score)
    y_true = check_array(y_true)
    y_score = check_array(y_score)
    n_samples, n_labels = y_true.shape
    score = np.empty((n_samples, ))
    for i in range(n_samples):
        # The best rank correspond to 1. Rank higher than 1 are worse.
        # The best inverse ranking correspond to n_labels.
        unique_rank, inv_rank = np.unique(y_score[i], return_inverse=True)
        n_ranks = unique_rank.size
        rank = n_ranks - inv_rank

        # Rank need to be corrected to take into account ties
        # ex: rank 1 ex aequo means that both label are rank 2.
        corr_rank = np.bincount(rank, minlength=n_ranks + 1).cumsum()
        rank = corr_rank[rank]

        relevant = y_true[i].nonzero()[0]
        if relevant.size == 0 or relevant.size == n_labels:
            score[i] = 1
            continue

        score[i] = 0.
        for label in relevant:
            # Let's count the number of relevant label with better rank
            # (smaller rank).
            n_ranked_above = sum(rank[r] <= rank[label] for r in relevant)

            # Weight by the rank of the actual label
            score[i] += n_ranked_above / rank[label]

        score[i] /= relevant.size

    return score.mean()
开发者ID:BTY2684,项目名称:scikit-learn,代码行数:36,代码来源:test_ranking.py

示例8: test_check_consistent_length

def test_check_consistent_length():
    check_consistent_length([1], [2], [3], [4], [5])
    check_consistent_length([[1, 2], [[1, 2]]], [1, 2], ['a', 'b'])
    check_consistent_length([1], (2,), np.array([3]), sp.csr_matrix((1, 2)))
    assert_raises_regexp(ValueError, 'inconsistent numbers of samples',
                         check_consistent_length, [1, 2], [1])
    assert_raises_regexp(TypeError, 'got <\w+ \'int\'>',
                         check_consistent_length, [1, 2], 1)
    assert_raises_regexp(TypeError, 'got <\w+ \'object\'>',
                         check_consistent_length, [1, 2], object())

    assert_raises(TypeError, check_consistent_length, [1, 2], np.array(1))
    # Despite ensembles having __len__ they must raise TypeError
    assert_raises_regexp(TypeError, 'estimator', check_consistent_length,
                         [1, 2], RandomForestRegressor())
开发者ID:Afey,项目名称:scikit-learn,代码行数:15,代码来源:test_validation.py

示例9: cv_split

def cv_split(cv, X, y, groups, is_pairwise, cache):
    check_consistent_length(X, y, groups)
    return CVCache(list(cv.split(X, y, groups)), is_pairwise, cache)
开发者ID:dask,项目名称:dask-learn,代码行数:3,代码来源:methods.py

示例10: _check_reg_targets

def _check_reg_targets(y_true, y_pred, multioutput):
    """Check that y_true and y_pred belong to the same regression task

    Parameters
    ----------
    y_true : array-like,

    y_pred : array-like,

    multioutput : array-like or string in ['raw_values', uniform_average',
        'variance_weighted'] or None
        None is accepted due to backward compatibility of r2_score().

    Returns
    -------
    type_true : one of {'continuous', continuous-multioutput'}
        The type of the true target data, as output by
        'utils.multiclass.type_of_target'

    y_true : array-like of shape = (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape = (n_samples, n_outputs)
        Estimated target values.

    multioutput : array-like of shape = (n_outputs) or string in ['raw_values',
        uniform_average', 'variance_weighted'] or None
        Custom output weights if ``multioutput`` is array-like or
        just the corresponding argument if ``multioutput`` is a
        correct keyword.

    """
    check_consistent_length(y_true, y_pred)
    y_true = check_array(y_true, ensure_2d=False)
    y_pred = check_array(y_pred, ensure_2d=False)

    if y_true.ndim == 1:
        y_true = y_true.reshape((-1, 1))

    if y_pred.ndim == 1:
        y_pred = y_pred.reshape((-1, 1))

    if y_true.shape[1] != y_pred.shape[1]:
        raise ValueError("y_true and y_pred have different number of output "
                         "({0}!={1})".format(y_true.shape[1], y_pred.shape[1]))

    n_outputs = y_true.shape[1]
    multioutput_options = (None, 'raw_values', 'uniform_average',
                           'variance_weighted')
    if multioutput not in multioutput_options:
        multioutput = check_array(multioutput, ensure_2d=False)
        if n_outputs == 1:
            raise ValueError("Custom weights are useful only in "
                             "multi-output cases.")
        elif n_outputs != len(multioutput):
            raise ValueError(("There must be equally many custom weights "
                              "(%d) as outputs (%d).") %
                             (len(multioutput), n_outputs))
    y_type = 'continuous' if n_outputs == 1 else 'continuous-multioutput'

    return y_type, y_true, y_pred, multioutput
开发者ID:stylianos-kampakis,项目名称:ADAN,代码行数:61,代码来源:regression.py

示例11: wpearsonr

def wpearsonr(x, y, w=None):
    """Utility function to calculate the weighted Pearson correlation of two
    samples.

    See https://stats.stackexchange.com/questions/221246/such-thing-as-a-weighted-correlation
    for more information

    Parameters
    ----------
    x : array, shape (n,)
        Input x.

    y : array, shape (n,)
        Input y.

    w : array, shape (n,)
        Weights w.

    Returns
    -------
    scores : float in range of [-1,1]
        Weighted Pearson Correlation between x and y.

    """

    # unweighted version
    # note the return is different
    # TODO: fix output differences
    if w is None:
        return pearsonr(x, y)

    x = np.asarray(x)
    y = np.asarray(y)
    w = np.asarray(w)

    check_consistent_length([x, y, w])
    # n = len(x)

    w_sum = w.sum()
    mx = np.sum(x * w) / w_sum
    my = np.sum(y * w) / w_sum

    xm, ym = (x - mx), (y - my)

    r_num = np.sum(xm * ym * w) / w_sum

    xm2 = np.sum(xm * xm * w) / w_sum
    ym2 = np.sum(ym * ym * w) / w_sum

    r_den = np.sqrt(xm2 * ym2)
    r = r_num / r_den

    r = max(min(r, 1.0), -1.0)

    # TODO: disable p value calculation due to python 2.7 break
    #    df = n_train_ - 2
    #
    #    if abs(r) == 1.0:
    #        prob = 0.0
    #    else:
    #        t_squared = r ** 2 * (df / ((1.0 - r) * (1.0 + r)))
    #        prob = _betai(0.5 * df, 0.5, df / (df + t_squared))
    return r  # , prob
开发者ID:flaviassantos,项目名称:pyod,代码行数:63,代码来源:stat_models.py

示例12: fit

    def fit(self, X, y, sample_weight=None):
        """
        Build a RGF Classifier from the training set (X, y).

        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_samples, n_features]
            The training input samples.

        y : array-like, shape = [n_samples]
            The target values (class labels in classification).

        sample_weight : array-like, shape = [n_samples] or None
            Individual weights for each sample.

        Returns
        -------
        self : object
            Returns self.
        """
        _validate_params(**self.get_params())

        X, y = check_X_y(X, y, accept_sparse=True)
        n_samples, self._n_features = X.shape

        if self.sl2 is None:
            self._sl2 = self.l2
        else:
            self._sl2 = self.sl2

        if isinstance(self.min_samples_leaf, _FLOATS):
            self._min_samples_leaf = ceil(self.min_samples_leaf * n_samples)
        else:
            self._min_samples_leaf = self.min_samples_leaf

        if self.n_iter is None:
            if self.loss == "LS":
                self._n_iter = 10
            else:
                self._n_iter = 5
        else:
            self._n_iter = self.n_iter

        if sample_weight is None:
            sample_weight = np.ones(n_samples, dtype=np.float32)
        else:
            sample_weight = column_or_1d(sample_weight, warn=True)
            if (sample_weight <= 0).any():
                raise ValueError("Sample weights must be positive.")
        check_consistent_length(X, y, sample_weight)
        check_classification_targets(y)

        self._classes = sorted(np.unique(y))
        self._n_classes = len(self._classes)
        self._classes_map = {}

        params = dict(max_leaf=self.max_leaf,
                      test_interval=self.test_interval,
                      algorithm=self.algorithm,
                      loss=self.loss,
                      reg_depth=self.reg_depth,
                      l2=self.l2,
                      sl2=self._sl2,
                      normalize=self.normalize,
                      min_samples_leaf=self._min_samples_leaf,
                      n_iter=self._n_iter,
                      n_tree_search=self.n_tree_search,
                      opt_interval=self.opt_interval,
                      learning_rate=self.learning_rate,
                      memory_policy=self.memory_policy,
                      verbose=self.verbose)
        if self._n_classes == 2:
            self._classes_map[0] = self._classes[0]
            self._classes_map[1] = self._classes[1]
            self._estimators = [None]
            y = (y == self._classes[0]).astype(int)
            self._estimators[0] = _RGFBinaryClassifier(**params)
            self._estimators[0].fit(X, y, sample_weight)
        elif self._n_classes > 2:
            if sp.isspmatrix_dok(X):
                X = X.tocsr().tocoo()  # Fix to avoid scipy 7699 issue
            self._estimators = [None] * self._n_classes
            ovr_list = [None] * self._n_classes
            for i, cls_num in enumerate(self._classes):
                self._classes_map[i] = cls_num
                ovr_list[i] = (y == cls_num).astype(int)
                self._estimators[i] = _RGFBinaryClassifier(**params)
            self._estimators = Parallel(n_jobs=self.n_jobs)(delayed(_fit_ovr_binary)(self._estimators[i],
                                                                                     X,
                                                                                     ovr_list[i],
                                                                                     sample_weight)
                                                            for i in range(self._n_classes))
        else:
            raise ValueError("Classifier can't predict when only one class is present.")

        self._fitted = True
        return self
开发者ID:Kjeanclaude,项目名称:rgf_python,代码行数:97,代码来源:sklearn.py


注:本文中的sklearn.utils.validation.check_consistent_length函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。