当前位置: 首页>>代码示例>>Python>>正文


Python validation.check_array函数代码示例

本文整理汇总了Python中sklearn.utils.validation.check_array函数的典型用法代码示例。如果您正苦于以下问题:Python check_array函数的具体用法?Python check_array怎么用?Python check_array使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了check_array函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: predict

    def predict(self, X, categorical=None):
        """Predict the closest cluster each sample in X belongs to.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            New data to predict.
        categorical : Indices of columns that contain categorical data

        Returns
        -------
        labels : array, shape [n_samples,]
            Index of the cluster each sample belongs to.
        """
        assert hasattr(self, '_enc_cluster_centroids'), "Model not yet fitted."

        if categorical is not None:
            assert isinstance(categorical, (int, list, tuple)), "The 'categorical' \
                argument needs to be an integer with the index of the categorical \
                column in your data, or a list or tuple of several of them, \
                but it is a {}.".format(type(categorical))

        X = pandas_to_numpy(X)
        Xnum, Xcat = _split_num_cat(X, categorical)
        Xnum, Xcat = check_array(Xnum), check_array(Xcat, dtype=None)
        Xcat, _ = encode_features(Xcat, enc_map=self._enc_map)
        return _labels_cost(Xnum, Xcat, self._enc_cluster_centroids,
                            self.num_dissim, self.cat_dissim, self.gamma)[0]
开发者ID:nicodv,项目名称:kmodes,代码行数:28,代码来源:kprototypes.py

示例2: pairwise_distances_no_broadcast

def pairwise_distances_no_broadcast(X, Y):
    """Utility function to calculate row-wise euclidean distance of two matrix.
    Different from pair-wise calculation, this function would not broadcast.

    For instance, X and Y are both (4,3) matrices, the function would return
    a distance vector with shape (4,), instead of (4,4).

    Parameters
    ----------
    X : array of shape (n_samples, n_features)
        First input samples

    Y : array of shape (n_samples, n_features)
        Second input samples

    Returns
    -------
    distance : array of shape (n_samples,)
        Row-wise euclidean distance of X and Y
    """
    X = check_array(X)
    Y = check_array(Y)

    if X.shape[0] != Y.shape[0] or X.shape[1] != Y.shape[1]:
        raise ValueError("pairwise_distances_no_broadcast function receive"
                         "matrix with different shapes {0} and {1}".format(
            X.shape, Y.shape))
    return _pairwise_distances_no_broadcast_helper(X, Y)
开发者ID:flaviassantos,项目名称:pyod,代码行数:28,代码来源:stat_models.py

示例3: _process_inputs

  def _process_inputs(self, X, constraints):

    self.X_ = X = check_array(X)

    # check to make sure that no two constrained vectors are identical
    a,b,c,d = constraints
    no_ident = vector_norm(X[a] - X[b]) > 1e-9
    a, b = a[no_ident], b[no_ident]
    no_ident = vector_norm(X[c] - X[d]) > 1e-9
    c, d = c[no_ident], d[no_ident]
    if len(a) == 0:
      raise ValueError('No non-trivial similarity constraints given for MMC.')
    if len(c) == 0:
      raise ValueError('No non-trivial dissimilarity constraints given for MMC.')

    # init metric
    if self.A0 is None:
      self.A_ = np.identity(X.shape[1])
      if not self.diagonal:
        # Don't know why division by 10... it's in the original code
        # and seems to affect the overall scale of the learned metric.
        self.A_ /= 10.0
    else:
      self.A_ = check_array(self.A0)

    return a,b,c,d
开发者ID:svecon,项目名称:metric-learn,代码行数:26,代码来源:mmc.py

示例4: check_array_with_weights

def check_array_with_weights(X, weights, **kwargs):
    """Utility to validate data and weights.

    This calls check_array on X and weights, making sure results match.
    """
    if weights is None:
        return check_array(X, **kwargs), weights

    # Always use copy=False for weights
    kwargs_weights = dict(kwargs)
    kwargs_weights.update(copy=False)
    weights = check_array(weights, **kwargs_weights)

    # Always use force_all_finite=False for X
    kwargs_X = dict(kwargs)
    kwargs_X.update(force_all_finite=False)
    X = check_array(X, **kwargs_X)

    # Make sure shapes match and missing data has weights=0
    if X.shape != weights.shape:
        raise ValueError("Shape of `X` and `weights` should match")

    Wzero = (weights == 0)
    X[Wzero] = 0

    if not np.all(np.isfinite(X)):
        raise ValueError("Input contains NaN or infinity without "
                         "a corresponding zero in `weights`.")
    return X, weights
开发者ID:jakevdp,项目名称:wpca,代码行数:29,代码来源:utils.py

示例5: log_loss

def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None):
    lb = LabelBinarizer()
    T = lb.fit_transform(y_true)
    if T.shape[1] == 1:
        T = np.append(1 - T, T, axis=1)

    # Clipping
    Y = np.clip(y_pred, eps, 1 - eps)

    # This happens in cases when elements in y_pred have type "str".
    if not isinstance(Y, np.ndarray):
        raise ValueError("y_pred should be an array of floats.")

    # If y_pred is of single dimension, assume y_true to be binary
    # and then check.
    if Y.ndim == 1:
        Y = Y[:, np.newaxis]
    if Y.shape[1] == 1:
        Y = np.append(1 - Y, Y, axis=1)

    # Check if dimensions are consistent.
    val.check_consistent_length(T, Y)
    T = val.check_array(T)
    Y = val.check_array(Y)
    print(T)
    print(Y)
    if T.shape[1] != Y.shape[1]:
        raise ValueError("y_true and y_pred have different number of classes "
                         "%d, %d" % (T.shape[1], Y.shape[1]))

    # Renormalize
    Y /= Y.sum(axis=1)[:, np.newaxis]
    loss = -(T * np.log(Y)).sum(axis=1)

    return _weighted_sum(loss, sample_weight, normalize)
开发者ID:dvn123,项目名称:MachineLearning,代码行数:35,代码来源:aux_functions.py

示例6: _impose_f_order

def _impose_f_order(X):
    """Helper Function"""
    # important to access flags instead of calling np.isfortran,
    # this catches corner cases.
    if X.flags.c_contiguous:
        return check_array(X.T, copy=False, order='F'), True
    else:
        return check_array(X, copy=False, order='F'), False
开发者ID:f00barin,项目名称:bilppattach,代码行数:8,代码来源:extmath.py

示例7: _prepare_inputs

 def _prepare_inputs(self, X, W):
   self.X_ = X = check_array(X)
   W = check_array(W, accept_sparse=True)
   # set up prior M
   if self.use_cov:
     self.M_ = pinvh(np.cov(X, rowvar = False))
   else:
     self.M_ = np.identity(X.shape[1])
   L = laplacian(W, normed=False)
   return X.T.dot(L.dot(X))
开发者ID:svecon,项目名称:metric-learn,代码行数:10,代码来源:sdml.py

示例8: fit

    def fit(self, X, y=None):
        """Don't trust the documentation of this module!

        Compute the mean and std to be used for later scaling.

        Parameters
        ----------
        X : array-like or CSR matrix with shape [n_samples, n_features]
            The data used to compute the mean and standard deviation
            used for later scaling along the features axis.
        """
        X = check_array(X, copy=self.copy, accept_sparse="csc",
                         ensure_2d=False)
        if warn_if_not_float(X, estimator=self):
            # Costly conversion, but otherwise the pipeline will break:
            # https://github.com/scikit-learn/scikit-learn/issues/1709
            X = X.astype(np.float32)
        if sparse.issparse(X):
            if self.center_sparse:
                means = []
                vars = []

                # This only works for csc matrices...
                for i in range(X.shape[1]):
                    if X.indptr[i] == X.indptr[i + 1]:
                        means.append(0)
                        vars.append(1)
                    else:
                        vars.append(
                            X.data[X.indptr[i]:X.indptr[i + 1]].var())
                        # If the variance is 0, set all occurences of this
                        # features to 1
                        means.append(
                            X.data[X.indptr[i]:X.indptr[i + 1]].mean())
                        if 0.0000001 >= vars[-1] >= -0.0000001:
                            means[-1] -= 1

                self.std_ = np.sqrt(np.array(vars))
                self.std_[np.array(vars) == 0.0] = 1.0
                self.mean_ = np.array(means)

                return self
            elif self.with_mean:
                raise ValueError(
                    "Cannot center sparse matrices: pass `with_mean=False` "
                    "instead. See docstring for motivation and alternatives.")
            else:
                self.mean_ = None

            if self.with_std:
                var = mean_variance_axis(X, axis=0)[1]
                self.std_ = np.sqrt(var)
                self.std_[var == 0.0] = 1.0
            else:
                self.std_ = None
            return self
        else:
            self.mean_, self.std_ = _mean_and_std(
                X, axis=0, with_mean=self.with_mean, with_std=self.with_std)
            return self
开发者ID:Ayaro,项目名称:auto-sklearn,代码行数:60,代码来源:StandardScaler.py

示例9: transform

    def transform(self, X, y=None, copy=None):
        """Perform standardization by centering and scaling

        Parameters
        ----------
        X : array-like with shape [n_samples, n_features]
            The data used to scale along the features axis.
        """
        check_is_fitted(self, 'std_')

        copy = copy if copy is not None else self.copy
        X = check_array(X, copy=copy, accept_sparse="csc", ensure_2d=False)
        if warn_if_not_float(X, estimator=self):
            X = X.astype(np.float)
        if sparse.issparse(X):
            if self.center_sparse:
                for i in range(X.shape[1]):
                    X.data[X.indptr[i]:X.indptr[i + 1]] -= self.mean_[i]

            elif self.with_mean:
                raise ValueError(
                    "Cannot center sparse matrices: pass `with_mean=False` "
                    "instead. See docstring for motivation and alternatives.")

            else:
                pass

            if self.std_ is not None:
                inplace_column_scale(X, 1 / self.std_)
        else:
            if self.with_mean:
                X -= self.mean_
            if self.with_std:
                X /= self.std_
        return X
开发者ID:Ayaro,项目名称:auto-sklearn,代码行数:35,代码来源:StandardScaler.py

示例10: dump_svmlight_file

def dump_svmlight_file(X, y, f, zero_based=True, comment=None, query_id=None):

    y = np.asarray(y)
    if y.ndim != 1:
        raise ValueError("expected y of shape (n_samples,), got %r"
                         % (y.shape,))

    Xval = check_array(X, accept_sparse='csr')
    if Xval.shape[0] != y.shape[0]:
        raise ValueError("X.shape[0] and y.shape[0] should be the same, got"
                         " %r and %r instead." % (Xval.shape[0], y.shape[0]))

    # We had some issues with CSR matrices with unsorted indices (e.g. #1501),
    # so sort them here, but first make sure we don't modify the user's X.
    # TODO We can do this cheaper; sorted_indices copies the whole matrix.
    if Xval is X and hasattr(Xval, "sorted_indices"):
        X = Xval.sorted_indices()
    else:
        X = Xval
        if hasattr(X, "sort_indices"):
            X.sort_indices()

    if query_id is not None:
        query_id = np.asarray(query_id)
        if query_id.shape[0] != y.shape[0]:
            raise ValueError("expected query_id of shape (n_samples,), got %r"
                             % (query_id.shape,))

    one_based = not zero_based

    if hasattr(f, "write"):
        _dump_svmlight(X, y, f, one_based, comment, query_id)
    else:
        with open(f, "wb") as f:
            _dump_svmlight(X, y, f, one_based, comment, query_id)
开发者ID:junjiek,项目名称:cmu-exp,代码行数:35,代码来源:svmIO.py

示例11: fit

    def fit(self, X, y=None):
        """Fit detector. y is optional for unsupervised methods.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : numpy array of shape (n_samples,), optional (default=None)
            The ground truth of the input samples (labels).
        """
        # validate inputs X and y (optional)
        X = check_array(X)
        self._set_n_classes(y)

        self.detector_ = LocalOutlierFactor(n_neighbors=self.n_neighbors,
                                            algorithm=self.algorithm,
                                            leaf_size=self.leaf_size,
                                            metric=self.metric,
                                            p=self.p,
                                            metric_params=self.metric_params,
                                            contamination=self.contamination,
                                            n_jobs=self.n_jobs)
        self.detector_.fit(X=X, y=y)

        # Invert decision_scores_. Outliers comes with higher outlier scores
        self.decision_scores_ = invert_order(
            self.detector_.negative_outlier_factor_)
        self._process_decision_scores()
        return self
开发者ID:flaviassantos,项目名称:pyod,代码行数:30,代码来源:lof.py

示例12: ttest

def ttest(X, y):
    X = check_array(X, accept_sparse='csr')
    if np.any((X.data if issparse(X) else X) < 0):
        raise ValueError("Input X must be non-negative.")

    Y = MultiLabelBinarizer().fit_transform(y)
    if Y.shape[1] == 1:
        Y = np.append(1 - Y, Y, axis=1)
    negY = 1- Y
    labelNum = Y.shape[1]
#     sampleNum = Y.shape[0]
    featureNum = X.shape[1]
    t = []
    prob = []
    for i in range(featureNum):
        values = X[:,i].T.toarray()[0]
        ti = 0
        probi = 0
        for j in range(labelNum):
            observed = values * Y[:,j]
            notObserved = values * negY[:,j]
            (res0, res1) = scipy.stats.ttest_ind(observed, notObserved)
            ti = ti + res0
            probi = probi + res1
        t.append(ti)
        prob.append(probi)
    t = np.asarray(t)
    prob = np.asarray(prob)
    return t, prob
开发者ID:junjiek,项目名称:cmu-exp,代码行数:29,代码来源:ttest_multilabel.py

示例13: fit

    def fit(self, X, y=None):
        """Fit the model with ``X``.
        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
            Training data, where n_samples in the number of samples
            and n_features is the number of features.
        Returns
        -------
        self : object
            Returns the instance itself.
        """

        X = check_array(X, dtype=np.float)
        L, S, (U, s, Vt), self.n_iter_ = rpca(X, self.lam, self.mu,
                                              self.max_iter, self.eps_primal,
                                              self.eps_dual, self.rho,
                                              self.initial_sv, self.max_mu,
                                              self.verbose)
        self.low_rank_ = L
        r = np.count_nonzero(s)
        self.n_components_ = r
        self.components_ = Vt[:r]

        return self
开发者ID:Alan215,项目名称:advanced_training,代码行数:25,代码来源:robust_pca.py

示例14: predict_proba

    def predict_proba(self, X):
        """Predict probability for each possible outcome.

        Compute the probability estimates for each single sample in X
        and each possible outcome seen during training (categorical
        distribution).

        Parameters
        ----------
        X : array_like, shape = [n_samples, n_features]

        Returns
        -------
        probabilities : array, shape = [n_samples, n_classes]
            Normalized probability distributions across
            class labels
        """
        check_is_fitted(self, 'X_')

        X_2d = check_array(X, accept_sparse = ['csc', 'csr', 'coo', 'dok',
                        'bsr', 'lil', 'dia'])
        weight_matrices = self._get_kernel(self.X_, X_2d)
        if self.kernel == 'knn':
            probabilities = []
            for weight_matrix in weight_matrices:
                ine = np.sum(self.label_distributions_[weight_matrix], axis=0)
                probabilities.append(ine)
            probabilities = np.array(probabilities)
        else:
            weight_matrices = weight_matrices.T
            probabilities = np.dot(weight_matrices, self.label_distributions_)
        normalizer = np.atleast_2d(np.sum(probabilities, axis=1)).T
        probabilities /= normalizer
        return probabilities
开发者ID:musically-ut,项目名称:semi_supervised,代码行数:34,代码来源:label_propagation.py

示例15: predict

    def predict(self, X):
        """Predict class for X.

        Parameters
        ----------
        X : Array-like of shape [n_samples, n_features]
            The input to classify.

        Returns
        -------
        y : array of shape = [n_samples]
            The predicted classes.
        """

        X = check_array(X)

        if self.trees_ is None:
            raise Exception("Pattern trees not initialized. Perform a fit first.")

        y_classes = np.zeros((X.shape[0], len(self.classes_)))
        for i, c in enumerate(self.classes_):
            y_classes[:, i] = self.trees_[i](X)

        # predict the maximum value
        return self.classes_.take(np.argmax(y_classes, -1))
开发者ID:sorend,项目名称:fylearn,代码行数:25,代码来源:fpt.py


注:本文中的sklearn.utils.validation.check_array函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。