本文整理汇总了Python中sklearn.utils.validation.check_array函数的典型用法代码示例。如果您正苦于以下问题:Python check_array函数的具体用法?Python check_array怎么用?Python check_array使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了check_array函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: predict
def predict(self, X, categorical=None):
"""Predict the closest cluster each sample in X belongs to.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
New data to predict.
categorical : Indices of columns that contain categorical data
Returns
-------
labels : array, shape [n_samples,]
Index of the cluster each sample belongs to.
"""
assert hasattr(self, '_enc_cluster_centroids'), "Model not yet fitted."
if categorical is not None:
assert isinstance(categorical, (int, list, tuple)), "The 'categorical' \
argument needs to be an integer with the index of the categorical \
column in your data, or a list or tuple of several of them, \
but it is a {}.".format(type(categorical))
X = pandas_to_numpy(X)
Xnum, Xcat = _split_num_cat(X, categorical)
Xnum, Xcat = check_array(Xnum), check_array(Xcat, dtype=None)
Xcat, _ = encode_features(Xcat, enc_map=self._enc_map)
return _labels_cost(Xnum, Xcat, self._enc_cluster_centroids,
self.num_dissim, self.cat_dissim, self.gamma)[0]
示例2: pairwise_distances_no_broadcast
def pairwise_distances_no_broadcast(X, Y):
"""Utility function to calculate row-wise euclidean distance of two matrix.
Different from pair-wise calculation, this function would not broadcast.
For instance, X and Y are both (4,3) matrices, the function would return
a distance vector with shape (4,), instead of (4,4).
Parameters
----------
X : array of shape (n_samples, n_features)
First input samples
Y : array of shape (n_samples, n_features)
Second input samples
Returns
-------
distance : array of shape (n_samples,)
Row-wise euclidean distance of X and Y
"""
X = check_array(X)
Y = check_array(Y)
if X.shape[0] != Y.shape[0] or X.shape[1] != Y.shape[1]:
raise ValueError("pairwise_distances_no_broadcast function receive"
"matrix with different shapes {0} and {1}".format(
X.shape, Y.shape))
return _pairwise_distances_no_broadcast_helper(X, Y)
示例3: _process_inputs
def _process_inputs(self, X, constraints):
self.X_ = X = check_array(X)
# check to make sure that no two constrained vectors are identical
a,b,c,d = constraints
no_ident = vector_norm(X[a] - X[b]) > 1e-9
a, b = a[no_ident], b[no_ident]
no_ident = vector_norm(X[c] - X[d]) > 1e-9
c, d = c[no_ident], d[no_ident]
if len(a) == 0:
raise ValueError('No non-trivial similarity constraints given for MMC.')
if len(c) == 0:
raise ValueError('No non-trivial dissimilarity constraints given for MMC.')
# init metric
if self.A0 is None:
self.A_ = np.identity(X.shape[1])
if not self.diagonal:
# Don't know why division by 10... it's in the original code
# and seems to affect the overall scale of the learned metric.
self.A_ /= 10.0
else:
self.A_ = check_array(self.A0)
return a,b,c,d
示例4: check_array_with_weights
def check_array_with_weights(X, weights, **kwargs):
"""Utility to validate data and weights.
This calls check_array on X and weights, making sure results match.
"""
if weights is None:
return check_array(X, **kwargs), weights
# Always use copy=False for weights
kwargs_weights = dict(kwargs)
kwargs_weights.update(copy=False)
weights = check_array(weights, **kwargs_weights)
# Always use force_all_finite=False for X
kwargs_X = dict(kwargs)
kwargs_X.update(force_all_finite=False)
X = check_array(X, **kwargs_X)
# Make sure shapes match and missing data has weights=0
if X.shape != weights.shape:
raise ValueError("Shape of `X` and `weights` should match")
Wzero = (weights == 0)
X[Wzero] = 0
if not np.all(np.isfinite(X)):
raise ValueError("Input contains NaN or infinity without "
"a corresponding zero in `weights`.")
return X, weights
示例5: log_loss
def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None):
lb = LabelBinarizer()
T = lb.fit_transform(y_true)
if T.shape[1] == 1:
T = np.append(1 - T, T, axis=1)
# Clipping
Y = np.clip(y_pred, eps, 1 - eps)
# This happens in cases when elements in y_pred have type "str".
if not isinstance(Y, np.ndarray):
raise ValueError("y_pred should be an array of floats.")
# If y_pred is of single dimension, assume y_true to be binary
# and then check.
if Y.ndim == 1:
Y = Y[:, np.newaxis]
if Y.shape[1] == 1:
Y = np.append(1 - Y, Y, axis=1)
# Check if dimensions are consistent.
val.check_consistent_length(T, Y)
T = val.check_array(T)
Y = val.check_array(Y)
print(T)
print(Y)
if T.shape[1] != Y.shape[1]:
raise ValueError("y_true and y_pred have different number of classes "
"%d, %d" % (T.shape[1], Y.shape[1]))
# Renormalize
Y /= Y.sum(axis=1)[:, np.newaxis]
loss = -(T * np.log(Y)).sum(axis=1)
return _weighted_sum(loss, sample_weight, normalize)
示例6: _impose_f_order
def _impose_f_order(X):
"""Helper Function"""
# important to access flags instead of calling np.isfortran,
# this catches corner cases.
if X.flags.c_contiguous:
return check_array(X.T, copy=False, order='F'), True
else:
return check_array(X, copy=False, order='F'), False
示例7: _prepare_inputs
def _prepare_inputs(self, X, W):
self.X_ = X = check_array(X)
W = check_array(W, accept_sparse=True)
# set up prior M
if self.use_cov:
self.M_ = pinvh(np.cov(X, rowvar = False))
else:
self.M_ = np.identity(X.shape[1])
L = laplacian(W, normed=False)
return X.T.dot(L.dot(X))
示例8: fit
def fit(self, X, y=None):
"""Don't trust the documentation of this module!
Compute the mean and std to be used for later scaling.
Parameters
----------
X : array-like or CSR matrix with shape [n_samples, n_features]
The data used to compute the mean and standard deviation
used for later scaling along the features axis.
"""
X = check_array(X, copy=self.copy, accept_sparse="csc",
ensure_2d=False)
if warn_if_not_float(X, estimator=self):
# Costly conversion, but otherwise the pipeline will break:
# https://github.com/scikit-learn/scikit-learn/issues/1709
X = X.astype(np.float32)
if sparse.issparse(X):
if self.center_sparse:
means = []
vars = []
# This only works for csc matrices...
for i in range(X.shape[1]):
if X.indptr[i] == X.indptr[i + 1]:
means.append(0)
vars.append(1)
else:
vars.append(
X.data[X.indptr[i]:X.indptr[i + 1]].var())
# If the variance is 0, set all occurences of this
# features to 1
means.append(
X.data[X.indptr[i]:X.indptr[i + 1]].mean())
if 0.0000001 >= vars[-1] >= -0.0000001:
means[-1] -= 1
self.std_ = np.sqrt(np.array(vars))
self.std_[np.array(vars) == 0.0] = 1.0
self.mean_ = np.array(means)
return self
elif self.with_mean:
raise ValueError(
"Cannot center sparse matrices: pass `with_mean=False` "
"instead. See docstring for motivation and alternatives.")
else:
self.mean_ = None
if self.with_std:
var = mean_variance_axis(X, axis=0)[1]
self.std_ = np.sqrt(var)
self.std_[var == 0.0] = 1.0
else:
self.std_ = None
return self
else:
self.mean_, self.std_ = _mean_and_std(
X, axis=0, with_mean=self.with_mean, with_std=self.with_std)
return self
示例9: transform
def transform(self, X, y=None, copy=None):
"""Perform standardization by centering and scaling
Parameters
----------
X : array-like with shape [n_samples, n_features]
The data used to scale along the features axis.
"""
check_is_fitted(self, 'std_')
copy = copy if copy is not None else self.copy
X = check_array(X, copy=copy, accept_sparse="csc", ensure_2d=False)
if warn_if_not_float(X, estimator=self):
X = X.astype(np.float)
if sparse.issparse(X):
if self.center_sparse:
for i in range(X.shape[1]):
X.data[X.indptr[i]:X.indptr[i + 1]] -= self.mean_[i]
elif self.with_mean:
raise ValueError(
"Cannot center sparse matrices: pass `with_mean=False` "
"instead. See docstring for motivation and alternatives.")
else:
pass
if self.std_ is not None:
inplace_column_scale(X, 1 / self.std_)
else:
if self.with_mean:
X -= self.mean_
if self.with_std:
X /= self.std_
return X
示例10: dump_svmlight_file
def dump_svmlight_file(X, y, f, zero_based=True, comment=None, query_id=None):
y = np.asarray(y)
if y.ndim != 1:
raise ValueError("expected y of shape (n_samples,), got %r"
% (y.shape,))
Xval = check_array(X, accept_sparse='csr')
if Xval.shape[0] != y.shape[0]:
raise ValueError("X.shape[0] and y.shape[0] should be the same, got"
" %r and %r instead." % (Xval.shape[0], y.shape[0]))
# We had some issues with CSR matrices with unsorted indices (e.g. #1501),
# so sort them here, but first make sure we don't modify the user's X.
# TODO We can do this cheaper; sorted_indices copies the whole matrix.
if Xval is X and hasattr(Xval, "sorted_indices"):
X = Xval.sorted_indices()
else:
X = Xval
if hasattr(X, "sort_indices"):
X.sort_indices()
if query_id is not None:
query_id = np.asarray(query_id)
if query_id.shape[0] != y.shape[0]:
raise ValueError("expected query_id of shape (n_samples,), got %r"
% (query_id.shape,))
one_based = not zero_based
if hasattr(f, "write"):
_dump_svmlight(X, y, f, one_based, comment, query_id)
else:
with open(f, "wb") as f:
_dump_svmlight(X, y, f, one_based, comment, query_id)
示例11: fit
def fit(self, X, y=None):
"""Fit detector. y is optional for unsupervised methods.
Parameters
----------
X : numpy array of shape (n_samples, n_features)
The input samples.
y : numpy array of shape (n_samples,), optional (default=None)
The ground truth of the input samples (labels).
"""
# validate inputs X and y (optional)
X = check_array(X)
self._set_n_classes(y)
self.detector_ = LocalOutlierFactor(n_neighbors=self.n_neighbors,
algorithm=self.algorithm,
leaf_size=self.leaf_size,
metric=self.metric,
p=self.p,
metric_params=self.metric_params,
contamination=self.contamination,
n_jobs=self.n_jobs)
self.detector_.fit(X=X, y=y)
# Invert decision_scores_. Outliers comes with higher outlier scores
self.decision_scores_ = invert_order(
self.detector_.negative_outlier_factor_)
self._process_decision_scores()
return self
示例12: ttest
def ttest(X, y):
X = check_array(X, accept_sparse='csr')
if np.any((X.data if issparse(X) else X) < 0):
raise ValueError("Input X must be non-negative.")
Y = MultiLabelBinarizer().fit_transform(y)
if Y.shape[1] == 1:
Y = np.append(1 - Y, Y, axis=1)
negY = 1- Y
labelNum = Y.shape[1]
# sampleNum = Y.shape[0]
featureNum = X.shape[1]
t = []
prob = []
for i in range(featureNum):
values = X[:,i].T.toarray()[0]
ti = 0
probi = 0
for j in range(labelNum):
observed = values * Y[:,j]
notObserved = values * negY[:,j]
(res0, res1) = scipy.stats.ttest_ind(observed, notObserved)
ti = ti + res0
probi = probi + res1
t.append(ti)
prob.append(probi)
t = np.asarray(t)
prob = np.asarray(prob)
return t, prob
示例13: fit
def fit(self, X, y=None):
"""Fit the model with ``X``.
Parameters
----------
X: array-like, shape (n_samples, n_features)
Training data, where n_samples in the number of samples
and n_features is the number of features.
Returns
-------
self : object
Returns the instance itself.
"""
X = check_array(X, dtype=np.float)
L, S, (U, s, Vt), self.n_iter_ = rpca(X, self.lam, self.mu,
self.max_iter, self.eps_primal,
self.eps_dual, self.rho,
self.initial_sv, self.max_mu,
self.verbose)
self.low_rank_ = L
r = np.count_nonzero(s)
self.n_components_ = r
self.components_ = Vt[:r]
return self
示例14: predict_proba
def predict_proba(self, X):
"""Predict probability for each possible outcome.
Compute the probability estimates for each single sample in X
and each possible outcome seen during training (categorical
distribution).
Parameters
----------
X : array_like, shape = [n_samples, n_features]
Returns
-------
probabilities : array, shape = [n_samples, n_classes]
Normalized probability distributions across
class labels
"""
check_is_fitted(self, 'X_')
X_2d = check_array(X, accept_sparse = ['csc', 'csr', 'coo', 'dok',
'bsr', 'lil', 'dia'])
weight_matrices = self._get_kernel(self.X_, X_2d)
if self.kernel == 'knn':
probabilities = []
for weight_matrix in weight_matrices:
ine = np.sum(self.label_distributions_[weight_matrix], axis=0)
probabilities.append(ine)
probabilities = np.array(probabilities)
else:
weight_matrices = weight_matrices.T
probabilities = np.dot(weight_matrices, self.label_distributions_)
normalizer = np.atleast_2d(np.sum(probabilities, axis=1)).T
probabilities /= normalizer
return probabilities
示例15: predict
def predict(self, X):
"""Predict class for X.
Parameters
----------
X : Array-like of shape [n_samples, n_features]
The input to classify.
Returns
-------
y : array of shape = [n_samples]
The predicted classes.
"""
X = check_array(X)
if self.trees_ is None:
raise Exception("Pattern trees not initialized. Perform a fit first.")
y_classes = np.zeros((X.shape[0], len(self.classes_)))
for i, c in enumerate(self.classes_):
y_classes[:, i] = self.trees_[i](X)
# predict the maximum value
return self.classes_.take(np.argmax(y_classes, -1))