本文整理汇总了Python中sklearn.utils.validation.check_consistent_length函数的典型用法代码示例。如果您正苦于以下问题:Python check_consistent_length函数的具体用法?Python check_consistent_length怎么用?Python check_consistent_length使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了check_consistent_length函数的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: log_loss
def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None):
lb = LabelBinarizer()
T = lb.fit_transform(y_true)
if T.shape[1] == 1:
T = np.append(1 - T, T, axis=1)
# Clipping
Y = np.clip(y_pred, eps, 1 - eps)
# This happens in cases when elements in y_pred have type "str".
if not isinstance(Y, np.ndarray):
raise ValueError("y_pred should be an array of floats.")
# If y_pred is of single dimension, assume y_true to be binary
# and then check.
if Y.ndim == 1:
Y = Y[:, np.newaxis]
if Y.shape[1] == 1:
Y = np.append(1 - Y, Y, axis=1)
# Check if dimensions are consistent.
val.check_consistent_length(T, Y)
T = val.check_array(T)
Y = val.check_array(Y)
print(T)
print(Y)
if T.shape[1] != Y.shape[1]:
raise ValueError("y_true and y_pred have different number of classes "
"%d, %d" % (T.shape[1], Y.shape[1]))
# Renormalize
Y /= Y.sum(axis=1)[:, np.newaxis]
loss = -(T * np.log(Y)).sum(axis=1)
return _weighted_sum(loss, sample_weight, normalize)
示例2: pinball_loss
def pinball_loss(y_true, y_pred, probs):
"""Compute the pinball loss.
Parameters
----------
pred : {array-like}, shape = [n_quantiles, n_samples] or [n_samples]
Predictions.
y : {array-like}, shape = [n_samples]
Targets.
Returns
-------
l : {array}, shape = [n_quantiles]
Average loss for each quantile level.
"""
probs = asarray(probs).reshape(-1)
check_consistent_length(y_true, y_pred.T)
y_true = check_array(y_true.reshape((-1, 1)),
ensure_2d=True)
y_pred = check_array(y_pred.T.reshape((y_true.shape[0], -1)),
ensure_2d=True)
residual = y_true - y_pred
loss = npsum([fmax(prob * res, (prob - 1) * res) for (res, prob) in
zip(residual.T, probs)], axis=1)
return loss / y_true.size
示例3: _check_rows_and_columns
def _check_rows_and_columns(a, b):
"""Unpacks the row and column arrays and checks their shape."""
check_consistent_length(*a)
check_consistent_length(*b)
checks = lambda x: check_array(x, ensure_2d=False)
a_rows, a_cols = map(checks, a)
b_rows, b_cols = map(checks, b)
return a_rows, a_cols, b_rows, b_cols
示例4: test_check_dataframe_fit_attribute
def test_check_dataframe_fit_attribute():
# check pandas dataframe with 'fit' column does not raise error
# https://github.com/scikit-learn/scikit-learn/issues/8415
try:
import pandas as pd
X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
X_df = pd.DataFrame(X, columns=['a', 'b', 'fit'])
check_consistent_length(X_df)
except ImportError:
raise SkipTest("Pandas not found")
示例5: fit
def fit(self, X, y, sample_weight=None):
"""
Build a classifier from the training set (X, y).
Parameters
----------
X : array-like or sparse matrix of shape = [n_samples, n_features]
The training input samples.
y : array-like, shape = [n_samples]
The target values (class labels in classification).
sample_weight : array-like, shape = [n_samples] or None
Individual weights for each sample.
Returns
-------
self : object
Returns self.
"""
self._validate_params(**self.get_params())
X, y = check_X_y(X, y, accept_sparse=True)
if sp.isspmatrix(X):
self._is_sparse_train_X = True
else:
self._is_sparse_train_X = False
self._n_samples, self._n_features = X.shape
sample_weight = self._get_sample_weight(sample_weight)
check_consistent_length(X, y, sample_weight)
check_classification_targets(y)
self._classes = sorted(np.unique(y))
self._n_classes = len(self._classes)
self._classes_map = {}
self._set_params_with_dependencies()
params = self._get_params()
if self._n_classes == 2:
self._classes_map[0] = self._classes[0]
self._classes_map[1] = self._classes[1]
self._estimators = [None]
y = (y == self._classes[0]).astype(int)
self._fit_binary_task(X, y, sample_weight, params)
elif self._n_classes > 2:
if sp.isspmatrix_dok(X):
X = X.tocsr().tocoo() # Fix to avoid scipy 7699 issue
self._estimators = [None] * self._n_classes
self._fit_multiclass_task(X, y, sample_weight, params)
else:
raise ValueError("Classifier can't predict when only one class is present.")
self._fitted = True
return self
示例6: _indexable
def _indexable(X, y):
"""Make arrays indexable for cross-validation. Checks consistent
length, passes through None, and ensures that everything can be indexed.
Parameters
----------
X : array-like or pandas DataFrame, shape = [n_samples, n_features]
Input data, where n_samples is the number of samples and
n_features is the number of features.
y : array-like, shape = [n_samples] or [n_samples, n_output], optional
Target relative to X for classification or regression;
None for unsupervised learning.
"""
result = [_validate_X(X), _validate_y(y)]
check_consistent_length(*result)
return result
示例7: _my_lrap
def _my_lrap(y_true, y_score):
"""Simple implementation of label ranking average precision"""
check_consistent_length(y_true, y_score)
y_true = check_array(y_true)
y_score = check_array(y_score)
n_samples, n_labels = y_true.shape
score = np.empty((n_samples, ))
for i in range(n_samples):
# The best rank correspond to 1. Rank higher than 1 are worse.
# The best inverse ranking correspond to n_labels.
unique_rank, inv_rank = np.unique(y_score[i], return_inverse=True)
n_ranks = unique_rank.size
rank = n_ranks - inv_rank
# Rank need to be corrected to take into account ties
# ex: rank 1 ex aequo means that both label are rank 2.
corr_rank = np.bincount(rank, minlength=n_ranks + 1).cumsum()
rank = corr_rank[rank]
relevant = y_true[i].nonzero()[0]
if relevant.size == 0 or relevant.size == n_labels:
score[i] = 1
continue
score[i] = 0.
for label in relevant:
# Let's count the number of relevant label with better rank
# (smaller rank).
n_ranked_above = sum(rank[r] <= rank[label] for r in relevant)
# Weight by the rank of the actual label
score[i] += n_ranked_above / rank[label]
score[i] /= relevant.size
return score.mean()
示例8: test_check_consistent_length
def test_check_consistent_length():
check_consistent_length([1], [2], [3], [4], [5])
check_consistent_length([[1, 2], [[1, 2]]], [1, 2], ['a', 'b'])
check_consistent_length([1], (2,), np.array([3]), sp.csr_matrix((1, 2)))
assert_raises_regexp(ValueError, 'inconsistent numbers of samples',
check_consistent_length, [1, 2], [1])
assert_raises_regexp(TypeError, 'got <\w+ \'int\'>',
check_consistent_length, [1, 2], 1)
assert_raises_regexp(TypeError, 'got <\w+ \'object\'>',
check_consistent_length, [1, 2], object())
assert_raises(TypeError, check_consistent_length, [1, 2], np.array(1))
# Despite ensembles having __len__ they must raise TypeError
assert_raises_regexp(TypeError, 'estimator', check_consistent_length,
[1, 2], RandomForestRegressor())
示例9: cv_split
def cv_split(cv, X, y, groups, is_pairwise, cache):
check_consistent_length(X, y, groups)
return CVCache(list(cv.split(X, y, groups)), is_pairwise, cache)
示例10: _check_reg_targets
def _check_reg_targets(y_true, y_pred, multioutput):
"""Check that y_true and y_pred belong to the same regression task
Parameters
----------
y_true : array-like,
y_pred : array-like,
multioutput : array-like or string in ['raw_values', uniform_average',
'variance_weighted'] or None
None is accepted due to backward compatibility of r2_score().
Returns
-------
type_true : one of {'continuous', continuous-multioutput'}
The type of the true target data, as output by
'utils.multiclass.type_of_target'
y_true : array-like of shape = (n_samples, n_outputs)
Ground truth (correct) target values.
y_pred : array-like of shape = (n_samples, n_outputs)
Estimated target values.
multioutput : array-like of shape = (n_outputs) or string in ['raw_values',
uniform_average', 'variance_weighted'] or None
Custom output weights if ``multioutput`` is array-like or
just the corresponding argument if ``multioutput`` is a
correct keyword.
"""
check_consistent_length(y_true, y_pred)
y_true = check_array(y_true, ensure_2d=False)
y_pred = check_array(y_pred, ensure_2d=False)
if y_true.ndim == 1:
y_true = y_true.reshape((-1, 1))
if y_pred.ndim == 1:
y_pred = y_pred.reshape((-1, 1))
if y_true.shape[1] != y_pred.shape[1]:
raise ValueError("y_true and y_pred have different number of output "
"({0}!={1})".format(y_true.shape[1], y_pred.shape[1]))
n_outputs = y_true.shape[1]
multioutput_options = (None, 'raw_values', 'uniform_average',
'variance_weighted')
if multioutput not in multioutput_options:
multioutput = check_array(multioutput, ensure_2d=False)
if n_outputs == 1:
raise ValueError("Custom weights are useful only in "
"multi-output cases.")
elif n_outputs != len(multioutput):
raise ValueError(("There must be equally many custom weights "
"(%d) as outputs (%d).") %
(len(multioutput), n_outputs))
y_type = 'continuous' if n_outputs == 1 else 'continuous-multioutput'
return y_type, y_true, y_pred, multioutput
示例11: wpearsonr
def wpearsonr(x, y, w=None):
"""Utility function to calculate the weighted Pearson correlation of two
samples.
See https://stats.stackexchange.com/questions/221246/such-thing-as-a-weighted-correlation
for more information
Parameters
----------
x : array, shape (n,)
Input x.
y : array, shape (n,)
Input y.
w : array, shape (n,)
Weights w.
Returns
-------
scores : float in range of [-1,1]
Weighted Pearson Correlation between x and y.
"""
# unweighted version
# note the return is different
# TODO: fix output differences
if w is None:
return pearsonr(x, y)
x = np.asarray(x)
y = np.asarray(y)
w = np.asarray(w)
check_consistent_length([x, y, w])
# n = len(x)
w_sum = w.sum()
mx = np.sum(x * w) / w_sum
my = np.sum(y * w) / w_sum
xm, ym = (x - mx), (y - my)
r_num = np.sum(xm * ym * w) / w_sum
xm2 = np.sum(xm * xm * w) / w_sum
ym2 = np.sum(ym * ym * w) / w_sum
r_den = np.sqrt(xm2 * ym2)
r = r_num / r_den
r = max(min(r, 1.0), -1.0)
# TODO: disable p value calculation due to python 2.7 break
# df = n_train_ - 2
#
# if abs(r) == 1.0:
# prob = 0.0
# else:
# t_squared = r ** 2 * (df / ((1.0 - r) * (1.0 + r)))
# prob = _betai(0.5 * df, 0.5, df / (df + t_squared))
return r # , prob
示例12: fit
def fit(self, X, y, sample_weight=None):
"""
Build a RGF Classifier from the training set (X, y).
Parameters
----------
X : array-like or sparse matrix of shape = [n_samples, n_features]
The training input samples.
y : array-like, shape = [n_samples]
The target values (class labels in classification).
sample_weight : array-like, shape = [n_samples] or None
Individual weights for each sample.
Returns
-------
self : object
Returns self.
"""
_validate_params(**self.get_params())
X, y = check_X_y(X, y, accept_sparse=True)
n_samples, self._n_features = X.shape
if self.sl2 is None:
self._sl2 = self.l2
else:
self._sl2 = self.sl2
if isinstance(self.min_samples_leaf, _FLOATS):
self._min_samples_leaf = ceil(self.min_samples_leaf * n_samples)
else:
self._min_samples_leaf = self.min_samples_leaf
if self.n_iter is None:
if self.loss == "LS":
self._n_iter = 10
else:
self._n_iter = 5
else:
self._n_iter = self.n_iter
if sample_weight is None:
sample_weight = np.ones(n_samples, dtype=np.float32)
else:
sample_weight = column_or_1d(sample_weight, warn=True)
if (sample_weight <= 0).any():
raise ValueError("Sample weights must be positive.")
check_consistent_length(X, y, sample_weight)
check_classification_targets(y)
self._classes = sorted(np.unique(y))
self._n_classes = len(self._classes)
self._classes_map = {}
params = dict(max_leaf=self.max_leaf,
test_interval=self.test_interval,
algorithm=self.algorithm,
loss=self.loss,
reg_depth=self.reg_depth,
l2=self.l2,
sl2=self._sl2,
normalize=self.normalize,
min_samples_leaf=self._min_samples_leaf,
n_iter=self._n_iter,
n_tree_search=self.n_tree_search,
opt_interval=self.opt_interval,
learning_rate=self.learning_rate,
memory_policy=self.memory_policy,
verbose=self.verbose)
if self._n_classes == 2:
self._classes_map[0] = self._classes[0]
self._classes_map[1] = self._classes[1]
self._estimators = [None]
y = (y == self._classes[0]).astype(int)
self._estimators[0] = _RGFBinaryClassifier(**params)
self._estimators[0].fit(X, y, sample_weight)
elif self._n_classes > 2:
if sp.isspmatrix_dok(X):
X = X.tocsr().tocoo() # Fix to avoid scipy 7699 issue
self._estimators = [None] * self._n_classes
ovr_list = [None] * self._n_classes
for i, cls_num in enumerate(self._classes):
self._classes_map[i] = cls_num
ovr_list[i] = (y == cls_num).astype(int)
self._estimators[i] = _RGFBinaryClassifier(**params)
self._estimators = Parallel(n_jobs=self.n_jobs)(delayed(_fit_ovr_binary)(self._estimators[i],
X,
ovr_list[i],
sample_weight)
for i in range(self._n_classes))
else:
raise ValueError("Classifier can't predict when only one class is present.")
self._fitted = True
return self