本文整理汇总了Python中sklearn.utils.validation._num_samples方法的典型用法代码示例。如果您正苦于以下问题:Python validation._num_samples方法的具体用法?Python validation._num_samples怎么用?Python validation._num_samples使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.utils.validation
的用法示例。
在下文中一共展示了validation._num_samples方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: check_cv_coverage
# 需要导入模块: from sklearn.utils import validation [as 别名]
# 或者: from sklearn.utils.validation import _num_samples [as 别名]
def check_cv_coverage(cv, X, y, groups, expected_n_splits=None):
n_samples = _num_samples(X)
# Check that a all the samples appear at least once in a test fold
if expected_n_splits is not None:
assert_equal(cv.get_n_splits(X, y, groups), expected_n_splits)
else:
expected_n_splits = cv.get_n_splits(X, y, groups)
collected_test_samples = set()
iterations = 0
for train, test in cv.split(X, y, groups):
check_valid_split(train, test, n_samples=n_samples)
iterations += 1
collected_test_samples.update(test)
# Check that the accumulated test samples cover the whole dataset
assert_equal(iterations, expected_n_splits)
if n_samples is not None:
assert_equal(collected_test_samples, set(range(n_samples)))
示例2: transform
# 需要导入模块: from sklearn.utils import validation [as 别名]
# 或者: from sklearn.utils.validation import _num_samples [as 别名]
def transform(self, y):
"""Transform labels to normalized encoding.
Parameters
----------
y : array-like of shape [n_samples]
Target values.
Returns
-------
y : array-like of shape [n_samples]
"""
check_is_fitted(self, 'classes_')
y = column_or_1d(y, warn=True)
# transform of empty array is empty array
if _num_samples(y) == 0:
return np.array([])
_, y = _encode(y, uniques=self.classes_, encode=True)
return y
示例3: inverse_transform
# 需要导入模块: from sklearn.utils import validation [as 别名]
# 或者: from sklearn.utils.validation import _num_samples [as 别名]
def inverse_transform(self, y):
"""Transform labels back to original encoding.
Parameters
----------
y : numpy array of shape [n_samples]
Target values.
Returns
-------
y : numpy array of shape [n_samples]
"""
check_is_fitted(self, 'classes_')
y = column_or_1d(y, warn=True)
# inverse transform of empty array is empty array
if _num_samples(y) == 0:
return np.array([])
diff = np.setdiff1d(y, np.arange(len(self.classes_)))
if len(diff):
raise ValueError(
"y contains previously unseen labels: %s" % str(diff))
y = np.asarray(y)
return self.classes_[y]
示例4: fit
# 需要导入模块: from sklearn.utils import validation [as 别名]
# 或者: from sklearn.utils.validation import _num_samples [as 别名]
def fit(self, y):
"""Fit label binarizer
Parameters
----------
y : array of shape [n_samples,] or [n_samples, n_classes]
Target values. The 2-d matrix should only contain 0 and 1,
represents multilabel classification.
Returns
-------
self : returns an instance of self.
"""
self.y_type_ = type_of_target(y)
if 'multioutput' in self.y_type_:
raise ValueError("Multioutput target data is not supported with "
"label binarization")
if _num_samples(y) == 0:
raise ValueError('y has 0 samples: %r' % y)
self.sparse_input_ = sp.issparse(y)
self.classes_ = unique_labels(y)
return self
示例5: predict
# 需要导入模块: from sklearn.utils import validation [as 别名]
# 或者: from sklearn.utils.validation import _num_samples [as 别名]
def predict(self, x):
"""
Applying multiple estimators for prediction.
Args:
x (numpy.ndarray): NxD array
Returns:
numpy.ndarray: predicted labels, Nx1 array
"""
n_samples = _num_samples(x)
maxima = np.empty(n_samples, dtype=float)
maxima.fill(-np.inf)
argmaxima = np.zeros(n_samples, dtype=int)
for i, e in enumerate(self.estimators):
pred = np.ravel(e.decision_function(x))
np.maximum(maxima, pred, out=maxima)
argmaxima[maxima == pred] = i
return self.classes[np.array(argmaxima.T)]
示例6: get_n_splits
# 需要导入模块: from sklearn.utils import validation [as 别名]
# 或者: from sklearn.utils.validation import _num_samples [as 别名]
def get_n_splits(self, X, y=None, groups=None):
"""Returns the number of splitting iterations in the cross-validator
Parameters
----------
X : array-like, shape (n_samples, n_features)
Training data, where n_samples is the number of samples
and n_features is the number of features.
y : object
Always ignored, exists for compatibility.
groups : object
Always ignored, exists for compatibility.
"""
self.__check_validity(X, y, groups)
n_samples = _num_samples(X)
gap_before, gap_after = self.gap_before, self.gap_after
if n_samples - gap_after - self.p >= gap_before + 1:
n_splits = n_samples - self.p + 1
else:
n_splits = max(n_samples - gap_after - self.p, 0)
n_splits += max(n_samples - self.p - gap_before, 0)
return n_splits
示例7: _do_n_samples
# 需要导入模块: from sklearn.utils import validation [as 别名]
# 或者: from sklearn.utils.validation import _num_samples [as 别名]
def _do_n_samples(dsk, token, Xs, n_splits):
name = "n_samples-" + token
n_samples = []
n_samples_append = n_samples.append
seen = {}
m = 0
for x in Xs:
if x in seen:
n_samples_append(seen[x])
else:
for n in range(n_splits):
dsk[name, m, n] = (_num_samples, x + (n,))
n_samples_append((name, m))
seen[x] = (name, m)
m += 1
return n_samples
示例8: test_check_sample_weight
# 需要导入模块: from sklearn.utils import validation [as 别名]
# 或者: from sklearn.utils.validation import _num_samples [as 别名]
def test_check_sample_weight():
from sklearn.cluster.k_means_ import _check_sample_weight
sample_weight = None
checked_sample_weight = _check_sample_weight(X, sample_weight)
assert_equal(_num_samples(X), _num_samples(checked_sample_weight))
assert_almost_equal(checked_sample_weight.sum(), _num_samples(X))
assert_equal(X.dtype, checked_sample_weight.dtype)
示例9: transform
# 需要导入模块: from sklearn.utils import validation [as 别名]
# 或者: from sklearn.utils.validation import _num_samples [as 别名]
def transform(self, y):
"""Transform labels to normalized encoding.
If ``self.fill_unseen_labels`` is ``True``, use ``self.fill_encoded_label_value`` for unseen values.
Seen labels are encoded with value between 0 and n_classes-1. Unseen labels are encoded with
``self.fill_encoded_label_value`` with a default value of n_classes.
Parameters
----------
y : array-like of shape [n_samples]
Label values.
Returns
-------
y_encoded : array-like of shape [n_samples]
Encoded label values.
"""
check_is_fitted(self, "classes_")
y = column_or_1d(y, warn=True)
# transform of empty array is empty array
if _num_samples(y) == 0:
return np.array([])
if self.fill_unseen_labels:
_, mask = _encode_check_unknown(y, self.classes_, return_mask=True)
y_encoded = np.searchsorted(self.classes_, y)
fill_encoded_label_value = self.fill_encoded_label_value or len(self.classes_)
y_encoded[~mask] = fill_encoded_label_value
else:
_, y_encoded = _encode(y, uniques=self.classes_, encode=True)
return y_encoded
示例10: inverse_transform
# 需要导入模块: from sklearn.utils import validation [as 别名]
# 或者: from sklearn.utils.validation import _num_samples [as 别名]
def inverse_transform(self, y):
"""Transform labels back to original encoding.
If ``self.fill_unseen_labels`` is ``True``, use ``self.fill_label_value`` for unseen values.
Parameters
----------
y : numpy array of shape [n_samples]
Encoded label values.
Returns
-------
y_decoded : numpy array of shape [n_samples]
Label values.
"""
check_is_fitted(self, "classes_")
y = column_or_1d(y, warn=True)
if y.dtype.kind not in ("i", "u"):
try:
y = y.astype(np.float).astype(np.int)
except ValueError:
raise ValueError("`y` contains values not convertible to integer.")
# inverse transform of empty array is empty array
if _num_samples(y) == 0:
return np.array([])
labels = np.arange(len(self.classes_))
diff = np.setdiff1d(y, labels)
if diff and not self.fill_unseen_labels:
raise ValueError("y contains previously unseen labels: %s" % str(diff))
y_decoded = [self.classes_[idx] if idx in labels else self.fill_label_value for idx in y]
return y_decoded
示例11: _iter_train_indices
# 需要导入模块: from sklearn.utils import validation [as 别名]
# 或者: from sklearn.utils.validation import _num_samples [as 别名]
def _iter_train_indices(self, X=None, y=None, groups=None):
"""Generates integer indices corresponding to training sets.
By default, delegates to _iter_test_indices(X, y, groups)
"""
return self.__complement_indices(
self._iter_test_indices(X, y, groups), _num_samples(X))
示例12: _iter_train_masks
# 需要导入模块: from sklearn.utils import validation [as 别名]
# 或者: from sklearn.utils.validation import _num_samples [as 别名]
def _iter_train_masks(self, X=None, y=None, groups=None):
"""Generates boolean masks corresponding to training sets.
By default, delegates to _iter_train_indices(X, y, groups)
"""
return GapCrossValidator.__indices_to_masks(
self._iter_train_indices(X, y, groups), _num_samples(X))
示例13: _iter_test_indices
# 需要导入模块: from sklearn.utils import validation [as 别名]
# 或者: from sklearn.utils.validation import _num_samples [as 别名]
def _iter_test_indices(self, X, y=None, groups=None):
self.__check_validity(X, y, groups)
n_samples = _num_samples(X)
gap_before, gap_after = self.gap_before, self.gap_after
if n_samples - gap_after - self.p >= gap_before + 1:
for i in range(n_samples - self.p + 1):
yield np.arange(i, i + self.p)
else:
for i in range(n_samples - gap_after - self.p):
yield np.arange(i, i + self.p)
for i in range(gap_before + 1, n_samples - self.p + 1):
yield np.arange(i, i + self.p)
示例14: __check_validity
# 需要导入模块: from sklearn.utils import validation [as 别名]
# 或者: from sklearn.utils.validation import _num_samples [as 别名]
def __check_validity(self, X, y=None, groups=None):
if X is None:
raise ValueError("The 'X' parameter should not be None.")
n_samples = _num_samples(X)
gap_before, gap_after = self.gap_before, self.gap_after
if (0 >= n_samples - gap_after - self.p and
gap_before >= n_samples - self.p):
raise ValueError("Not enough training samples available.")
if n_samples - gap_after - self.p <= gap_before + 1:
warnings.warn(SINGLETON_WARNING, Warning)
示例15: _iter_indices
# 需要导入模块: from sklearn.utils import validation [as 别名]
# 或者: from sklearn.utils.validation import _num_samples [as 别名]
def _iter_indices(self, X, y, groups=None):
n_samples = _num_samples(X)
y = check_array(y, ensure_2d=False, dtype=None)
y = np.asarray(y, dtype=bool)
type_of_target_y = type_of_target(y)
if type_of_target_y != 'multilabel-indicator':
raise ValueError(
'Supported target type is: multilabel-indicator. Got {!r} instead.'.format(
type_of_target_y))
n_train, n_test = _validate_shuffle_split(n_samples, self.test_size,
self.train_size)
n_samples = y.shape[0]
rng = check_random_state(self.random_state)
y_orig = y.copy()
r = np.array([n_train, n_test]) / (n_train + n_test)
for _ in range(self.n_splits):
indices = np.arange(n_samples)
rng.shuffle(indices)
y = y_orig[indices]
test_folds = IterativeStratification(labels=y, r=r, random_state=rng)
test_idx = test_folds[np.argsort(indices)] == 1
test = np.where(test_idx)[0]
train = np.where(~test_idx)[0]
yield train, test