本文整理汇总了Python中sklearn.utils.indexable方法的典型用法代码示例。如果您正苦于以下问题:Python utils.indexable方法的具体用法?Python utils.indexable怎么用?Python utils.indexable使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.utils
的用法示例。
在下文中一共展示了utils.indexable方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: split
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import indexable [as 别名]
def split(self, X, y, groups=None):
if groups is None:
groups = self._groups
X, y, groups = indexable(X, y, groups)
msk = np.array(groups, dtype=bool)
train_idx = np.arange(len(X))[~msk]
test_idx = np.arange(len(X))[msk]
try:
test_x = X.as_matrix()[test_idx, :]
except AttributeError:
test_x = X[test_idx, :]
test_y = np.array(y)[test_idx]
split = super(PartiallyHeldOutKFold, self).split(test_x, test_y)
offset = test_idx[0]
for test_train, test_test in split:
test_train = np.concatenate((train_idx, test_train + offset))
yield test_train, test_test
示例2: cross_val_score
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import indexable [as 别名]
def cross_val_score(
estimator,
X,
y=None,
groups=None,
scoring=None,
cv=None,
n_jobs=1,
verbose=0,
fit_params=None,
pre_dispatch="2*n_jobs",
):
"""
Evaluate a score by cross-validation
"""
if not isinstance(scoring, (list, tuple)):
scoring = [scoring]
X, y, groups = indexable(X, y, groups)
cv = check_cv(cv, y, classifier=is_classifier(estimator))
splits = list(cv.split(X, y, groups))
scorer = [check_scoring(estimator, scoring=s) for s in scoring]
# We clone the estimator to make sure that all the folds are
# independent, and that it is pickle-able.
parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)
scores = parallel(
delayed(_fit_and_score)(
clone(estimator), X, y, scorer, train, test, verbose, None, fit_params
)
for train, test in splits
)
group_order = []
if hasattr(cv, "groups"):
group_order = [np.array(cv.groups)[test].tolist()[0] for _, test in splits]
return np.squeeze(np.array(scores)), group_order
示例3: split
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import indexable [as 别名]
def split(self, X, y=None, groups=None):
"""Generate indices to split data into training and test set.
Parameters
----------
X : array-like, shape (n_samples, n_features)
Training data, where n_samples is the number of samples
and n_features is the number of features.
y : array-like, shape (n_samples,)
Always ignored, exists for compatibility.
groups : array-like, with shape (n_samples,)
Always ignored, exists for compatibility.
Yields
------
train : ndarray
The training set indices for that split.
test : ndarray
The testing set indices for that split.
"""
X, y, groups = indexable(X, y, groups)
n_samples = _num_samples(X)
n_splits = self.n_splits
n_folds = n_splits + 1
gap_size = self.gap_size
test_size = self.test_size if self.test_size else n_samples // n_folds
# Make sure we have enough samples for the given split parameters
if n_folds > n_samples:
raise ValueError(
("Cannot have number of folds ={0} greater"
" than the number of samples: {1}.").format(n_folds,
n_samples))
if n_samples - gap_size - (test_size * n_splits) <= 0:
raise ValueError(
("Too many splits ={0} for number of samples"
" ={1} with test_size ={2} and gap_size ={3}."
"").format(n_splits, n_samples, test_size, gap_size))
indices = np.arange(n_samples)
test_starts = range(n_samples - n_splits * test_size,
n_samples, test_size)
for test_start in test_starts:
train_end = test_start - gap_size
if self.max_train_size and self.max_train_size < train_end:
yield (indices[train_end - self.max_train_size:train_end],
indices[test_start:test_start + test_size])
else:
yield (indices[:train_end],
indices[test_start:test_start + test_size])
示例4: permutation_test_score
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import indexable [as 别名]
def permutation_test_score(
estimator,
X,
y,
groups=None,
cv=None,
n_permutations=100,
n_jobs=1,
random_state=0,
verbose=0,
scoring=None,
):
"""
Evaluate the significance of a cross-validated score with permutations,
as in test 1 of [Ojala2010]_.
A modification of original sklearn's permutation test score function
to evaluate p-value outside this function, so that the score can be
reused from outside.
.. [Ojala2010] Ojala and Garriga. Permutation Tests for Studying Classifier
Performance. The Journal of Machine Learning Research (2010)
vol. 11
"""
X, y, groups = indexable(X, y, groups)
cv = check_cv(cv, y, classifier=is_classifier(estimator))
scorer = check_scoring(estimator, scoring=scoring)
random_state = check_random_state(random_state)
# We clone the estimator to make sure that all the folds are
# independent, and that it is pickle-able.
permutation_scores = Parallel(n_jobs=n_jobs, verbose=verbose)(
delayed(_permutation_test_score)(
clone(estimator), X, _shuffle(y, groups, random_state), groups, cv, scorer
)
for _ in range(n_permutations)
)
permutation_scores = np.array(permutation_scores)
return permutation_scores
示例5: _validate_X_y_ratio_classes
# 需要导入模块: from sklearn import utils [as 别名]
# 或者: from sklearn.utils import indexable [as 别名]
def _validate_X_y_ratio_classes(X, y, ratio):
# validate the cheap stuff before copying arrays around...
validate_float(ratio, 'balance_ratio')
# validate arrays
X, y = indexable(X, y) # want to allow pd.DataFrame
y = column_or_1d(y, warn=False) # type: np.ndarray
# get n classes in y, ensure they are <= MAX_N_CLASSES, but first
# ensure these are actually class labels and not floats or anything...
y_type = type_of_target(y)
supported_types = {'multiclass', 'binary'}
if y_type not in supported_types:
raise ValueError('balancers only support %r, but got %r'
% ("(" + ', '.join(supported_types) + ")", y_type))
present_classes, counts = np.unique(y, return_counts=True)
n_classes = len(present_classes)
# ensure <= MAX_N_CLASSES
if n_classes > MAX_N_CLASSES:
raise ValueError('balancers currently only support a maximum of %i '
'unique class labels, but %i were identified.'
% (MAX_N_CLASSES, n_classes))
# get the majority class label, and its count:
majority_count_idx = np.argmax(counts, axis=0)
majority_label, majority_count = (present_classes[majority_count_idx],
counts[majority_count_idx])
target_count = max(int(ratio * majority_count), 1)
# define a min_n_samples based on the sample ratio to max_class
# required = {target_count - counts[i]
# for i, v in enumerate(present_classes)
# if v != majority_label}
# THIS WAS OUR ORIGINAL LOGIC:
# * If there were any instances where the number of synthetic examples
# required for a class outweighed the number that existed in the class
# to begin with, we would end up having to potentially sample from the
# synthetic examples. We didn't want to have to do that.
#
# But it seems like a totally valid use-case. If we're detecting breast
# cancer, it might be a rare event that needs lots of bolstering. We
# should allow that, even though we may discourage it.
# if any counts < MIN_N_SAMPLES, raise:
if any(i < MIN_N_SAMPLES for i in counts):
raise ValueError('All label counts must be >= %i' % MIN_N_SAMPLES)
return (X, y, n_classes, present_classes, counts,
majority_label, target_count)