本文整理汇总了Python中sklearn.utils.validation.check_arrays函数的典型用法代码示例。如果您正苦于以下问题:Python check_arrays函数的具体用法?Python check_arrays怎么用?Python check_arrays使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了check_arrays函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: reduce_data
def reduce_data(self, X, y):
if self.classifier == None:
self.classifier = KNeighborsClassifier(n_neighbors=self.n_neighbors)
if self.classifier.n_neighbors != self.n_neighbors:
self.classifier.n_neighbors = self.n_neighbors
X, y = check_arrays(X, y, sparse_format="csr")
classes = np.unique(y)
self.classes_ = classes
if self.n_neighbors >= len(X):
self.X_ = np.array(X)
self.y_ = np.array(y)
self.reduction_ = 0.0
mask = np.zeros(y.size, dtype=bool)
tmp_m = np.ones(y.size, dtype=bool)
for i in xrange(y.size):
tmp_m[i] = not tmp_m[i]
self.classifier.fit(X[tmp_m], y[tmp_m])
sample, label = X[i], y[i]
if self.classifier.predict(sample) == [label]:
mask[i] = not mask[i]
tmp_m[i] = not tmp_m[i]
self.X_ = np.asarray(X[mask])
self.y_ = np.asarray(y[mask])
self.reduction_ = 1.0 - float(len(self.y_)) / len(y)
return self.X_, self.y_
示例2: _my_lrap
def _my_lrap(y_true, y_score):
"""Simple implementation of label ranking average precision"""
y_true, y_score = check_arrays(y_true, y_score)
n_samples, n_labels = y_true.shape
score = np.empty((n_samples, ))
for i in range(n_samples):
# The best rank correspond to 1. Rank higher than 1 are worse.
# The best inverse ranking correspond to n_labels.
unique_rank, inv_rank = np.unique(y_score[i], return_inverse=True)
n_ranks = unique_rank.size
rank = n_ranks - inv_rank
# Rank need to be corrected to take into account ties
# ex: rank 1 ex aequo means that both label are rank 2.
corr_rank = np.bincount(rank, minlength=n_ranks + 1).cumsum()
rank = corr_rank[rank]
relevant = y_true[i].nonzero()[0]
if relevant.size == 0 or relevant.size == n_labels:
score[i] = 1
continue
score[i] = 0.
for label in relevant:
# Let's count the number of relevant label with better rank
# (smaller rank).
n_ranked_above = sum(rank[r] <= rank[label] for r in relevant)
# Weight by the rank of the actual label
score[i] += n_ranked_above / rank[label]
score[i] /= relevant.size
return score.mean()
示例3: reduce_data
def reduce_data(self, X, y):
X, y = check_arrays(X, y, sparse_format="csr")
if self.classifier == None:
self.classifier = KNeighborsClassifier(n_neighbors=self.n_neighbors)
prots_s = []
labels_s = []
classes = np.unique(y)
self.classes_ = classes
for cur_class in classes:
mask = y == cur_class
insts = X[mask]
prots_s = prots_s + [insts[np.random.randint(0, insts.shape[0])]]
labels_s = labels_s + [cur_class]
self.classifier.fit(prots_s, labels_s)
for sample, label in zip(X, y):
if self.classifier.predict(sample) != [label]:
prots_s = prots_s + [sample]
labels_s = labels_s + [label]
self.classifier.fit(prots_s, labels_s)
self.X_ = np.asarray(prots_s)
self.y_ = np.asarray(labels_s)
self.reduction_ = 1.0 - float(len(self.y_))/len(y)
return self.X_, self.y_
示例4: fit
def fit(self, X, y, sample_weight=None):
X, y = check_arrays(X, y, dtype=DTYPE, sparse_format="dense", check_ccontiguous=True)
sample_weight = check_sample_weight(y, sample_weight=sample_weight)
sample_weight = normalize_weight(y, sample_weight, sig_weight=self.sig_weight)
self.random_state = check_random_state(self.random_state)
self.estimators = []
score = numpy.zeros(len(X), dtype=float)
y_signed = 2 * y - 1
self.w_sig = []
self.w_bck = []
for _ in range(self.n_estimators):
residual = y_signed
# numpy.exp(- y_signed * score)
# residual[y > 0.5] /= numpy.mean(residual[y > 0.5])
# residual[y < 0.5] /= -numpy.mean(residual[y < 0.5])
trainX, testX, trainY, testY, trainW, testW, trainR, testR, trainS, testS = \
train_test_split(X, y, sample_weight, residual, score,
train_size=self.train_part, test_size=self.test_size, random_state=self.random_state)
tree = DecisionTreeRegressor(criterion=self.criterion, splitter=self.splitter,
max_depth=self.max_depth, min_samples_leaf=self.min_samples_leaf,
max_features=self.max_features, random_state=self.random_state)
# fitting
tree.fit(trainX, trainR, sample_weight=trainW, check_input=False)
# post-pruning
self.update_terminal_regions(tree.tree_, testX, testY, testW, testS)
# updating score
# score += self.learning_rate * tree.predict(X)
self.estimators.append(tree)
示例5: calc_hist_with_errors
def calc_hist_with_errors(x, weight=None, bins=60, normed=True, x_range=None, ignored_sideband=0.0):
"""
Calculate data for error bar (for plot pdf with errors)
:param x: data
:type x: list or numpy.array
:param weight: weights
:type weight: None or list or numpy.array
:return: tuple (x-points (list), y-points (list), y points errors (list), x points errors (list))
"""
weight = numpy.ones(len(x)) if weight is None else weight
x, weight = check_arrays(x, weight)
if x_range is None:
x_range = numpy.percentile(x, [100 * ignored_sideband, 100 * (1 - ignored_sideband)])
ans, bins = numpy.histogram(x, bins=bins, normed=normed, weights=weight, range=x_range)
yerr = []
normalization = 1.0
if normed:
normalization = float(len(bins) - 1) / float(sum(weight)) / (x_range[1] - x_range[0])
for i in range(len(bins) - 1):
weight_bin = weight[(x > bins[i]) * (x <= bins[i + 1])]
yerr.append(numpy.sqrt(sum(weight_bin * weight_bin)) * normalization)
bins_mean = [0.5 * (bins[i] + bins[i + 1]) for i in range(len(ans))]
xerr = [0.5 * (bins[i + 1] - bins[i]) for i in range(len(ans))]
return bins_mean, ans, yerr, xerr
示例6: reduce_data
def reduce_data(self, X, y):
X, y = check_arrays(X, y, sparse_format="csr")
if self.classifier == None:
self.classifier = KNeighborsClassifier(n_neighbors=self.n_neighbors)
if self.classifier.n_neighbors != self.n_neighbors:
self.classifier.n_neighbors = self.n_neighbors
classes = np.unique(y)
self.classes_ = classes
# loading inicial groups
self.groups = []
for label in classes:
mask = y == label
self.groups = self.groups + [_Group(X[mask], label)]
self._main_loop()
self._generalization_step()
self._merge()
self._pruning()
self.X_ = np.asarray([g.rep_x for g in self.groups])
self.y_ = np.asarray([g.label for g in self.groups])
self.reduction_ = 1.0 - float(len(self.y_))/len(y)
return self.X_, self.y_
示例7: cvm_flatness
def cvm_flatness(y, proba, X, uniform_variables, sample_weight=None, label=1, knn=30):
""" The most simple way to compute Cramer-von Mises flatness, this is however very slow
if you need to compute it many times
:param y: real classes of events, shape = [n_samples]
:param proba: predicted probabilities, shape = [n_samples, n_classes]
:param X: pandas.DataFrame with uniform features (i.e. test dataset)
:param uniform_variables: features, along which uniformity is desired, list of strings
:param sample_weight: weights of events, shape = [n_samples]
:param label: class, for which uniformity is measured (usually, 0 is bck, 1 is signal)
:param knn: number of nearest neighbours used in knn
Example of usage:
proba = classifier.predict_proba(testX)
cvm_flatness(testY, proba=proba, X=testX, uniform_variables=['mass'])
"""
y, proba = check_arrays(y, proba)
assert len(y) == len(proba) == len(X), 'Different lengths'
y = column_or_1d(y)
sample_weight = check_sample_weight(y, sample_weight=sample_weight)
X = pandas.DataFrame(X)
signal_mask = y == label
groups_indices = computeSignalKnnIndices(uniform_variables=uniform_variables, dataframe=X,
is_signal=signal_mask, n_neighbors=knn)
groups_indices = groups_indices[signal_mask, :]
return ut.group_based_cvm(proba[:, label], mask=signal_mask, groups_indices=groups_indices,
sample_weight=sample_weight)
示例8: plot_score_variable_correlation
def plot_score_variable_correlation(y_true, y_pred, correlation_values, cuts, sample_weight=None, classifier_name="",
var_name="", score_function=efficiency_score, bins_number=20):
"""
Different score functions available: Efficiency, Precision, Recall, F1Score, and other things from sklearn.metrics
:param y_pred: numpy.array, of shape [n_samples]
:param y_true: numpy.array, of shape [n_samples] with float predictions
:param correlation_values: numpy.array of shape [n_samples], usually that is masses of events
:param cuts: array-like of cuts, for each cut a separate
:param sample_weight: numpy.array or None, shape = [n_samples]
:param classifier_name: str, used only in label
:param var_name: str, i.e. 'mass'
:param score_function: any function with signature (y_true, y_pred, sample_weight=None)
:param bins_number: int, the number of bins
"""
y_true, y_pred, correlation_values = check_arrays(y_true, y_pred, correlation_values)
sample_weight = check_sample_weight(y_true, sample_weight=sample_weight)
binner = Binner(correlation_values, n_bins=bins_number)
bins_data = binner.split_into_bins(correlation_values, y_true, y_pred, sample_weight)
for cut in cuts:
x_values = []
y_values = []
for bin_data in bins_data:
bin_masses, bin_y_true, bin_proba, bin_weight = bin_data
y_values.append(score_function(bin_y_true, bin_proba[:, 1] > cut, sample_weight=bin_weight))
x_values.append(numpy.mean(bin_masses))
pylab.plot(x_values, y_values, '.-', label="cut = %0.3f" % cut)
pylab.title("Correlation with results of " + classifier_name)
pylab.xlabel(var_name)
pylab.ylabel(score_function.__name__)
pylab.legend(loc="lower right")
示例9: transform
def transform(self, X, y=None):
"""Project the data by using matrix product with the random matrix
Parameters
----------
X : numpy array or scipy.sparse of shape [n_samples, n_features]
The input data to project into a smaller dimensional space.
y : is not used: placeholder to allow for usage in a Pipeline.
Returns
-------
X_new : numpy array or scipy sparse of shape [n_samples, n_components]
Projected array.
"""
X, y = check_arrays(X, y)
if self.components_ is None:
raise ValueError('No random projection matrix had been fit.')
if X.shape[1] != self.components_.shape[1]:
raise ValueError(
'Impossible to perform projection:'
'X at fit stage had a different number of features.'
'(%s != %s)' % (X.shape[1], self.components_.shape[1]))
if not sp.issparse(X):
X = np.atleast_2d(X)
X_new = safe_sparse_dot(X, self.components_.T,
dense_output=self.dense_output)
return X_new
示例10: reorder_by_first
def reorder_by_first(*arrays):
"""
Applies the same permutation to all passed arrays,
permutation sorts the first passed array
"""
arrays = check_arrays(*arrays)
order = numpy.argsort(arrays[0])
return [arr[order] for arr in arrays]
示例11: fit
def fit(self, X, y, sample_weight=None):
sample_weight = check_sample_weight(y, sample_weight=sample_weight)
assert len(X) == len(y), 'Different lengths of X and y'
X = pandas.DataFrame(X)
y = numpy.array(column_or_1d(y), dtype=int)
assert numpy.all(numpy.in1d(y, [0, 1])), 'Only two-class classification supported'
self.check_params()
self.estimators = []
self.scores = []
n_samples = len(X)
n_inbag = int(self.subsample * len(X))
self.loss = copy.copy(self.loss)
self.loss.fit(X, y, sample_weight=sample_weight)
# preparing for fitting in trees
X = self.get_train_vars(X)
self.n_features = X.shape[1]
X, y = check_arrays(X, y, dtype=DTYPE, sparse_format="dense", check_ccontiguous=True)
y_pred = numpy.zeros(len(X), dtype=float)
if self.init_estimator is not None:
y_signed = 2 * y - 1
self.init_estimator.fit(X, y_signed, sample_weight=sample_weight)
y_pred += numpy.ravel(self.init_estimator.predict(X))
for stage in range(self.n_estimators):
# tree creation
tree = DecisionTreeRegressor(
criterion=self.criterion,
splitter=self.splitter,
max_depth=self.max_depth,
min_samples_split=self.min_samples_split,
min_samples_leaf=self.min_samples_leaf,
max_features=self.max_features,
random_state=self.random_state,
max_leaf_nodes=self.max_leaf_nodes)
# tree learning
residual = self.loss.negative_gradient(y_pred)
train_indices = self.random_state.choice(n_samples, size=n_inbag, replace=False)
tree.fit(X[train_indices], residual[train_indices],
sample_weight=sample_weight[train_indices], check_input=False)
# update tree leaves
if self.update_tree:
self.loss.update_tree(tree.tree_, X=X, y=y, y_pred=y_pred, sample_weight=sample_weight,
update_mask=numpy.ones(len(X), dtype=bool), residual=residual)
y_pred += self.learning_rate * tree.predict(X)
self.estimators.append(tree)
self.scores.append(self.loss(y_pred))
return self
示例12: _log_loss
def _log_loss(y_true, y_pred, eps=1e-10, sample_weight=None):
""" This is shorter ans simpler version og log_loss, which supports sample_weight """
sample_weight = check_sample_weight(y_true, sample_weight=sample_weight)
y_true, y_pred, sample_weight = check_arrays(y_true, y_pred, sample_weight)
y_true = column_or_1d(y_true)
lb = LabelBinarizer()
T = lb.fit_transform(y_true)
if T.shape[1] == 1:
T = numpy.append(1 - T, T, axis=1)
# Clipping
Y = numpy.clip(y_pred, eps, 1 - eps)
# Check if dimensions are consistent.
T, Y = check_arrays(T, Y)
# Renormalize
Y /= Y.sum(axis=1)[:, numpy.newaxis]
loss = -(T * numpy.log(Y) * sample_weight[:, numpy.newaxis]).sum() / numpy.sum(sample_weight)
return loss
示例13: fit
def fit(self, X):
"""Creates a biclustering for X.
Parameters
----------
X : array-like, shape (n_samples, n_features)
"""
X, = check_arrays(X, sparse_format="csr", dtype=np.float64)
check_array_ndim(X)
self._check_parameters()
self._fit(X)
示例14: plot_roc
def plot_roc(y_true, y_pred, sample_weight=None, classifier_name="", is_cut=False, mask=None):
"""Plots ROC curve in the way physicists like it
:param y_true: numpy.array, shape=[n_samples]
:param y_pred: numpy.array, shape=[n_samples]
:param sample_weight: numpy.array | None, shape = [n_samples]
:param classifier_name: str, the name of classifier for label
:param is_cut: predictions are binary
:param mask: plot ROC curve only for events that have mask=True
"""
if is_cut:
assert len(numpy.unique(y_pred)) == 2, 'Cut assumes that prediction are 0 and 1 (or True/False)'
MAX_STEPS = 500
y_true, y_pred = check_arrays(y_true, y_pred)
if mask is not None:
mask = numpy.array(mask, dtype=bool) # converting to bool, just in case
y_true = y_true[mask]
y_pred = y_pred[mask]
if sample_weight is not None:
sample_weight = sample_weight[mask]
fpr, tpr, thresholds = check_arrays(*roc_curve(y_true, y_pred, sample_weight=sample_weight))
roc_auc = auc(fpr, tpr)
# tpr = recall = isSasS / isS = signal efficiency
# fpr = isBasS / isB = 1 - specificity = 1 - backgroundRejection
bg_rejection = 1. - fpr
if len(fpr) > MAX_STEPS:
# decreasing the number of points in plot
targets = numpy.linspace(0, 1, MAX_STEPS)
x_ids = numpy.searchsorted(tpr, targets)
y_ids = numpy.searchsorted(fpr, targets)
indices = numpy.concatenate([x_ids, y_ids, [0, len(tpr) - 1]], )
indices = numpy.unique(indices)
tpr = tpr[indices]
bg_rejection = bg_rejection[indices]
if not is_cut:
pylab.plot(tpr, bg_rejection, label='%s (area = %0.3f)' % (classifier_name, roc_auc))
else:
pylab.plot(tpr[1:2], bg_rejection[1:2], 'o', label='%s' % classifier_name)
示例15: plot_roc
def plot_roc(y_true, y_pred, sample_weight=None, classifier_name=""):
"""Plots ROC curve in the way physicist like it
:param y_true: numpy.array, shape=[n_samples]
:param y_pred: numpy.array, shape=[n_samples]
:param sample_weight: numpy.array | None, shape = [n_samples]
:param classifier_name: str, the name of classifier for label
"""
y_true, y_pred = check_arrays(y_true, y_pred)
fpr, tpr, thresholds = roc_curve(y_true, y_pred, sample_weight=sample_weight)
# tpr = recall = isSasS / isS = signal efficiency
# fpr = isBasS / isB = 1 - specificity ?=? 1 - backgroundRejection
bg_rejection = 1. - numpy.array(fpr)
roc_auc = auc(fpr, tpr)
pylab.plot(tpr, bg_rejection, label='%s (area = %0.3f)' % (classifier_name, roc_auc))