本文整理汇总了Python中sklearn.utils.column_or_1d函数的典型用法代码示例。如果您正苦于以下问题:Python column_or_1d函数的具体用法?Python column_or_1d怎么用?Python column_or_1d使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了column_or_1d函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: check_consistent_shape
def check_consistent_shape(X_train, y_train, X_test, y_test, y_train_pred,
y_test_pred):
"""Internal shape to check input data shapes are consistent.
Parameters
----------
X_train : numpy array of shape (n_samples, n_features)
The training samples.
y_train : list or array of shape (n_samples,)
The ground truth of training samples.
X_test : numpy array of shape (n_samples, n_features)
The test samples.
y_test : list or array of shape (n_samples,)
The ground truth of test samples.
y_train_pred : numpy array of shape (n_samples, n_features)
The predicted binary labels of the training samples.
y_test_pred : numpy array of shape (n_samples, n_features)
The predicted binary labels of the test samples.
Returns
-------
X_train : numpy array of shape (n_samples, n_features)
The training samples.
y_train : list or array of shape (n_samples,)
The ground truth of training samples.
X_test : numpy array of shape (n_samples, n_features)
The test samples.
y_test : list or array of shape (n_samples,)
The ground truth of test samples.
y_train_pred : numpy array of shape (n_samples, n_features)
The predicted binary labels of the training samples.
y_test_pred : numpy array of shape (n_samples, n_features)
The predicted binary labels of the test samples.
"""
# check input data shapes are consistent
X_train, y_train = check_X_y(X_train, y_train)
X_test, y_test = check_X_y(X_test, y_test)
y_test_pred = column_or_1d(y_test_pred)
y_train_pred = column_or_1d(y_train_pred)
check_consistent_length(y_train, y_train_pred)
check_consistent_length(y_test, y_test_pred)
if X_train.shape[1] != X_test.shape[1]:
raise ValueError("X_train {0} and X_test {1} have different number "
"of features.".format(X_train.shape, X_test.shape))
return X_train, y_train, X_test, y_test, y_train_pred, y_test_pred
示例2: evaluate_print
def evaluate_print(clf_name, y, y_pred):
"""Utility function for evaluating and printing the results for examples.
Default metrics include ROC and Precision @ n
Parameters
----------
clf_name : str
The name of the detector.
y : list or numpy array of shape (n_samples,)
The ground truth. Binary (0: inliers, 1: outliers).
y_pred : list or numpy array of shape (n_samples,)
The raw outlier scores as returned by a fitted model.
"""
y = column_or_1d(y)
y_pred = column_or_1d(y_pred)
check_consistent_length(y, y_pred)
print('{clf_name} ROC:{roc}, precision @ rank n:{prn}'.format(
clf_name=clf_name,
roc=np.round(roc_auc_score(y, y_pred), decimals=4),
prn=np.round(precision_n_scores(y, y_pred), decimals=4)))
示例3: savings_score
def savings_score(y_true, y_pred, cost_mat):
#TODO: update description
"""Savings score.
This function calculates the savings cost of using y_pred on y_true with
cost-matrix cost-mat, as the difference of y_pred and the cost_loss of a naive
classification model.
Parameters
----------
y_true : array-like or label indicator matrix
Ground truth (correct) labels.
y_pred : array-like or label indicator matrix
Predicted labels, as returned by a classifier.
cost_mat : array-like of shape = [n_samples, 4]
Cost matrix of the classification problem
Where the columns represents the costs of: false positives, false negatives,
true positives and true negatives, for each example.
Returns
-------
score : float
Savings of a using y_pred on y_true with cost-matrix cost-mat
The best performance is 1.
References
----------
.. [1] A. Correa Bahnsen, A. Stojanovic, D.Aouada, B, Ottersten,
`"Improving Credit Card Fraud Detection with Calibrated Probabilities" <http://albahnsen.com/files/%20Improving%20Credit%20Card%20Fraud%20Detection%20by%20using%20Calibrated%20Probabilities%20-%20Publish.pdf>`__, in Proceedings of the fourteenth SIAM International Conference on Data Mining,
677-685, 2014.
See also
--------
cost_loss
Examples
--------
>>> import numpy as np
>>> from costcla.metrics import savings_score, cost_loss
>>> y_pred = [0, 1, 0, 0]
>>> y_true = [0, 1, 1, 0]
>>> cost_mat = np.array([[4, 1, 0, 0], [1, 3, 0, 0], [2, 3, 0, 0], [2, 1, 0, 0]])
>>> savings_score(y_true, y_pred, cost_mat)
0.5
"""
#TODO: Check consistency of cost_mat
y_true = column_or_1d(y_true)
y_pred = column_or_1d(y_pred)
n_samples = len(y_true)
# Calculate the cost of naive prediction
cost_base = min(cost_loss(y_true, np.zeros(n_samples), cost_mat),
cost_loss(y_true, np.ones(n_samples), cost_mat))
cost = cost_loss(y_true, y_pred, cost_mat)
return 1.0 - cost / cost_base
示例4: precision_n_scores
def precision_n_scores(y, y_pred, n=None):
"""Utility function to calculate precision @ rank n.
Parameters
----------
y : list or numpy array of shape (n_samples,)
The ground truth. Binary (0: inliers, 1: outliers).
y_pred : list or numpy array of shape (n_samples,)
The raw outlier scores as returned by a fitted model.
n : int, optional (default=None)
The number of outliers. if not defined, infer using ground truth.
Returns
-------
precision_at_rank_n : float
Precision at rank n score.
"""
# turn raw prediction decision scores into binary labels
y_pred = get_label_n(y, y_pred, n)
# enforce formats of y and labels_
y = column_or_1d(y)
y_pred = column_or_1d(y_pred)
return precision_score(y, y_pred)
示例5: _sigmoid_calibration
def _sigmoid_calibration(self,df, y, sample_weight=None):
"""Probability Calibration with sigmoid method (Platt 2000)
Parameters
----------
df : ndarray, shape (n_samples,)
The decision function or predict proba for the samples.
y : ndarray, shape (n_samples,)
The targets.
sample_weight : array-like, shape = [n_samples] or None
Sample weights. If None, then samples are equally weighted.
Returns
-------
a : float
The slope.
b : float
The intercept.
References
----------
Platt, "Probabilistic Outputs for Support Vector Machines"
"""
df = column_or_1d(df)
y = column_or_1d(y)
F = df # F follows Platt's notations in the Reference Paper
tiny = np.finfo(np.float).tiny # to avoid division by 0 warning
# Bayesian priors (see Platt end of section 2.2 in the Reference Paper)
prior0 = float(np.sum(y <= 0))
prior1 = y.shape[0] - prior0
T = np.zeros(y.shape)
T[y > 0] = (prior1 + 1.) / (prior1 + 2.)
T[y <= 0] = 1. / (prior0 + 2.)
T1 = 1. - T
def objective(AB):
# From Platt (beginning of Section 2.2 in the Reference Paper)
E = np.exp(AB[0] * F + AB[1])
P = 1. / (1. + E)
l = -(T * np.log(P + tiny) + T1 * np.log(1. - P + tiny))
if sample_weight is not None:
return (sample_weight * l).sum()
else:
return l.sum()
def grad(AB):
# gradient of the objective function
E = np.exp(AB[0] * F + AB[1])
P = 1. / (1. + E)
TEP_minus_T1P = P * (T * E - T1)
if sample_weight is not None:
TEP_minus_T1P *= sample_weight
dA = np.dot(TEP_minus_T1P, F)
dB = np.sum(TEP_minus_T1P)
return np.array([dA, dB])
AB0 = np.array([0., math.log((prior0 + 1.) / (prior1 + 1.))])
AB_ = fmin_bfgs(objective, AB0, fprime=grad, disp=False)
return (AB_[0], AB_[1])
示例6: _check_targets_hmc
def _check_targets_hmc(y_true, y_pred):
check_consistent_length(y_true, y_pred)
y_type = set([type_of_target(y_true), type_of_target(y_pred)])
if y_type == set(["binary", "multiclass"]):
y_type = set(["multiclass"])
if y_type != set(["multiclass"]):
raise ValueError("{0} is not supported".format(y_type))
y_true = column_or_1d(y_true)
y_pred = column_or_1d(y_pred)
return y_true, y_pred
示例7: brier_score_loss
def brier_score_loss(y_true, y_prob):
"""Compute the Brier score
The smaller the Brier score, the better, hence the naming with "loss".
Across all items in a set N predictions, the Brier score measures the
mean squared difference between (1) the predicted probability assigned
to the possible outcomes for item i, and (2) the actual outcome.
Therefore, the lower the Brier score is for a set of predictions, the
better the predictions are calibrated. Note that the Brier score always
takes on a value between zero and one, since this is the largest
possible difference between a predicted probability (which must be
between zero and one) and the actual outcome (which can take on values
of only 0 and 1).
The Brier score is appropriate for binary and categorical outcomes that
can be structured as true or false, but is inappropriate for ordinal
variables which can take on three or more values (this is because the
Brier score assumes that all possible outcomes are equivalently
"distant" from one another).
Parameters
----------
y_true : array, shape (n_samples,)
True targets.
y_prob : array, shape (n_samples,)
Probabilities of the positive class.
Returns
-------
score : float
Brier score
Examples
--------
>>> import numpy as np
>>> from costcla.metrics import brier_score_loss
>>> y_true = [0, 1, 1, 0]
>>> y_prob = [0.1, 0.9, 0.8, 0.3]
>>> brier_score_loss(y_true, y_prob) # doctest: +ELLIPSIS
0.037...
>>> brier_score_loss(y_true, np.array(y_prob) > 0.5)
0.0
References
----------
http://en.wikipedia.org/wiki/Brier_score
"""
y_true = column_or_1d(y_true)
y_prob = column_or_1d(y_prob)
return np.mean((y_true - y_prob) ** 2)
示例8: _check_clf_targets
def _check_clf_targets(y_true, y_pred):
"""Check that y_true and y_pred belong to the same classification task
This converts multiclass or binary types to a common shape, and raises a
ValueError for a mix of multilabel and multiclass targets, a mix of
multilabel formats, for the presence of continuous-valued or multioutput
targets, or for targets of different lengths.
Column vectors are squeezed to 1d.
Parameters
----------
y_true : array-like,
y_pred : array-like
Returns
-------
type_true : one of {'multilabel-indicator', 'multilabel-sequences', \
'multiclass', 'binary'}
The type of the true target data, as output by
``utils.multiclass.type_of_target``
y_true : array or indicator matrix or sequence of sequences
y_pred : array or indicator matrix or sequence of sequences
"""
y_true, y_pred = check_arrays(y_true, y_pred, allow_lists=True)
type_true = type_of_target(y_true)
type_pred = type_of_target(y_pred)
y_type = set([type_true, type_pred])
if y_type == set(["binary", "multiclass"]):
y_type = set(["multiclass"])
if len(y_type) > 1:
raise ValueError("Can't handle mix of {0} and {1}" "".format(type_true, type_pred))
# We can't have more than one value on y_type => The set is no more needed
y_type = y_type.pop()
# No metrics support "multiclass-multioutput" format
if y_type not in ["binary", "multiclass", "multilabel-indicator", "multilabel-sequences"]:
raise ValueError("{0} is not supported".format(y_type))
if y_type in ["binary", "multiclass"]:
y_true = column_or_1d(y_true)
y_pred = column_or_1d(y_pred)
return y_type, y_true, y_pred
示例9: average
def average(scores, estimator_weight=None):
"""Combination method to merge the outlier scores from multiple estimators
by taking the average.
Parameters
----------
scores : numpy array of shape (n_samples, n_estimators)
Score matrix from multiple estimators on the same samples.
estimator_weight : list of shape (1, n_estimators)
If specified, using weighted average
Returns
-------
combined_scores : numpy array of shape (n_samples, )
The combined outlier scores.
"""
scores = check_array(scores)
if estimator_weight is not None:
estimator_weight = column_or_1d(estimator_weight).reshape(1, -1)
assert_equal(scores.shape[1], estimator_weight.shape[1])
# (d1*w1 + d2*w2 + ...+ dn*wn)/(w1+w2+...+wn)
# generated weighted scores
scores = np.sum(np.multiply(scores, estimator_weight),
axis=1) / np.sum(
estimator_weight)
return scores.ravel()
else:
return np.mean(scores, axis=1).ravel()
示例10: fit
def fit(self, T, y, sample_weight=None):
"""Fit using `T`, `y` as training data.
Parameters
----------
* `T` [array-like, shape=(n_samples,)]:
Training data.
* `y` [array-like, shape=(n_samples,)]:
Training target.
* `sample_weight` [array-like, shape=(n_samples,), optional]:
Weights. If set to `None`, all weights will be set to 1.
Returns
-------
* `self` [object]:
`self`.
"""
# Check input
T = column_or_1d(T)
# Fit
self.calibrator_ = _SigmoidCalibration()
self.calibrator_.fit(T, y, sample_weight=sample_weight)
return self
示例11: _validate_y
def _validate_y(self, y):
y = column_or_1d(y, warn=True)
check_classification_targets(y)
self.classes_, y = np.unique(y, return_inverse=True)
self.n_classes_ = len(self.classes_)
return y
示例12: fit
def fit(self, X, y, sample_weight=None, check_input=True):
"""Fit Ridge regression model after searching for the best mu and tau.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Training data
y : array-like, shape = [n_samples] or [n_samples, n_targets]
Target values
sample_weight : float or array-like of shape [n_samples]
Sample weight
Returns
-------
self : Returns self.
"""
self._label_binarizer = LabelBinarizer(pos_label=1, neg_label=-1)
y = self._label_binarizer.fit_transform(y)
if self._label_binarizer.y_type_.startswith('multilabel'):
raise ValueError(
"%s doesn't support multi-label classification" % (
self.__class__.__name__))
else:
y = column_or_1d(y, warn=False)
param_grid = {'tau': self.taus, 'lamda': self.lamdas}
fit_params = {'sample_weight': sample_weight,
'check_input': check_input}
estimator = L1L2TwoStepClassifier(
mu=self.mu, fit_intercept=self.fit_intercept,
use_gpu=self.use_gpu, threshold=self.threshold,
normalize=self.normalize, precompute=self.precompute,
max_iter=self.max_iter,
copy_X=self.copy_X, tol=self.tol, warm_start=self.warm_start,
positive=self.positive,
random_state=self.random_state, selection=self.selection)
gs = GridSearchCV(
estimator=estimator,
param_grid=param_grid, fit_params=fit_params, cv=self.cv,
scoring=self.scoring, n_jobs=self.n_jobs, iid=self.iid,
refit=self.refit, verbose=self.verbose,
pre_dispatch=self.pre_dispatch, error_score=self.error_score,
return_train_score=self.return_train_score)
gs.fit(X, y)
estimator = gs.best_estimator_
self.tau_ = estimator.tau
self.lamda_ = estimator.lamda
self.coef_ = estimator.coef_
self.intercept_ = estimator.intercept_
self.best_estimator_ = estimator # XXX DEBUG
if self.classes_.shape[0] > 2:
ndim = self.classes_.shape[0]
else:
ndim = 1
self.coef_ = self.coef_.reshape(ndim, -1)
return self
示例13: fit
def fit(self, X, y):
"""Fit the model to the data X and target y.
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Training data, where n_samples in the number of samples
and n_features is the number of features.
y : numpy array of shape (n_samples)
Returns
-------
self
"""
y = column_or_1d(y, warn=True)
# needs a better way to check multi-label instances
if isinstance(np.reshape(y, (-1, 1))[0][0], list):
self.multi_label = True
else:
self.multi_label = False
self.classes_ = np.unique(y)
self._lbin = LabelBinarizer()
y = self._lbin.fit_transform(y)
super(MultilayerPerceptronClassifier, self).fit(X, y)
return self
示例14: get_color_codes
def get_color_codes(y):
"""Internal function to generate color codes for inliers and outliers.
Inliers (0): blue; Outlier (1): red.
Parameters
----------
y : list or numpy array of shape (n_samples,)
The ground truth. Binary (0: inliers, 1: outliers).
Returns
-------
c : numpy array of shape (n_samples,)
Color codes.
"""
y = column_or_1d(y)
# inliers are assigned blue
c = np.full([len(y)], 'b', dtype=str)
outliers_ind = np.where(y == 1)
# outlier are assigned red
c[outliers_ind] = 'r'
return c
示例15: score_to_label
def score_to_label(pred_scores, outliers_fraction=0.1):
"""Turn raw outlier outlier scores to binary labels (0 or 1).
Parameters
----------
pred_scores : list or numpy array of shape (n_samples,)
Raw outlier scores. Outliers are assumed have larger values.
outliers_fraction : float in (0,1)
Percentage of outliers.
Returns
-------
outlier_labels : numpy array of shape (n_samples,)
For each observation, tells whether or not
it should be considered as an outlier according to the
fitted model. Return the outlier probability, ranging
in [0,1].
"""
# check input values
pred_scores = column_or_1d(pred_scores)
check_parameter(outliers_fraction, 0, 1)
threshold = scoreatpercentile(pred_scores, 100 * (1 - outliers_fraction))
pred_labels = (pred_scores > threshold).astype('int')
return pred_labels