本文整理汇总了Python中sklearn.utils.check_X_y函数的典型用法代码示例。如果您正苦于以下问题:Python check_X_y函数的具体用法?Python check_X_y怎么用?Python check_X_y使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了check_X_y函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: fit_transform
def fit_transform(self,X,y=None):
"""
Fit an sklearn classifier to data
Parameters
----------
X : pandas dataframe or array-like
training samples
y : array like, required for array-like X and not used presently for pandas dataframe
class labels
Returns
-------
self: object
"""
if isinstance(X,pd.DataFrame):
df = X
(X,y,self.vectorizer) = self.convert_numpy(df)
else:
check_X_y(X,y)
self.clf.fit(X,y)
return self
示例2: check_consistent_shape
def check_consistent_shape(X_train, y_train, X_test, y_test, y_train_pred,
y_test_pred):
"""Internal shape to check input data shapes are consistent.
Parameters
----------
X_train : numpy array of shape (n_samples, n_features)
The training samples.
y_train : list or array of shape (n_samples,)
The ground truth of training samples.
X_test : numpy array of shape (n_samples, n_features)
The test samples.
y_test : list or array of shape (n_samples,)
The ground truth of test samples.
y_train_pred : numpy array of shape (n_samples, n_features)
The predicted binary labels of the training samples.
y_test_pred : numpy array of shape (n_samples, n_features)
The predicted binary labels of the test samples.
Returns
-------
X_train : numpy array of shape (n_samples, n_features)
The training samples.
y_train : list or array of shape (n_samples,)
The ground truth of training samples.
X_test : numpy array of shape (n_samples, n_features)
The test samples.
y_test : list or array of shape (n_samples,)
The ground truth of test samples.
y_train_pred : numpy array of shape (n_samples, n_features)
The predicted binary labels of the training samples.
y_test_pred : numpy array of shape (n_samples, n_features)
The predicted binary labels of the test samples.
"""
# check input data shapes are consistent
X_train, y_train = check_X_y(X_train, y_train)
X_test, y_test = check_X_y(X_test, y_test)
y_test_pred = column_or_1d(y_test_pred)
y_train_pred = column_or_1d(y_train_pred)
check_consistent_length(y_train, y_train_pred)
check_consistent_length(y_test, y_test_pred)
if X_train.shape[1] != X_test.shape[1]:
raise ValueError("X_train {0} and X_test {1} have different number "
"of features.".format(X_train.shape, X_test.shape))
return X_train, y_train, X_test, y_test, y_train_pred, y_test_pred
示例3: fit
def fit(self,X,y=None):
"""Fit a model:
Parameters
----------
X : pandas dataframe or array-like
training samples. If pandas dataframe can handle dict of feature in one column or cnvert a set of columns
y : array like, required for array-like X and not used presently for pandas dataframe
class labels
Returns
-------
self: object
"""
if isinstance(X,pd.DataFrame):
df = X
if not self.dict_feature is None:
if not self.target_readable is None:
self.create_class_id_map(df,self.target,self.target_readable)
(X,y) = self._load_from_dict(df)
num_class = len(np.unique(y))
else:
(X,y,self.vectorizer) = self.convert_numpy(df)
num_class = len(y.unique())
else:
check_X_y(X,y)
num_class = len(np.unique(y))
self.clf = xgb.XGBClassifier(**self.params)
print self.clf.get_params(deep=True)
self.clf.fit(X,y,verbose=True)
return self
示例4: test_check_array_warn_on_dtype_deprecation
def test_check_array_warn_on_dtype_deprecation():
X = np.asarray([[0.0], [1.0]])
Y = np.asarray([[2.0], [3.0]])
with pytest.warns(DeprecationWarning,
match="'warn_on_dtype' is deprecated"):
check_array(X, warn_on_dtype=True)
with pytest.warns(DeprecationWarning,
match="'warn_on_dtype' is deprecated"):
check_X_y(X, Y, warn_on_dtype=True)
示例5: fit
def fit(self,X,y=None):
"""Derived from https://github.com/fchollet/keras/blob/master/keras/wrappers/scikit_learn.py
Adds:
Handling pandas inputs
Saving of model into the class to allow for easy pickling
Parameters
----------
X : pandas dataframe or array-like
training samples
y : array like, required for array-like X and not used presently for pandas dataframe
class labels
Returns
-------
self: object
"""
if isinstance(X,pd.DataFrame):
df = X
(X,y,self.vectorizer) = self.convert_numpy(df)
else:
check_X_y(X,y)
input_width = X.shape[1]
num_classes = len(y.unique())
logger.info("input_width %d",input_width)
logger.info("num_classes %d",num_classes)
train_y = np_utils.to_categorical(y, num_classes)
self.model = self.model_create(input_width,num_classes)
if len(y.shape) == 1:
self.classes_ = list(np.unique(y))
if self.loss == 'categorical_crossentropy':
y = to_categorical(y)
else:
self.classes_ = np.arange(0, y.shape[1])
if self.compiled_model_ is None:
self.compiled_model_ = copy.deepcopy(self.model)
self.compiled_model_.compile(optimizer=self.optimizer, loss=self.loss)
history = self.compiled_model_.fit(
X, y, batch_size=self.train_batch_size, nb_epoch=self.nb_epoch, verbose=self.verbose,
shuffle=self.shuffle, show_accuracy=self.show_accuracy,
validation_split=self.validation_split, validation_data=self.validation_data,
callbacks=self.callbacks)
self.config_ = self.model.to_json()
self.compiled_model_.save_weights(self.tmp_model)
with open(self.tmp_model, mode='rb') as file: # b is important -> binary
self.model_saved = file.read()
return self
示例6: fit
def fit(self,X,y=None):
"""Convert data to vw lines and then train for required iterations
Parameters
----------
X : pandas dataframe or array-like
training samples
y : array like, required for array-like X and not used presently for pandas dataframe
class labels
Returns
-------
self: object
Caveats :
1. A seldon specific fork of wabbit_wappa is needed to allow vw to run in server mode without save_resume. Save_resume seems to cause issues with the scores returned. Maybe connected to https://github.com/JohnLangford/vowpal_wabbit/issues/262
"""
if isinstance(X,pd.DataFrame):
df = X
df_base = self._exclude_include_features(df)
df_base = df_base.fillna(0)
else:
check_X_y(X,y)
df = pd.DataFrame(X)
df_y = pd.DataFrame(y,columns=list('y'))
self.target='y'
df_base = pd.concat([df,df_y],axis=1)
print df_base.head()
min_target = df_base[self.target].astype(float).min()
print "min target ",min_target
if min_target == 0:
self.zero_based = True
else:
self.zero_based = False
if not self.target_readable is None:
self.create_class_id_map(df,self.target,self.target_readable,zero_based=self.zero_based)
self.num_classes = len(df_base[self.target].unique())
print "num classes ",self.num_classes
self._start_vw_if_needed("train")
df_vw = df_base.apply(self._convert_row,axis=1)
for i in range(0,self.num_iterations):
for (index,val) in df_vw.iteritems():
self.vw.send_line(val,parse_result=False)
self._save_model(self.model_file)
return self
示例7: fit
def fit(self,X,y):
'''
Fit Relevance Vector Regression Model
Parameters
-----------
X: {array-like,sparse matrix} of size [n_samples, n_features]
Training data, matrix of explanatory variables
y: array-like of size [n_samples, n_features]
Target values
Returns
-------
self: object
self
'''
X,y = check_X_y(X,y, accept_sparse = ['csr','coo','bsr'], dtype = np.float64)
# kernelise features
K = get_kernel( X, X, self.gamma, self.degree, self.coef0,
self.kernel, self.kernel_params)
# use fit method of RegressionARD
_ = super(RVR,self).fit(K,y)
# convert to csr (need to use __getitem__)
convert_tocsr = [scipy.sparse.coo.coo_matrix, scipy.sparse.dia.dia_matrix,
scipy.sparse.bsr.bsr_matrix]
if type(X) in convert_tocsr:
X = X.tocsr()
self.relevant_ = np.where(self.active_== True)[0]
if X.ndim == 1:
self.relevant_vectors_ = X[self.relevant_]
else:
self.relevant_vectors_ = X[self.relevant_,:]
return self
示例8: fit
def fit(self, X, y):
"""Fit joint quantile regression model.
Parameters
----------
inputs : {array-like, sparse matrix}, shape = [n_samples, n_features]
Training data.
targets : {array-like}, shape = [n_samples]
Target values.
Returns
-------
self : returns an instance of self.
"""
if self.eps > 0 and self.nc_const:
raise UserWarning("eps is considered null because you chose to "
"enfoce non-crossing constraints.")
X, y = check_X_y(X, y, ['csr', 'csc', 'coo'], y_numeric=True)
y = asarray(y).flatten()
self._validate_params()
self.linop_ = self._get_kernel_map(X)
gram = self.linop_.Gram_dense(X)
self.reg_c_ = 1. / self.lbda
# Solve the optimization problem
# probs = asarray(self.probs).reshape((-1, 1))
probs = asarray(self.probs).flatten()
if self.nc_const:
self._qp_nc(gram, y, probs)
else:
self._coneqp(gram, y, probs)
return self
示例9: _check_params
def _check_params(self, X, y):
# checking input data and scaling it if y is continuous
X, y = check_X_y(X, y)
if not self.categorical:
ss = StandardScaler()
X = ss.fit_transform(X)
y = ss.fit_transform(y)
# sanity checks
methods = ['JMI', 'JMIM', 'MRMR']
if self.method not in methods:
raise ValueError('Please choose one of the following methods:\n' +
'\n'.join(methods))
if not isinstance(self.k, int):
raise ValueError("k must be an integer.")
if self.k < 1:
raise ValueError('k must be larger than 0.')
if self.categorical and np.any(self.k > np.bincount(y)):
raise ValueError('k must be smaller than your smallest class.')
if not isinstance(self.categorical, bool):
raise ValueError('Categorical must be Boolean.')
if self.categorical and np.unique(y).shape[0] > 5:
print 'Are you sure y is categorical? It has more than 5 levels.'
if not self.categorical and self._isinteger(y):
print 'Are you sure y is continuous? It seems to be discrete.'
if self._isinteger(X):
print ('The values of X seem to be discrete. MI_FS will treat them'
'as continuous.')
return X, y
示例10: fit
def fit(self, X, y):
"""Find the classes statistics before to perform sampling.
Parameters
----------
X : ndarray, shape (n_samples, n_features)
Matrix containing the data which have to be sampled.
y : ndarray, shape (n_samples, )
Corresponding label for each sample in X.
Returns
-------
self : object,
Return self.
"""
# Check the consistency of X and y
X, y = check_X_y(X, y)
super(SMOTEENN, self).fit(X, y)
# Fit using SMOTE
self.sm.fit(X, y)
return self
示例11: f_classifNumba
def f_classifNumba(X, y):
"""Compute the ANOVA F-value for the provided sample.
Read more in the :ref:`User Guide <univariate_feature_selection>`.
Parameters
----------
X : {array-like, sparse matrix} shape = [n_samples, n_features]
The set of regressors that will tested sequentially.
y : array of shape(n_samples)
The data matrix.
Returns
-------
F : array, shape = [n_features,]
The set of F values.
pval : array, shape = [n_features,]
The set of p-values.
See also
--------
chi2: Chi-squared stats of non-negative features for classification tasks.
f_regression: F-value between label/feature for regression tasks.
"""
X, y = check_X_y(X, y, ['csr', 'csc', 'coo'])
args = [X[safe_mask(X, y == k)] for k in np.unique(y)]
return f_onewayNumba(*args)
示例12: my_smote
def my_smote(X, y, minority_target=None, per=0.5):
"""
This object is an implementation of SMOTE - Synthetic Minority
Over-sampling Technique, and the variations Borderline SMOTE 1, 2 and
SVM-SMOTE.
:param X: nd-array, sparse matrix, shape=[n_samples, n_features]
:param y: nd-array, list, shape=[n_samples]
:param minority_target: list
:param per
:return:
"""
X, Y = check_X_y(X, y, 'csr')
unique_label = list(set(Y))
label_count = [np.sum(Y == i) for i in unique_label]
if minority_target is None:
minority_index = [np.argmin(label_count)]
else:
minority_index = [unique_label.index(target) for target in minority_target]
majority = np.max(label_count)
for i in minority_index:
N = (int((majority * 1.0 / (1 - per) - majority) / label_count[i]) - 1) * 100
safe, synthetic, danger = _smote._borderlineSMOTE(X, Y, unique_label[i], N, k=5)
syn_label = np.array([unique_label[i]] * synthetic.shape[0])
X = sp.vstack([X, synthetic])
Y = np.concatenate([Y, syn_label])
return X, Y
示例13: fit
def fit(self, X, y):
"""Fit ORFF ridge regression model.
Parameters
----------
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
Training data.
y : {array-like}, shape = [n_samples] or [n_samples, n_targets]
Target values.
Returns
-------
self : returns an instance of self.
"""
X, y = check_X_y(X, y, ['csr', 'csc', 'coo'],
y_numeric=True, multi_output=True)
self._validate_params()
self.p = y.shape[1] if y.ndim > 1 else 1
solver_params = self.solver_params or {}
self.linop_ = self._get_kernel(X, y)
self.phix_ = self.linop_.get_orff_map(X, self.D)
risk = ORFFRidgeRisk(self.lbda, 'LS')
self.solver_res_ = minimize(risk.functional_grad_val,
zeros(self.phix_.shape[1],
dtype=X.dtype),
args=(y.ravel(), self.phix_, self.linop_),
method=self.solver,
jac=True, options=solver_params)
self.coefs_ = self.solver_res_.x
return self
示例14: fit
def fit(self, x, y):
"""
Constructs GAM model(s) to predict y from X
x: 1 or 2 dimensional array of predictor values with each row being one observation
y: 1 or 2 dimensional array of predicted values (a GAM model is constructed for each output if y is 2 dimensional)
"""
# Input validation for standard estimators using sklearn utils
x, y = check_X_y(x, y, accept_sparse=["csr", "csc", "coo"], multi_output=True)
# Convert to R matrices
if (
x.ndim == 1
): # If we're only looking at 1 x at a time, shape[1] will give an error for one-dimensional arrays. Sklearn input validation doesn't change that.
rX = r.matrix(x, nrow=x.shape[0], ncol=1)
else:
rX = r.matrix(x, nrow=x.shape[0], ncol=x.shape[1])
if (
y.ndim == 1
): # If we're only looking at 1 y at a time, shape[1] will give an error for one-dimensional arrays
rY = r.matrix(y, nrow=y.shape[0], ncol=1)
else:
rY = r.matrix(y, nrow=y.shape[0], ncol=y.shape[1])
# Compute models (one for each column in y)
self.gammodels = self.computeGAM(rX, rY)
return self
示例15: fit
def fit(self, X, y=None):
"""Fit the model using X as training data.
Parameters
----------
X : {array-like, sparse matrix}, optional
Training data. If array or matrix, shape = [n_samples, n_features]
If X is None, a "lazy fitting" is performed. If kneighbors is called, the fitting
with with the data there is done. Also the caching of computed hash values is deactivated in
this case.
y : list, optional (default = None)
List of classes for the given input of X. Size have to be n_samples."""
if y is not None:
self._y_is_csr = True
_, self._y = check_X_y(X, y, "csr", multi_output=True)
if self._y.ndim == 1 or self._y.shape[1] == 1:
self._y_is_csr = False
else:
self._y_is_csr = False
X_csr = csr_matrix(X)
self._index_elements_count = X_csr.shape[0]
instances, features = X_csr.nonzero()
maxFeatures = int(max(X_csr.getnnz(1)))
data = X_csr.data
# returns a pointer to the inverse index stored in c++
self._pointer_address_of_nearestNeighbors_object = _nearestNeighbors.fit(instances.tolist(), features.tolist(), data.tolist(),
X_csr.shape[0], maxFeatures,
self._pointer_address_of_nearestNeighbors_object)