本文整理汇总了Python中sklearn.model_selection.KFold.get_n_splits方法的典型用法代码示例。如果您正苦于以下问题:Python KFold.get_n_splits方法的具体用法?Python KFold.get_n_splits怎么用?Python KFold.get_n_splits使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.model_selection.KFold
的用法示例。
在下文中一共展示了KFold.get_n_splits方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: TargetEncoderNSplits
# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import get_n_splits [as 别名]
class TargetEncoderNSplits(BaseTransformer):
def __init__(self, n_splits, **kwargs):
self.k_folds = KFold(n_splits=n_splits)
self.target_means_map = {}
def _target_means_names(self, columns):
confidence_rate_names = ['target_mean_{}'.format(column) for column in columns]
return confidence_rate_names
def _is_null_names(self, columns):
is_null_names = ['target_mean_is_nan_{}'.format(column) for column in columns]
return is_null_names
def fit(self, categorical_features, target, **kwargs):
feature_columns, target_column = categorical_features.columns, target.columns[0]
X_target_means = []
self.k_folds.get_n_splits(target)
for train_index, test_index in self.k_folds.split(target):
X_train, y_train = categorical_features.iloc[train_index], target.iloc[train_index]
X_test, y_test = categorical_features.iloc[test_index], target.iloc[test_index]
train = pd.concat([X_train, y_train], axis=1)
for column, target_mean_name in zip(feature_columns, self._target_means_names(feature_columns)):
group_object = train.groupby(column)
train_target_means = group_object[target_column].mean(). \
reset_index().rename(index=str, columns={target_column: target_mean_name})
X_test = X_test.merge(train_target_means, on=column, how='left')
X_target_means.append(X_test)
X_target_means = pd.concat(X_target_means, axis=0).astype(np.float32)
for column, target_mean_name in zip(feature_columns, self._target_means_names(feature_columns)):
group_object = X_target_means.groupby(column)
self.target_means_map[column] = group_object[target_mean_name].mean().reset_index()
return self
def transform(self, categorical_features, **kwargs):
columns = categorical_features.columns
for column, target_mean_name, is_null_name in zip(columns,
self._target_means_names(columns),
self._is_null_names(columns)):
categorical_features = categorical_features.merge(self.target_means_map[column],
on=column,
how='left').astype(np.float32)
categorical_features[is_null_name] = pd.isnull(categorical_features[target_mean_name]).astype(int)
categorical_features[target_mean_name].fillna(0, inplace=True)
return {'numerical_features': categorical_features[self._target_means_names(columns)],
'categorical_features': categorical_features[self._is_null_names(columns)]}
def load(self, filepath):
self.target_means_map = joblib.load(filepath)
return self
def save(self, filepath):
joblib.dump(self.target_means_map, filepath)
示例2: kFolds
# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import get_n_splits [as 别名]
def kFolds(dataSet, k = 10):
"""
This is the k-fold method
:param dataSet: of type DataFrame
:param k: number of subsets to choose
"""
df_mx = dataSet.as_matrix()
X = df_mx[:, 1:16]
Y = df_mx[:, 0:1]
lm = svm.SVC(gamma=0.001, C=100.) # Support Vector Machine
kf = KFold(n_splits=10) # Define the split - into 10 folds
i = 0
accuracies = numpy.zeros(kf.get_n_splits(X))
for train_index, test_index in kf.split(X):
print("{}. TRAIN: {} TEST: {}".format(i+1, train_index, test_index))
X_train, X_test = X[train_index], X[test_index]
Y_train, Y_test = Y[train_index], Y[test_index]
# train using X_Train
model = lm.fit(X_train, Y_train)
# evaluate against X_Test
predictions = lm.predict(X_test)
# save accuracy
accuracies[i] = model.score(X_test, Y_test)
i = i + 1
# find mean accuracy over all rounds
print("Average accuracy of K-Folds (k={}): {}%".format(numpy.mean(accuracies) * 100, k))
示例3: VTiter
# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import get_n_splits [as 别名]
def VTiter(self, *parsedArgs, **envars):
largs, dictargs = self.full_parse(parsedArgs)
# get arguments
if 'splits' not in dictargs:
raise functions.OperatorError(__name__.rsplit('.')[-1], "No splits argument.")
else:
self.n_splits = int(dictargs['splits'])
# print largs
# print dictargs
self.data = []
if 'query' not in dictargs:
raise functions.OperatorError(__name__.rsplit('.')[-1], "No query argument ")
query = dictargs['query']
cur = envars['db'].cursor()
c = cur.execute(query)
for r in c:
if r[0].isdigit():
self.data.append(r[0])
else:
self.data.append(str(r[0]))
yield [('rid',), ('idofset',)]
# print "data", self.data
X = np.array(self.data)
# print X
kf = KFold(self.n_splits)
kf.get_n_splits(X)
# print"KF", kf
try:
for train_index, test_index in kf.split(X):
# print("TRAIN:", train_index ,"TEST:", test_index)
j = 0
for train_index, test_index in kf.split(X):
for k in test_index:
yield (self.data[k], j)
j += 1
except:
yield (-1, "Cannot have number of splits greater than the number of samples")
示例4: model
# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import get_n_splits [as 别名]
clf = model()
# train the model
clf.fit(X_train, y_train)
# to compute training error, first make predictions on training set
y_hat_train = clf.predict(X_train)
# then compare our prediction with true labels using the metric
training_error = r2_score(y_train, y_hat_train)
# CROSS-VALIDATION ERROR
from sklearn.model_selection import KFold
from numpy import zeros, mean
# 3-fold cross-validation
n = 3
kf = KFold(n_splits=n)
kf.get_n_splits(X_train)
i=0
scores = zeros(n)
for train_index, test_index in kf.split(X_train):
Xtr, Xva = X_train[train_index], X_train[test_index]
Ytr, Yva = y_train[train_index], y_train[test_index]
M = model()
M.fit(Xtr, Ytr)
Yhat = M.predict(Xva)
scores[i] = r2_score(Yva, Yhat)
print ('Fold', i+1, 'example metric = ', scores[i])
i=i+1
cross_validation_error = mean(scores)
# Print results
print("\nThe scores are: ")
示例5: print
# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import get_n_splits [as 别名]
beta = np.zeros((P))
beta[causal_ind] = 1.0
X = np.random.randn(*(N,P))
noise = np.random.randn(N)
y = X.dot(beta)
print("Initialize the model")
print("Option 1: use native glmnet `nfolds`")
model = glmnet(l1_ratio=0.5, n_folds=10)
print("Option 2: use `sklearn` `cv` syntax")
from sklearn.model_selection import KFold
n_folds =10
kf = KFold(n_folds)
model = glmnet(l1_ratio=0.5, cv=kf.get_n_splits(y), keep=True)
print("Fit in sklearn style")
model.fit(X, y)
print("Predict in sklearn style")
y_hat = model.predict(X)
print("penalty", model.alpha_)
print("Use `.cross_val_score()` method in order to apply cross-validation metrics other than MSE")
from sklearn import metrics
print(model.cross_val_score(metrics.r2_score))
print("plot native R graphs")
model.rplot()
示例6: KFold
# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import get_n_splits [as 别名]
indices = np.column_stack(indices)
indices = pd.DataFrame(indices)
test = pd.read_csv(path+'test_start.csv')
test = pd.concat([test, indices], axis=1)
dtest = xgb.DMatrix(test)
# y_pred = model.predict(dtest)
#
# test = pd.read_csv(path+'test_start.csv')
# output = pd.DataFrame({'id': test['ID'].astype(np.int32), 'y': y_pred})
# output.to_csv(path+'xgboost-depth{}-pca-ica.csv'.format(xgb_params['max_depth']), index=False)
n_splits = 5
kf = KFold(n_splits=n_splits)
kf.get_n_splits(train)
# dtest = xgb.DMatrix(test)
predictions = np.zeros((test.shape[0], n_splits))
score = 0
oof_predictions = np.zeros(train.shape[0])
for fold, (train_index, test_index) in enumerate(kf.split(train)):
X_train, X_valid = train.iloc[train_index, :], train.iloc[test_index, :]
y_train, y_valid = y[train_index], y[test_index]
d_train = xgb.DMatrix(X_train, label=y_train)
d_valid = xgb.DMatrix(X_valid, label=y_valid)
watchlist = [(d_train, 'train'), (d_valid, 'valid')]
model = xgb.train(params, d_train, 1000, watchlist, early_stopping_rounds=50, feval=xgb_r2_score, maximize=True, verbose_eval=False)