当前位置: 首页>>代码示例>>Python>>正文


Python KFold.get_n_splits方法代码示例

本文整理汇总了Python中sklearn.model_selection.KFold.get_n_splits方法的典型用法代码示例。如果您正苦于以下问题:Python KFold.get_n_splits方法的具体用法?Python KFold.get_n_splits怎么用?Python KFold.get_n_splits使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.model_selection.KFold的用法示例。


在下文中一共展示了KFold.get_n_splits方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: TargetEncoderNSplits

# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import get_n_splits [as 别名]
class TargetEncoderNSplits(BaseTransformer):
    def __init__(self, n_splits, **kwargs):
        self.k_folds = KFold(n_splits=n_splits)
        self.target_means_map = {}

    def _target_means_names(self, columns):
        confidence_rate_names = ['target_mean_{}'.format(column) for column in columns]
        return confidence_rate_names

    def _is_null_names(self, columns):
        is_null_names = ['target_mean_is_nan_{}'.format(column) for column in columns]
        return is_null_names

    def fit(self, categorical_features, target, **kwargs):
        feature_columns, target_column = categorical_features.columns, target.columns[0]

        X_target_means = []
        self.k_folds.get_n_splits(target)
        for train_index, test_index in self.k_folds.split(target):
            X_train, y_train = categorical_features.iloc[train_index], target.iloc[train_index]
            X_test, y_test = categorical_features.iloc[test_index], target.iloc[test_index]

            train = pd.concat([X_train, y_train], axis=1)
            for column, target_mean_name in zip(feature_columns, self._target_means_names(feature_columns)):
                group_object = train.groupby(column)
                train_target_means = group_object[target_column].mean(). \
                    reset_index().rename(index=str, columns={target_column: target_mean_name})

                X_test = X_test.merge(train_target_means, on=column, how='left')
            X_target_means.append(X_test)
        X_target_means = pd.concat(X_target_means, axis=0).astype(np.float32)

        for column, target_mean_name in zip(feature_columns, self._target_means_names(feature_columns)):
            group_object = X_target_means.groupby(column)
            self.target_means_map[column] = group_object[target_mean_name].mean().reset_index()

        return self

    def transform(self, categorical_features, **kwargs):
        columns = categorical_features.columns

        for column, target_mean_name, is_null_name in zip(columns,
                                                          self._target_means_names(columns),
                                                          self._is_null_names(columns)):
            categorical_features = categorical_features.merge(self.target_means_map[column],
                                                              on=column,
                                                              how='left').astype(np.float32)
            categorical_features[is_null_name] = pd.isnull(categorical_features[target_mean_name]).astype(int)
            categorical_features[target_mean_name].fillna(0, inplace=True)

        return {'numerical_features': categorical_features[self._target_means_names(columns)],
                'categorical_features': categorical_features[self._is_null_names(columns)]}

    def load(self, filepath):
        self.target_means_map = joblib.load(filepath)
        return self

    def save(self, filepath):
        joblib.dump(self.target_means_map, filepath)
开发者ID:rafajak,项目名称:open-solution-talking-data,代码行数:61,代码来源:feature_extraction.py

示例2: kFolds

# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import get_n_splits [as 别名]
def kFolds(dataSet, k = 10):
    """
    This is the k-fold method
    :param dataSet: of type DataFrame
    :param k: number of subsets to choose
    """
    df_mx = dataSet.as_matrix()
    X = df_mx[:, 1:16]
    Y = df_mx[:, 0:1]

    lm = svm.SVC(gamma=0.001, C=100.)  # Support Vector Machine
    kf = KFold(n_splits=10)  # Define the split - into 10 folds
    i = 0
    accuracies = numpy.zeros(kf.get_n_splits(X))
    for train_index, test_index in kf.split(X):
        print("{}. TRAIN: {} TEST: {}".format(i+1, train_index, test_index))
        X_train, X_test = X[train_index], X[test_index]
        Y_train, Y_test = Y[train_index], Y[test_index]
        # train using X_Train
        model = lm.fit(X_train, Y_train)
        # evaluate against X_Test
        predictions = lm.predict(X_test)
        # save accuracy
        accuracies[i] = model.score(X_test, Y_test)
        i = i + 1

    # find mean accuracy over all rounds
    print("Average accuracy of K-Folds (k={}): {}%".format(numpy.mean(accuracies) * 100, k))
开发者ID:HerrAugust,项目名称:EserciziUni,代码行数:30,代码来源:kFolds.py

示例3: VTiter

# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import get_n_splits [as 别名]
    def VTiter(self, *parsedArgs, **envars):
        largs, dictargs = self.full_parse(parsedArgs)

        # get arguments
        if 'splits' not in dictargs:
            raise functions.OperatorError(__name__.rsplit('.')[-1], "No splits argument.")
        else:
            self.n_splits = int(dictargs['splits'])

        # print largs
        # print dictargs

        self.data = []

        if 'query' not in dictargs:
            raise functions.OperatorError(__name__.rsplit('.')[-1], "No query argument ")
        query = dictargs['query']

        cur = envars['db'].cursor()
        c = cur.execute(query)

        for r in c:
            if r[0].isdigit():
                self.data.append(r[0])
            else:
                self.data.append(str(r[0]))
        yield [('rid',), ('idofset',)]

        # print "data", self.data
        X = np.array(self.data)
        # print X

        kf = KFold(self.n_splits)
        kf.get_n_splits(X)
        # print"KF", kf

        try:
            for train_index, test_index in kf.split(X):
                # print("TRAIN:", train_index ,"TEST:", test_index)
                j = 0
                for train_index, test_index in kf.split(X):
                    for k in test_index:
                        yield (self.data[k], j)
                    j += 1
        except:
            yield (-1, "Cannot have number of splits greater than the number of samples")
开发者ID:HBPSP8Repo,项目名称:exareme,代码行数:48,代码来源:sklearnkfold.py

示例4: model

# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import get_n_splits [as 别名]
    clf = model()
    # train the model
    clf.fit(X_train, y_train)
    # to compute training error, first make predictions on training set
    y_hat_train = clf.predict(X_train)
    # then compare our prediction with true labels using the metric
    training_error = r2_score(y_train, y_hat_train)


    # CROSS-VALIDATION ERROR
    from sklearn.model_selection import KFold
    from numpy import zeros, mean
    # 3-fold cross-validation
    n = 3
    kf = KFold(n_splits=n)
    kf.get_n_splits(X_train)
    i=0
    scores = zeros(n)
    for train_index, test_index in kf.split(X_train):
        Xtr, Xva = X_train[train_index], X_train[test_index]
        Ytr, Yva = y_train[train_index], y_train[test_index]
        M = model()
        M.fit(Xtr, Ytr)
        Yhat = M.predict(Xva)
        scores[i] = r2_score(Yva, Yhat)
        print ('Fold', i+1, 'example metric = ', scores[i])
        i=i+1
    cross_validation_error = mean(scores)

    # Print results
    print("\nThe scores are: ")
开发者ID:clementhtn,项目名称:nature,代码行数:33,代码来源:model.py

示例5: print

# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import get_n_splits [as 别名]
beta = np.zeros((P))
beta[causal_ind] = 1.0
X = np.random.randn(*(N,P))
noise = np.random.randn(N)
y = X.dot(beta)

print("Initialize the model")
print("Option 1: use native glmnet `nfolds`")
model = glmnet(l1_ratio=0.5, n_folds=10)

print("Option 2: use `sklearn` `cv` syntax")
from sklearn.model_selection import KFold
n_folds =10
kf = KFold(n_folds)

model = glmnet(l1_ratio=0.5, cv=kf.get_n_splits(y), keep=True)

print("Fit in sklearn style")
model.fit(X, y)

print("Predict in sklearn style")
y_hat = model.predict(X)
print("penalty", model.alpha_)

print("Use `.cross_val_score()` method in order to apply cross-validation metrics other than MSE")
from sklearn import metrics
print(model.cross_val_score(metrics.r2_score))

print("plot native R graphs")
model.rplot()
开发者ID:sashatarg,项目名称:rpyglmnet,代码行数:32,代码来源:example.py

示例6: KFold

# 需要导入模块: from sklearn.model_selection import KFold [as 别名]
# 或者: from sklearn.model_selection.KFold import get_n_splits [as 别名]
indices = np.column_stack(indices)
indices = pd.DataFrame(indices)
test = pd.read_csv(path+'test_start.csv')

test = pd.concat([test, indices], axis=1)
dtest = xgb.DMatrix(test)

# y_pred = model.predict(dtest)
#
# test = pd.read_csv(path+'test_start.csv')
# output = pd.DataFrame({'id': test['ID'].astype(np.int32), 'y': y_pred})
# output.to_csv(path+'xgboost-depth{}-pca-ica.csv'.format(xgb_params['max_depth']), index=False)

n_splits = 5
kf = KFold(n_splits=n_splits)
kf.get_n_splits(train)
# dtest = xgb.DMatrix(test)
predictions = np.zeros((test.shape[0], n_splits))
score = 0

oof_predictions = np.zeros(train.shape[0])
for fold, (train_index, test_index) in enumerate(kf.split(train)):
    X_train, X_valid = train.iloc[train_index, :], train.iloc[test_index, :]
    y_train, y_valid = y[train_index], y[test_index]

    d_train = xgb.DMatrix(X_train, label=y_train)
    d_valid = xgb.DMatrix(X_valid, label=y_valid)

    watchlist = [(d_train, 'train'), (d_valid, 'valid')]

    model = xgb.train(params, d_train, 1000, watchlist, early_stopping_rounds=50, feval=xgb_r2_score, maximize=True, verbose_eval=False)
开发者ID:xiaofeifei1800,项目名称:Kaggle_Bimbo,代码行数:33,代码来源:gbdt+feature.py


注:本文中的sklearn.model_selection.KFold.get_n_splits方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。