当前位置: 首页>>代码示例>>Python>>正文


Python model_selection.TimeSeriesSplit方法代码示例

本文整理汇总了Python中sklearn.model_selection.TimeSeriesSplit方法的典型用法代码示例。如果您正苦于以下问题:Python model_selection.TimeSeriesSplit方法的具体用法?Python model_selection.TimeSeriesSplit怎么用?Python model_selection.TimeSeriesSplit使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.model_selection的用法示例。


在下文中一共展示了model_selection.TimeSeriesSplit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: backtest

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import TimeSeriesSplit [as 别名]
def backtest(data_set_path,n_test_split):

    X,y = prepare_data(data_set_path,as_retention=False)

    tscv = TimeSeriesSplit(n_splits=n_test_split)

    lift_scorer = make_scorer(calc_lift, needs_proba=True)
    score_models = {'lift': lift_scorer, 'AUC': 'roc_auc'}

    retain_reg = LogisticRegression(penalty='l1', solver='liblinear', fit_intercept=True)

    gsearch = GridSearchCV(estimator=retain_reg,scoring=score_models, cv=tscv, verbose=1,
                           return_train_score=False,  param_grid={'C' : [1]}, refit='AUC')

    gsearch.fit(X,y)
    result_df = pd.DataFrame(gsearch.cv_results_)

    save_path = data_set_path.replace('.csv', '_backtest.csv')
    result_df.to_csv(save_path, index=False)
    print('Saved test scores to ' + save_path) 
开发者ID:carl24k,项目名称:fight-churn,代码行数:22,代码来源:listing_9_3_backtest.py

示例2: test_diff_detector_cross_validate

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import TimeSeriesSplit [as 别名]
def test_diff_detector_cross_validate(return_estimator: bool):
    """
    DiffBasedAnomalyDetector.cross_validate implementation should be the
    same as sklearn.model_selection.cross_validate if called the same.

    And it always will update `return_estimator` to True, as it requires
    the intermediate models to calculate the thresholds
    """
    X = np.random.random((100, 10))
    y = np.random.random((100, 1))

    model = DiffBasedAnomalyDetector(base_estimator=LinearRegression())

    cv = TimeSeriesSplit(n_splits=3)
    cv_results_da = model.cross_validate(
        X=X, y=y, cv=cv, return_estimator=return_estimator
    )
    cv_results_sk = cross_validate(model, X=X, y=y, cv=cv, return_estimator=True)

    assert cv_results_da.keys() == cv_results_sk.keys() 
开发者ID:equinor,项目名称:gordo,代码行数:22,代码来源:test_anomaly_detectors.py

示例3: test_2d_y

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import TimeSeriesSplit [as 别名]
def test_2d_y():
    # smoke test for 2d y and multi-label
    n_samples = 30
    rng = np.random.RandomState(1)
    X = rng.randint(0, 3, size=(n_samples, 2))
    y = rng.randint(0, 3, size=(n_samples,))
    y_2d = y.reshape(-1, 1)
    y_multilabel = rng.randint(0, 2, size=(n_samples, 3))
    groups = rng.randint(0, 3, size=(n_samples,))
    splitters = [LeaveOneOut(), LeavePOut(p=2), KFold(), StratifiedKFold(),
                 RepeatedKFold(), RepeatedStratifiedKFold(),
                 ShuffleSplit(), StratifiedShuffleSplit(test_size=.5),
                 GroupShuffleSplit(), LeaveOneGroupOut(),
                 LeavePGroupsOut(n_groups=2), GroupKFold(), TimeSeriesSplit(),
                 PredefinedSplit(test_fold=groups)]
    for splitter in splitters:
        list(splitter.split(X, y, groups))
        list(splitter.split(X, y_2d, groups))
        try:
            list(splitter.split(X, y_multilabel, groups))
        except ValueError as e:
            allowed_target_types = ('binary', 'multiclass')
            msg = "Supported target types are: {}. Got 'multilabel".format(
                allowed_target_types)
            assert msg in str(e) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:27,代码来源:test_split.py

示例4: prepare_xy

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import TimeSeriesSplit [as 别名]
def prepare_xy(self,groups=True):

        if groups:
            self.apply_behavior_grouping()
            dat= pd.DataFrame(self.churn_data_reduced)
            cols=self.grouped_columns
        else:
            self.normalize_skewscale()
            dat = pd.DataFrame(self.data_scores)
            cols = self.metric_columns

        # The result has to be sorted by date for the TimeSeriesSplit to work properly
        dat['temp_obs_date'] = self.observe_dates.values
        dat.sort_values('temp_obs_date',inplace=True)

        X = dat[cols]
        y = dat['is_churn']

        return X,y 
开发者ID:carl24k,项目名称:fight-churn,代码行数:21,代码来源:churn_calc.py

示例5: crossvalidate_churn_model

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import TimeSeriesSplit [as 别名]
def crossvalidate_churn_model(self,model_code,groups=True):
        X,y = self.prepare_xy(groups)
        params = self.cv_params(model_code)
        model = self.model_instance(model_code)
        tscv = TimeSeriesSplit(n_splits=3)
        lift_scorer = make_scorer(top_decile_lift,needs_proba=True)
        score_models = {'lift_scorer' : lift_scorer, 'AUC' : 'roc_auc'}
        gsearch = GridSearchCV(estimator=model, param_grid=params, scoring=score_models, cv=tscv, n_jobs=8,verbose=5,
                               return_train_score=True,refit='AUC')


        gsearch.fit(X, y)
        result_df = pd.DataFrame(gsearch.cv_results_)
        if len(params)>1:
            result_df.sort_values('mean_test_AUC',ascending=False,inplace=True)


        save_file_name = model_code + '_CV'
        save_path = self.save_path(save_file_name, subdir=self.grouping_correlation_subdir(groups))

        result_df.to_csv(save_path)
        print('Saved result to ' + save_path)
        return result_df 
开发者ID:carl24k,项目名称:fight-churn,代码行数:25,代码来源:churn_calc.py

示例6: test_keras_autoencoder_crossval

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import TimeSeriesSplit [as 别名]
def test_keras_autoencoder_crossval(model, kind):
    """
    Test ability for cross validation
    """
    Model = pydoc.locate(f"gordo.machine.model.models.{model}")
    model = Pipeline([("model", Model(kind=kind))])

    X = np.random.random(size=(15, 2))
    y = X.copy()

    scores = cross_val_score(
        model, X, y, cv=TimeSeriesSplit(n_splits=2, max_train_size=2)
    )
    assert isinstance(scores, np.ndarray)
    logger.info(f"Mean score: {scores.mean():.4f} - Std score: {scores.std():.4f}") 
开发者ID:equinor,项目名称:gordo,代码行数:17,代码来源:test_model.py

示例7: test_time_series_cv

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import TimeSeriesSplit [as 别名]
def test_time_series_cv():
    X = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14]]

    # Should fail if there are more folds than samples
    assert_raises_regexp(ValueError, "Cannot have number of folds.*greater",
                         next,
                         TimeSeriesSplit(n_splits=7).split(X))

    tscv = TimeSeriesSplit(2)

    # Manually check that Time Series CV preserves the data
    # ordering on toy datasets
    splits = tscv.split(X[:-1])
    train, test = next(splits)
    assert_array_equal(train, [0, 1])
    assert_array_equal(test, [2, 3])

    train, test = next(splits)
    assert_array_equal(train, [0, 1, 2, 3])
    assert_array_equal(test, [4, 5])

    splits = TimeSeriesSplit(2).split(X)

    train, test = next(splits)
    assert_array_equal(train, [0, 1, 2])
    assert_array_equal(test, [3, 4])

    train, test = next(splits)
    assert_array_equal(train, [0, 1, 2, 3, 4])
    assert_array_equal(test, [5, 6])

    # Check get_n_splits returns the correct number of splits
    splits = TimeSeriesSplit(2).split(X)
    n_splits_actual = len(list(splits))
    assert_equal(n_splits_actual, tscv.get_n_splits())
    assert_equal(n_splits_actual, 2) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:38,代码来源:test_split.py

示例8: test_time_series_max_train_size

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import TimeSeriesSplit [as 别名]
def test_time_series_max_train_size():
    X = np.zeros((6, 1))
    splits = TimeSeriesSplit(n_splits=3).split(X)
    check_splits = TimeSeriesSplit(n_splits=3, max_train_size=3).split(X)
    _check_time_series_max_train_size(splits, check_splits, max_train_size=3)

    # Test for the case where the size of a fold is greater than max_train_size
    check_splits = TimeSeriesSplit(n_splits=3, max_train_size=2).split(X)
    _check_time_series_max_train_size(splits, check_splits, max_train_size=2)

    # Test for the case where the size of each fold is less than max_train_size
    check_splits = TimeSeriesSplit(n_splits=3, max_train_size=5).split(X)
    _check_time_series_max_train_size(splits, check_splits, max_train_size=2) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:15,代码来源:test_split.py

示例9: test_nsplit_default_warn

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import TimeSeriesSplit [as 别名]
def test_nsplit_default_warn():
    # Test that warnings are raised. Will be removed in 0.22
    assert_warns_message(FutureWarning, NSPLIT_WARNING, KFold)
    assert_warns_message(FutureWarning, NSPLIT_WARNING, GroupKFold)
    assert_warns_message(FutureWarning, NSPLIT_WARNING, StratifiedKFold)
    assert_warns_message(FutureWarning, NSPLIT_WARNING, TimeSeriesSplit)

    assert_no_warnings(KFold, n_splits=5)
    assert_no_warnings(GroupKFold, n_splits=5)
    assert_no_warnings(StratifiedKFold, n_splits=5)
    assert_no_warnings(TimeSeriesSplit, n_splits=5) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:13,代码来源:test_split.py

示例10: split

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import TimeSeriesSplit [as 别名]
def split(self, df, y=None, groups=None):
        self._validate_df(df)
        groups = df.groupby(self.groupby).indices
        splits = {}
        while True:
            X_idxs, y_idxs = [], []
            for key, sub_idx in groups.items():
                sub_df = df.iloc[sub_idx]
                sub_y = y[sub_idx] if y is not None else None

                if key not in splits:
                    splitter = TimeSeriesSplit(
                        self.n_splits, self.max_train_size
                    )
                    splits[key] = splitter.split(sub_df, sub_y)

                try:
                    X_idx, y_idx = next(splits[key])
                    X_idx = np.array(
                        [df.index.get_loc(i) for i in sub_df.iloc[X_idx].index]
                    )
                    y_idx = np.array(
                        [df.index.get_loc(i) for i in sub_df.iloc[y_idx].index]
                    )
                    X_idxs.append(X_idx)
                    y_idxs.append(y_idx)
                except StopIteration:
                    pass

            if len(X_idxs) == 0:
                break

            yield np.concatenate(X_idxs), np.concatenate(y_idxs) 
开发者ID:octoenergy,项目名称:timeserio,代码行数:35,代码来源:time_series_split.py

示例11: crossvalidate_xgb

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import TimeSeriesSplit [as 别名]
def crossvalidate_xgb(data_set_path,n_test_split):

    X,y = prepare_data(data_set_path,ext='',as_retention=False)

    tscv = TimeSeriesSplit(n_splits=n_test_split)

    score_models = {'lift': make_scorer(calc_lift, needs_proba=True), 'AUC': 'roc_auc'}

    xgb_model = xgb.XGBClassifier(objective='binary:logistic')
    test_params = { 'max_depth': [1,2,4,6],
                    'learning_rate': [0.1,0.2,0.3,0.4],
                    'n_estimators': [20,40,80,120],
                    'min_child_weight' : [3,6,9,12]}
    gsearch = GridSearchCV(estimator=xgb_model,n_jobs=-1, scoring=score_models, cv=tscv, verbose=1,
                           return_train_score=False,  param_grid=test_params,refit='AUC')
    gsearch.fit(X.values,y)

    result_df = pd.DataFrame(gsearch.cv_results_)
    result_df.sort_values('mean_test_AUC',ascending=False,inplace=True)
    save_path = data_set_path.replace('.csv', '_crossval_xgb.csv')
    result_df.to_csv(save_path, index=False)
    print('Saved test scores to ' + save_path)

    pickle_path = data_set_path.replace('.csv', '_xgb_model.pkl')
    with open(pickle_path, 'wb') as fid:
        pickle.dump(gsearch.best_estimator_, fid)
    print('Saved model pickle to ' + pickle_path)

    predictions = gsearch.best_estimator_.predict_proba(X.values)
    predict_df = pd.DataFrame(predictions, index=X.index, columns=['retain_prob','churn_prob'])
    forecast_save_path = data_set_path.replace('.csv', '_xgb_predictions.csv')
    print('Saving results to %s' % forecast_save_path)
    predict_df.to_csv(forecast_save_path, header=True)

    forecast_histogram(data_set_path,predict_df,ext='xgb') 
开发者ID:carl24k,项目名称:fight-churn,代码行数:37,代码来源:listing_9_6_crossvalidate_xgb.py

示例12: crossvalidate

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import TimeSeriesSplit [as 别名]
def crossvalidate(data_set_path,n_test_split):

    X,y = prepare_data(data_set_path,as_retention=False)
    tscv = TimeSeriesSplit(n_splits=n_test_split)
    score_models = {'lift': make_scorer(calc_lift, needs_proba=True), 'AUC': 'roc_auc'}
    retain_reg = LogisticRegression(penalty='l1', solver='liblinear', fit_intercept=True)
    test_params = {'C' : [0.64, 0.32, 0.16, 0.08, 0.04, 0.02, 0.01, 0.005, 0.0025]}
    gsearch = GridSearchCV(estimator=retain_reg,scoring=score_models, cv=tscv, verbose=1,
                           return_train_score=False,  param_grid=test_params, refit=False)
    gsearch.fit(X,y)

    result_df = pd.DataFrame(gsearch.cv_results_)
    result_df['n_weights']= test_n_weights(X,y,test_params)
    result_df.to_csv(data_set_path.replace('.csv', '_crossval.csv'), index=False)
    plot_regression_test(data_set_path,result_df) 
开发者ID:carl24k,项目名称:fight-churn,代码行数:17,代码来源:listing_9_5_crossvalidate.py

示例13: test_objectmapper

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import TimeSeriesSplit [as 别名]
def test_objectmapper(self):
        df = pdml.ModelFrame([])

        # Splitter Classes
        self.assertIs(df.model_selection.KFold, ms.KFold)
        self.assertIs(df.model_selection.GroupKFold, ms.GroupKFold)
        self.assertIs(df.model_selection.StratifiedKFold, ms.StratifiedKFold)

        self.assertIs(df.model_selection.LeaveOneGroupOut, ms.LeaveOneGroupOut)
        self.assertIs(df.model_selection.LeavePGroupsOut, ms.LeavePGroupsOut)
        self.assertIs(df.model_selection.LeaveOneOut, ms.LeaveOneOut)
        self.assertIs(df.model_selection.LeavePOut, ms.LeavePOut)

        self.assertIs(df.model_selection.ShuffleSplit, ms.ShuffleSplit)
        self.assertIs(df.model_selection.GroupShuffleSplit,
                      ms.GroupShuffleSplit)
        # self.assertIs(df.model_selection.StratifiedShuffleSplit,
        #               ms.StratifiedShuffleSplit)
        self.assertIs(df.model_selection.PredefinedSplit, ms.PredefinedSplit)
        self.assertIs(df.model_selection.TimeSeriesSplit, ms.TimeSeriesSplit)

        # Splitter Functions

        # Hyper-parameter optimizers
        self.assertIs(df.model_selection.GridSearchCV, ms.GridSearchCV)
        self.assertIs(df.model_selection.RandomizedSearchCV, ms.RandomizedSearchCV)
        self.assertIs(df.model_selection.ParameterGrid, ms.ParameterGrid)
        self.assertIs(df.model_selection.ParameterSampler, ms.ParameterSampler)

        # Model validation 
开发者ID:pandas-ml,项目名称:pandas-ml,代码行数:32,代码来源:test_model_selection.py

示例14: test_objectmapper_abbr

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import TimeSeriesSplit [as 别名]
def test_objectmapper_abbr(self):
        df = pdml.ModelFrame([])

        # Splitter Classes
        self.assertIs(df.ms.KFold, ms.KFold)
        self.assertIs(df.ms.GroupKFold, ms.GroupKFold)
        self.assertIs(df.ms.StratifiedKFold, ms.StratifiedKFold)

        self.assertIs(df.ms.LeaveOneGroupOut, ms.LeaveOneGroupOut)
        self.assertIs(df.ms.LeavePGroupsOut, ms.LeavePGroupsOut)
        self.assertIs(df.ms.LeaveOneOut, ms.LeaveOneOut)
        self.assertIs(df.ms.LeavePOut, ms.LeavePOut)

        self.assertIs(df.ms.ShuffleSplit, ms.ShuffleSplit)
        self.assertIs(df.ms.GroupShuffleSplit,
                      ms.GroupShuffleSplit)
        # self.assertIs(df.ms.StratifiedShuffleSplit,
        #               ms.StratifiedShuffleSplit)
        self.assertIs(df.ms.PredefinedSplit, ms.PredefinedSplit)
        self.assertIs(df.ms.TimeSeriesSplit, ms.TimeSeriesSplit)

        # Splitter Functions

        # Hyper-parameter optimizers
        self.assertIs(df.ms.GridSearchCV, ms.GridSearchCV)
        self.assertIs(df.ms.RandomizedSearchCV, ms.RandomizedSearchCV)
        self.assertIs(df.ms.ParameterGrid, ms.ParameterGrid)
        self.assertIs(df.ms.ParameterSampler, ms.ParameterSampler)

        # Model validation 
开发者ID:pandas-ml,项目名称:pandas-ml,代码行数:32,代码来源:test_model_selection.py

示例15: function

# 需要导入模块: from sklearn import model_selection [as 别名]
# 或者: from sklearn.model_selection import TimeSeriesSplit [as 别名]
def function(self):
        self.out_1.val = TimeSeriesSplit() 
开发者ID:AlvarBer,项目名称:Persimmon,代码行数:4,代码来源:tssplitblock.py


注:本文中的sklearn.model_selection.TimeSeriesSplit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。