当前位置: 首页>>代码示例>>Python>>正文


Python _split._validate_shuffle_split方法代码示例

本文整理汇总了Python中sklearn.model_selection._split._validate_shuffle_split方法的典型用法代码示例。如果您正苦于以下问题:Python _split._validate_shuffle_split方法的具体用法?Python _split._validate_shuffle_split怎么用?Python _split._validate_shuffle_split使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.model_selection._split的用法示例。


在下文中一共展示了_split._validate_shuffle_split方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_stratified_shuffle_split_iter

# 需要导入模块: from sklearn.model_selection import _split [as 别名]
# 或者: from sklearn.model_selection._split import _validate_shuffle_split [as 别名]
def test_stratified_shuffle_split_iter():
    ys = [np.array([1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3]),
          np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]),
          np.array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2] * 2),
          np.array([1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]),
          np.array([-1] * 800 + [1] * 50),
          np.concatenate([[i] * (100 + i) for i in range(11)]),
          [1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3],
          ['1', '1', '1', '1', '2', '2', '2', '3', '3', '3', '3', '3'],
          ]

    for y in ys:
        sss = StratifiedShuffleSplit(6, test_size=0.33,
                                     random_state=0).split(np.ones(len(y)), y)
        y = np.asanyarray(y)  # To make it indexable for y[train]
        # this is how test-size is computed internally
        # in _validate_shuffle_split
        test_size = np.ceil(0.33 * len(y))
        train_size = len(y) - test_size
        for train, test in sss:
            assert_array_equal(np.unique(y[train]), np.unique(y[test]))
            # Checks if folds keep classes proportions
            p_train = (np.bincount(np.unique(y[train],
                                   return_inverse=True)[1]) /
                       float(len(y[train])))
            p_test = (np.bincount(np.unique(y[test],
                                  return_inverse=True)[1]) /
                      float(len(y[test])))
            assert_array_almost_equal(p_train, p_test, 1)
            assert_equal(len(train) + len(test), y.size)
            assert_equal(len(train), train_size)
            assert_equal(len(test), test_size)
            assert_array_equal(np.lib.arraysetops.intersect1d(train, test), []) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:35,代码来源:test_split.py

示例2: getDataLoaders

# 需要导入模块: from sklearn.model_selection import _split [as 别名]
# 或者: from sklearn.model_selection._split import _validate_shuffle_split [as 别名]
def getDataLoaders(self, batch_size, shuffle, device, *args):
        kwargs = {'num_workers': 1, 'pin_memory': True} if device == "cuda" else {}
        print('Load training data...')
        dataset = SyntheticDataset(*self.data_size, *map(lambda x: float(x), args))
        n_train, n_test = _validate_shuffle_split(len(dataset), test_size=None, train_size=0.7)
        train_dataset, test_dataset = torch.utils.data.random_split(dataset, [n_train, n_test])
        train_loader = DataLoader(train_dataset, batch_size=batch_size, drop_last=True, shuffle=shuffle, **kwargs)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, drop_last=True, shuffle=False, **kwargs)
        return train_loader, test_loader 
开发者ID:emilemathieu,项目名称:pvae,代码行数:11,代码来源:tabular.py

示例3: _split_blockwise

# 需要导入模块: from sklearn.model_selection import _split [as 别名]
# 或者: from sklearn.model_selection._split import _validate_shuffle_split [as 别名]
def _split_blockwise(self, X, seeds):
        chunks = X.chunks[0]

        train_pct, test_pct = _maybe_normalize_split_sizes(
            self.train_size, self.test_size
        )
        sizes = [_validate_shuffle_split(c, test_pct, train_pct) for c in chunks]

        objs = [
            dask.delayed(_generate_idx, nout=2)(chunksize, seed, n_train, n_test)
            for chunksize, seed, (n_train, n_test) in zip(chunks, seeds, sizes)
        ]

        train_objs, test_objs = zip(*objs)
        offsets = np.hstack([0, np.cumsum(chunks)])
        train_idx = da.concatenate(
            [
                da.from_delayed(x + offset, (train_size,), np.dtype("int"))
                for x, chunksize, (train_size, _), offset in zip(
                    train_objs, chunks, sizes, offsets
                )
            ]
        )
        test_idx = da.concatenate(
            [
                da.from_delayed(x + offset, (test_size,), np.dtype("int"))
                for x, chunksize, (_, test_size), offset in zip(
                    test_objs, chunks, sizes, offsets
                )
            ]
        )

        return train_idx, test_idx 
开发者ID:dask,项目名称:dask-ml,代码行数:35,代码来源:_split.py

示例4: test_stratified_shuffle_split_even

# 需要导入模块: from sklearn.model_selection import _split [as 别名]
# 或者: from sklearn.model_selection._split import _validate_shuffle_split [as 别名]
def test_stratified_shuffle_split_even():
    # Test the StratifiedShuffleSplit, indices are drawn with a
    # equal chance
    n_folds = 5
    n_splits = 1000

    def assert_counts_are_ok(idx_counts, p):
        # Here we test that the distribution of the counts
        # per index is close enough to a binomial
        threshold = 0.05 / n_splits
        bf = stats.binom(n_splits, p)
        for count in idx_counts:
            prob = bf.pmf(count)
            assert prob > threshold, \
                "An index is not drawn with chance corresponding to even draws"

    for n_samples in (6, 22):
        groups = np.array((n_samples // 2) * [0, 1])
        splits = StratifiedShuffleSplit(n_splits=n_splits,
                                        test_size=1. / n_folds,
                                        random_state=0)

        train_counts = [0] * n_samples
        test_counts = [0] * n_samples
        n_splits_actual = 0
        for train, test in splits.split(X=np.ones(n_samples), y=groups):
            n_splits_actual += 1
            for counter, ids in [(train_counts, train), (test_counts, test)]:
                for id in ids:
                    counter[id] += 1
        assert_equal(n_splits_actual, n_splits)

        n_train, n_test = _validate_shuffle_split(
            n_samples, test_size=1. / n_folds, train_size=1. - (1. / n_folds))

        assert_equal(len(train), n_train)
        assert_equal(len(test), n_test)
        assert_equal(len(set(train).intersection(test)), 0)

        group_counts = np.unique(groups)
        assert_equal(splits.test_size, 1.0 / n_folds)
        assert_equal(n_train + n_test, len(groups))
        assert_equal(len(group_counts), 2)
        ex_test_p = float(n_test) / n_samples
        ex_train_p = float(n_train) / n_samples

        assert_counts_are_ok(train_counts, ex_train_p)
        assert_counts_are_ok(test_counts, ex_test_p) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:50,代码来源:test_split.py

示例5: test_stratified_shuffle_split_even

# 需要导入模块: from sklearn.model_selection import _split [as 别名]
# 或者: from sklearn.model_selection._split import _validate_shuffle_split [as 别名]
def test_stratified_shuffle_split_even():
    # Test the StratifiedShuffleSplit, indices are drawn with a
    # equal chance
    n_folds = 5
    n_splits = 1000

    def assert_counts_are_ok(idx_counts, p):
        # Here we test that the distribution of the counts
        # per index is close enough to a binomial
        threshold = 0.05 / n_splits
        bf = stats.binom(n_splits, p)
        for count in idx_counts:
            prob = bf.pmf(count)
            assert_true(prob > threshold,
                        "An index is not drawn with chance corresponding "
                        "to even draws")

    for n_samples in (6, 22):
        groups = np.array((n_samples // 2) * [0, 1])
        splits = StratifiedShuffleSplit(n_splits=n_splits,
                                        test_size=1. / n_folds,
                                        random_state=0)

        train_counts = [0] * n_samples
        test_counts = [0] * n_samples
        n_splits_actual = 0
        for train, test in splits.split(X=np.ones(n_samples), y=groups):
            n_splits_actual += 1
            for counter, ids in [(train_counts, train), (test_counts, test)]:
                for id in ids:
                    counter[id] += 1
        assert_equal(n_splits_actual, n_splits)

        n_train, n_test = _validate_shuffle_split(
            n_samples, test_size=1. / n_folds, train_size=1. - (1. / n_folds))

        assert_equal(len(train), n_train)
        assert_equal(len(test), n_test)
        assert_equal(len(set(train).intersection(test)), 0)

        group_counts = np.unique(groups)
        assert_equal(splits.test_size, 1.0 / n_folds)
        assert_equal(n_train + n_test, len(groups))
        assert_equal(len(group_counts), 2)
        ex_test_p = float(n_test) / n_samples
        ex_train_p = float(n_train) / n_samples

        assert_counts_are_ok(train_counts, ex_train_p)
        assert_counts_are_ok(test_counts, ex_test_p) 
开发者ID:alvarobartt,项目名称:twitter-stock-recommendation,代码行数:51,代码来源:test_split.py


注:本文中的sklearn.model_selection._split._validate_shuffle_split方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。