当前位置: 首页>>代码示例>>Python>>正文


Python data.RandomSampler方法代码示例

本文整理汇总了Python中torch.utils.data.RandomSampler方法的典型用法代码示例。如果您正苦于以下问题:Python data.RandomSampler方法的具体用法?Python data.RandomSampler怎么用?Python data.RandomSampler使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在torch.utils.data的用法示例。


在下文中一共展示了data.RandomSampler方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: prepare_data_loader

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def prepare_data_loader(self, dataset, batch_size, rand_flag=True):
        # prepare data loader
        if rand_flag:
            data_sampler = RandomSampler(dataset)
        else:
            data_sampler = SequentialSampler(dataset)

        if self.custom_collate_fn is None:
            dataloader = DataLoader(dataset,
                                    batch_size=batch_size,
                                    sampler=data_sampler)
        else:
            dataloader = DataLoader(dataset,
                                    batch_size=batch_size,
                                    sampler=data_sampler,
                                    collate_fn=self.custom_collate_fn)

        return dataloader 
开发者ID:dolphin-zs,项目名称:Doc2EDAG,代码行数:20,代码来源:base_task.py

示例2: generate_batch

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def generate_batch(self, episodes, episode_labels):
        total_steps = sum([len(e) for e in episodes])
        assert total_steps > self.batch_size
        print('Total Steps: {}'.format(total_steps))
        # Episode sampler
        # Sample `num_samples` episodes then batchify them with `self.batch_size` episodes per batch
        sampler = BatchSampler(RandomSampler(range(len(episodes)),
                                             replacement=True, num_samples=total_steps),
                               self.batch_size, drop_last=True)

        for indices in sampler:
            episodes_batch = [episodes[x] for x in indices]
            episode_labels_batch = [episode_labels[x] for x in indices]
            xs, labels = [], appendabledict()
            for ep_ind, episode in enumerate(episodes_batch):
                # Get one sample from this episode
                t = np.random.randint(len(episode))
                xs.append(episode[t])
                labels.append_update(episode_labels_batch[ep_ind][t])
            yield torch.stack(xs).float().to(self.device) / 255., labels 
开发者ID:mila-iqia,项目名称:atari-representation-learning,代码行数:22,代码来源:probe.py

示例3: generate_batch

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def generate_batch(self, episodes):
        total_steps = sum([len(e) for e in episodes])
        print('Total Steps: {}'.format(total_steps))
        # Episode sampler
        # Sample `num_samples` episodes then batchify them with `self.batch_size` episodes per batch
        sampler = BatchSampler(RandomSampler(range(len(episodes)),
                                             replacement=True, num_samples=total_steps),
                               self.batch_size, drop_last=True)
        for indices in sampler:
            episodes_batch = [episodes[x] for x in indices]
            x_t, x_tprev, x_that, ts, thats = [], [], [], [], []
            for episode in episodes_batch:
                # Get one sample from this episode
                t, t_hat = 0, 0
                t, t_hat = np.random.randint(0, len(episode)), np.random.randint(0, len(episode))
                x_t.append(episode[t])

                x_tprev.append(episode[t - 1])
                ts.append([t])
            yield torch.stack(x_t).float().to(self.device) / 255., torch.stack(x_tprev).float().to(self.device) / 255. 
开发者ID:mila-iqia,项目名称:atari-representation-learning,代码行数:22,代码来源:stdim.py

示例4: generate_batch

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def generate_batch(self, episodes):
        total_steps = sum([len(e) for e in episodes])
        print('Total Steps: {}'.format(total_steps))
        # Episode sampler
        # Sample `num_samples` episodes then batchify them with `self.batch_size` episodes per batch
        sampler = BatchSampler(RandomSampler(range(len(episodes)),
                                             replacement=True, num_samples=total_steps),
                               self.batch_size, drop_last=True)
        for indices in sampler:
            episodes_batch = [episodes[x] for x in indices]
            x_t, x_tn = [], []
            for episode in episodes_batch:
                # Get one sample from this episode
                t = np.random.randint(0, len(episode) - self.pred_offset)
                t_n = t + self.pred_offset

                x_t.append(episode[t])
                x_tn.append(episode[t_n])
            yield torch.stack(x_t).float().to(self.device) / 255., \
                  torch.stack(x_tn).float().to(self.device) / 255. 
开发者ID:mila-iqia,项目名称:atari-representation-learning,代码行数:22,代码来源:no_action_feedforward_predictor.py

示例5: generate_batch

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def generate_batch(self, episodes):
        total_steps = sum([len(e) for e in episodes])
        print('Total Steps: {}'.format(total_steps))
        # Episode sampler
        # Sample `num_samples` episodes then batchify them with `self.batch_size` episodes per batch
        sampler = BatchSampler(RandomSampler(range(len(episodes)),
                                             replacement=True, num_samples=total_steps),
                               self.batch_size, drop_last=True)
        for indices in sampler:
            episodes_batch = [episodes[x] for x in indices]
            x_t, x_tprev, x_that, ts, thats = [], [], [], [], []
            for episode in episodes_batch:
                # Get one sample from this episode
                t, t_hat = 0, 0
                t, t_hat = np.random.randint(0, len(episode)), np.random.randint(0, len(episode))
                x_t.append(episode[t])
            yield torch.stack(x_t).float().to(self.device) / 255. 
开发者ID:mila-iqia,项目名称:atari-representation-learning,代码行数:19,代码来源:vae.py

示例6: get_train_dataloader

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def get_train_dataloader(self, train_examples, verbose=True):
        train_features = convert_examples_to_features(
            train_examples, self.label_map, self.rparams.max_seq_length, self.tokenizer,
            verbose=verbose,
        )
        train_data, train_tokens = convert_to_dataset(
            train_features, label_mode=get_label_mode(self.label_map),
        )
        if self.rparams.local_rank == -1:
            train_sampler = RandomSampler(train_data)
        else:
            train_sampler = DistributedSampler(train_data)
        train_dataloader = DataLoader(
            train_data, sampler=train_sampler, batch_size=self.rparams.train_batch_size,
        )
        return HybridLoader(train_dataloader, train_tokens) 
开发者ID:zphang,项目名称:bert_on_stilts,代码行数:18,代码来源:runners.py

示例7: get_train_dataloader

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def get_train_dataloader(self, train_examples, verbose=True):
        train_features = convert_examples_to_features(
            examples=train_examples,
            max_seq_length=self.rparams.max_seq_length,
            tokenizer=self.tokenizer,
            select_prob=self.rparams.select_prob,
            verbose=verbose,
        )
        train_data, train_tokens = convert_to_dataset(train_features)
        if self.rparams.local_rank == -1:
            train_sampler = RandomSampler(train_data)
        else:
            train_sampler = DistributedSampler(train_data)
        train_dataloader = DataLoader(
            train_data, sampler=train_sampler, batch_size=self.rparams.train_batch_size,
        )
        return HybridLoader(train_dataloader, train_tokens) 
开发者ID:zphang,项目名称:bert_on_stilts,代码行数:19,代码来源:runners.py

示例8: data_creator

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def data_creator(config):
    args = config["args"]
    start = time.time()
    tokenizer = AutoTokenizer.from_pretrained(
        args.tokenizer_name
        if args.tokenizer_name else args.model_name_or_path,
        cache_dir=args.cache_dir if args.cache_dir else None,
    )
    logger.info("tokenizer instantiation time: {}".format(time.time() - start))

    train_dataset = load_and_cache_examples(
        args, args.task_name, tokenizer, evaluate=False)
    train_sampler = RandomSampler(
        train_dataset) if not dist.is_initialized() else None
    return DataLoader(
        train_dataset,
        sampler=train_sampler,
        batch_size=args.per_gpu_train_batch_size) 
开发者ID:ray-project,项目名称:ray,代码行数:20,代码来源:transformers_example.py

示例9: auto_add_sampler

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def auto_add_sampler(self, dataloader: DataLoader, train: bool) -> DataLoader:

        # don't do anything if it's not a dataloader
        is_dataloader = isinstance(dataloader, DataLoader)
        # don't manipulate iterable datasets
        is_iterable_ds = _has_iterable_dataset(dataloader)

        if not is_dataloader or is_iterable_ds:
            return dataloader
        need_dist_sampler = (self.use_ddp or self.use_ddp2 or self.use_horovod or self.use_tpu)

        if self.replace_sampler_ddp and need_dist_sampler:
            if not isinstance(dataloader.sampler, (SequentialSampler, RandomSampler)):
                raise MisconfigurationException(
                    'You seem to have configured a sampler in your DataLoader. This will be replaced '
                    ' by `DistributedSampler` since `replace_sampler_ddp` is True and you are using'
                    ' distributed training. Either remove the sampler from your DataLoader or set'
                    ' `replace_sampler_ddp`=False if you want to use your custom sampler.')

            # replace with distributed sampler
            sampler = self._get_distributed_sampler(dataloader)
            dataloader = self.replace_sampler(dataloader, sampler)

        return dataloader 
开发者ID:PyTorchLightning,项目名称:pytorch-lightning,代码行数:26,代码来源:data_loading.py

示例10: setup_loader

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def setup_loader(dataset: Dataset,
                 batch_size: int,
                 local_rank: int,
                 n_gpu: int,
                 gradient_accumulation_steps: int,
                 num_workers: int) -> DataLoader:
    sampler = DistributedSampler(dataset) if local_rank != -1 else RandomSampler(dataset)
    batch_size = get_effective_batch_size(
        batch_size, local_rank, n_gpu, gradient_accumulation_steps) * n_gpu
    # WARNING: this will fail if the primary sequence is not the first thing the dataset returns
    batch_sampler = BucketBatchSampler(
        sampler, batch_size, False, lambda x: len(x[0]), dataset)

    loader = DataLoader(
        dataset,
        num_workers=num_workers,
        collate_fn=dataset.collate_fn,  # type: ignore
        batch_sampler=batch_sampler)

    return loader 
开发者ID:songlab-cal,项目名称:tape,代码行数:22,代码来源:setup_utils.py

示例11: data_iterator

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def data_iterator(self):
        while True:
            if self.mode == "train":
                random.shuffle(self.files)
            for f_id in range(self.num_files):
                data_file = self.files[f_id]
                train_data = BertPretrainingPreprocessedDataset(
                    input_file=data_file, max_pred_length=self.max_pred_length
                )
                train_sampler = pt_data.RandomSampler(train_data)
                train_dataloader = pt_data.DataLoader(
                    dataset=train_data,
                    batch_size=self._batch_size,
                    collate_fn=self._collate_fn,
                    shuffle=False,
                    sampler=train_sampler,
                )
                for x in train_dataloader:
                    yield x
            if self.mode != "train":
                break 
开发者ID:NVIDIA,项目名称:NeMo,代码行数:23,代码来源:lm_bert_datalayer.py

示例12: test_engine_with_dataloader_no_auto_batching

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def test_engine_with_dataloader_no_auto_batching():
    # tests https://github.com/pytorch/ignite/issues/941
    from torch.utils.data import DataLoader, BatchSampler, RandomSampler

    data = torch.rand(64, 4, 10)
    data_loader = DataLoader(
        data, batch_size=None, sampler=BatchSampler(RandomSampler(data), batch_size=8, drop_last=True)
    )

    counter = [0]

    def foo(e, b):
        print("{}-{}: {}".format(e.state.epoch, e.state.iteration, b))
        counter[0] += 1

    engine = DeterministicEngine(foo)
    engine.run(data_loader, epoch_length=10, max_epochs=5)

    assert counter[0] == 50 
开发者ID:pytorch,项目名称:ignite,代码行数:21,代码来源:test_deterministic.py

示例13: test_engine_with_dataloader_no_auto_batching

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def test_engine_with_dataloader_no_auto_batching():
    # tests https://github.com/pytorch/ignite/issues/941
    from torch.utils.data import DataLoader, BatchSampler, RandomSampler

    data = torch.rand(64, 4, 10)
    data_loader = DataLoader(
        data, batch_size=None, sampler=BatchSampler(RandomSampler(data), batch_size=8, drop_last=True)
    )

    counter = [0]

    def foo(e, b):
        print("{}-{}: {}".format(e.state.epoch, e.state.iteration, b))
        counter[0] += 1

    engine = Engine(foo)
    engine.run(data_loader, epoch_length=10, max_epochs=5)

    assert counter[0] == 50 
开发者ID:pytorch,项目名称:ignite,代码行数:21,代码来源:test_engine.py

示例14: get_data

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def get_data(transform, mode='train'):
    print('Loading data for "%s" ...' % mode)
    if args.dataset == 'k400':
        use_big_K400 = args.img_dim > 140
        dataset = Kinetics400_full_3d(mode=mode,
                              transform=transform,
                              seq_len=args.seq_len,
                              num_seq=args.num_seq,
                              downsample=5,
                              big=use_big_K400)
    elif args.dataset == 'ucf101':
        dataset = UCF101_3d(mode=mode,
                         transform=transform,
                         seq_len=args.seq_len,
                         num_seq=args.num_seq,
                         downsample=args.ds)
    else:
        raise ValueError('dataset not supported')

    sampler = data.RandomSampler(dataset)

    if mode == 'train':
        data_loader = data.DataLoader(dataset,
                                      batch_size=args.batch_size,
                                      sampler=sampler,
                                      shuffle=False,
                                      num_workers=32,
                                      pin_memory=True,
                                      drop_last=True)
    elif mode == 'val':
        data_loader = data.DataLoader(dataset,
                                      batch_size=args.batch_size,
                                      sampler=sampler,
                                      shuffle=False,
                                      num_workers=32,
                                      pin_memory=True,
                                      drop_last=True)
    print('"%s" dataset size: %d' % (mode, len(dataset)))
    return data_loader 
开发者ID:TengdaHan,项目名称:DPC,代码行数:41,代码来源:main.py

示例15: get_data_loader

# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def get_data_loader(dataset, batch_size, evaluation=False,
                    custom_dataset=False, num_worker=6, local_rank=-1):
    if evaluation:
        sampler = SequentialSampler(dataset)
    else:
        if not custom_dataset:
            # 使用 DistributedSampler 对数据集进行划分
            sampler = RandomSampler(dataset) if local_rank == -1 else DistributedSampler(dataset)
        else:
            sampler = None
    print(f'get_data_loader: training:{not evaluation}; sampler:{sampler}')
    data_loader = DataLoader(dataset, sampler=sampler, batch_size=batch_size, num_workers=num_worker)
    return data_loader 
开发者ID:NLPInBLCU,项目名称:BiaffineDependencyParsing,代码行数:15,代码来源:bertology_loader.py


注:本文中的torch.utils.data.RandomSampler方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。