本文整理汇总了Python中torch.utils.data.RandomSampler方法的典型用法代码示例。如果您正苦于以下问题:Python data.RandomSampler方法的具体用法?Python data.RandomSampler怎么用?Python data.RandomSampler使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch.utils.data
的用法示例。
在下文中一共展示了data.RandomSampler方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: prepare_data_loader
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def prepare_data_loader(self, dataset, batch_size, rand_flag=True):
# prepare data loader
if rand_flag:
data_sampler = RandomSampler(dataset)
else:
data_sampler = SequentialSampler(dataset)
if self.custom_collate_fn is None:
dataloader = DataLoader(dataset,
batch_size=batch_size,
sampler=data_sampler)
else:
dataloader = DataLoader(dataset,
batch_size=batch_size,
sampler=data_sampler,
collate_fn=self.custom_collate_fn)
return dataloader
示例2: generate_batch
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def generate_batch(self, episodes, episode_labels):
total_steps = sum([len(e) for e in episodes])
assert total_steps > self.batch_size
print('Total Steps: {}'.format(total_steps))
# Episode sampler
# Sample `num_samples` episodes then batchify them with `self.batch_size` episodes per batch
sampler = BatchSampler(RandomSampler(range(len(episodes)),
replacement=True, num_samples=total_steps),
self.batch_size, drop_last=True)
for indices in sampler:
episodes_batch = [episodes[x] for x in indices]
episode_labels_batch = [episode_labels[x] for x in indices]
xs, labels = [], appendabledict()
for ep_ind, episode in enumerate(episodes_batch):
# Get one sample from this episode
t = np.random.randint(len(episode))
xs.append(episode[t])
labels.append_update(episode_labels_batch[ep_ind][t])
yield torch.stack(xs).float().to(self.device) / 255., labels
示例3: generate_batch
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def generate_batch(self, episodes):
total_steps = sum([len(e) for e in episodes])
print('Total Steps: {}'.format(total_steps))
# Episode sampler
# Sample `num_samples` episodes then batchify them with `self.batch_size` episodes per batch
sampler = BatchSampler(RandomSampler(range(len(episodes)),
replacement=True, num_samples=total_steps),
self.batch_size, drop_last=True)
for indices in sampler:
episodes_batch = [episodes[x] for x in indices]
x_t, x_tprev, x_that, ts, thats = [], [], [], [], []
for episode in episodes_batch:
# Get one sample from this episode
t, t_hat = 0, 0
t, t_hat = np.random.randint(0, len(episode)), np.random.randint(0, len(episode))
x_t.append(episode[t])
x_tprev.append(episode[t - 1])
ts.append([t])
yield torch.stack(x_t).float().to(self.device) / 255., torch.stack(x_tprev).float().to(self.device) / 255.
示例4: generate_batch
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def generate_batch(self, episodes):
total_steps = sum([len(e) for e in episodes])
print('Total Steps: {}'.format(total_steps))
# Episode sampler
# Sample `num_samples` episodes then batchify them with `self.batch_size` episodes per batch
sampler = BatchSampler(RandomSampler(range(len(episodes)),
replacement=True, num_samples=total_steps),
self.batch_size, drop_last=True)
for indices in sampler:
episodes_batch = [episodes[x] for x in indices]
x_t, x_tn = [], []
for episode in episodes_batch:
# Get one sample from this episode
t = np.random.randint(0, len(episode) - self.pred_offset)
t_n = t + self.pred_offset
x_t.append(episode[t])
x_tn.append(episode[t_n])
yield torch.stack(x_t).float().to(self.device) / 255., \
torch.stack(x_tn).float().to(self.device) / 255.
示例5: generate_batch
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def generate_batch(self, episodes):
total_steps = sum([len(e) for e in episodes])
print('Total Steps: {}'.format(total_steps))
# Episode sampler
# Sample `num_samples` episodes then batchify them with `self.batch_size` episodes per batch
sampler = BatchSampler(RandomSampler(range(len(episodes)),
replacement=True, num_samples=total_steps),
self.batch_size, drop_last=True)
for indices in sampler:
episodes_batch = [episodes[x] for x in indices]
x_t, x_tprev, x_that, ts, thats = [], [], [], [], []
for episode in episodes_batch:
# Get one sample from this episode
t, t_hat = 0, 0
t, t_hat = np.random.randint(0, len(episode)), np.random.randint(0, len(episode))
x_t.append(episode[t])
yield torch.stack(x_t).float().to(self.device) / 255.
示例6: get_train_dataloader
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def get_train_dataloader(self, train_examples, verbose=True):
train_features = convert_examples_to_features(
train_examples, self.label_map, self.rparams.max_seq_length, self.tokenizer,
verbose=verbose,
)
train_data, train_tokens = convert_to_dataset(
train_features, label_mode=get_label_mode(self.label_map),
)
if self.rparams.local_rank == -1:
train_sampler = RandomSampler(train_data)
else:
train_sampler = DistributedSampler(train_data)
train_dataloader = DataLoader(
train_data, sampler=train_sampler, batch_size=self.rparams.train_batch_size,
)
return HybridLoader(train_dataloader, train_tokens)
示例7: get_train_dataloader
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def get_train_dataloader(self, train_examples, verbose=True):
train_features = convert_examples_to_features(
examples=train_examples,
max_seq_length=self.rparams.max_seq_length,
tokenizer=self.tokenizer,
select_prob=self.rparams.select_prob,
verbose=verbose,
)
train_data, train_tokens = convert_to_dataset(train_features)
if self.rparams.local_rank == -1:
train_sampler = RandomSampler(train_data)
else:
train_sampler = DistributedSampler(train_data)
train_dataloader = DataLoader(
train_data, sampler=train_sampler, batch_size=self.rparams.train_batch_size,
)
return HybridLoader(train_dataloader, train_tokens)
示例8: data_creator
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def data_creator(config):
args = config["args"]
start = time.time()
tokenizer = AutoTokenizer.from_pretrained(
args.tokenizer_name
if args.tokenizer_name else args.model_name_or_path,
cache_dir=args.cache_dir if args.cache_dir else None,
)
logger.info("tokenizer instantiation time: {}".format(time.time() - start))
train_dataset = load_and_cache_examples(
args, args.task_name, tokenizer, evaluate=False)
train_sampler = RandomSampler(
train_dataset) if not dist.is_initialized() else None
return DataLoader(
train_dataset,
sampler=train_sampler,
batch_size=args.per_gpu_train_batch_size)
示例9: auto_add_sampler
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def auto_add_sampler(self, dataloader: DataLoader, train: bool) -> DataLoader:
# don't do anything if it's not a dataloader
is_dataloader = isinstance(dataloader, DataLoader)
# don't manipulate iterable datasets
is_iterable_ds = _has_iterable_dataset(dataloader)
if not is_dataloader or is_iterable_ds:
return dataloader
need_dist_sampler = (self.use_ddp or self.use_ddp2 or self.use_horovod or self.use_tpu)
if self.replace_sampler_ddp and need_dist_sampler:
if not isinstance(dataloader.sampler, (SequentialSampler, RandomSampler)):
raise MisconfigurationException(
'You seem to have configured a sampler in your DataLoader. This will be replaced '
' by `DistributedSampler` since `replace_sampler_ddp` is True and you are using'
' distributed training. Either remove the sampler from your DataLoader or set'
' `replace_sampler_ddp`=False if you want to use your custom sampler.')
# replace with distributed sampler
sampler = self._get_distributed_sampler(dataloader)
dataloader = self.replace_sampler(dataloader, sampler)
return dataloader
示例10: setup_loader
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def setup_loader(dataset: Dataset,
batch_size: int,
local_rank: int,
n_gpu: int,
gradient_accumulation_steps: int,
num_workers: int) -> DataLoader:
sampler = DistributedSampler(dataset) if local_rank != -1 else RandomSampler(dataset)
batch_size = get_effective_batch_size(
batch_size, local_rank, n_gpu, gradient_accumulation_steps) * n_gpu
# WARNING: this will fail if the primary sequence is not the first thing the dataset returns
batch_sampler = BucketBatchSampler(
sampler, batch_size, False, lambda x: len(x[0]), dataset)
loader = DataLoader(
dataset,
num_workers=num_workers,
collate_fn=dataset.collate_fn, # type: ignore
batch_sampler=batch_sampler)
return loader
示例11: data_iterator
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def data_iterator(self):
while True:
if self.mode == "train":
random.shuffle(self.files)
for f_id in range(self.num_files):
data_file = self.files[f_id]
train_data = BertPretrainingPreprocessedDataset(
input_file=data_file, max_pred_length=self.max_pred_length
)
train_sampler = pt_data.RandomSampler(train_data)
train_dataloader = pt_data.DataLoader(
dataset=train_data,
batch_size=self._batch_size,
collate_fn=self._collate_fn,
shuffle=False,
sampler=train_sampler,
)
for x in train_dataloader:
yield x
if self.mode != "train":
break
示例12: test_engine_with_dataloader_no_auto_batching
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def test_engine_with_dataloader_no_auto_batching():
# tests https://github.com/pytorch/ignite/issues/941
from torch.utils.data import DataLoader, BatchSampler, RandomSampler
data = torch.rand(64, 4, 10)
data_loader = DataLoader(
data, batch_size=None, sampler=BatchSampler(RandomSampler(data), batch_size=8, drop_last=True)
)
counter = [0]
def foo(e, b):
print("{}-{}: {}".format(e.state.epoch, e.state.iteration, b))
counter[0] += 1
engine = DeterministicEngine(foo)
engine.run(data_loader, epoch_length=10, max_epochs=5)
assert counter[0] == 50
示例13: test_engine_with_dataloader_no_auto_batching
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def test_engine_with_dataloader_no_auto_batching():
# tests https://github.com/pytorch/ignite/issues/941
from torch.utils.data import DataLoader, BatchSampler, RandomSampler
data = torch.rand(64, 4, 10)
data_loader = DataLoader(
data, batch_size=None, sampler=BatchSampler(RandomSampler(data), batch_size=8, drop_last=True)
)
counter = [0]
def foo(e, b):
print("{}-{}: {}".format(e.state.epoch, e.state.iteration, b))
counter[0] += 1
engine = Engine(foo)
engine.run(data_loader, epoch_length=10, max_epochs=5)
assert counter[0] == 50
示例14: get_data
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def get_data(transform, mode='train'):
print('Loading data for "%s" ...' % mode)
if args.dataset == 'k400':
use_big_K400 = args.img_dim > 140
dataset = Kinetics400_full_3d(mode=mode,
transform=transform,
seq_len=args.seq_len,
num_seq=args.num_seq,
downsample=5,
big=use_big_K400)
elif args.dataset == 'ucf101':
dataset = UCF101_3d(mode=mode,
transform=transform,
seq_len=args.seq_len,
num_seq=args.num_seq,
downsample=args.ds)
else:
raise ValueError('dataset not supported')
sampler = data.RandomSampler(dataset)
if mode == 'train':
data_loader = data.DataLoader(dataset,
batch_size=args.batch_size,
sampler=sampler,
shuffle=False,
num_workers=32,
pin_memory=True,
drop_last=True)
elif mode == 'val':
data_loader = data.DataLoader(dataset,
batch_size=args.batch_size,
sampler=sampler,
shuffle=False,
num_workers=32,
pin_memory=True,
drop_last=True)
print('"%s" dataset size: %d' % (mode, len(dataset)))
return data_loader
示例15: get_data_loader
# 需要导入模块: from torch.utils import data [as 别名]
# 或者: from torch.utils.data import RandomSampler [as 别名]
def get_data_loader(dataset, batch_size, evaluation=False,
custom_dataset=False, num_worker=6, local_rank=-1):
if evaluation:
sampler = SequentialSampler(dataset)
else:
if not custom_dataset:
# 使用 DistributedSampler 对数据集进行划分
sampler = RandomSampler(dataset) if local_rank == -1 else DistributedSampler(dataset)
else:
sampler = None
print(f'get_data_loader: training:{not evaluation}; sampler:{sampler}')
data_loader = DataLoader(dataset, sampler=sampler, batch_size=batch_size, num_workers=num_worker)
return data_loader