本文整理汇总了Python中torch.utils.data.dataset.Subset方法的典型用法代码示例。如果您正苦于以下问题:Python dataset.Subset方法的具体用法?Python dataset.Subset怎么用?Python dataset.Subset使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch.utils.data.dataset
的用法示例。
在下文中一共展示了dataset.Subset方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_inference_dataloader
# 需要导入模块: from torch.utils.data import dataset [as 别名]
# 或者: from torch.utils.data.dataset import Subset [as 别名]
def get_inference_dataloader(
root_path: str,
mode: str,
transforms: Callable,
batch_size: int = 16,
num_workers: int = 8,
pin_memory: bool = True,
limit_num_samples: Optional[int] = None,
) -> DataLoader:
assert mode in ("train", "test"), "Mode should be 'train' or 'test'"
get_dataset_fn = get_train_dataset if mode == "train" else get_val_dataset
dataset = get_dataset_fn(root_path, return_meta=True)
if limit_num_samples is not None:
indices = np.random.permutation(len(dataset))[:limit_num_samples]
dataset = Subset(dataset, indices)
dataset = TransformedDataset(dataset, transform_fn=transforms)
loader = DataLoader(
dataset, shuffle=False, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory, drop_last=False
)
return loader
示例2: data_from_dataset
# 需要导入模块: from torch.utils.data import dataset [as 别名]
# 或者: from torch.utils.data.dataset import Subset [as 别名]
def data_from_dataset(dataset, X_indexing=None, y_indexing=None):
"""Try to access X and y attribute from dataset.
Also works when dataset is a subset.
Parameters
----------
dataset : skorch.dataset.Dataset or torch.utils.data.Subset
The incoming dataset should be a ``skorch.dataset.Dataset`` or a
``torch.utils.data.Subset`` of a
``skorch.dataset.Dataset``.
X_indexing : function/callable or None (default=None)
If not None, use this function for indexing into the X data. If
None, try to automatically determine how to index data.
y_indexing : function/callable or None (default=None)
If not None, use this function for indexing into the y data. If
None, try to automatically determine how to index data.
"""
X, y = _none, _none
if isinstance(dataset, Subset):
X, y = data_from_dataset(
dataset.dataset, X_indexing=X_indexing, y_indexing=y_indexing)
X = multi_indexing(X, dataset.indices, indexing=X_indexing)
y = multi_indexing(y, dataset.indices, indexing=y_indexing)
elif hasattr(dataset, 'X') and hasattr(dataset, 'y'):
X, y = dataset.X, dataset.y
if (X is _none) or (y is _none):
raise AttributeError("Could not access X and y from dataset.")
return X, y
示例3: is_skorch_dataset
# 需要导入模块: from torch.utils.data import dataset [as 别名]
# 或者: from torch.utils.data.dataset import Subset [as 别名]
def is_skorch_dataset(ds):
"""Checks if the supplied dataset is an instance of
``skorch.dataset.Dataset`` even when it is nested inside
``torch.util.data.Subset``."""
from skorch.dataset import Dataset
if isinstance(ds, Subset):
return is_skorch_dataset(ds.dataset)
return isinstance(ds, Dataset)
# pylint: disable=unused-argument
示例4: subset
# 需要导入模块: from torch.utils.data import dataset [as 别名]
# 或者: from torch.utils.data.dataset import Subset [as 别名]
def subset(self, skorch_ds):
from torch.utils.data.dataset import Subset
return Subset(skorch_ds, [1, 3])
示例5: subset_subset
# 需要导入模块: from torch.utils.data import dataset [as 别名]
# 或者: from torch.utils.data.dataset import Subset [as 别名]
def subset_subset(self, subset):
from torch.utils.data.dataset import Subset
return Subset(subset, [0])
# pylint: disable=missing-docstring
示例6: fit
# 需要导入模块: from torch.utils.data import dataset [as 别名]
# 或者: from torch.utils.data.dataset import Subset [as 别名]
def fit(self, pipeline_config, hyperparameter_config, X, Y, train_indices, valid_indices):
torch.manual_seed(pipeline_config["random_seed"])
hyperparameter_config = ConfigWrapper(self.get_name(), hyperparameter_config)
# prepare data
drop_last = hyperparameter_config['batch_size'] < train_indices.shape[0]
X, Y = to_dense(X), to_dense(Y)
X, Y = torch.from_numpy(X).float(), torch.from_numpy(Y)
train_dataset = TensorDataset(X, Y)
train_loader = DataLoader(
dataset=train_dataset,
batch_size=hyperparameter_config['batch_size'],
sampler=SubsetRandomSampler(train_indices),
shuffle=False,
drop_last=drop_last)
valid_loader = None
if valid_indices is not None:
valid_loader = DataLoader(
dataset=Subset(train_dataset, valid_indices),
batch_size=hyperparameter_config['batch_size'],
shuffle=False,
drop_last=False)
return {'train_loader': train_loader, 'valid_loader': valid_loader, 'batch_size': hyperparameter_config['batch_size']}
示例7: test_net_input_is_scoring_input
# 需要导入模块: from torch.utils.data import dataset [as 别名]
# 或者: from torch.utils.data.dataset import Subset [as 别名]
def test_net_input_is_scoring_input(
self, net_cls, module_cls, scoring_cls, data,
):
# Make sure that whatever data type is put in the network is
# received at the scoring side as well. For the caching case
# we only receive datasets.
import skorch
from skorch.dataset import CVSplit
import torch.utils.data.dataset
from torch.utils.data.dataset import Subset
class MyTorchDataset(torch.utils.data.dataset.TensorDataset):
def __init__(self, X, y):
super().__init__(
skorch.utils.to_tensor(X.reshape(-1, 1), device='cpu'),
skorch.utils.to_tensor(y, device='cpu'))
class MySkorchDataset(skorch.dataset.Dataset):
pass
rawsplit = lambda ds: (ds, ds)
cvsplit = CVSplit(2, random_state=0)
def split_ignore_y(ds, y):
return rawsplit(ds)
table = [
# Test a split where type(input) == type(output) is guaranteed
(data, split_ignore_y, np.ndarray, False),
(data, split_ignore_y, skorch.dataset.Dataset, True),
((MyTorchDataset(*data), None), rawsplit, MyTorchDataset, False),
((MyTorchDataset(*data), None), rawsplit, MyTorchDataset, True),
((MySkorchDataset(*data), None), rawsplit, np.ndarray, False),
((MySkorchDataset(*data), None), rawsplit, MySkorchDataset, True),
# Test a split that splits datasets using torch Subset
(data, cvsplit, np.ndarray, False),
(data, cvsplit, Subset, True),
((MyTorchDataset(*data), None), cvsplit, Subset, False),
((MyTorchDataset(*data), None), cvsplit, Subset, True),
((MySkorchDataset(*data), None), cvsplit, np.ndarray, False),
((MySkorchDataset(*data), None), cvsplit, Subset, True),
]
for input_data, train_split, expected_type, caching in table:
self.net_input_is_scoring_input(
net_cls,
module_cls,
scoring_cls,
input_data,
train_split,
expected_type,
caching)
示例8: test_batch_size_smaller_than_num_gpus
# 需要导入模块: from torch.utils.data import dataset [as 别名]
# 或者: from torch.utils.data.dataset import Subset [as 别名]
def test_batch_size_smaller_than_num_gpus(tmpdir):
# we need at least 3 gpus for this test
num_gpus = 3
batch_size = 3
class CurrentTestModel(EvalModelTemplate):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# batch norm doesn't work with batch size 1, we replace it
self.c_d1_bn = torch.nn.ReLU()
def training_step(self, *args, **kwargs):
output = super().training_step(*args, **kwargs)
loss = output['loss']
# we make sure to add some metrics to the output dict,
# this is essential for this test
output['progress_bar'] = {'train_loss': loss}
return output
def train_dataloader(self):
dataloader = super().train_dataloader()
# construct a dataset with a size that is not divisible by num_gpus
# therefore the last batch will have a size < num_gpus
size = num_gpus * batch_size + (num_gpus - 1)
dataset = Subset(dataloader.dataset, range(size))
dataloader = DataLoader(
dataset,
batch_size=self.batch_size,
drop_last=False,
)
return dataloader
hparams = EvalModelTemplate.get_default_hparams()
hparams['batch_size'] = batch_size
model = CurrentTestModel(**hparams)
trainer = Trainer(
default_root_dir=tmpdir,
max_epochs=1,
limit_train_batches=0.1,
limit_val_batches=0,
gpus=num_gpus,
)
# we expect the reduction for the metrics also to happen on the last batch
# where we will get fewer metrics than gpus
result = trainer.fit(model)
assert 1 == result
示例9: get_train_val_loaders
# 需要导入模块: from torch.utils.data import dataset [as 别名]
# 或者: from torch.utils.data.dataset import Subset [as 别名]
def get_train_val_loaders(
root_path: str,
train_transforms: Callable,
val_transforms: Callable,
batch_size: int = 16,
num_workers: int = 8,
val_batch_size: Optional[int] = None,
with_sbd: Optional[str] = None,
limit_train_num_samples: Optional[int] = None,
limit_val_num_samples: Optional[int] = None,
) -> Tuple[DataLoader, DataLoader, DataLoader]:
train_ds = get_train_dataset(root_path)
val_ds = get_val_dataset(root_path)
if with_sbd is not None:
sbd_train_ds = get_train_noval_sbdataset(with_sbd)
train_ds = ConcatDataset([train_ds, sbd_train_ds])
if limit_train_num_samples is not None:
np.random.seed(limit_train_num_samples)
train_indices = np.random.permutation(len(train_ds))[:limit_train_num_samples]
train_ds = Subset(train_ds, train_indices)
if limit_val_num_samples is not None:
np.random.seed(limit_val_num_samples)
val_indices = np.random.permutation(len(val_ds))[:limit_val_num_samples]
val_ds = Subset(val_ds, val_indices)
# random samples for evaluation on training dataset
if len(val_ds) < len(train_ds):
np.random.seed(len(val_ds))
train_eval_indices = np.random.permutation(len(train_ds))[: len(val_ds)]
train_eval_ds = Subset(train_ds, train_eval_indices)
else:
train_eval_ds = train_ds
train_ds = TransformedDataset(train_ds, transform_fn=train_transforms)
val_ds = TransformedDataset(val_ds, transform_fn=val_transforms)
train_eval_ds = TransformedDataset(train_eval_ds, transform_fn=val_transforms)
train_loader = idist.auto_dataloader(
train_ds, shuffle=True, batch_size=batch_size, num_workers=num_workers, drop_last=True,
)
val_batch_size = batch_size * 4 if val_batch_size is None else val_batch_size
val_loader = idist.auto_dataloader(
val_ds, shuffle=False, batch_size=val_batch_size, num_workers=num_workers, drop_last=False,
)
train_eval_loader = idist.auto_dataloader(
train_eval_ds, shuffle=False, batch_size=val_batch_size, num_workers=num_workers, drop_last=False,
)
return train_loader, val_loader, train_eval_loader
示例10: get_train_val_loaders
# 需要导入模块: from torch.utils.data import dataset [as 别名]
# 或者: from torch.utils.data.dataset import Subset [as 别名]
def get_train_val_loaders(
root_path: str,
train_transforms: Callable,
val_transforms: Callable,
batch_size: int = 16,
num_workers: int = 8,
val_batch_size: Optional[int] = None,
limit_train_num_samples: Optional[int] = None,
limit_val_num_samples: Optional[int] = None,
) -> Tuple[DataLoader, DataLoader, DataLoader]:
train_ds = ImageNet(
root_path, split="train", transform=lambda sample: train_transforms(image=sample)["image"], loader=opencv_loader
)
val_ds = ImageNet(
root_path, split="val", transform=lambda sample: val_transforms(image=sample)["image"], loader=opencv_loader
)
if limit_train_num_samples is not None:
np.random.seed(limit_train_num_samples)
train_indices = np.random.permutation(len(train_ds))[:limit_train_num_samples]
train_ds = Subset(train_ds, train_indices)
if limit_val_num_samples is not None:
np.random.seed(limit_val_num_samples)
val_indices = np.random.permutation(len(val_ds))[:limit_val_num_samples]
val_ds = Subset(val_ds, val_indices)
# random samples for evaluation on training dataset
if len(val_ds) < len(train_ds):
np.random.seed(len(val_ds))
train_eval_indices = np.random.permutation(len(train_ds))[: len(val_ds)]
train_eval_ds = Subset(train_ds, train_eval_indices)
else:
train_eval_ds = train_ds
train_loader = idist.auto_dataloader(
train_ds, shuffle=True, batch_size=batch_size, num_workers=num_workers, drop_last=True,
)
val_batch_size = batch_size * 4 if val_batch_size is None else val_batch_size
val_loader = idist.auto_dataloader(
val_ds, shuffle=False, batch_size=val_batch_size, num_workers=num_workers, drop_last=False,
)
train_eval_loader = idist.auto_dataloader(
train_eval_ds, shuffle=False, batch_size=val_batch_size, num_workers=num_workers, drop_last=False,
)
return train_loader, val_loader, train_eval_loader