This is what I use (taken from here):
import torch
from torch.utils.data import Dataset, TensorDataset, random_split
from torchvision import transforms
class DatasetFromSubset(Dataset):
def __init__(self, subset, transform=None):
self.subset = subset
self.transform = transform
def __getitem__(self, index):
x, y = self.subset[index]
if self.transform:
x = self.transform(x)
return x, y
def __len__(self):
return len(self.subset)
Here's an example:
init_dataset = TensorDataset(
torch.randn(100, 3, 24, 24),
torch.randint(0, 10, (100,))
)
lengths = [int(len(init_dataset)*0.8), int(len(init_dataset)*0.2)]
train_subset, test_subset = random_split(init_dataset, lengths)
train_dataset = DatasetFromSubset(
train_set, transform=transforms.Normalize((0., 0., 0.), (0.5, 0.5, 0.5))
)
test_dataset = DatasetFromSubset(
test_set, transform=transforms.Normalize((0., 0., 0.), (0.5, 0.5, 0.5))
)