Multi Layer Perceptron (MLP)

Simple feedforward Multilayer perceptron model

source

MNISTDataModule

 MNISTDataModule (data_dir:str='~/Data/',
                  train_val_test_split:List[float]=[0.8, 0.1, 0.1],
                  batch_size:int=64, num_workers:int=0,
                  pin_memory:bool=False, persistent_workers:bool=False)

A DataModule standardizes the training, val, test splits, data preparation and transforms. The main advantage is consistent data splits, data preparation and transforms across models.

Example::

import lightning.pytorch as L
import torch.utils.data as data
from pytorch_lightning.demos.boring_classes import RandomDataset

class MyDataModule(L.LightningDataModule):
    def prepare_data(self):
        # download, IO, etc. Useful with shared filesystems
        # only called on 1 GPU/TPU in distributed
        ...

    def setup(self, stage):
        # make assignments here (val/train/test split)
        # called on every process in DDP
        dataset = RandomDataset(1, 100)
        self.train, self.val, self.test = data.random_split(
            dataset, [80, 10, 10], generator=torch.Generator().manual_seed(42)
        )

    def train_dataloader(self):
        return data.DataLoader(self.train)

    def val_dataloader(self):
        return data.DataLoader(self.val)

    def test_dataloader(self):
        return data.DataLoader(self.test)

    def teardown(self):
        # clean up state after the trainer stops, delete files...
        # called on every process in DDP
        ...
Type Default Details
data_dir str ~/Data/ path to source data dir
train_val_test_split typing.List[float] [0.8, 0.1, 0.1] train val test %
batch_size int 64 size of compute batch
num_workers int 0 num_workers equal 0 means that it’s the main process that will do the data loading when needed, num_workers equal 1 is the same as any n, but you’ll only have a single worker, so it might be slow
pin_memory bool False If you load your samples in the Dataset on CPU and would like to push it during training to the GPU, you can speed up the host to device transfer by enabling pin_memory. This lets your DataLoader allocate the samples in page-locked memory, which speeds-up the transfer
persistent_workers bool False

Basic model


source

MLP

 MLP (n_in:int, n_h:int, n_out:int, dropout:float=0.2)

Base class for all neural network modules.

Your models should also subclass this class.

Modules can also contain other Modules, allowing to nest them in a tree structure. You can assign the submodules as regular attributes::

import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

Submodules assigned in this way will be registered, and will have their parameters converted too when you call :meth:to, etc.

.. note:: As per the example above, an __init__() call to the parent class must be made before assignment on the child.

ivar training: Boolean represents whether this module is in training or evaluation mode. :vartype training: bool
Type Default Details
n_in int input dimension e.g. (H,W) for image
n_h int hidden dimension
n_out int output dimension (= number of classes for classification)
dropout float 0.2
Returns None

Usage

image = torch.rand((5, 28*28))
mlp = MLP(n_in=28*28, n_h=64, n_out=10)
out = mlp(image)
print(out.shape)
torch.Size([5, 10])

Basic training

Data Module

Data module c.f. recipes/image/mnist

cat ../config/data/image/mnist.yaml
# load from config file
cfg = OmegaConf.load('../config/data/image/mnist.yaml')
print(cfg.datamodule)
datamodule = instantiate(cfg.datamodule)
datamodule.prepare_data()
datamodule.setup()
x = datamodule.data_test[0][0] # (C, H, W)
print(len(datamodule.data_test))
label = datamodule.data_test[0][1] #(int)
print("original shape (C,H,W): ", x.shape)
print("reshape (C,HxW): ", x.view(x.size(0), -1).shape)
print(x[0][1])
{'_target_': 'nimrod.image.datasets.MNISTDataModule', 'data_dir': '../data/image', 'train_val_test_split': [0.8, 0.1, 0.1], 'batch_size': 64, 'num_workers': 0, 'pin_memory': False, 'persistent_workers': False}
7000
original shape (C,H,W):  torch.Size([1, 28, 28])
reshape (C,HxW):  torch.Size([1, 784])
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.])
# using default Pytorch datasets
train_dataset = MNIST("../data/image", train=True, download=True, transform=ToTensor())
test_dataset = MNIST("../data/image", train=False, download=True, transform=ToTensor())

# train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
# test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# using nimrod datamodule
train_loader = datamodule.train_dataloader()
val_loader = datamodule.val_dataloader()
test_loader = datamodule.test_dataloader()
type(datamodule.data_test)
torch.utils.data.dataset.Subset

Hardware acceleration

# device = "mps" if torch.backends.mps.is_available() else "cpu"
device = "cpu" # for CI on cpu instance
device = torch.device(device)
model = mlp.to(device)

Loss & optimizer setup

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

Training loop

n_epochs = 1
for epoch in range(n_epochs):
    model.train()
    for images, labels in train_loader:
        images = images.view(-1, 28*28)
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            # model expects input (B,H*W)
            images = images.view(-1, 28*28).to(device)
            images = images.to(device)
            labels = labels.to(device)
            # Pass the input through the model
            outputs = model(images)
            # Get the predicted labels
            _, predicted = torch.max(outputs.data, 1)

            # Update the total and correct counts
            total += labels.size(0)
            correct += (predicted == labels).sum()

        # Print the accuracy
        print(f"Epoch {epoch + 1}: Accuracy = {100 * correct / total:.2f}%")
Epoch 1: Accuracy = 79.99%
CPU times: user 3.39 s, sys: 466 ms, total: 3.86 s
Wall time: 3.58 s

Integrated model + training settings


source

MLP_PL

 MLP_PL (n_in:int, n_h:int, n_out:int, dropout:float=0.2, lr:float=0.001)

Hooks to be used in LightningModule.

Type Default Details
n_in int input dimension e.g. (H,W) for image
n_h int hidden dimension
n_out int output dimension (= number of classes for classification)
dropout float 0.2 dropout factor
lr float 0.001 learning rate

Usage

# wrap simple model in modularized model
mlp_pl = MLP_PL(28*28, 64, n_out=10, dropout=0.2, lr=1e-3)

# fake input
b = torch.rand((5,1, 28*28))

# move model and data to hardware
model = mlp_pl.to(device)

b = b.to(device)
y_hat = mlp_pl(b)
print(y_hat.shape)

# real data
batch = next(iter(test_loader))
print(batch[0].shape, batch[1].shape)
print(model.predict_step(batch, 0))
torch.Size([5, 1, 10])
torch.Size([64, 1, 28, 28]) torch.Size([64])
tensor([1, 4, 5, 4, 5, 5, 1, 4, 5, 5, 5, 2, 4, 4, 5, 5, 5, 5, 4, 5, 1, 1, 5, 4,
        1, 1, 1, 2, 1, 5, 1, 5, 1, 0, 9, 7, 6, 5, 4, 5, 5, 5, 4, 4, 5, 5, 5, 0,
        5, 5, 6, 4, 5, 1, 4, 4, 1, 5, 4, 1, 4, 0, 5, 1])
print(model.lr)
0.001
# print(bb)

Integrated trainer

trainer = Trainer(accelerator='mps', devices = 1, max_epochs=1)
trainer.fit(mlp_pl, datamodule.data_train)
trainer.fit(mlp_pl, datamodule.data_train)

Training scripts with config file

To check an example script leveraging model training with configurable yaml files check recipes folder

cd recipes/image/mnist
python train.py trainer.max_epochs 20 trainer.accelerator='mps' datamodule.num_workers=0