= torch.rand((5, 28*28))
image = MLP(n_in=28*28, n_h=64, n_out=10, dropout=0.1)
mlp = mlp(image)
out print(out.shape)
= OmegaConf.load('../config/image/model/mlp.yaml')
cfg = instantiate(cfg.nnet)
model = model(image)
out print(out.shape)
Multi Layer Perceptron
Overview
A lightweight, configurable Multi-Layer Perceptron (MLP) implementation using PyTorch, designed for image classification tasks.
Core Components
MLP Class
A simple feedforward neural network with configurable architecture: - Flexible input, hidden, and output dimensions - Integrated dropout for regularization - ReLU activation function - Sequential layer design
Key Parameters
n_in
: Input dimension (default: 784, typical for MNIST)n_h
: Hidden layer dimension (default: 64)n_out
: Output dimension (default: 10, for classification)dropout
: Dropout rate for regularization (default: 0.2)
Features
- Dynamic network configuration
- Logging integration
- Compatible with PyTorch Lightning
- Supports configuration via Hydra/OmegaConf
Dependencies
- PyTorch
- Hydra
- OmegaConf
- Lightning
- Nimrod custom modules
MLP
MLP
MLP (n_in:int=784, n_h:int=64, n_out:int=10, dropout:float=0.2)
*Base class for all neural network modules.
Your models should also subclass this class.
Modules can also contain other Modules, allowing to nest them in a tree structure. You can assign the submodules as regular attributes::
import torch.nn as nn
import torch.nn.functional as F
class Model(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 20, 5)
self.conv2 = nn.Conv2d(20, 20, 5)
def forward(self, x):
x = F.relu(self.conv1(x))
return F.relu(self.conv2(x))
Submodules assigned in this way will be registered, and will have their parameters converted too when you call :meth:to
, etc.
.. note:: As per the example above, an __init__()
call to the parent class must be made before assignment on the child.
Type | Default | Details | |
---|---|---|---|
n_in | int | 784 | input dimension e.g. (H,W) for image |
n_h | int | 64 | hidden dimension |
n_out | int | 10 | output dimension (= number of classes for classification) |
dropout | float | 0.2 | |
Returns | None |
Usage
Training
# load from config file
= OmegaConf.load('../config/image/data/mnist.yaml')
cfg = instantiate(cfg.datamodule)
datamodule
datamodule.prepare_data()
datamodule.setup()
= datamodule.test_ds[0][0] # (C, H, W)
x = datamodule.test_ds[0][1] #(int)
label print("original shape (C,H,W): ", x.shape)
print("reshape (C,HxW): ", x.view(x.size(0), -1).shape)
print(x[0][1])
# using nimrod datamodule
= datamodule.train_dataloader()
train_loader = datamodule.val_dataloader()
val_loader = datamodule.test_dataloader() test_loader
[17:09:13] INFO - Init ImageDataModule for mnist
[17:09:13] INFO - mnist Dataset: init
[17:09:20] INFO - mnist Dataset: init
[17:09:23] INFO - split train into train/val [0.8, 0.2]
[17:09:23] INFO - train: 48000 val: 12000, test: 10000
original shape (C,H,W): torch.Size([1, 28, 28])
reshape (C,HxW): torch.Size([1, 784])
tensor([-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
-0.4242, -0.4242, -0.4242, -0.4242])
= get_device() device
[17:10:26] INFO - Using device: mps
Training loop
# data
= OmegaConf.load('../config/image/data/mnist.yaml')
cfg = 2048
cfg.batch_size = instantiate(cfg.datamodule)
datamodule
datamodule.prepare_data()
datamodule.setup()
# model
= mlp.to(device)
model = nn.CrossEntropyLoss()
criterion = torch.optim.Adam(model.parameters(), lr=1e-3)
optimizer
= 2
n_epochs = []
losses = []
lrs = 0
current_step = len(datamodule.train_ds) // cfg.datamodule.batch_size
steps_per_epoch = steps_per_epoch * n_epochs
total_steps print(f"steps_per_epoch: {steps_per_epoch}, total_steps: {total_steps}")
for epoch in range(n_epochs):
model.train()for images, labels in datamodule.train_dataloader():
optimizer.zero_grad()= images.view(-1, 28*28)
images = images.to(device)
images = labels.to(device)
labels = model(images)
outputs = criterion(outputs, labels)
loss
loss.backward()
optimizer.step()
losses.append(loss.item())= optimizer.param_groups[0]['lr']
current_lr
lrs.append(current_lr)if not (current_step % 100):
print(f"Loss {loss.item():.4f}, Current LR: {current_lr:.10f}, Step: {current_step}/{total_steps}")
+= 1
current_step
eval()
model.with torch.no_grad():
= 0
correct = 0
total for images, labels in datamodule.test_dataloader():
# model expects input (B,H*W)
= images.view(-1, 28*28).to(device)
images = images.to(device)
images = labels.to(device)
labels # Pass the input through the model
= model(images)
outputs # Get the predicted labels
= torch.max(outputs.data, 1)
_, predicted
# Update the total and correct counts
+= labels.size(0)
total += (predicted == labels).sum()
correct
# Print the accuracy
print(f"Epoch {epoch + 1}: Accuracy = {100 * correct / total:.2f}%")
# plt.figure(1)
# plt.subplot(211)
'loss')
plt.ylabel('step')
plt.xlabel(
plt.plot(losses)# plt.subplot(212)
# plt.ylabel('lr')
# plt.xlabel('step')
# plt.plot(lrs)
MLP_X
MLP_X
MLP_X (nnet:__main__.MLP, num_classes:int, optimizer:torch.optim.optimizer.Optimizer, scheduler:<module'torch .optim.lr_scheduler'from'/opt/hostedtoolcache/Python/3.10.16/x64/l ib/python3.10/site-packages/torch/optim/lr_scheduler.py'>)
Helper class that provides a standard way to create an ABC using inheritance.
Usage
= OmegaConf.load('../config/image/model/mlp.yaml')
cfg = instantiate(cfg)
model = torch.rand((16,1, 28*28))
b = model(b)
y print(y.shape)
Nimrod training
# model
= OmegaConf.load('../config/image/model/conv.yaml')
cfg = instantiate(cfg)
model
# data module config
= OmegaConf.load('../config/image/data/mnist.yaml')
cfg = 2048
cfg.datamodule.batch_size = 0
cfg.datamodule.num_workers = instantiate(cfg.datamodule)
datamodule # datamodule.prepare_data()
datamodule.setup()
= Trainer(
trainer ="auto",
accelerator=3,
max_epochs=CSVLogger("logs", name="mnist_mlp")
logger )
trainer.fit(model, datamodule.train_dataloader(), datamodule.val_dataloader())
= f"{trainer.logger.log_dir}/metrics.csv"
csv_path = pd.read_csv(csv_path)
metrics 5) metrics.head(
plt.figure()'step'], metrics['train/loss_step'], 'b.-')
plt.plot(metrics['step'], metrics['val/loss'],'r.-')
plt.plot(metrics[ plt.show()
trainer.test(model, datamodule.test_dataloader())