Convolutional neural networks (CNNs) have enhanced the field of computer vision, enabling machines to understand and interpret visual data with remarkable accuracy. PyTorch is a famous deep learning framework and a powerful platform for building and training models. In this Answer, we will build a simple CNN using PyTorch and train it using the MNIST dataset for handwritten digit recognition.
PyTorchWe’ll first get PyTorch installed in our Python environment. We can install PyTorch using the following command:
pip3 install torch torchvision
Once installed, we can follow the following steps to build a CNN model:
Import the necessary modules:
import torch # Import the PyTorch libraryimport torch.nn as nn # Import the neural network moduleimport torch.optim as optim # Import the optimization moduleimport torchvision # Import the torchvision library containing datasets, model architectures, etcimport torchvision.transforms as transforms # Import the module for image transformations
Define a simple CNN architecture using PyTorch’s nn.Module:
# Define a custom classclass SimpleCNN(nn.Module):def __init__(self):# Call the constructorsuper(SimpleCNN, self).__init__()# Define the first convolutional layer with its parametersself.conv_layer1 = nn.Conv2d(in_channels=1, out_channels=16, padding=1, kernel_size=3, stride=1)# Define the second convolutional layer with its parametersself.conv_layer2 = nn.Conv2d(in_channels=16, out_channels=32, padding=1, kernel_size=3, stride=1)# Define the first fully connected layerself.fc_layer1 = nn.Linear(32 * 7 * 7, 128)# Define the second fully connected layerself.fc_layer2 = nn.Linear(128, 10)def forward(self, x):# Apply ReLU to the output of the first conv layerx = torch.relu(self.conv_layer1(x))# Perform max pooling operationx = torch.max_pool2d(x, stride=2, kernel_size=2)# Apply ReLU to the output of the second conv layerx = torch.relu(self.conv_layer2(x))x = torch.max_pool2d(x, stride=2, kernel_size=2)# Flatten the outputx = x.view(-1, 32 * 7 * 7)# Apply ReLU to the output of the first fully connected layerx = torch.relu(self.fc_layer1(x))# Apply the second fully connected layerx = self.fc_layer2(x)return x
The model architecture diagram for the code above is shown below:
Load and preprocess the MNIST dataset using PyTorch’s built-in methods:
# Transformations to be applied to the datatransform_data = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])# Load the MNIST data and apply the transformations defined abovetrainset_data = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform_data)# DataLoader for the training datasettrain_loader = torch.utils.data.DataLoader(trainset_data, batch_size=64, shuffle=True)
Initialize the CNN network, cross-entropy loss function, and adam optimizer:
# Create an instance of the modelnet = SimpleCNN()# Define the loss functioncriterion = nn.CrossEntropyLoss()# Define the optimizer used for updating the parameters Adam optimizer is used hereoptimizer = optim.Adam(net.parameters(), lr=0.001)
Train the CNN Network:
num_epochs = 5# Iterate over each epochfor epoch in range(num_epochs):running_loss = 0.0# Iterate over batchesfor i, data in enumerate(train_loader, 0):inputs, labels = dataoptimizer.zero_grad()outputs = net(inputs) # Forward passloss = criterion(outputs, labels) # Calculate the lossloss.backward() # Backward passoptimizer.step() # Update the parameters (weights)running_loss += loss.item()if i % 100 == 99: # Print every 100 mini-batchesprint('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100))running_loss = 0.0print('Training completed successfully.')
Evaluate the CNN Network to observe the model performance.
# Evaluate the modelcorrect = 0 # Initialize variablestotal = 0# Disable gradient computation for efficiencywith torch.no_grad():# Iterate over the batchesfor images, labels in train_loader:# Forward passoutputs = net(images)_, predicted = torch.max(outputs.data, 1)total += labels.size(0)# Count the number of correct predictionscorrect += (predicted == labels).sum().item()# Calculate the accuracyaccuracy = 100 * correct / totalprint('Accuracy of the model on the train dataset: {:.2f}%'.format(accuracy))
During training, we iterate over mini-batches of data, compute the loss, perform backpropagation, and update the model parameters using the optimizer. We can practice the code by running the widget below:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
# Define a CNN model
class SimpleCNN(nn.Module):
def __init__(self):
super(SimpleCNN, self).__init__()
self.conv_layer1 = nn.Conv2d(in_channels=1, out_channels=16, padding=1, kernel_size=3, stride=1)
self.conv_layer2 = nn.Conv2d(in_channels=16, out_channels=32, padding=1, kernel_size=3, stride=1)
self.fc_layer1 = nn.Linear(32 * 7 * 7, 128)
self.fc_layer2 = nn.Linear(128, 10)
def forward(self, x):
x = torch.relu(self.conv_layer1(x))
x = torch.max_pool2d(x, stride=2, kernel_size=2)
x = torch.relu(self.conv_layer2(x))
x = torch.max_pool2d(x, stride=2, kernel_size=2)
x = x.view(-1, 32 * 7 * 7)
x = torch.relu(self.fc_layer1(x))
x = self.fc_layer2(x)
return x
# Load the MNIST dataset
transform_data = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
trainset_data = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform_data)
train_loader = torch.utils.data.DataLoader(trainset_data, batch_size=64, shuffle=True)
# Initialize the network
net = SimpleCNN()
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
# Train the network
numb_epochs = 5
for eachEpoch in range(numb_epochs):
running_Loss = 0.0
for j, Data in enumerate(train_loader, 0):
inputS, labelS = Data
optimizer.zero_grad()
outputS = net(inputS)
loss = criterion(outputS, labelS)
loss.backward()
optimizer.step()
running_Loss += loss.item()
if j % 100 == 99:
print('[%d, %5d] loss: %.3f' % (eachEpoch + 1, j + 1, running_Loss / 100))
running_Loss = 0.0
print('Training completed successfully.')
# Evaluate the model
correct = 0
total = 0
with torch.no_grad():
for images, labels in train_loader:
outputs = net(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
accuracy = 100 * correct / total
print('Accuracy of the model on the train dataset: {:.2f}%'.format(accuracy))
We covered the fundamentals of building a CNN with PyTorch and training it on the MNIST dataset. By understanding the architecture of CNNs and leveraging PyTorch’s powerful tools, we can create sophisticated deep learning models for various computer vision tasks.
Free Resources