In [906]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
In [983]:
# 1 a)
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
In [984]:
# b) Display the first 12 images in the training set into 4 columns and 3 rows.
fig, axes = plt.subplots(4,3, figsize=(15,15))
axes = axes.ravel()
for i in range(12):
axes[i].imshow(x_train[i])
axes[i].axis('off')
axes[i].set_title(i+1)
In [987]:
# c)
def rgb_to_yuv(image):
# YUV transformation
Yuv_matrix = np.array([[0.299, 0.587, 0.114],
[-0.14713, -0.28886, 0.436],
[0.615, -0.51499, -0.10001]])
new_image = np.dot(image, Yuv_matrix.T)
return new_image[..., 0]
# creating tempoary lists so that only 1 channel is there
x_train_gray = np.zeros((x_train.shape[0], 32, 32, 1))
x_test_gray = np.zeros((x_test.shape[0], 32, 32, 1))
# coverting training data
for i in range(x_train.shape[0]):
grayscale_image = rgb_to_yuv(x_train[i])
x_train_gray[i] = grayscale_image[..., np.newaxis]
x_train = x_train_gray
# coverting test data
for i in range(x_test.shape[0]):
grayscale_image = rgb_to_yuv(x_test[i])
x_test_gray[i] = grayscale_image[..., np.newaxis]
x_test = x_test_gray
# x arrays are now correct shape (32,32,1)
print(x_test[0].shape)
print(x_train[0].shape)
(32, 32, 1) (32, 32, 1)
In [910]:
# d) maybe say i tried muliple models and this was most successful
'''
I choose the YUV color model, for one the Y channel captures brightness while the other two capture color. i knew that
seperating the images to brightness would be very important as that is what is needed by the models for the classification process.
I tested this by passing the U and V images through the rest of the code once finished and the results were substancially less accurate.
'''
Out[910]:
'\n\nI choose the YUV color model, for one the Y channel captures brightness while the other two capture color. i knew that\nseperating the images to brightness would be very important as that is what is needed by the models for the classification process.\nI tested this by passing the U and V images through the rest of the code once finished and the results were substancially less accurate. \n\n'
In [989]:
# e) Display the first 12 grayscale images in the training set into 4 columns and 3 rows.
fig, axes = plt.subplots(4,3, figsize=(15,15))
axes = axes.ravel()
for i in range(12):
axes[i].imshow(x_train[i], cmap='gray')
axes[i].axis('off')
axes[i].set_title(i+1)
In [912]:
"""
QUESTION 2
"""
Out[912]:
'\n\nQUESTION 2\n\n'
In [991]:
#(a) Implement gradient descent and run for 250 epochs. (7 marks)
# preprocessing
K = len(np.unique(y_train))
# (32*32*1) = (height x width x channel)
Din = 1024
# normalise pixel values (make them between 0 and 1)
x_train = x_train / 255.0
x_test = x_test / 255.0
# mean normalization.
mean_image = np.mean(x_train, axis=0)
x_train = x_train - mean_image
x_test = x_test - mean_image
# one hot encoding (splits unique categorties into 10 columns with binary 1 in one of the columns and 0 in the rest)
y_train = tf.keras.utils.to_categorical(y_train, num_classes = K)
print('y_train : ', y_train.shape)
y_test = tf.keras.utils.to_categorical(y_test, num_classes = K)
print('y_test : ', y_test.shape)
x_train = np.reshape(x_train, (Ntr,Din)).astype('float32')
x_test = np.reshape(x_test, (Nte,Din)).astype('float32')
print(x_train.shape)
print(x_test.shape)
y_train : (50000, 10) y_test : (10000, 10) (50000, 1024) (10000, 1024)
In [993]:
# initialising the weight matrix
std = 0.00001
w1 = std * np.random.randn(Din, K)
b1 = np.zeros(K)
# Rearranging training and test data
x_train_ra = np.concatenate((np.ones((x_train.shape[0], 1)), x_train), axis=1)
x_test_ra = np.concatenate((np.ones((x_test.shape[0], 1)), x_test), axis=1)
# rearranging the weight matrix and bias matrix
w1 = np.concatenate((b1.reshape(1, K), w1), axis=0)
In [995]:
def getAccuracy(predictions, labels):
pred_class = np.argmax(predictions, axis=1)
real_class = np.argmax(labels, axis=1)
valid_pred = pred_class == real_class
return np.sum(valid_pred) / len(real_class)
In [997]:
# initialisation
epochs = 250
lr = 0.014
loss_history = []
test_loss = []
train_acc_history = []
test_acc_history = []
# size of training data
m = x_train_ra.shape[0]
# size of test data
m2 = x_test_ra.shape[0]
In [999]:
# trainig loop
for t in range(1,epochs + 1):
# Forward Propagation
y_train_pred = x_train_ra.dot(w1)
loss = (1/m)*np.sum(( y_train_pred - y_train)**2)
loss_history.append(loss)
# Backward Propagation
dw1 = (1/m)*(x_train_ra.T.dot(y_train_pred - y_train))
w1 = w1 - lr*dw1
# Training Accuracy
train_acc = getAccuracy(y_train_pred, y_train)
train_acc_history.append(train_acc)
# Testing Accuracy
y_test_pred = x_test_ra.dot(w1)
test_acc = getAccuracy(y_test_pred, y_test)
test_acc_history.append(test_acc)
# Test Loss
t_loss = (1/m2)*np.sum(( y_test_pred - y_test)**2)
test_loss.append(t_loss)
# b)
# Print details for selected iterations
if (t%20==0) or (t==1) or (t==epochs):
print(f"| Epoch {t} | Train Loss {loss} | Train Accuracy: {train_acc} | Test Loss: {t_loss} | Test Accuracy: {test_acc} |")
| Epoch 1 | Train Loss 0.9999973103664838 | Train Accuracy: 0.0994 | Test Loss: 0.989256451990334 | Test Accuracy: 0.2271 | | Epoch 20 | Train Loss 0.9192764350879172 | Train Accuracy: 0.26092 | Test Loss: 0.9171859174545491 | Test Accuracy: 0.2641 | | Epoch 40 | Train Loss 0.8874314103079382 | Train Accuracy: 0.2744 | Test Loss: 0.8867253564362741 | Test Accuracy: 0.2783 | | Epoch 60 | Train Loss 0.869844425059942 | Train Accuracy: 0.28268 | Test Loss: 0.8699239513510441 | Test Accuracy: 0.2829 | | Epoch 80 | Train Loss 0.8597425449371057 | Train Accuracy: 0.2877 | Test Loss: 0.8603273428057605 | Test Accuracy: 0.2888 | | Epoch 100 | Train Loss 0.853808238042526 | Train Accuracy: 0.29164 | Test Loss: 0.8547428324010026 | Test Accuracy: 0.2928 | | Epoch 120 | Train Loss 0.8502343225105686 | Train Accuracy: 0.29418 | Test Loss: 0.8514284998621877 | Test Accuracy: 0.2954 | | Epoch 140 | Train Loss 0.8480141635721214 | Train Accuracy: 0.29512 | Test Loss: 0.8494138079924338 | Test Accuracy: 0.2974 | | Epoch 160 | Train Loss 0.8465806030642751 | Train Accuracy: 0.29618 | Test Loss: 0.8481521656484154 | Test Accuracy: 0.3004 | | Epoch 180 | Train Loss 0.8456111267139131 | Train Accuracy: 0.29724 | Test Loss: 0.8473330416755713 | Test Accuracy: 0.3008 | | Epoch 200 | Train Loss 0.844920671992456 | Train Accuracy: 0.29904 | Test Loss: 0.8467784973145419 | Test Accuracy: 0.3016 | | Epoch 220 | Train Loss 0.8444019973005541 | Train Accuracy: 0.30028 | Test Loss: 0.8463856131940369 | Test Accuracy: 0.3024 | | Epoch 240 | Train Loss 0.8439922366390149 | Train Accuracy: 0.30088 | Test Loss: 0.8460942046539486 | Test Accuracy: 0.3034 | | Epoch 250 | Train Loss 0.8438158541390375 | Train Accuracy: 0.3012 | Test Loss: 0.8459747680472287 | Test Accuracy: 0.3035 |
In [1001]:
# c)
epochs = range(1, len(train_acc_history) + 1) # Epoch indices
fig, axs = plt.subplots(1, 2, figsize=(12, 6))
# accuracy vs epoch
axs[0].plot(epochs, train_acc_history, label='Training Accuracy', color='blue')
axs[0].plot(epochs, test_acc_history, label='Testing Accuracy', color='green', linestyle='--')
axs[0].set_title('Accuracy vs Epoch')
axs[0].set_xlabel('Epoch')
axs[0].set_ylabel('Accuracy')
axs[0].set_ylim(0, 1)
axs[0].legend()
# loss vs epoch
axs[1].plot(epochs, loss_history, label='Training Loss', color='red')
axs[1].plot(epochs, test_loss, label='Testing Loss', color='orange', linestyle='--')
axs[1].set_title('Loss vs Epoch')
axs[1].set_xlabel('Epoch')
axs[1].set_ylabel('Loss')
axs[1].set_ylim(0, 1)
axs[1].legend()
# Show plots
plt.tight_layout()
plt.show()
In [919]:
'''
QUESTION 3
'''
Out[919]:
'\n\nQUESTION 3\n\n'
In [1003]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
# Build the model
model = Sequential()
# Input layer
# First hidden layer
model.add(Dense(128, input_dim=x_train.shape[1], activation='relu')) # 128 neurons
model.add(Dense(64, activation='relu')) # 64 neurons
# Output layer - softmax for multi-class classification
model.add(Dense(K, activation='softmax'))
# Compile Veriables
lr = 0.005
optimizer = Adam(learning_rate = lr)
# Compile the model
model.compile(optimizer,
loss='categorical_crossentropy',
metrics=['accuracy'])
# Train the model
history = model.fit(x_train, y_train, epochs=20, batch_size=32, validation_data=(x_test, y_test))
Epoch 1/20 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.2796 - loss: 2.0051 - val_accuracy: 0.3237 - val_loss: 1.8566 Epoch 2/20 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.3568 - loss: 1.8084 - val_accuracy: 0.3652 - val_loss: 1.7843 Epoch 3/20 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.3902 - loss: 1.7195 - val_accuracy: 0.3940 - val_loss: 1.7209 Epoch 4/20 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.4088 - loss: 1.6652 - val_accuracy: 0.3984 - val_loss: 1.7054 Epoch 5/20 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.4241 - loss: 1.6196 - val_accuracy: 0.3947 - val_loss: 1.6964 Epoch 6/20 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.4355 - loss: 1.5892 - val_accuracy: 0.3939 - val_loss: 1.7624 Epoch 7/20 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.4439 - loss: 1.5565 - val_accuracy: 0.4096 - val_loss: 1.6792 Epoch 8/20 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.4598 - loss: 1.5237 - val_accuracy: 0.4026 - val_loss: 1.6849 Epoch 9/20 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.4711 - loss: 1.4952 - val_accuracy: 0.4228 - val_loss: 1.6725 Epoch 10/20 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.4761 - loss: 1.4683 - val_accuracy: 0.4188 - val_loss: 1.6817 Epoch 11/20 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.4871 - loss: 1.4456 - val_accuracy: 0.4138 - val_loss: 1.6954 Epoch 12/20 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.4953 - loss: 1.4290 - val_accuracy: 0.4106 - val_loss: 1.7267 Epoch 13/20 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.5007 - loss: 1.4079 - val_accuracy: 0.4026 - val_loss: 1.7156 Epoch 14/20 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.5029 - loss: 1.3898 - val_accuracy: 0.4076 - val_loss: 1.7235 Epoch 15/20 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.5170 - loss: 1.3643 - val_accuracy: 0.4120 - val_loss: 1.7207 Epoch 16/20 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.5172 - loss: 1.3576 - val_accuracy: 0.4225 - val_loss: 1.7239 Epoch 17/20 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.5221 - loss: 1.3397 - val_accuracy: 0.4086 - val_loss: 1.7565 Epoch 18/20 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.5277 - loss: 1.3294 - val_accuracy: 0.4244 - val_loss: 1.7614 Epoch 19/20 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.5343 - loss: 1.3146 - val_accuracy: 0.4154 - val_loss: 1.7988 Epoch 20/20 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.5364 - loss: 1.3070 - val_accuracy: 0.4160 - val_loss: 1.7894
In [921]:
"""
b)
I chose two hidden layers because they are enough to learn patterns in the data without making the model too complicated.
The first layer with 128 neurons learns basic patterns, and the second layer with 64 neurons improves on them.
This setup helps the model work well without overfitting. The output layer uses softmax to handle multi-class classification.
"""
Out[921]:
'\nb)\nI chose two hidden layers because they are enough to learn patterns in the data without making the model too complicated. \nThe first layer with 128 neurons learns basic patterns, and the second layer with 64 neurons improves on them. \nThis setup helps the model work well without overfitting. The output layer uses softmax to handle multi-class classification.\n'
In [1005]:
# c)
# get the last item in list for final result
train_loss = history.history['loss']
train_acc = history.history['accuracy']
test_loss = history.history['val_loss']
test_acc = history.history['val_accuracy']
print("Learning Rate: " + str(lr))
print("Training Loss: " + str(train_loss[-1]))
print("Training Accuracy: " + str(train_acc[-1]))
print("Test Loss: " + str(test_loss[-1]))
print("Test Accuracy: " + str(test_acc[-1]))
Learning Rate: 0.005 Training Loss: 1.3112945556640625 Training Accuracy: 0.5331400036811829 Test Loss: 1.78938627243042 Test Accuracy: 0.41600000858306885
In [1007]:
# d)
# get range of epochs
epochs = range(1, len(train_acc) + 1)
fig, axs = plt.subplots(1, 2, figsize=(12, 6))
# Accuracy vs Epoch
axs[0].plot(epochs, train_acc, label='Training Accuracy', color='blue')
axs[0].plot(epochs, test_acc, label='Testing Accuracy', color='green', linestyle='--')
axs[0].set_title('Accuracy vs Epoch')
axs[0].set_xlabel('Epoch')
axs[0].set_ylabel('Accuracy')
axs[0].legend()
axs[0].set_ylim(0, 1)
axs[0].grid(True)
# Loss vs Epoch
axs[1].plot(epochs, train_loss, label='Training Loss', color='red')
axs[1].plot(epochs, test_loss, label='Testing Loss', color='orange', linestyle='--')
axs[1].set_title('Loss vs Epoch')
axs[1].set_xlabel('Epoch')
axs[1].set_ylabel('Loss')
axs[1].legend()
axs[1].grid(True)
plt.tight_layout()
plt.show()
In [924]:
'''
QUESTION 4
'''
Out[924]:
'\n\nQUESTION 4\n\n'
In [925]:
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.optimizers import SGD
# the model
model = Sequential()
# Layers
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 1))) # C32
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(32, (3, 3), activation='relu')) # C32
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu')) # C64
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(64, activation='relu')) # F64
model.add(Dense(K, activation='softmax')) # F10
model.summary()
Model: "sequential_100"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ conv2d_280 (Conv2D) │ (None, 30, 30, 32) │ 320 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ max_pooling2d_276 │ (None, 15, 15, 32) │ 0 │ │ (MaxPooling2D) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_281 (Conv2D) │ (None, 13, 13, 32) │ 9,248 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ max_pooling2d_277 │ (None, 6, 6, 32) │ 0 │ │ (MaxPooling2D) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_282 (Conv2D) │ (None, 4, 4, 64) │ 18,496 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ max_pooling2d_278 │ (None, 2, 2, 64) │ 0 │ │ (MaxPooling2D) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ flatten_64 (Flatten) │ (None, 256) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_214 (Dense) │ (None, 64) │ 16,448 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_215 (Dense) │ (None, 10) │ 650 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 45,162 (176.41 KB)
Trainable params: 45,162 (176.41 KB)
Non-trainable params: 0 (0.00 B)
In [926]:
optimizer = SGD(learning_rate=0.01)
model.compile(optimizer=optimizer,
loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
# need to reshape data for compile because of previous reshape
x_test = x_test.reshape(-1, 32, 32, 1)
x_train = x_train.reshape(-1, 32, 32, 1)
history = model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test))
Epoch 1/10 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 10s 6ms/step - accuracy: 0.1416 - loss: 2.2735 - val_accuracy: 0.2601 - val_loss: 2.0618 Epoch 2/10 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 10s 6ms/step - accuracy: 0.2805 - loss: 2.0053 - val_accuracy: 0.3377 - val_loss: 1.8282 Epoch 3/10 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 10s 7ms/step - accuracy: 0.3534 - loss: 1.8024 - val_accuracy: 0.3892 - val_loss: 1.7367 Epoch 4/10 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 10s 6ms/step - accuracy: 0.3999 - loss: 1.6837 - val_accuracy: 0.3980 - val_loss: 1.6956 Epoch 5/10 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 10s 6ms/step - accuracy: 0.4373 - loss: 1.5989 - val_accuracy: 0.4348 - val_loss: 1.5846 Epoch 6/10 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 10s 6ms/step - accuracy: 0.4601 - loss: 1.5340 - val_accuracy: 0.4558 - val_loss: 1.5308 Epoch 7/10 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 10s 6ms/step - accuracy: 0.4882 - loss: 1.4638 - val_accuracy: 0.4776 - val_loss: 1.4680 Epoch 8/10 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 10s 6ms/step - accuracy: 0.5077 - loss: 1.4077 - val_accuracy: 0.4997 - val_loss: 1.4001 Epoch 9/10 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 10s 6ms/step - accuracy: 0.5279 - loss: 1.3500 - val_accuracy: 0.5116 - val_loss: 1.3759 Epoch 10/10 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 10s 6ms/step - accuracy: 0.5470 - loss: 1.2991 - val_accuracy: 0.5229 - val_loss: 1.3579
In [927]:
'''
a)
As seen in the summary above there ae 45,162.
Breakdown:
Learnable Parameters = (kernel_height×kernel_width×input_channels+1)×number_of_filters
source - https://www.geeksforgeeks.org/how-to-calculate-the-number-of-parameters-in-cnn/
First layer is
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 1)))
The kernel size is 3x3.
The input is a greyscale image which is 1 channel.
And the first parameter of Conv2D is the number of filters which is 32.
conv2d (Conv2D): (3 × 3 × 1 + 1) × 32 = 320.
conv2d_1 (Conv2D): (3 × 3 × 32 + 1) × 32 = 9,248.
conv2d_2 (Conv2D): (3 × 3 × 32 + 1) × 64 = 18,496.
For dense layers:
The previous flatten layer outputs 256
so for model.add(Dense(64, activation='relu'))
we multiply (256 + 1) by 64, the 64 is then passed to the next layer
dense_3: (256 + 1) × 64 = 16,448.
dense_4: (64 + 1) × 10 = 650.
Total: 320 + 9,248 + 18,496 + 16,448 + 650 = 45,162.
'''
Out[927]:
"\na)\n\nAs seen in the summary above there ae 45,162.\n\nBreakdown:\nLearnable Parameters = (kernel_height×kernel_width×input_channels+1)×number_of_filters\nsource - https://www.geeksforgeeks.org/how-to-calculate-the-number-of-parameters-in-cnn/\n\nFirst layer is \nmodel.add(Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 1)))\n\nThe kernel size is 3x3. \nThe input is a greyscale image which is 1 channel.\nAnd the first parameter of Conv2D is the number of filters which is 32.\n\nconv2d (Conv2D): (3 × 3 × 1 + 1) × 32 = 320.\nconv2d_1 (Conv2D): (3 × 3 × 32 + 1) × 32 = 9,248.\nconv2d_2 (Conv2D): (3 × 3 × 32 + 1) × 64 = 18,496.\n\nFor dense layers: \n\nThe previous flatten layer outputs 256\nso for model.add(Dense(64, activation='relu'))\nwe multiply (256 + 1) by 64, the 64 is then passed to the next layer\n\n\ndense_3: (256 + 1) × 64 = 16,448.\ndense_4: (64 + 1) × 10 = 650.\n\nTotal: 320 + 9,248 + 18,496 + 16,448 + 650 = 45,162.\n\n"
In [928]:
# b)
train_loss = history.history['loss']
train_acc = history.history['accuracy']
test_loss = history.history['val_loss']
test_acc = history.history['val_accuracy']
print("Training Loss: " + str(train_loss[-1]))
print("Training Accuracy: " + str(train_acc[-1]))
print("Test Loss: " + str(test_loss[-1]))
print("Test Accuracy: " + str(test_acc[-1]))
Training Loss: 1.2954219579696655 Training Accuracy: 0.5487599968910217 Test Loss: 1.3579355478286743 Test Accuracy: 0.5228999853134155
In [929]:
# c)
# get range of epochs
epochs = range(1, len(train_acc) + 1)
fig, axs = plt.subplots(1, 2, figsize=(12, 6))
# Accuracy vs Epoch
axs[0].plot(epochs, train_acc, label='Training Accuracy', color='blue')
axs[0].plot(epochs, test_acc, label='Testing Accuracy', color='green', linestyle='--')
axs[0].set_title('Accuracy vs Epoch')
axs[0].set_xlabel('Epoch')
axs[0].set_ylabel('Accuracy')
axs[0].legend()
axs[0].set_ylim(0, 1)
axs[0].grid(True)
# Loss vs Epoch
axs[1].plot(epochs, train_loss, label='Training Loss', color='red')
axs[1].plot(epochs, test_loss, label='Testing Loss', color='orange', linestyle='--')
axs[1].set_title('Loss vs Epoch')
axs[1].set_xlabel('Epoch')
axs[1].set_ylabel('Loss')
axs[1].legend()
axs[1].grid(True)
plt.tight_layout()
plt.show()
In [930]:
'''
QUESTION 5
'''
Out[930]:
'\n\nQUESTION 5\n\n'
In [ ]:
In [ ]:
In [931]:
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dropout,GlobalAveragePooling2D
from tensorflow.keras.callbacks import EarlyStopping
# the model
model = Sequential()
dropout_rate = 0.2
# Layers
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 1))) # C32
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu')) # C32
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(dropout_rate))
model.add(Conv2D(128, (3, 3), activation='relu')) # C64
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(dropout_rate))
#model.add(Flatten())
model.add(GlobalAveragePooling2D())
model.add(Dense(64, activation='relu')) # F128
model.add(Dense(K, activation='softmax')) # F10 output layer
model.summary()
Model: "sequential_101"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ conv2d_283 (Conv2D) │ (None, 30, 30, 32) │ 320 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ batch_normalization_158 │ (None, 30, 30, 32) │ 128 │ │ (BatchNormalization) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ max_pooling2d_279 │ (None, 15, 15, 32) │ 0 │ │ (MaxPooling2D) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_284 (Conv2D) │ (None, 13, 13, 64) │ 18,496 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ batch_normalization_159 │ (None, 13, 13, 64) │ 256 │ │ (BatchNormalization) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ max_pooling2d_280 │ (None, 6, 6, 64) │ 0 │ │ (MaxPooling2D) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout_83 (Dropout) │ (None, 6, 6, 64) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_285 (Conv2D) │ (None, 4, 4, 128) │ 73,856 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ batch_normalization_160 │ (None, 4, 4, 128) │ 512 │ │ (BatchNormalization) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ max_pooling2d_281 │ (None, 2, 2, 128) │ 0 │ │ (MaxPooling2D) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout_84 (Dropout) │ (None, 2, 2, 128) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ global_average_pooling2d_24 │ (None, 128) │ 0 │ │ (GlobalAveragePooling2D) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_216 (Dense) │ (None, 64) │ 8,256 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_217 (Dense) │ (None, 10) │ 650 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 102,474 (400.29 KB)
Trainable params: 102,026 (398.54 KB)
Non-trainable params: 448 (1.75 KB)
In [932]:
'''
QUESTION 5
'''
Out[932]:
'\n\nQUESTION 5\n\n'
In [933]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
optimizer = Adam(learning_rate=0.001)
epochs = 50
model.compile(optimizer=optimizer,
loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
# need to reshape data for compile because of previous reshape
x_test = x_test.reshape(-1, 32, 32, 1)
x_train = x_train.reshape(-1, 32, 32, 1)
history = model.fit(x_train, y_train, epochs=epochs, validation_data=(x_test, y_test), callbacks=[early_stopping])
Epoch 1/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 15ms/step - accuracy: 0.4057 - loss: 1.6691 - val_accuracy: 0.5676 - val_loss: 1.2979 Epoch 2/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 15ms/step - accuracy: 0.5891 - loss: 1.1651 - val_accuracy: 0.6145 - val_loss: 1.1020 Epoch 3/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 23s 15ms/step - accuracy: 0.6475 - loss: 1.0026 - val_accuracy: 0.6490 - val_loss: 1.0104 Epoch 4/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 23s 15ms/step - accuracy: 0.6788 - loss: 0.9301 - val_accuracy: 0.6786 - val_loss: 0.9201 Epoch 5/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 23s 15ms/step - accuracy: 0.6989 - loss: 0.8714 - val_accuracy: 0.6480 - val_loss: 1.0198 Epoch 6/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 23s 15ms/step - accuracy: 0.7115 - loss: 0.8345 - val_accuracy: 0.7046 - val_loss: 0.8527 Epoch 7/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 23s 15ms/step - accuracy: 0.7255 - loss: 0.7895 - val_accuracy: 0.6933 - val_loss: 0.8832 Epoch 8/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 15ms/step - accuracy: 0.7318 - loss: 0.7635 - val_accuracy: 0.7150 - val_loss: 0.8249 Epoch 9/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 23s 15ms/step - accuracy: 0.7464 - loss: 0.7294 - val_accuracy: 0.6987 - val_loss: 0.8724 Epoch 10/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 23s 15ms/step - accuracy: 0.7506 - loss: 0.7177 - val_accuracy: 0.7254 - val_loss: 0.8173 Epoch 11/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 23s 15ms/step - accuracy: 0.7582 - loss: 0.6942 - val_accuracy: 0.7287 - val_loss: 0.7973 Epoch 12/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 23s 15ms/step - accuracy: 0.7671 - loss: 0.6763 - val_accuracy: 0.7337 - val_loss: 0.7790 Epoch 13/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 23s 15ms/step - accuracy: 0.7658 - loss: 0.6657 - val_accuracy: 0.7338 - val_loss: 0.7703 Epoch 14/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 23s 15ms/step - accuracy: 0.7777 - loss: 0.6428 - val_accuracy: 0.7229 - val_loss: 0.8124 Epoch 15/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 15ms/step - accuracy: 0.7799 - loss: 0.6307 - val_accuracy: 0.7070 - val_loss: 0.8935 Epoch 16/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 15ms/step - accuracy: 0.7817 - loss: 0.6181 - val_accuracy: 0.7340 - val_loss: 0.7804 Epoch 17/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 15ms/step - accuracy: 0.7846 - loss: 0.6128 - val_accuracy: 0.7432 - val_loss: 0.7620 Epoch 18/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 15ms/step - accuracy: 0.7938 - loss: 0.5967 - val_accuracy: 0.7410 - val_loss: 0.7760 Epoch 19/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 23s 15ms/step - accuracy: 0.7905 - loss: 0.5976 - val_accuracy: 0.7425 - val_loss: 0.7571 Epoch 20/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 15ms/step - accuracy: 0.7968 - loss: 0.5842 - val_accuracy: 0.7346 - val_loss: 0.8009 Epoch 21/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 23s 15ms/step - accuracy: 0.8005 - loss: 0.5750 - val_accuracy: 0.7473 - val_loss: 0.7569 Epoch 22/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 25s 16ms/step - accuracy: 0.7962 - loss: 0.5777 - val_accuracy: 0.7217 - val_loss: 0.8272 Epoch 23/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 15ms/step - accuracy: 0.8025 - loss: 0.5643 - val_accuracy: 0.7503 - val_loss: 0.7415 Epoch 24/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 16ms/step - accuracy: 0.8038 - loss: 0.5618 - val_accuracy: 0.7452 - val_loss: 0.7678 Epoch 25/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 16ms/step - accuracy: 0.8086 - loss: 0.5457 - val_accuracy: 0.7467 - val_loss: 0.7637 Epoch 26/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 16ms/step - accuracy: 0.8113 - loss: 0.5404 - val_accuracy: 0.7264 - val_loss: 0.8173 Epoch 27/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 15ms/step - accuracy: 0.8098 - loss: 0.5378 - val_accuracy: 0.7226 - val_loss: 0.8458 Epoch 28/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 16ms/step - accuracy: 0.8073 - loss: 0.5412 - val_accuracy: 0.7569 - val_loss: 0.7235 Epoch 29/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 15ms/step - accuracy: 0.8171 - loss: 0.5272 - val_accuracy: 0.7606 - val_loss: 0.7133 Epoch 30/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 16ms/step - accuracy: 0.8160 - loss: 0.5240 - val_accuracy: 0.7544 - val_loss: 0.7523 Epoch 31/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 16ms/step - accuracy: 0.8159 - loss: 0.5210 - val_accuracy: 0.7603 - val_loss: 0.7214 Epoch 32/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 26s 17ms/step - accuracy: 0.8198 - loss: 0.5132 - val_accuracy: 0.7071 - val_loss: 0.8829 Epoch 33/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 15ms/step - accuracy: 0.8197 - loss: 0.5150 - val_accuracy: 0.7644 - val_loss: 0.7216 Epoch 34/50 1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 15ms/step - accuracy: 0.8212 - loss: 0.5035 - val_accuracy: 0.7596 - val_loss: 0.7141
In [934]:
# c)
train_loss = history.history['loss']
train_acc = history.history['accuracy']
test_loss = history.history['val_loss']
test_acc = history.history['val_accuracy']
print("Training Loss: " + str(train_loss[-1]))
print("Training Accuracy: " + str(train_acc[-1]))
print("Test Loss: " + str(test_loss[-1]))
print("Test Accuracy: " + str(test_acc[-1]))
Training Loss: 0.5167111158370972 Training Accuracy: 0.8166400194168091 Test Loss: 0.7140844464302063 Test Accuracy: 0.7595999836921692
In [ ]:
In [935]:
"""
b)
I used the SDG and through trial and error foud that the most successful learning rate was 0.1 where training accuracy was 71%
i then switched to Adam optimiser where i again used trial and error and found that a LR of 0.001 was most succesful with 74%.
i decided to try batch normalisation after each convolutional layer. this normalises the output of each layer. i implemenetd this by adding
model.add(BatchNormalization()) after each convolutional layer and this improved the training accuray to 79% but the
testing acuracy is still quite low at 69% which shows overfitting.
i noticed an issue where the model would overtrain and it would peak and then begin to drop in accuracy. i found out about early
stopping and dropoup.
drop out - adding a dropout rate of 0.25 after each layer made the accuracy substantally worse so i removed it. ealy stopping
resulted in a slightly better test accuracy rate so i kept it.
at the moment i have a problem with overheating of my computer but i still tried to raise my epoch level to 50 and with the early
stopping it stopped a 15 epochs at 83% training rate and 72% testing accuracy. this shows the trend of overfitting. which is where
the model is training to recognise the specific training data and cant handle general data it has not been trained on.
i used GlobalAveragePooling2D in place of flatten() and it decreased the seperation between training and testing accurcies but
didint improve the testing accuracy.
After trying drop out again on a couple of the layers i managed to get the test acuracy up to 75% so i will keep that in
"""
Out[935]:
'\nb)\nI used the SDG and through trial and error foud that the most successful learning rate was 0.1 where training accuracy was 71%\n\ni then switched to Adam optimiser where i again used trial and error and found that a LR of 0.001 was most succesful with 74%.\n\ni decided to try batch normalisation after each convolutional layer. this normalises the output of each layer. i implemenetd this by adding \nmodel.add(BatchNormalization()) after each convolutional layer and this improved the training accuray to 79% but the \ntesting acuracy is still quite low at 69% which shows overfitting.\n\ni noticed an issue where the model would overtrain and it would peak and then begin to drop in accuracy. i found out about early \nstopping and dropoup.\n\ndrop out - adding a dropout rate of 0.25 after each layer made the accuracy substantally worse so i removed it. ealy stopping \nresulted in a slightly better test accuracy rate so i kept it. \n\nat the moment i have a problem with overheating of my computer but i still tried to raise my epoch level to 50 and with the early\nstopping it stopped a 15 epochs at 83% training rate and 72% testing accuracy. this shows the trend of overfitting. which is where \nthe model is training to recognise the specific training data and cant handle general data it has not been trained on. \n\ni used GlobalAveragePooling2D in place of flatten() and it decreased the seperation between training and testing accurcies but\ndidint improve the testing accuracy. \n\nAfter trying drop out again on a couple of the layers i managed to get the test acuracy up to 75% so i will keep that in\n\n'
In [936]:
# get range of epochs
epochs = range(1, len(train_acc) + 1)
fig, axs = plt.subplots(1, 2, figsize=(12, 6))
# Accuracy vs Epoch
axs[0].plot(epochs, train_acc, label='Training Accuracy', color='blue')
axs[0].plot(epochs, test_acc, label='Testing Accuracy', color='green', linestyle='--')
axs[0].set_title('Accuracy vs Epoch')
axs[0].set_xlabel('Epoch')
axs[0].set_ylabel('Accuracy')
axs[0].legend()
axs[0].set_ylim(0, 1)
axs[0].grid(True)
# Loss vs Epoch
axs[1].plot(epochs, train_loss, label='Training Loss', color='red')
axs[1].plot(epochs, test_loss, label='Testing Loss', color='orange', linestyle='--')
axs[1].set_title('Loss vs Epoch')
axs[1].set_xlabel('Epoch')
axs[1].set_ylabel('Loss')
axs[1].legend()
axs[1].set_ylim(0, 1)
axs[1].grid(True)
plt.tight_layout()
plt.show()