import tensorflow as tf
from tensorflow import keras

import numpy as np

import matplotlib.pyplot as plt

# 1 a)

(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

# b) Display the first 12 images in the training set into 4 columns and 3 rows.


fig, axes = plt.subplots(4,3, figsize=(15,15))

axes = axes.ravel()

for i in range(12):
    axes[i].imshow(x_train[i])
    axes[i].axis('off')  
    axes[i].set_title(i+1)

# c)
def rgb_to_yuv(image):
    # YUV transformation
    Yuv_matrix = np.array([[0.299, 0.587, 0.114],
                           [-0.14713, -0.28886, 0.436],
                           [0.615, -0.51499, -0.10001]])

    new_image = np.dot(image, Yuv_matrix.T)

    return new_image[..., 0]



# creating tempoary lists so that only 1 channel is there
x_train_gray = np.zeros((x_train.shape[0], 32, 32, 1))
x_test_gray = np.zeros((x_test.shape[0], 32, 32, 1)) 

# coverting training data
for i in range(x_train.shape[0]):
    grayscale_image = rgb_to_yuv(x_train[i])
    x_train_gray[i] = grayscale_image[..., np.newaxis]

x_train = x_train_gray


# coverting test data
for i in range(x_test.shape[0]):
    grayscale_image = rgb_to_yuv(x_test[i])
    x_test_gray[i] = grayscale_image[..., np.newaxis]

x_test = x_test_gray


# x arrays are now correct shape (32,32,1)
print(x_test[0].shape)
print(x_train[0].shape)

(32, 32, 1)
(32, 32, 1)

# d) maybe say i tried muliple models and this was most successful

'''

I choose the YUV color model, for one the Y channel captures brightness while the other two capture color. i knew that
seperating the images to brightness would be very important as that is what is needed by the models for the classification process.
I tested this by passing the U and V images through the rest of the code once finished and the results were substancially less accurate. 

'''

'\n\nI choose the YUV color model, for one the Y channel captures brightness while the other two capture color. i knew that\nseperating the images to brightness would be very important as that is what is needed by the models for the classification process.\nI tested this by passing the U and V images through the rest of the code once finished and the results were substancially less accurate. \n\n'

# e) Display the first 12 grayscale images in the training set into 4 columns and 3 rows.

fig, axes = plt.subplots(4,3, figsize=(15,15))

axes = axes.ravel()


for i in range(12):
    axes[i].imshow(x_train[i], cmap='gray')
    axes[i].axis('off')  
    axes[i].set_title(i+1)

"""

QUESTION 2

"""

'\n\nQUESTION 2\n\n'

#(a) Implement gradient descent and run for 250 epochs. (7 marks)

# preprocessing
K = len(np.unique(y_train))

# (32*32*1) = (height x width x channel)
Din = 1024 


# normalise pixel values (make them between 0 and 1)
x_train = x_train / 255.0
x_test = x_test / 255.0

# mean normalization.
mean_image = np.mean(x_train, axis=0) 
x_train = x_train - mean_image 
x_test = x_test - mean_image


# one hot encoding (splits unique categorties into 10 columns with binary 1 in one of the columns and 0 in the rest)
y_train = tf.keras.utils.to_categorical(y_train, num_classes = K)
print('y_train : ', y_train.shape)
y_test =  tf.keras.utils.to_categorical(y_test,  num_classes = K)
print('y_test : ', y_test.shape)


x_train = np.reshape(x_train, (Ntr,Din)).astype('float32')
x_test = np.reshape(x_test, (Nte,Din)).astype('float32')
print(x_train.shape)
print(x_test.shape)

y_train :  (50000, 10)
y_test :  (10000, 10)
(50000, 1024)
(10000, 1024)

# initialising the weight matrix
std = 0.00001  
w1 = std * np.random.randn(Din, K) 
b1 = np.zeros(K) 

# Rearranging training and test data
x_train_ra = np.concatenate((np.ones((x_train.shape[0], 1)), x_train), axis=1)
x_test_ra  = np.concatenate((np.ones((x_test.shape[0], 1)), x_test), axis=1)

# rearranging the weight matrix and bias matrix
w1 = np.concatenate((b1.reshape(1, K), w1), axis=0)

def getAccuracy(predictions, labels):
    pred_class = np.argmax(predictions, axis=1)
    
    real_class = np.argmax(labels, axis=1)
    
    valid_pred = pred_class == real_class
    
    return np.sum(valid_pred) / len(real_class)

# initialisation 
epochs = 250  
lr = 0.014

loss_history = [] 
test_loss = []
train_acc_history = [] 
test_acc_history = [] 

# size of training data
m = x_train_ra.shape[0]

# size of test data
m2 = x_test_ra.shape[0]

# trainig loop
for t in range(1,epochs + 1):    
    # Forward Propagation
    y_train_pred = x_train_ra.dot(w1)
    loss = (1/m)*np.sum(( y_train_pred - y_train)**2) 
    loss_history.append(loss)
    
    # Backward Propagation
    dw1 = (1/m)*(x_train_ra.T.dot(y_train_pred - y_train))  
    w1 = w1 - lr*dw1
        
    # Training Accuracy 
    train_acc = getAccuracy(y_train_pred, y_train)
    train_acc_history.append(train_acc)
    # Testing Accuracy
    y_test_pred = x_test_ra.dot(w1)
    test_acc = getAccuracy(y_test_pred, y_test)
    test_acc_history.append(test_acc)
   
    # Test Loss    
    t_loss = (1/m2)*np.sum(( y_test_pred - y_test)**2) 
    test_loss.append(t_loss)

    # b)
    # Print details for selected iterations
    if (t%20==0) or (t==1) or (t==epochs):
        print(f"| Epoch {t} | Train Loss {loss} | Train Accuracy: {train_acc} | Test Loss: {t_loss} | Test Accuracy: {test_acc} |")

| Epoch 1 | Train Loss 0.9999973103664838 | Train Accuracy: 0.0994 | Test Loss: 0.989256451990334 | Test Accuracy: 0.2271 |
| Epoch 20 | Train Loss 0.9192764350879172 | Train Accuracy: 0.26092 | Test Loss: 0.9171859174545491 | Test Accuracy: 0.2641 |
| Epoch 40 | Train Loss 0.8874314103079382 | Train Accuracy: 0.2744 | Test Loss: 0.8867253564362741 | Test Accuracy: 0.2783 |
| Epoch 60 | Train Loss 0.869844425059942 | Train Accuracy: 0.28268 | Test Loss: 0.8699239513510441 | Test Accuracy: 0.2829 |
| Epoch 80 | Train Loss 0.8597425449371057 | Train Accuracy: 0.2877 | Test Loss: 0.8603273428057605 | Test Accuracy: 0.2888 |
| Epoch 100 | Train Loss 0.853808238042526 | Train Accuracy: 0.29164 | Test Loss: 0.8547428324010026 | Test Accuracy: 0.2928 |
| Epoch 120 | Train Loss 0.8502343225105686 | Train Accuracy: 0.29418 | Test Loss: 0.8514284998621877 | Test Accuracy: 0.2954 |
| Epoch 140 | Train Loss 0.8480141635721214 | Train Accuracy: 0.29512 | Test Loss: 0.8494138079924338 | Test Accuracy: 0.2974 |
| Epoch 160 | Train Loss 0.8465806030642751 | Train Accuracy: 0.29618 | Test Loss: 0.8481521656484154 | Test Accuracy: 0.3004 |
| Epoch 180 | Train Loss 0.8456111267139131 | Train Accuracy: 0.29724 | Test Loss: 0.8473330416755713 | Test Accuracy: 0.3008 |
| Epoch 200 | Train Loss 0.844920671992456 | Train Accuracy: 0.29904 | Test Loss: 0.8467784973145419 | Test Accuracy: 0.3016 |
| Epoch 220 | Train Loss 0.8444019973005541 | Train Accuracy: 0.30028 | Test Loss: 0.8463856131940369 | Test Accuracy: 0.3024 |
| Epoch 240 | Train Loss 0.8439922366390149 | Train Accuracy: 0.30088 | Test Loss: 0.8460942046539486 | Test Accuracy: 0.3034 |
| Epoch 250 | Train Loss 0.8438158541390375 | Train Accuracy: 0.3012 | Test Loss: 0.8459747680472287 | Test Accuracy: 0.3035 |

# c)
epochs = range(1, len(train_acc_history) + 1)  # Epoch indices

fig, axs = plt.subplots(1, 2, figsize=(12, 6))

# accuracy vs epoch
axs[0].plot(epochs, train_acc_history, label='Training Accuracy', color='blue')
axs[0].plot(epochs, test_acc_history, label='Testing Accuracy', color='green', linestyle='--')
axs[0].set_title('Accuracy vs Epoch')
axs[0].set_xlabel('Epoch')
axs[0].set_ylabel('Accuracy')
axs[0].set_ylim(0, 1)
axs[0].legend()

# loss vs epoch
axs[1].plot(epochs, loss_history, label='Training Loss', color='red')
axs[1].plot(epochs, test_loss, label='Testing Loss', color='orange', linestyle='--')
axs[1].set_title('Loss vs Epoch')
axs[1].set_xlabel('Epoch')
axs[1].set_ylabel('Loss')
axs[1].set_ylim(0, 1)
axs[1].legend()

# Show plots
plt.tight_layout()
plt.show()

'''

QUESTION 3

'''

'\n\nQUESTION 3\n\n'

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# Build the model
model = Sequential()

# Input layer
# First hidden layer 
model.add(Dense(128, input_dim=x_train.shape[1], activation='relu'))  # 128 neurons
model.add(Dense(64, activation='relu'))  # 64 neurons

# Output layer - softmax for multi-class classification
model.add(Dense(K, activation='softmax')) 

# Compile Veriables
lr = 0.005
optimizer = Adam(learning_rate = lr)

# Compile the model
model.compile(optimizer, 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

# Train the model
history = model.fit(x_train, y_train, epochs=20, batch_size=32, validation_data=(x_test, y_test))

Epoch 1/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.2796 - loss: 2.0051 - val_accuracy: 0.3237 - val_loss: 1.8566
Epoch 2/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.3568 - loss: 1.8084 - val_accuracy: 0.3652 - val_loss: 1.7843
Epoch 3/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.3902 - loss: 1.7195 - val_accuracy: 0.3940 - val_loss: 1.7209
Epoch 4/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.4088 - loss: 1.6652 - val_accuracy: 0.3984 - val_loss: 1.7054
Epoch 5/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.4241 - loss: 1.6196 - val_accuracy: 0.3947 - val_loss: 1.6964
Epoch 6/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.4355 - loss: 1.5892 - val_accuracy: 0.3939 - val_loss: 1.7624
Epoch 7/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.4439 - loss: 1.5565 - val_accuracy: 0.4096 - val_loss: 1.6792
Epoch 8/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.4598 - loss: 1.5237 - val_accuracy: 0.4026 - val_loss: 1.6849
Epoch 9/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.4711 - loss: 1.4952 - val_accuracy: 0.4228 - val_loss: 1.6725
Epoch 10/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.4761 - loss: 1.4683 - val_accuracy: 0.4188 - val_loss: 1.6817
Epoch 11/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.4871 - loss: 1.4456 - val_accuracy: 0.4138 - val_loss: 1.6954
Epoch 12/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.4953 - loss: 1.4290 - val_accuracy: 0.4106 - val_loss: 1.7267
Epoch 13/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.5007 - loss: 1.4079 - val_accuracy: 0.4026 - val_loss: 1.7156
Epoch 14/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.5029 - loss: 1.3898 - val_accuracy: 0.4076 - val_loss: 1.7235
Epoch 15/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.5170 - loss: 1.3643 - val_accuracy: 0.4120 - val_loss: 1.7207
Epoch 16/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.5172 - loss: 1.3576 - val_accuracy: 0.4225 - val_loss: 1.7239
Epoch 17/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.5221 - loss: 1.3397 - val_accuracy: 0.4086 - val_loss: 1.7565
Epoch 18/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.5277 - loss: 1.3294 - val_accuracy: 0.4244 - val_loss: 1.7614
Epoch 19/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.5343 - loss: 1.3146 - val_accuracy: 0.4154 - val_loss: 1.7988
Epoch 20/20
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step - accuracy: 0.5364 - loss: 1.3070 - val_accuracy: 0.4160 - val_loss: 1.7894

"""
b)
I chose two hidden layers because they are enough to learn patterns in the data without making the model too complicated. 
The first layer with 128 neurons learns basic patterns, and the second layer with 64 neurons improves on them. 
This setup helps the model work well without overfitting. The output layer uses softmax to handle multi-class classification.
"""

'\nb)\nI chose two hidden layers because they are enough to learn patterns in the data without making the model too complicated. \nThe first layer with 128 neurons learns basic patterns, and the second layer with 64 neurons improves on them. \nThis setup helps the model work well without overfitting. The output layer uses softmax to handle multi-class classification.\n'

# c)

# get the last item in list for final result
train_loss = history.history['loss']
train_acc = history.history['accuracy']
test_loss = history.history['val_loss']
test_acc = history.history['val_accuracy']

print("Learning Rate: " + str(lr))
print("Training Loss: " + str(train_loss[-1]))
print("Training Accuracy: " + str(train_acc[-1]))
print("Test Loss: " + str(test_loss[-1]))
print("Test Accuracy: " + str(test_acc[-1]))

Learning Rate: 0.005
Training Loss: 1.3112945556640625
Training Accuracy: 0.5331400036811829
Test Loss: 1.78938627243042
Test Accuracy: 0.41600000858306885

# d)

# get range of epochs
epochs = range(1, len(train_acc) + 1)

fig, axs = plt.subplots(1, 2, figsize=(12, 6))

# Accuracy vs Epoch
axs[0].plot(epochs, train_acc, label='Training Accuracy', color='blue')
axs[0].plot(epochs, test_acc, label='Testing Accuracy', color='green', linestyle='--')
axs[0].set_title('Accuracy vs Epoch')
axs[0].set_xlabel('Epoch')
axs[0].set_ylabel('Accuracy')
axs[0].legend()
axs[0].set_ylim(0, 1)
axs[0].grid(True)

# Loss vs Epoch
axs[1].plot(epochs, train_loss, label='Training Loss', color='red')
axs[1].plot(epochs, test_loss, label='Testing Loss', color='orange', linestyle='--')
axs[1].set_title('Loss vs Epoch')
axs[1].set_xlabel('Epoch')
axs[1].set_ylabel('Loss')
axs[1].legend()
axs[1].grid(True)

plt.tight_layout()
plt.show()

'''

QUESTION 4

'''

'\n\nQUESTION 4\n\n'

from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.optimizers import SGD

# the model
model = Sequential()

# Layers
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 1)))  # C32
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3), activation='relu'))  # C32
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))  # C64
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())

model.add(Dense(64, activation='relu'))  # F64
model.add(Dense(K, activation='softmax'))  # F10

model.summary()

Model: "sequential_100"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ conv2d_280 (Conv2D)             │ (None, 30, 30, 32)     │           320 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_276               │ (None, 15, 15, 32)     │             0 │
│ (MaxPooling2D)                  │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_281 (Conv2D)             │ (None, 13, 13, 32)     │         9,248 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_277               │ (None, 6, 6, 32)       │             0 │
│ (MaxPooling2D)                  │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_282 (Conv2D)             │ (None, 4, 4, 64)       │        18,496 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_278               │ (None, 2, 2, 64)       │             0 │
│ (MaxPooling2D)                  │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ flatten_64 (Flatten)            │ (None, 256)            │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_214 (Dense)               │ (None, 64)             │        16,448 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_215 (Dense)               │ (None, 10)             │           650 │
└─────────────────────────────────┴────────────────────────┴───────────────┘

 Total params: 45,162 (176.41 KB)

 Trainable params: 45,162 (176.41 KB)

 Non-trainable params: 0 (0.00 B)

optimizer = SGD(learning_rate=0.01)

model.compile(optimizer=optimizer,
              loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

# need to reshape data for compile because of previous reshape
x_test = x_test.reshape(-1, 32, 32, 1)
x_train = x_train.reshape(-1, 32, 32, 1)


history = model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test))

Epoch 1/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 10s 6ms/step - accuracy: 0.1416 - loss: 2.2735 - val_accuracy: 0.2601 - val_loss: 2.0618
Epoch 2/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 10s 6ms/step - accuracy: 0.2805 - loss: 2.0053 - val_accuracy: 0.3377 - val_loss: 1.8282
Epoch 3/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 10s 7ms/step - accuracy: 0.3534 - loss: 1.8024 - val_accuracy: 0.3892 - val_loss: 1.7367
Epoch 4/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 10s 6ms/step - accuracy: 0.3999 - loss: 1.6837 - val_accuracy: 0.3980 - val_loss: 1.6956
Epoch 5/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 10s 6ms/step - accuracy: 0.4373 - loss: 1.5989 - val_accuracy: 0.4348 - val_loss: 1.5846
Epoch 6/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 10s 6ms/step - accuracy: 0.4601 - loss: 1.5340 - val_accuracy: 0.4558 - val_loss: 1.5308
Epoch 7/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 10s 6ms/step - accuracy: 0.4882 - loss: 1.4638 - val_accuracy: 0.4776 - val_loss: 1.4680
Epoch 8/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 10s 6ms/step - accuracy: 0.5077 - loss: 1.4077 - val_accuracy: 0.4997 - val_loss: 1.4001
Epoch 9/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 10s 6ms/step - accuracy: 0.5279 - loss: 1.3500 - val_accuracy: 0.5116 - val_loss: 1.3759
Epoch 10/10
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 10s 6ms/step - accuracy: 0.5470 - loss: 1.2991 - val_accuracy: 0.5229 - val_loss: 1.3579

'''
a)

As seen in the summary above there ae 45,162.

Breakdown:
Learnable Parameters = (kernel_height×kernel_width×input_channels+1)×number_of_filters
source - https://www.geeksforgeeks.org/how-to-calculate-the-number-of-parameters-in-cnn/

First layer is 
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 1)))

The kernel size is 3x3. 
The input is a greyscale image which is 1 channel.
And the first parameter of Conv2D is the number of filters which is 32.

conv2d (Conv2D): (3 × 3 × 1 + 1) × 32 = 320.
conv2d_1 (Conv2D): (3 × 3 × 32 + 1) × 32 = 9,248.
conv2d_2 (Conv2D): (3 × 3 × 32 + 1) × 64 = 18,496.

For dense layers: 

The previous flatten layer outputs 256
so for model.add(Dense(64, activation='relu'))
we multiply (256 + 1) by 64, the 64 is then passed to the next layer


dense_3: (256 + 1) × 64 = 16,448.
dense_4: (64 + 1) × 10 = 650.

Total: 320 + 9,248 + 18,496 + 16,448 + 650 = 45,162.

'''

"\na)\n\nAs seen in the summary above there ae 45,162.\n\nBreakdown:\nLearnable Parameters = (kernel_height×kernel_width×input_channels+1)×number_of_filters\nsource - https://www.geeksforgeeks.org/how-to-calculate-the-number-of-parameters-in-cnn/\n\nFirst layer is \nmodel.add(Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 1)))\n\nThe kernel size is 3x3. \nThe input is a greyscale image which is 1 channel.\nAnd the first parameter of Conv2D is the number of filters which is 32.\n\nconv2d (Conv2D): (3 × 3 × 1 + 1) × 32 = 320.\nconv2d_1 (Conv2D): (3 × 3 × 32 + 1) × 32 = 9,248.\nconv2d_2 (Conv2D): (3 × 3 × 32 + 1) × 64 = 18,496.\n\nFor dense layers: \n\nThe previous flatten layer outputs 256\nso for model.add(Dense(64, activation='relu'))\nwe multiply (256 + 1) by 64, the 64 is then passed to the next layer\n\n\ndense_3: (256 + 1) × 64 = 16,448.\ndense_4: (64 + 1) × 10 = 650.\n\nTotal: 320 + 9,248 + 18,496 + 16,448 + 650 = 45,162.\n\n"

# b)

train_loss = history.history['loss']
train_acc = history.history['accuracy']
test_loss = history.history['val_loss']
test_acc = history.history['val_accuracy']

print("Training Loss: " + str(train_loss[-1]))
print("Training Accuracy: " + str(train_acc[-1]))
print("Test Loss: " + str(test_loss[-1]))
print("Test Accuracy: " + str(test_acc[-1]))

Training Loss: 1.2954219579696655
Training Accuracy: 0.5487599968910217
Test Loss: 1.3579355478286743
Test Accuracy: 0.5228999853134155

# c)

# get range of epochs
epochs = range(1, len(train_acc) + 1)

fig, axs = plt.subplots(1, 2, figsize=(12, 6))

# Accuracy vs Epoch
axs[0].plot(epochs, train_acc, label='Training Accuracy', color='blue')
axs[0].plot(epochs, test_acc, label='Testing Accuracy', color='green', linestyle='--')
axs[0].set_title('Accuracy vs Epoch')
axs[0].set_xlabel('Epoch')
axs[0].set_ylabel('Accuracy')
axs[0].legend()
axs[0].set_ylim(0, 1)
axs[0].grid(True)


# Loss vs Epoch
axs[1].plot(epochs, train_loss, label='Training Loss', color='red')
axs[1].plot(epochs, test_loss, label='Testing Loss', color='orange', linestyle='--')
axs[1].set_title('Loss vs Epoch')
axs[1].set_xlabel('Epoch')
axs[1].set_ylabel('Loss')
axs[1].legend()
axs[1].grid(True)

plt.tight_layout()
plt.show()

'''

QUESTION 5

'''

'\n\nQUESTION 5\n\n'

from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dropout,GlobalAveragePooling2D
from tensorflow.keras.callbacks import EarlyStopping

# the model
model = Sequential()
dropout_rate = 0.2

# Layers
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 1)))  # C32
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))  # C32
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(dropout_rate))

model.add(Conv2D(128, (3, 3), activation='relu'))  # C64
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(dropout_rate))


#model.add(Flatten())
model.add(GlobalAveragePooling2D()) 

model.add(Dense(64, activation='relu'))  # F128
model.add(Dense(K, activation='softmax'))  # F10 output layer

model.summary()

Model: "sequential_101"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ conv2d_283 (Conv2D)             │ (None, 30, 30, 32)     │           320 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization_158         │ (None, 30, 30, 32)     │           128 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_279               │ (None, 15, 15, 32)     │             0 │
│ (MaxPooling2D)                  │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_284 (Conv2D)             │ (None, 13, 13, 64)     │        18,496 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization_159         │ (None, 13, 13, 64)     │           256 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_280               │ (None, 6, 6, 64)       │             0 │
│ (MaxPooling2D)                  │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_83 (Dropout)            │ (None, 6, 6, 64)       │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_285 (Conv2D)             │ (None, 4, 4, 128)      │        73,856 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization_160         │ (None, 4, 4, 128)      │           512 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_281               │ (None, 2, 2, 128)      │             0 │
│ (MaxPooling2D)                  │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_84 (Dropout)            │ (None, 2, 2, 128)      │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ global_average_pooling2d_24     │ (None, 128)            │             0 │
│ (GlobalAveragePooling2D)        │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_216 (Dense)               │ (None, 64)             │         8,256 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_217 (Dense)               │ (None, 10)             │           650 │
└─────────────────────────────────┴────────────────────────┴───────────────┘

 Total params: 102,474 (400.29 KB)

 Trainable params: 102,026 (398.54 KB)

 Non-trainable params: 448 (1.75 KB)

'''

QUESTION 5

'''

'\n\nQUESTION 5\n\n'

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

optimizer = Adam(learning_rate=0.001)

epochs = 50

model.compile(optimizer=optimizer,
              loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
# need to reshape data for compile because of previous reshape
x_test = x_test.reshape(-1, 32, 32, 1)
x_train = x_train.reshape(-1, 32, 32, 1)


history = model.fit(x_train, y_train, epochs=epochs, validation_data=(x_test, y_test), callbacks=[early_stopping])

Epoch 1/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 15ms/step - accuracy: 0.4057 - loss: 1.6691 - val_accuracy: 0.5676 - val_loss: 1.2979
Epoch 2/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 15ms/step - accuracy: 0.5891 - loss: 1.1651 - val_accuracy: 0.6145 - val_loss: 1.1020
Epoch 3/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 23s 15ms/step - accuracy: 0.6475 - loss: 1.0026 - val_accuracy: 0.6490 - val_loss: 1.0104
Epoch 4/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 23s 15ms/step - accuracy: 0.6788 - loss: 0.9301 - val_accuracy: 0.6786 - val_loss: 0.9201
Epoch 5/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 23s 15ms/step - accuracy: 0.6989 - loss: 0.8714 - val_accuracy: 0.6480 - val_loss: 1.0198
Epoch 6/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 23s 15ms/step - accuracy: 0.7115 - loss: 0.8345 - val_accuracy: 0.7046 - val_loss: 0.8527
Epoch 7/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 23s 15ms/step - accuracy: 0.7255 - loss: 0.7895 - val_accuracy: 0.6933 - val_loss: 0.8832
Epoch 8/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 15ms/step - accuracy: 0.7318 - loss: 0.7635 - val_accuracy: 0.7150 - val_loss: 0.8249
Epoch 9/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 23s 15ms/step - accuracy: 0.7464 - loss: 0.7294 - val_accuracy: 0.6987 - val_loss: 0.8724
Epoch 10/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 23s 15ms/step - accuracy: 0.7506 - loss: 0.7177 - val_accuracy: 0.7254 - val_loss: 0.8173
Epoch 11/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 23s 15ms/step - accuracy: 0.7582 - loss: 0.6942 - val_accuracy: 0.7287 - val_loss: 0.7973
Epoch 12/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 23s 15ms/step - accuracy: 0.7671 - loss: 0.6763 - val_accuracy: 0.7337 - val_loss: 0.7790
Epoch 13/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 23s 15ms/step - accuracy: 0.7658 - loss: 0.6657 - val_accuracy: 0.7338 - val_loss: 0.7703
Epoch 14/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 23s 15ms/step - accuracy: 0.7777 - loss: 0.6428 - val_accuracy: 0.7229 - val_loss: 0.8124
Epoch 15/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 15ms/step - accuracy: 0.7799 - loss: 0.6307 - val_accuracy: 0.7070 - val_loss: 0.8935
Epoch 16/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 15ms/step - accuracy: 0.7817 - loss: 0.6181 - val_accuracy: 0.7340 - val_loss: 0.7804
Epoch 17/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 15ms/step - accuracy: 0.7846 - loss: 0.6128 - val_accuracy: 0.7432 - val_loss: 0.7620
Epoch 18/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 15ms/step - accuracy: 0.7938 - loss: 0.5967 - val_accuracy: 0.7410 - val_loss: 0.7760
Epoch 19/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 23s 15ms/step - accuracy: 0.7905 - loss: 0.5976 - val_accuracy: 0.7425 - val_loss: 0.7571
Epoch 20/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 15ms/step - accuracy: 0.7968 - loss: 0.5842 - val_accuracy: 0.7346 - val_loss: 0.8009
Epoch 21/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 23s 15ms/step - accuracy: 0.8005 - loss: 0.5750 - val_accuracy: 0.7473 - val_loss: 0.7569
Epoch 22/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 25s 16ms/step - accuracy: 0.7962 - loss: 0.5777 - val_accuracy: 0.7217 - val_loss: 0.8272
Epoch 23/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 15ms/step - accuracy: 0.8025 - loss: 0.5643 - val_accuracy: 0.7503 - val_loss: 0.7415
Epoch 24/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 16ms/step - accuracy: 0.8038 - loss: 0.5618 - val_accuracy: 0.7452 - val_loss: 0.7678
Epoch 25/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 16ms/step - accuracy: 0.8086 - loss: 0.5457 - val_accuracy: 0.7467 - val_loss: 0.7637
Epoch 26/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 16ms/step - accuracy: 0.8113 - loss: 0.5404 - val_accuracy: 0.7264 - val_loss: 0.8173
Epoch 27/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 15ms/step - accuracy: 0.8098 - loss: 0.5378 - val_accuracy: 0.7226 - val_loss: 0.8458
Epoch 28/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 16ms/step - accuracy: 0.8073 - loss: 0.5412 - val_accuracy: 0.7569 - val_loss: 0.7235
Epoch 29/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 15ms/step - accuracy: 0.8171 - loss: 0.5272 - val_accuracy: 0.7606 - val_loss: 0.7133
Epoch 30/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 16ms/step - accuracy: 0.8160 - loss: 0.5240 - val_accuracy: 0.7544 - val_loss: 0.7523
Epoch 31/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 16ms/step - accuracy: 0.8159 - loss: 0.5210 - val_accuracy: 0.7603 - val_loss: 0.7214
Epoch 32/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 26s 17ms/step - accuracy: 0.8198 - loss: 0.5132 - val_accuracy: 0.7071 - val_loss: 0.8829
Epoch 33/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 15ms/step - accuracy: 0.8197 - loss: 0.5150 - val_accuracy: 0.7644 - val_loss: 0.7216
Epoch 34/50
1563/1563 ━━━━━━━━━━━━━━━━━━━━ 24s 15ms/step - accuracy: 0.8212 - loss: 0.5035 - val_accuracy: 0.7596 - val_loss: 0.7141

# c)
train_loss = history.history['loss']
train_acc = history.history['accuracy']
test_loss = history.history['val_loss']
test_acc = history.history['val_accuracy']

print("Training Loss: " + str(train_loss[-1]))
print("Training Accuracy: " + str(train_acc[-1]))
print("Test Loss: " + str(test_loss[-1]))
print("Test Accuracy: " + str(test_acc[-1]))

Training Loss: 0.5167111158370972
Training Accuracy: 0.8166400194168091
Test Loss: 0.7140844464302063
Test Accuracy: 0.7595999836921692

"""
b)
I used the SDG and through trial and error foud that the most successful learning rate was 0.1 where training accuracy was 71%

i then switched to Adam optimiser where i again used trial and error and found that a LR of 0.001 was most succesful with 74%.

i decided to try batch normalisation after each convolutional layer. this normalises the output of each layer. i implemenetd this by adding 
model.add(BatchNormalization()) after each convolutional layer and this improved the training accuray to 79% but the 
testing acuracy is still quite low at 69% which shows overfitting.

i noticed an issue where the model would overtrain and it would peak and then begin to drop in accuracy. i found out about early 
stopping and dropoup.

drop out - adding a dropout rate of 0.25 after each layer made the accuracy substantally worse so i removed it. ealy stopping 
resulted in a slightly better test accuracy rate so i kept it. 

at the moment i have a problem with overheating of my computer but i still tried to raise my epoch level to 50 and with the early
stopping it stopped a 15 epochs at 83% training rate and 72% testing accuracy. this shows the trend of overfitting. which is where 
the model is training to recognise the specific training data and cant handle general data it has not been trained on.  

i used GlobalAveragePooling2D in place of flatten() and it decreased the seperation between training and testing accurcies but
didint improve the testing accuracy. 

After trying drop out again on a couple of the layers i managed to get the test acuracy up to 75% so i will keep that in

"""

'\nb)\nI used the SDG and through trial and error foud that the most successful learning rate was 0.1 where training accuracy was 71%\n\ni then switched to Adam optimiser where i again used trial and error and found that a LR of 0.001 was most succesful with 74%.\n\ni decided to try batch normalisation after each convolutional layer. this normalises the output of each layer. i implemenetd this by adding \nmodel.add(BatchNormalization()) after each convolutional layer and this improved the training accuray to 79% but the \ntesting acuracy is still quite low at 69% which shows overfitting.\n\ni noticed an issue where the model would overtrain and it would peak and then begin to drop in accuracy. i found out about early \nstopping and dropoup.\n\ndrop out - adding a dropout rate of 0.25 after each layer made the accuracy substantally worse so i removed it. ealy stopping \nresulted in a slightly better test accuracy rate so i kept it. \n\nat the moment i have a problem with overheating of my computer but i still tried to raise my epoch level to 50 and with the early\nstopping it stopped a 15 epochs at 83% training rate and 72% testing accuracy. this shows the trend of overfitting. which is where \nthe model is training to recognise the specific training data and cant handle general data it has not been trained on.  \n\ni used GlobalAveragePooling2D in place of flatten() and it decreased the seperation between training and testing accurcies but\ndidint improve the testing accuracy. \n\nAfter trying drop out again on a couple of the layers i managed to get the test acuracy up to 75% so i will keep that in\n\n'

# get range of epochs
epochs = range(1, len(train_acc) + 1)

fig, axs = plt.subplots(1, 2, figsize=(12, 6))

# Accuracy vs Epoch
axs[0].plot(epochs, train_acc, label='Training Accuracy', color='blue')
axs[0].plot(epochs, test_acc, label='Testing Accuracy', color='green', linestyle='--')
axs[0].set_title('Accuracy vs Epoch')
axs[0].set_xlabel('Epoch')
axs[0].set_ylabel('Accuracy')
axs[0].legend()
axs[0].set_ylim(0, 1)
axs[0].grid(True)


# Loss vs Epoch
axs[1].plot(epochs, train_loss, label='Training Loss', color='red')
axs[1].plot(epochs, test_loss, label='Testing Loss', color='orange', linestyle='--')
axs[1].set_title('Loss vs Epoch')
axs[1].set_xlabel('Epoch')
axs[1].set_ylabel('Loss')
axs[1].legend()
axs[1].set_ylim(0, 1)
axs[1].grid(True)

plt.tight_layout()
plt.show()