# create data
image_height = 5
image_width = 5
filter_height = 3
filter_width = 3
import numpy as np
np.set_printoptions(precision=6)
np0 = np.float32(0)
np1 = np.float32(1)
zeros = np.zeros((image_height, image_width), dtype = "float32")
cross = np.array([
[ 1, 0, 1 ],
[ 0, 1, 0 ],
[ 1, 0, 1 ]
], dtype = "float32")
nought = np.array([
[ 1, 1, 1 ],
[ 1, 0, 1 ],
[ 1, 1, 1 ]
], dtype = "float32")
X = []
for template in [ cross, nought ]:
for i in range(template.shape[0]):
for j in range(template.shape[1]):
temp = zeros.copy()
temp[i:(i+template.shape[0]), j:(j+template.shape[1])] = template
X.append(temp)
X = np.array(X).reshape((9 + 9, image_height, image_width, 1)).astype("float32")
Y = np.array(([1]*9) + ([0]*9)).astype("float32")
print("X[0]:\n", X[0,:,:,0])
print("Y[0]:", Y[0])
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Conv2D, Dense, Flatten, MaxPooling2D
from tensorflow.keras.optimizers import SGD
# create model
model = Sequential()
model.add(Conv2D(1, (filter_height, filter_width), padding = "same", activation = "relu", input_shape = X.shape[1:4]))
model.add(Flatten())
model.add(Dense(1, activation = "sigmoid"))
model.summary()
old_weights = model.get_weights()
# forward propagation
conv_activations = Model(model.input, model.layers[0].output)
conv_out = conv_activations.predict(X[:1])[0,:,:,0]
pred_out = model.predict(X[:1])[0,0]
# manual convolution
padded = np.zeros((X.shape[1] + 2, X.shape[2] + 2), dtype = "float32")
padded[1:-1,1:-1] = X[0,:,:,0]
conv_out_manual = np.zeros((X.shape[1], X.shape[2]), dtype = "float32")
for i in range(X.shape[1]):
for j in range(X.shape[2]):
conv_out_manual[i,j] = np.sum(padded[i:(i+filter_width), j:(j+filter_height)] * old_weights[0][:,:,0,0]) \
+ old_weights[1][0]
# manual activation
conv_out_manual = np.where(conv_out_manual > 0, conv_out_manual, np.float32(0))
pd_relu_conv = np.where(conv_out_manual > 0, np1, np0)
log_odds = np.dot(conv_out_manual.reshape(X.shape[1] * X.shape[2]), old_weights[2][:,0]) + old_weights[3][0]
pred_out_manual = np1/(np1 + np.exp(- log_odds))
print("conv output:\n", conv_out)
print("conv manual:\n", conv_out_manual)
print("partial derivative of relu activation with respect to convolution:\n", pd_relu_conv)
print("prediction output:", pred_out)
print("prediction manual:", pred_out_manual)
# train on first example
model.compile(loss = "binary_crossentropy", optimizer = SGD(lr = 0.01), metrics = [ "accuracy" ])
model.fit(X[:1], Y[:1])
new_weights = model.get_weights()
# manual convolution weight update
pd_loss_logodds = ((pred_out - Y[0]) / (pred_out * (np1 - pred_out))) * ((pred_out) * (np1 - pred_out))
pd_loss_wgt = np.zeros((3, 3), dtype = "float32")
for w_i in range(filter_height):
for w_j in range(filter_width):
for i in range(X.shape[1]):
for j in range(X.shape[2]):
pd_loss_wgt[w_i,w_j] += (pd_loss_logodds \
* old_weights[2][X.shape[2]*i+j] \
* pd_relu_conv[i,j] \
* padded[i:(i+filter_height), j:(j+filter_width)][w_i,w_j])
print("weight update:\n", new_weights[0][:,:,0,0] - old_weights[0][:,:,0,0])
print("weight manual:\n", - 0.01 * pd_loss_wgt)