# a SimpleRNN model
import numpy as np
import keras.backend as K
from keras.layers import Dense, Embedding, SimpleRNN
from keras.models import Sequential
model = Sequential()
model.add(Embedding(8,3))
model.add(SimpleRNN(4))
model.add(Dense(1, activation = "sigmoid"))
model.compile(loss = "binary_crossentropy", optimizer = "sgd", metrics = [ "accuracy" ])
model.summary()
# a prediction
X = np.array([ 4, 6 ]).astype("int32").reshape(1, 2)
Y = np.array([ 1 ]).astype("float32").reshape(1, 1)
model.predict(X)
# gathering the weights of the model
P = model.layers[0].get_weights()[0] # embedding parameters
Q = model.layers[1].get_weights()[0] # RNN cell parameters for previous layer
R = model.layers[1].get_weights()[2] # RNN cell bias parameters
S = model.layers[1].get_weights()[1] # RNN cell parameters for previous output (same layer)
T = model.layers[2].get_weights()[0] # dense layer parameters for previous layer
U = model.layers[2].get_weights()[1] # dense layer bias parameter
print(P.shape)
print(Q.shape)
print(R.shape)
print(S.shape)
print(T.shape)
print(U.shape)
# equivalent methods for making the prediction
print(model.predict(X))
from numpy import dot
from numpy import tanh
from numpy import exp
def sigmoid(x):
return(1.0/(1.0 + exp(-x)))
print(
sigmoid(dot(tanh(dot(P[6,:], Q) + R + dot(tanh(dot(P[4,:], Q) + R), S)), T) + U)
)
print(
sigmoid( tanh(P[6,0] * Q[0,0] + P[6,1] * Q[1,0] + P[6,2] * Q[2,0] + R[0]
+ tanh(P[4,0] * Q[0,0] + P[4,1] * Q[1,0] + P[4,2] * Q[2,0] + R[0]) * S[0,0]
+ tanh(P[4,0] * Q[0,1] + P[4,1] * Q[1,1] + P[4,2] * Q[2,1] + R[1]) * S[1,0]
+ tanh(P[4,0] * Q[0,2] + P[4,1] * Q[1,2] + P[4,2] * Q[2,2] + R[2]) * S[2,0]
+ tanh(P[4,0] * Q[0,3] + P[4,1] * Q[1,3] + P[4,2] * Q[2,3] + R[3]) * S[3,0]) * T[0,0]
+ tanh(P[6,0] * Q[0,1] + P[6,1] * Q[1,1] + P[6,2] * Q[2,1] + R[1]
+ tanh(P[4,0] * Q[0,0] + P[4,1] * Q[1,0] + P[4,2] * Q[2,0] + R[0]) * S[0,1]
+ tanh(P[4,0] * Q[0,1] + P[4,1] * Q[1,1] + P[4,2] * Q[2,1] + R[1]) * S[1,1]
+ tanh(P[4,0] * Q[0,2] + P[4,1] * Q[1,2] + P[4,2] * Q[2,2] + R[2]) * S[2,1]
+ tanh(P[4,0] * Q[0,3] + P[4,1] * Q[1,3] + P[4,2] * Q[2,3] + R[3]) * S[3,1]) * T[1,0]
+ tanh(P[6,0] * Q[0,2] + P[6,1] * Q[1,2] + P[6,2] * Q[2,2] + R[2]
+ tanh(P[4,0] * Q[0,0] + P[4,1] * Q[1,0] + P[4,2] * Q[2,0] + R[0]) * S[0,2]
+ tanh(P[4,0] * Q[0,1] + P[4,1] * Q[1,1] + P[4,2] * Q[2,1] + R[1]) * S[1,2]
+ tanh(P[4,0] * Q[0,2] + P[4,1] * Q[1,2] + P[4,2] * Q[2,2] + R[2]) * S[2,2]
+ tanh(P[4,0] * Q[0,3] + P[4,1] * Q[1,3] + P[4,2] * Q[2,3] + R[3]) * S[3,2]) * T[2,0]
+ tanh(P[6,0] * Q[0,3] + P[6,1] * Q[1,3] + P[6,2] * Q[2,3] + R[3]
+ tanh(P[4,0] * Q[0,0] + P[4,1] * Q[1,0] + P[4,2] * Q[2,0] + R[0]) * S[0,3]
+ tanh(P[4,0] * Q[0,1] + P[4,1] * Q[1,1] + P[4,2] * Q[2,1] + R[1]) * S[1,3]
+ tanh(P[4,0] * Q[0,2] + P[4,1] * Q[1,2] + P[4,2] * Q[2,2] + R[2]) * S[2,3]
+ tanh(P[4,0] * Q[0,3] + P[4,1] * Q[1,3] + P[4,2] * Q[2,3] + R[3]) * S[3,3]) * T[3,0]
+ U[0])
)
# tanh() and sigmoid() functions involving Q[0,0]
f0 = tanh(P[4,0] * Q[0,0] + P[4,1] * Q[1,0] + P[4,2] * Q[2,0] + R[0])
f1 = tanh(P[6,0] * Q[0,0] + P[6,1] * Q[1,0] + P[6,2] * Q[2,0] + R[0]
+ tanh(P[4,0] * Q[0,0] + P[4,1] * Q[1,0] + P[4,2] * Q[2,0] + R[0]) * S[0,0]
+ tanh(P[4,0] * Q[0,1] + P[4,1] * Q[1,1] + P[4,2] * Q[2,1] + R[1]) * S[1,0]
+ tanh(P[4,0] * Q[0,2] + P[4,1] * Q[1,2] + P[4,2] * Q[2,2] + R[2]) * S[2,0]
+ tanh(P[4,0] * Q[0,3] + P[4,1] * Q[1,3] + P[4,2] * Q[2,3] + R[3]) * S[3,0])
f2 = tanh(P[6,0] * Q[0,1] + P[6,1] * Q[1,1] + P[6,2] * Q[2,1] + R[1]
+ tanh(P[4,0] * Q[0,0] + P[4,1] * Q[1,0] + P[4,2] * Q[2,0] + R[0]) * S[0,1]
+ tanh(P[4,0] * Q[0,1] + P[4,1] * Q[1,1] + P[4,2] * Q[2,1] + R[1]) * S[1,1]
+ tanh(P[4,0] * Q[0,2] + P[4,1] * Q[1,2] + P[4,2] * Q[2,2] + R[2]) * S[2,1]
+ tanh(P[4,0] * Q[0,3] + P[4,1] * Q[1,3] + P[4,2] * Q[2,3] + R[3]) * S[3,1])\
f3 = tanh(P[6,0] * Q[0,2] + P[6,1] * Q[1,2] + P[6,2] * Q[2,2] + R[2]
+ tanh(P[4,0] * Q[0,0] + P[4,1] * Q[1,0] + P[4,2] * Q[2,0] + R[0]) * S[0,2]
+ tanh(P[4,0] * Q[0,1] + P[4,1] * Q[1,1] + P[4,2] * Q[2,1] + R[1]) * S[1,2]
+ tanh(P[4,0] * Q[0,2] + P[4,1] * Q[1,2] + P[4,2] * Q[2,2] + R[2]) * S[2,2]
+ tanh(P[4,0] * Q[0,3] + P[4,1] * Q[1,3] + P[4,2] * Q[2,3] + R[3]) * S[3,2])
f4 = tanh(P[6,0] * Q[0,3] + P[6,1] * Q[1,3] + P[6,2] * Q[2,3] + R[3]
+ tanh(P[4,0] * Q[0,0] + P[4,1] * Q[1,0] + P[4,2] * Q[2,0] + R[0]) * S[0,3]
+ tanh(P[4,0] * Q[0,1] + P[4,1] * Q[1,1] + P[4,2] * Q[2,1] + R[1]) * S[1,3]
+ tanh(P[4,0] * Q[0,2] + P[4,1] * Q[1,2] + P[4,2] * Q[2,2] + R[2]) * S[2,3]
+ tanh(P[4,0] * Q[0,3] + P[4,1] * Q[1,3] + P[4,2] * Q[2,3] + R[3]) * S[3,3])
f5 = sigmoid( tanh(P[6,0] * Q[0,0] + P[6,1] * Q[1,0] + P[6,2] * Q[2,0] + R[0]
+ tanh(P[4,0] * Q[0,0] + P[4,1] * Q[1,0] + P[4,2] * Q[2,0] + R[0]) * S[0,0]
+ tanh(P[4,0] * Q[0,1] + P[4,1] * Q[1,1] + P[4,2] * Q[2,1] + R[1]) * S[1,0]
+ tanh(P[4,0] * Q[0,2] + P[4,1] * Q[1,2] + P[4,2] * Q[2,2] + R[2]) * S[2,0]
+ tanh(P[4,0] * Q[0,3] + P[4,1] * Q[1,3] + P[4,2] * Q[2,3] + R[3]) * S[3,0]) * T[0,0]
+ tanh(P[6,0] * Q[0,1] + P[6,1] * Q[1,1] + P[6,2] * Q[2,1] + R[1]
+ tanh(P[4,0] * Q[0,0] + P[4,1] * Q[1,0] + P[4,2] * Q[2,0] + R[0]) * S[0,1]
+ tanh(P[4,0] * Q[0,1] + P[4,1] * Q[1,1] + P[4,2] * Q[2,1] + R[1]) * S[1,1]
+ tanh(P[4,0] * Q[0,2] + P[4,1] * Q[1,2] + P[4,2] * Q[2,2] + R[2]) * S[2,1]
+ tanh(P[4,0] * Q[0,3] + P[4,1] * Q[1,3] + P[4,2] * Q[2,3] + R[3]) * S[3,1]) * T[1,0]
+ tanh(P[6,0] * Q[0,2] + P[6,1] * Q[1,2] + P[6,2] * Q[2,2] + R[2]
+ tanh(P[4,0] * Q[0,0] + P[4,1] * Q[1,0] + P[4,2] * Q[2,0] + R[0]) * S[0,2]
+ tanh(P[4,0] * Q[0,1] + P[4,1] * Q[1,1] + P[4,2] * Q[2,1] + R[1]) * S[1,2]
+ tanh(P[4,0] * Q[0,2] + P[4,1] * Q[1,2] + P[4,2] * Q[2,2] + R[2]) * S[2,2]
+ tanh(P[4,0] * Q[0,3] + P[4,1] * Q[1,3] + P[4,2] * Q[2,3] + R[3]) * S[3,2]) * T[2,0]
+ tanh(P[6,0] * Q[0,3] + P[6,1] * Q[1,3] + P[6,2] * Q[2,3] + R[3]
+ tanh(P[4,0] * Q[0,0] + P[4,1] * Q[1,0] + P[4,2] * Q[2,0] + R[0]) * S[0,3]
+ tanh(P[4,0] * Q[0,1] + P[4,1] * Q[1,1] + P[4,2] * Q[2,1] + R[1]) * S[1,3]
+ tanh(P[4,0] * Q[0,2] + P[4,1] * Q[1,2] + P[4,2] * Q[2,2] + R[2]) * S[2,3]
+ tanh(P[4,0] * Q[0,3] + P[4,1] * Q[1,3] + P[4,2] * Q[2,3] + R[3]) * S[3,3]) * T[3,0]
+ U[0])
# gradients for tanh() and sigmoid() functions involving Q[0,0]
g6 = (f5 - Y) / (f5 * (1 - f5))
g5 = (f5 * (1 - f5))
g4 = (1 - f4 * f4)
g3 = (1 - f3 * f3)
g2 = (1 - f2 * f2)
g1 = (1 - f1 * f1)
g0 = (1 - f0 * f0)
# the gradient for Q[0,0]
gradient = \
g6 * g5 * T[0,0] * g1 * P[6,0] + \
g6 * g5 * T[0,0] * g1 * S[0,0] * g0 * P[4,0] + \
g6 * g5 * T[1,0] * g2 * S[0,1] * g0 * P[4,0] + \
g6 * g5 * T[2,0] * g3 * S[0,2] * g0 * P[4,0] + \
g6 * g5 * T[3,0] * g4 * S[0,3] * g0 * P[4,0]
# comparing results
model.fit(X, Y, epochs = 1, batch_size = 1)
print(model.layers[1].get_weights()[0] - Q)
print(format((model.layers[1].get_weights()[0] - Q)[0,0], ".7f"))
print(format(- K.eval(model.optimizer.lr) * gradient[0,0], ".7f"))
model.predict(X) # closer to target!