# see https://keras.io/api/layers/recurrent_layers/lstm/ for "requirements to use the cuDNN implementation" import numpy as np import tensorflow as tf from tensorflow.keras import activations, callbacks, experimental, layers, models, optimizers, regularizers feature_count = 1024 batch_size = 256 trnX = np.load("trnX.npy") trnY = np.load("trnY.npy") valX = np.load("valX.npy") valY = np.load("valY.npy") tstX = np.load("tstX.npy") tstY = np.load("tstY.npy") vocabulary_size = trnX.max() + 1 class TiedEmbedding(layers.Layer): def __init__(self, embeddings_layer: layers.Layer, **kwargs): super(TiedEmbedding, self).__init__(**kwargs) self.embeddings_layer = embeddings_layer self.supports_masking = False def call(self, inputs): return activations.softmax(tf.matmul(inputs, self.embeddings_layer.embeddings, transpose_b = True)) embedding = layers.Embedding(vocabulary_size, feature_count, mask_zero = True, embeddings_regularizer = regularizers.l1(0.000001)) tied = TiedEmbedding(embedding) input = layers.Input(shape = (trnX.shape[1],), dtype = "int32") x = embedding(input) x = layers.LSTM(feature_count, dropout = 0.5)(x) x = tied(x) model = models.Model(inputs = input, outputs = x) model.compile(loss = "sparse_categorical_crossentropy", optimizer = "adam", metrics = [ "sparse_categorical_crossentropy" ]) model.summary() callbacks = [ callbacks.EarlyStopping(monitor = "val_sparse_categorical_crossentropy", patience = 4, restore_best_weights = True) ] model.fit(trnX, trnY, epochs = 4, batch_size = batch_size, validation_data = (valX, valY), callbacks = callbacks) probabilities = model.predict(tstX) output = open("predictions.csv", "w") output.write("id,prediction\n") for i in range(tstX.shape[0]): output.write(str(i).zfill(6) + "," + str(probabilities[i,tstY[i]]) + "\n") output.close()