import numpy
trn_val_X = numpy.loadtxt('mnist_trn_X.csv', dtype=numpy.int, delimiter=',')
trn_val_y = numpy.loadtxt('mnist_trn_y.txt', dtype=numpy.int, delimiter=',')

numpy.random.seed(131071)
shuffle = numpy.random.permutation(trn_val_X.shape[0])

trn_X = trn_val_X[shuffle[0:5999],:]
trn_y = trn_val_y[shuffle[0:5999]]
val_X = trn_val_X[shuffle[6000:11999],:]
val_y = trn_val_y[shuffle[6000:11999]]

from sklearn.neighbors import KNeighborsClassifier
import time
import datetime
bestAccuracy = 0.0
bestK = 0
for k in [ 1, 2, 4, 8, 16 ]:
    knn = KNeighborsClassifier(n_neighbors = k)
    knn.fit(trn_X, trn_y)
    accuracy = knn.score(val_X, val_y)
    if (accuracy > bestAccuracy):
        bestAccuracy = accuracy
        bestK = k
    timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')
    print(timestamp + " [k = " + str(k) + "]: " + str(accuracy))

print("bestK = " + str(bestK))
knn = KNeighborsClassifier(n_neighbors = bestK)
knn.fit(trn_val_X, trn_val_y)
tst_X = numpy.loadtxt('mnist_tst_X.csv', dtype=numpy.int, delimiter=',')
predictions = knn.predict(tst_X)

output = open("predictions.csv", "w")
output.write("Id,Prediction\n")
index = 0
for prediction in predictions:
    output.write(str(index) + "," + str(prediction) + "\n")
    index = index + 1
output.close()