import numpy trn_val_X = numpy.loadtxt('mnist_trn_X.csv', dtype=numpy.int, delimiter=',') trn_val_y = numpy.loadtxt('mnist_trn_y.txt', dtype=numpy.int, delimiter=',') numpy.random.seed(131071) shuffle = numpy.random.permutation(trn_val_X.shape[0]) trn_X = trn_val_X[shuffle[0:5999],:] trn_y = trn_val_y[shuffle[0:5999]] val_X = trn_val_X[shuffle[6000:11999],:] val_y = trn_val_y[shuffle[6000:11999]] from sklearn.neighbors import KNeighborsClassifier import time import datetime bestAccuracy = 0.0 bestK = 0 for k in [ 1, 2, 4, 8, 16 ]: knn = KNeighborsClassifier(n_neighbors = k) knn.fit(trn_X, trn_y) accuracy = knn.score(val_X, val_y) if (accuracy > bestAccuracy): bestAccuracy = accuracy bestK = k timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S') print(timestamp + " [k = " + str(k) + "]: " + str(accuracy)) print("bestK = " + str(bestK)) knn = KNeighborsClassifier(n_neighbors = bestK) knn.fit(trn_val_X, trn_val_y) tst_X = numpy.loadtxt('mnist_tst_X.csv', dtype=numpy.int, delimiter=',') predictions = knn.predict(tst_X) output = open("predictions.csv", "w") output.write("Id,Prediction\n") index = 0 for prediction in predictions: output.write(str(index) + "," + str(prediction) + "\n") index = index + 1 output.close()