curves = function(actuals, predictions) { thresholds = c(unique(sort(predictions, decreasing = T)), min(predictions) - .Machine$double.eps) k = length(thresholds) tp = array(0, k) fn = array(0, k) fp = array(0, k) tn = array(0, k) for (i in 1:k) { tp[i] = sum((actuals == 1) & (predictions > thresholds[i])) fn[i] = sum((actuals == 1) & (predictions <= thresholds[i])) fp[i] = sum((actuals == 0) & (predictions > thresholds[i])) tn[i] = sum((actuals == 0) & (predictions <= thresholds[i])) } return(data.frame(thresholds, tp, fn, fp, tn, fp.rate = fp / (fp + tn), tp.rate = tp / (tp + fn), precision = tp / (tp + fp))) } auc = function(x, y) { area = 0.0 for (i in 2:length(x)) { # adding the area of the lower rectangle # plus half the area of the upper rectangle area = area + (x[i] - x[i - 1]) * (min(y[i], y[i - 1]) + 0.5 * abs(y[i] - y[i - 1])) } return(area) } set.seed(2^17-1) library(ISLR) index = sample(1:nrow(Default)) trn = Default[index[1:8000],] tst = Default[index[8001:10000],] model = glm(default ~ student + balance, data = trn, family = binomial) predictions = predict(model, newdata = tst, type = "response") evaluation = curves(as.integer(tst$default == "Yes"), predictions) valid.precision = 2:nrow(evaluation) par(mfrow = c(1, 2)) plot(evaluation$fp.rate, evaluation$tp.rate, type = "l") title(paste("auc =", round(auc(evaluation$fp.rate, evaluation$tp.rate), 3))) abline(a = 0, b = 1, lty = "dotted", col = "red") legend("bottomright", legend = c("glm", "random"), lty = c("solid", "dashed"), col = c("black", "red")) plot(evaluation$tp.rate[valid.precision], evaluation$precision[valid.precision], type = "l") title(paste("auc =", round(auc(evaluation$tp.rate[valid.precision], evaluation$precision[valid.precision]), 3))) abline(a = mean(tst$default == "Yes"), b = 0, lty = "dotted", col = "red") legend("topright", legend = c("glm", "random"), lty = c("solid", "dashed"), col = c("black", "red"))