set.seed(2^17 - 1)
Credit = read.csv("http://www-bcf.usc.edu/~gareth/ISL/Credit.csv", row.names = 1)
response.index = which(colnames(Credit) == "Balance")
X = model.matrix(~ 0 + ., data = Credit[,-response.index])
y = Credit[,response.index]
n = nrow(X)
m = round(0.8 * n)
shuffle = sample(1:n)
trn_X = X[shuffle[1:m], -which(colnames(X) == "Gender Male")]
trn_y = y[shuffle[1:m]]
tst_X = X[shuffle[(m+1):n], -which(colnames(X) == "Gender Male")]
tst_y = y[shuffle[(m+1):n]]
p = ncol(trn_X)

start.time = Sys.time()
library(leaps)
subsets = regsubsets(trn_X, trn_y, nvmax = p)
Sys.time() - start.time

results = array(0, c(p, 3))
colnames(results) = list("Variable Count", "RMSE", "RMSE SD")
best.loss = Inf
best.subset = NA
best.model = NA
start.time = Sys.time()
library(caret)
trControl = trainControl("repeatedcv", number = 5, repeats = 30)
tuneGrid = expand.grid(alpha = c(0, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 1),
                       lambda = c(0, 0.1, 1, 10, 100))
for (i in 1:p) {
    selected = which(summary(subsets)$outmat[i,] == "*")
    if (i == 1) {
        model = train(as.matrix(trn_X[,selected]), trn_y, method = "glm", trControl = trControl)
    } else {
        model = train(trn_X[,selected], trn_y, method = "glmnet", trControl = trControl, tuneGrid = tuneGrid)
    }
    results[i,1] = i
    index = which.min(model$results[,"RMSE"])
    results[i,2] = min(model$results[index,"RMSE"])
    results[i,3] = min(model$results[index,"RMSESD"])
    if (results[i,2] < best.loss) {
        best.loss = results[i,2]
        best.subset = selected
        best.model = model
    }
}
Sys.time() - start.time
results[order(results[,2]),]

plot(best.model)
plot(best.model, ylim = c(99.2, 99.3))

lambda = best.model$results[which.min(best.model$results[,"RMSE"]),"lambda"]
coef(best.model$finalModel, s = lambda)