set.seed(2^17-1) # create some data n.pos = 50 n.neg = 50 library(MASS) X_trn = rbind(mvrnorm(n.pos, c(1.5, 1.5), diag(2)), mvrnorm(n.neg, c(-1.5, -1.5), diag(2))) y_trn = rep(c(1, -1), c(n.pos, n.neg)) X_tst = rbind(mvrnorm(10 * n.pos, c(1.5, 1.5), diag(2)), mvrnorm(10 * n.neg, c(-1.5, -1.5), diag(2))) y_tst = rep(c(1, -1), c(10 * n.pos, 10 * n.neg)) # construct a couple of models library(e1071) model1 = svm(X_trn, y_trn, type = "C-classification", kernel = "linear", scale = F, cost = 0.01) model1$nSV model2 = svm(X_trn, y_trn, type = "C-classification", kernel = "linear", scale = F, cost = 0.1) model2$nSV beta.model1 = t(model1$coefs) %*% model1$SV beta.model2 = t(model2$coefs) %*% model2$SV # plot the decision boundaries and the margins plot(X_trn, typ = "p", col = rep(c("green3","red"), c(n.pos,n.neg)), xlab = expression("X"["i,1"]), ylab = expression("X"["i,2"])) legend("topright", legend = c("cost = 0.01", "cost = 0.10"), lty = c("solid", "dotted")) abline(a = model1$rho / beta.model1[2], b = - beta.model1[1] / beta.model1[2], lty = "solid", col = "black") abline(a = (model1$rho + 1) / beta.model1[2], b = - beta.model1[1] / beta.model1[2], lty = "solid", col = "green3") abline(a = (model1$rho - 1) / beta.model1[2], b = - beta.model1[1] / beta.model1[2], lty = "solid", col = "red") abline(a = model2$rho / beta.model2[2], b = - beta.model2[1] / beta.model2[2], lty = "dotted", col = "black") abline(a = (model2$rho + 1) / beta.model2[2], b = - beta.model2[1] / beta.model2[2], lty = "dotted", col = "green3") abline(a = (model2$rho - 1) / beta.model2[2], b = - beta.model2[1] / beta.model2[2], lty = "dotted", col = "red") # check the error rates table(y_tst, predict(model1, X_tst), dnn = c("actual", "predicted")) table(y_tst, predict(model2, X_tst), dnn = c("actual", "predicted"))