# Exploring a peculiar things about svm function # Developed using 2.9.0. library(MASS) library(e1071) # for svm f'n (for support vector machines) library(mvtnorm) # for rmvnorm and dmvnorm (for generation # of multivariate normal observations and # computation of multivariate normal pdf # values) set.seed(7) # I'll generate data from three classes. n <- 50 # n is number of training sample cases per class Sig <- diag(1,2) Sig # Sig is a variance-covariance matrix. Gxvals <- rmvnorm(n, mean=c(0,0), sigma=Sig) # G is used for the Green class. Oxvals <- rmvnorm(n, mean=c(0,0), sigma=Sig) # O is used for the Orange class. Bxvals <- rmvnorm(n, mean=c(0,0), sigma=Sig) # B is used for the Blue class. Oxvals[,1] <- runif(n,-4,1) Oxvals[,2] <- runif(n,-1,4) Bxvals[,1] <- runif(n,-1,4) Bxvals[,2] <- runif(n,-4,1) # I'll plot the data. plot(Oxvals[,1],Oxvals[,2],col="darkorange",main='Training Data', xlab='x1',ylab='x2',xlim=c(-4,4),ylim=c(-4,4)) points(Bxvals[,1], Bxvals[,2], col="royalblue") points(Gxvals[,1], Gxvals[,2], col="limegreen") ##### # I'll give response value of 0 for Orange class observations, # 1 for Green class observations, 2 for the Blue class observations, # and then combine them together to form the training data. Oclass <- cbind(0, Oxvals) Oclass <- cbind(0, Oxvals) Gclass <- cbind(1, Gxvals) Bclass <- cbind(2, Bxvals) trndat <- data.frame(rbind(Oclass, Gclass, Bclass)) names(trndat) <- c("g", "x1", "x2") ##### # Now I'll scale data. trndat[,2] <- (trndat[,2] - mean(trndat[,2]))/sd(trndat[,2]) trndat[,3] <- (trndat[,3] - mean(trndat[,3]))/sd(trndat[,3]) mean(trndat[,2]) sd(trndat[,2]) mean(trndat[,3]) sd(trndat[,3]) # Now I'll use svm function w/ scale = FALSE and get # posterior probabilities for two cases. svm.notscaled <- svm( as.factor(g) ~ ., trndat, scale=FALSE, probability=TRUE) gendat <- trndat[n:(n+1),] gendat pred.notscaled <- predict(svm.notscaled,gendat,decision.values=TRUE,probability=TRUE) # Now I'll use svm function w/ scale = TRUE and get # posterior probabilities for same two cases. svm.scaled <- svm( as.factor(g) ~ ., trndat, scale=FALSE, probability=TRUE) pred.scaled <- predict(svm.scaled,gendat,decision.values=TRUE,probability=TRUE) # It's bothersome that the results aren't the same. # Let's look at some of the values used to get the # posterior probabilities. svm.notscaled$probA svm.scaled$probA svm.notscaled$probB svm.scaled$probB pred.notscaled pred.scaled