# Exploring a peculiar things about svm function

# Developed using 2.9.0.


library(MASS)     
library(e1071)    # for svm f'n (for support vector machines)
library(mvtnorm)  # for rmvnorm and dmvnorm (for generation 
                  # of multivariate normal observations and
                  # computation of multivariate normal pdf 
                  # values)
    
set.seed(7)     


# I'll generate data from three classes. 

n <- 50  # n is number of training sample cases per class

Sig <- diag(1,2) 
Sig
# Sig is a variance-covariance matrix.  

Gxvals <- rmvnorm(n, mean=c(0,0), sigma=Sig) # G is used for the Green class.
Oxvals <- rmvnorm(n, mean=c(0,0), sigma=Sig) # O is used for the Orange class.
Bxvals <- rmvnorm(n, mean=c(0,0), sigma=Sig) # B is used for the Blue class.
Oxvals[,1] <- runif(n,-4,1)
Oxvals[,2] <- runif(n,-1,4)
Bxvals[,1] <- runif(n,-1,4)
Bxvals[,2] <- runif(n,-4,1)

# I'll plot the data.

plot(Oxvals[,1],Oxvals[,2],col="darkorange",main='Training Data',
     xlab='x1',ylab='x2',xlim=c(-4,4),ylim=c(-4,4))
points(Bxvals[,1], Bxvals[,2], col="royalblue")
points(Gxvals[,1], Gxvals[,2], col="limegreen")

#####

# I'll give response value of 0 for Orange class observations, 
# 1 for Green class observations, 2 for the Blue class observations, 
# and then combine them together to form the training data.  Oclass <- cbind(0, Oxvals)

Oclass <- cbind(0, Oxvals)
Gclass <- cbind(1, Gxvals)
Bclass <- cbind(2, Bxvals)
trndat <- data.frame(rbind(Oclass, Gclass, Bclass))
names(trndat) <- c("g", "x1", "x2")  

#####

# Now I'll scale data.

trndat[,2] <- (trndat[,2] - mean(trndat[,2]))/sd(trndat[,2])
trndat[,3] <- (trndat[,3] - mean(trndat[,3]))/sd(trndat[,3])

mean(trndat[,2])
sd(trndat[,2])
mean(trndat[,3])
sd(trndat[,3])

# Now I'll use svm function w/ scale = FALSE and get 
# posterior probabilities for two cases.

svm.notscaled <- svm( as.factor(g) ~ ., trndat, scale=FALSE, probability=TRUE)

gendat <- trndat[n:(n+1),]
gendat
pred.notscaled <- predict(svm.notscaled,gendat,decision.values=TRUE,probability=TRUE) 

# Now I'll use svm function w/ scale = TRUE and get 
# posterior probabilities for same two cases.

svm.scaled <- svm( as.factor(g) ~ ., trndat, scale=FALSE, probability=TRUE)

pred.scaled <- predict(svm.scaled,gendat,decision.values=TRUE,probability=TRUE) 

# It's bothersome that the results aren't the same.
# Let's look at some of the values used to get the 
# posterior probabilities.

svm.notscaled$probA
svm.scaled$probA

svm.notscaled$probB
svm.scaled$probB

pred.notscaled
pred.scaled