R scripts for the lecture course
Machine Learning, pattern recognition and statistical data modelling
Coryn A.L. Bailer-Jones, 2007


Support Vector Machines
-----------------------

library(e1071)
# application with fixed gamma and cost
dim(dat.baspeg)
length(train.cl)
svm.cl <- svm(astroclass ~ ., data=dat.baspeg[train.cl,], kernel='radial', cost=1, gamma=1)
svm.cl
attributes(svm.cl)
# apply to test set and examine confusion matrix
pred.svm.cl <- predict(svm.cl, dat.baspeg[-train.cl,2:5])
table(dat.baspeg[-train.cl,]$astroclass, pred.svm.cl) 
# write again, but now as percentages
noquote(format(100*table(dat.baspeg[-train.cl,]$astroclass, pred.svm.cl) / Nastroclass2, digits=2) )
# plot data, fitted classes and mark SVs
plot(gr ~ ri, x=svm.cl, data=dat.baspeg[train.cl, ], slice=list(iz=0.2, zy=0.2))

# Now play around and see how number of SVs and train and test errors vary with
# cost, gamma and the kernel function used

# tune
svm.cl.tune <- tune(svm, train.x=dat.baspeg[train.cl,2:5], train.y=dat.baspeg[train.cl,1], validation.x=dat.baspeg[-train.cl,2:5], validation.y=dat.baspeg[-train.cl,1], ranges=list(gamma=c(0.1,1,10), cost=c(1,5,10) ), tunecontrol=tune.control(sampling="fix")  )
# analyse
svm.cl.tune$performances
# apply best model
astroclass2.predict.svm.cl <- predict(svm.cl.tune$best.model, dat.baspeg[-train.cl,2:5])
# write confusion matrix
noquote( format(100*table(dat.baspeg[-train.cl,]$astroclass, astroclass2.predict.svm.cl) / Nastroclass2, digits=2) )

