R scripts for the lecture course
Machine Learning, pattern recognition and statistical data modelling
Coryn A.L. Bailer-Jones, 2007


Model selection and combination
-------------------------------

library(mclust)
# Note that this package reports the negative BIC, not the BIC. The optimal model has the
# LARGEST -BIC. (Annoyingly, they still call this the BIC)

# basic run
plot(faithful$eruptions, faithful$waiting)
# Search over full range of models
fc <- Mclust(faithful)
mclust2Dplot(data=faithful, what="classification", identify=TRUE, parameters=fc$parameters, z= fc$z)
plot(x=fc, data=faithful)
# This actually plots -BIC, not BIC
attributes(fc)

# Run this to obtain other information, in particular the best 3 models
fc.bic <- mclustBIC(faithful, G=1:10)
summary(fc.bic, data=faithful)
# We see that a 3-cluster EEE model is slightly better than a 2-cluster VVV model
# fc.bic does not seem to give full details on all solutions, so run on each specifically
fc_3eee <- Mclust(faithful, G=3, modelNames='EEE')
fc_2vvv <- Mclust(faithful, G=2, modelNames='VVV')
par(mfrow=c(1,2))
mclust2Dplot(data=faithful, parameters=fc_3eee$parameters, z= fc_3eee$z)
mclust2Dplot(data=faithful, parameters=fc_2vvv$parameters, z= fc_2vvv$z)
# Inspect parameters
fc_3eee$parameters
# 3 parameters in covariance matrix; 2 means (x,y) and 1 weight per cluster - 1 
# (as weights sum to 1) (=8). 
# Total = 11
fc_2vvv$parameters
# 6 parameters in  the covariance matrices; 2 means (x,y) and 1 weight per cluster -1 
# (as weights sum to 1) (=5). 
# Total = 11
#
# Generally have G*d*(d-1)/2 + d*G + G - 1 =  Gd(d+1)/2 + G - 1  for VVV case
# and d*(d-1)/2 + d*G + G - 1 for EEE case

# From the equation for BIC (and recalling that Mclust gives -BIC) we can extract the
# number of parameters:
(-fc_2vvv$bic + 2*fc_2vvv$loglik) / log(fc_2vvv$n)            # = 11