# C.A.L. Bailer-Jones
# Astrostats 2013
# This file: model_comparison_1.R
# R code to calculate evidence and K-fold CV likelihood for linear and quadratic models

library(gplots) # for plotCI()
source("linearmodel.R")
source("quadraticmodel.R")
source("monte_carlo.R")
source("kfoldCV.R")

########## Define true model and simulate experimental data from it

set.seed(50)
Ndat <- 10
xrange <- c(0,10)
x <- sort(runif(Ndat, xrange[1], xrange[2]))
sigTrue <- 1
modMat <- c(0,1,0.3) # 1 x P vector: coefficients, a_p, of polynomial sum_{p=0} a_p*x^p
y <- cbind(1,x,x^2) %*% as.matrix(modMat) + rnorm(Ndat, 0, sigTrue)
# Dimensions in matrix multiplication: [Ndat x 1] = [Ndat x P] %*% [P x 1] + [Ndat]
# cbind does the logical thing combining a scalar and vector; then vector addition
y <- drop(y) # converts into a vector
par(mfrow=c(1,1))
plotCI(x, y, xlim=xrange, uiw=sigTrue, gap=0)
# plot true model
xsamp <- seq(from=xrange[1], to=xrange[2], length.out=500)
ysamp <- cbind(1,xsamp,xsamp^2) %*% as.matrix(modMat)
lines(xsamp, drop(ysamp), col="red", lw=2)
# True parameters, transformed to be conformable with model to be used below
thetaTrue <- c(modMat[1], atan(modMat[2]), modMat[3], log10(sigTrue))
obsdata <- data.frame(cbind(x,y)) # only this is used in the analysis 
rm(x,y,xsamp,ysamp)

########## Calculate evidences

set.seed(100)
# linear model
Nsamp <- 1e5
priorSamples <- sampleprior.linearmodel(Nsamp)
logLike <- vector(length=Nsamp)
for(i in 1:Nsamp) {
  logLike[i] <- loglike.linearmodel(priorSamples[i,], obsdata)
}
evLM <- mean(10^logLike)
# quadratic model
Nsamp <- 1e5
priorSamples <- sampleprior.quadraticmodel(Nsamp)
logLike <- vector(length=Nsamp)
for(i in 1:Nsamp) {
  logLike[i] <- loglike.quadraticmodel(priorSamples[i,], obsdata)
}
evQM <- mean(10^logLike)
#
cat("Bayes factor [Quadratic/Linear] = ", evQM/evLM, "\n")
cat("log10 Bayes factor [Quadratic - Linear] = ", log10(evQM/evLM), "\n")

########## Calculate K-fold CV likelihoods

set.seed(100)
# linear model: c(a_0, alpha, ysig)
sampleCov <- make.covariance.matrix(sampleSD=c(0.1, 0.02, 0.1), sampleCor=0) 
thetaInit <- c(2, pi/8, log10(3))
kcvLM <- kfoldcv(Npart=5, obsdata=obsdata, logpost=logpost.linearmodel,
                 loglike=loglike.linearmodel, sampleCov=sampleCov, thetaInit=thetaInit,
                 Nburnin=1e3, Nsamp=1e4)
# quadratic model: c(a_0, alpha, a_2, log10(ysig))
sampleCov <- make.covariance.matrix(sampleSD=c(0.1, 0.02, 0.01, 0.1), sampleCor=0)
thetaInit <- c(2, pi/8, 0, log10(3))
kcvQM <- kfoldcv(Npart=5, obsdata=obsdata, logpost=logpost.quadraticmodel,
                 loglike=loglike.quadraticmodel, sampleCov=sampleCov, thetaInit=thetaInit,
                 Nburnin=1e3, Nsamp=1e4)
#
cat("Difference log10 K-fold CV likelihood [Quadratic - Linear]", kcvQM - kcvLM, "\n")
