# C.A.L. Bailer-Jones
# Astrostats 2013
# This file: model_comparison_2.R
# R code to calculate evidence and K-fold CV likelihood for sinusoidal
# and OU process models on a real data set.

library(gplots) # for plotCI()
source("sinusoidal.R")
source("OUprocess.R")
source("monte_carlo.R")
source("kfoldCV.R")

########## Read in data

# Files:
#obsdata <- read.table("../data/g-band-LC_1001265.dat", header=FALSE)
#obsdata <- read.table("../data/g-band-LC_1002162.dat", header=FALSE)
obsdata <- read.table("../data/g-band-LC_208035.dat", header=FALSE)
# Models assumes data are zero mean
obsdata[,3] <- obsdata[,3] - mean(obsdata[,3])
plotCI(obsdata[,1], obsdata[,3], uiw=obsdata[,4], err="y", xlab="time", ylab="signal",
       pch=18, type="n", gap=0, sfrac=0.01)

########## Set priors and fixed parameters

zrange <- diff(range(obsdata[,3]))
alphaSinusoidal <- 0.5*c(zrange/2, zrange/2, 1/1000) # c(sd_a1, sd_a2, scale_freq)
#
ouprocFixed <- c(obsdata[,3], 0) # c(zstartmean, zstartsd)
tscale <- diff(range(obsdata[,1]))/10
alphaOUprocess <- 0.5*c(2*var(obsdata[,3])/tscale, tscale) # c(scale_diffcon, scale_relax)

########## Calculate evidences

set.seed(100)
# sinusoidal model
Nsamp <- 1e4
priorSamples <- sampleprior.sinusoidal(Nsamp, alphaSinusoidal)
logLike <- vector(length=Nsamp)
for(i in 1:Nsamp) {
  logLike[i] <- loglike.sinusoidal(priorSamples[i,], obsdata)
}
evSM <- mean(10^logLike)
# OUprocess 
Nsamp <- 1e4
priorSamples <- sampleprior.OUprocess(Nsamp, alphaOUprocess)
logLike <- vector(length=Nsamp)
for(i in 1:Nsamp) {
  logLike[i] <- loglike.OUprocess(priorSamples[i,], obsdata, ouprocFixed)
}
evOU <- mean(10^logLike)
#
cat("Bayes factor [OUprocess/sinusoidal] = ", evOU/evSM, "\n")
cat("log10 Bayes factor [OUprocess - sinusoidal] = ", log10(evOU/evSM), "\n")
cat("log10 Evidences [OUprocess, sinusoidal] = ", log10(evOU), log10(evSM), "\n")

########## Calculate K-fold CV likelihoods

set.seed(100)
# sinusoidal model: c(a1, a2, freq)
sampleCov <- make.covariance.matrix(sampleSD=alphaSinusoidal/50, sampleCor=0)
thetaInit <- alphaSinusoidal
kcvSM <- kfoldcv(Npart=10, obsdata=obsdata, logpost=logpost.sinusoidal,
                 loglike=loglike.sinusoidal, sampleCov=sampleCov, thetaInit=thetaInit,
                 Nburnin=1e3, Nsamp=1e4, alpha=alphaSinusoidal)
# OUprocess model: c(diffcon, relax)
sampleCov <- make.covariance.matrix(sampleSD=alphaOUprocess, sampleCor=0)
thetaInit <- alphaOUprocess
kcvOU <- kfoldcv(Npart=10, obsdata=obsdata, logpost=logpost.OUprocess,
                 loglike=loglike.OUprocess, sampleCov=sampleCov, thetaInit=thetaInit,
                 Nburnin=1e3, Nsamp=1e4, ouprocFixed=ouprocFixed, alpha=alphaOUprocess)
#
cat("log10 K-fold CV likelihood [OUprocess, sinusoidal]", kcvOU, kcvSM, "\n")
cat("Difference log10 K-fold CV likelihood [OUprocess - sinusoidal]", kcvOU - kcvSM, "\n")

######### Calculate posterior PDFs

# should really check the MCMC in the K-fold CV likelihood by plotting samples and
# posterior PDFs for the data partitions used there. But get a good idea by sampling
# the whole data set, as done here.

# sinusoidal model: c(a1, a2, freq)
sampleCov <- make.covariance.matrix(sampleSD=alphaSinusoidal/25, sampleCor=0)
thetaInit <- alphaSinusoidal
postSamp <- metrop(func=logpost.sinusoidal, thetaInit=thetaInit, Nburnin=1e3, 
                   Nsamp=1e4, verbose=1e3, sampleCov=sampleCov, 
                   obsdata=obsdata, alpha=alphaSinusoidal)
parnames <- c("a1", "a2", "freq")
par(mfrow=c(3,2), mar=c(3.0,3.0,0.5,0.5), oma=c(1,1,1,1), mgp=c(1.8,0.6,0), cex=1.0)
for(p in 3:5) { # columns of postSamp
  plot(1:nrow(postSamp), postSamp[,p], type="l", xlab="iteration", ylab=parnames[p-2])
  postDen <- density(postSamp[,p], n=2^10)
  plot(postDen$x, postDen$y, type="l", xlab=parnames[p-2], ylab="density")
}

