# C.A.L. Bailer-Jones
# Astrostats 2013
# This file: exp_quadraticmodel.R
# R code to do Bayesian inference of a 4-parameter quadratic model to 2D data

library(gplots) # for plotCI()
source("monte_carlo.R") # provides metrop() and make.covariance.matrix()
source("quadraticmodel.R") # provides logpost.quadraticmodel()

########## Define true model and simulate experimental data from it

set.seed(70)
Ndat <- 10
xrange <- c(0,10)
x <- sort(runif(Ndat, xrange[1], xrange[2]))
sigTrue <- 1
modMat <- c(0,1,0.15) # 1 x P vector: coefficients, a_p, of polynomial sum_{p=0} a_p*x^p
y <- cbind(1,x,x^2) %*% as.matrix(modMat) + rnorm(Ndat, 0, sigTrue)
# Dimensions in matrix multiplication: [Ndat x 1] = [Ndat x P] %*% [P x 1] + [Ndat]
# cbind does the logical thing combining a scalar and vector; then vector addition
y <- drop(y) # converts into a vector
pdf("quadraticmodel_data.pdf", width=5, height=4)
par(mfrow=c(1,1), mar=c(3.0,3.0,0.5,0.5), oma=c(0.5,0.5,0.5,0.5), mgp=c(2.2,0.8,0), cex=1.0)
plotCI(x, y, xlim=xrange, uiw=sigTrue, gap=0)
# plot true model
xsamp <- seq(from=xrange[1], to=xrange[2], length.out=500)
ysamp <- cbind(1,xsamp,xsamp^2) %*% as.matrix(modMat)
lines(xsamp, drop(ysamp), col="red", lw=2)
dev.off()
# True parameters, transformed to be conformable with model to be used below
thetaTrue <- c(modMat[1], atan(modMat[2]), modMat[3], log10(sigTrue))
obsdata <- data.frame(cbind(x,y)) # only this is used in the analysis 
rm(x,y)

########## Define model and infer the posterior PDF over its parameters

# Model to infer: quadratic regression with Gaussian noise
# Parameters: intercept a_0, gradient a_1, quadratic term a_2; Gaussian noise sigma, ysig.
# Prior PDFs over model parameters:
# a_0:  intercept. P(a_0) ~ N(mean=m, sd=s)
# a_1:  gradient, a_1 = tan(alpha), alpha is angle between horizontal and model line.
#       P(alpha) ~ 1 [uniform] => P(alpha) ~ 1/(1+tan(alpha)^2) but it is easier if we
#       use alpha (in radians) as the model parameter, alpha = atan(a_1)
# a_2:  quadratic term. P(a_0) ~ N(mean=m, sd=s)
# ysig: Jeffreys prior P(ysig) ~ 1/ysig, or equivalently P(log10(ysig)) ~ 1.
#       This is an "improper prior", which means its integral does not converge.
# theta is 1 x J vector of all model parameters (J=4): theta = c(a_0, alpha, a_2, log10(ysig)).
# The sampling is performed on theta.

# define covariance matrix of MCMC sampling PDF: c(a_0, alpha, a_2, log10(ysig))
sampleCov <- make.covariance.matrix(sampleSD=c(0.1, 0.02, 0.01, 0.1), sampleCor=0) 
# set starting point
thetaInit <- c(2, pi/8, 0, log10(3))
# run the MCMC to find postSamp, samples of the posterior PDF
set.seed(150)
postSamp <- metrop(func=logpost.quadraticmodel, thetaInit=thetaInit, Nburnin=0, Nsamp=5e4,
                   verbose=1e3, sampleCov=sampleCov, obsdata=obsdata)
# 10^(postSamp[,1]+postSamp[,2]) is the unnormalized posterior at each sample

# Plot MCMC chains and use density estimation to plot 1D posterior PDFs from these.
# Note that we don't need to do any explicit marginalization to get the 1D PDFs.
parnames <- c("a_0", "alpha = arctan(a_1)/radians", "a_2", "log10(ysig)")
pdf("quadraticmodel_mcmc.pdf", width=7, height=8)
par(mfrow=c(4,2), mar=c(3.0,3.0,0.5,0.5), oma=c(1,1,1,1), mgp=c(1.8,0.6,0), cex=1.0)
for(p in 3:6) { # columns of postSamp
  plot(1:nrow(postSamp), postSamp[,p], type="l", xlab="iteration", ylab=parnames[p-2])
  postDen <- density(postSamp[,p], n=2^10)
  plot(postDen$x, postDen$y, type="l", xlab=parnames[p-2], ylab="density")
  abline(v=thetaTrue[p-2], col="red")
}
dev.off()

# Plot a_0, a_1, and a_2 samples in 2D
par(mfrow=c(2,2))
plot(postSamp[,3], postSamp[,4], xlab="intercept a_0", ylab="alpha = atan(a_1) / radians", pch=".")
plot(postSamp[,3], postSamp[,5], xlab="intercept a_0", ylab="a_2", pch=".")
plot(postSamp[,4], tan(postSamp[,5]), xlab="alpha = atan(a_1) / radians",
     ylab="gradient a_2", pch=".")

# Find MAP solution and mean solution.
# MAP = Maximum A Posteriori, i.e. peak of posterior.
# MAP is not the peak in each 1D PDF, but the peak of the 4D PDF.
# mean is easy, because samples have been drawn from the (unnormalized) posterior.
posMAP     <- which.max(postSamp[,1]+postSamp[,2]) 
(thetaMAP  <- postSamp[posMAP, 3:6])
(thetaMean <- apply(postSamp[,3:6], 2, mean)) # Monte Carlo integration
# Overplot these solutions with original data and true model
pdf("quadraticmodel_fits.pdf", width=5, height=4)
par(mfrow=c(1,1), mar=c(3.0,3.0,0.5,0.5), oma=c(0.5,0.5,0.5,0.5), mgp=c(2.2,0.8,0), cex=1.0)
plotCI(obsdata$x, obsdata$y, xlab="x", ylab="y", xlim=xrange, uiw=sigTrue, gap=0)
ysamp <- cbind(1,xsamp,xsamp^2) %*% as.matrix(modMat)
lines(xsamp, drop(ysamp), col="red", lw=2) # true model
ysamp <- cbind(1,xsamp,xsamp^2) %*% as.matrix(thetaMAP[1:3]) 
lines(xsamp, drop(ysamp), col="blue", lw=2) # MAP model
ysamp <- cbind(1,xsamp,xsamp^2) %*% as.matrix(thetaMean[1:3]) 
lines(xsamp, drop(ysamp), col="green", lw=2) # mean model
dev.off()
