#
# Bayesian Gaussian Process Regression
# For STAT8810
# Fall, 2017
# M.T. Pratola


# Bayesian Implementation of GP Regression Model
#
# Data:   y(xi) = eta(xi), i=1,...,n ; xi=1xK vector
#
#
# Likelihood: L(Y|.) propto. 1/(2pi*det(E))exp(-(Y')(Einv)(Y)) ; Y=[y(x1),...,y(xn)]
# where,
#         E=1/lambdaz*[R(.,.;rho)]
#
# Priors:
#         pi(lambdaz) propto. lambdaz^(az-1)exp(-bz*lambdaz)     [Gamma prior]
#         pi(rhoz_k) propto. rhoz_k^(rhoa-1)(1-rhoz_k)^(rhob-1)  [Beta prior]
#
# Posterior:
#         pi(.|Y) propto. L(Y|.)pi(lambdaz)pi(rhoz_1)*...*pi(rhoz_K)
#
# We fit the model using MCMC with adaptive stepwidths for the uniform proposal distributions.
#
#

# Note: requires dace.sim.r for the correlation function (rhogeodacecormat).

logp<-function(y,lambdaz,rhoz,l.dez,pi)
{
  az=pi$az; bz=pi$bz

  if(any(rhoz<=0) || any(rhoz>=1))
     logposterior=-Inf
  else
  {
    n=length(y)
    k=length(rhoz)
    In=diag(n)
    Rz=rhogeodacecormat(l.dez,rhoz)$R
    Ez=1/lambdaz*(Rz+In*1e-14)
    cholEz=chol(Ez)
    Ezinv=chol2inv(cholEz)
    logdetEz=2*sum(log(diag(cholEz)))
  
    logpz=-1/2*logdetEz-1/2*t(y)%*%Ezinv%*%(y)
    logplambdaz=(az-1)*log(lambdaz)-bz*lambdaz
    logprhoz=0
    for(i in 1:k) logprhoz=logprhoz+(pi$rhoa[i]-1)*log(rhoz[i])+(pi$rhob[i]-1)*log(1-rhoz[i])
    
    logposterior=logpz+logplambdaz+logprhoz
  }
  logposterior
}


# Gibbs/MCMC Sampler
# y - observed response
# l.dez - distances in my format
# N - number of Gibbs/MCMC iterations for the sampler
# pi=list(az=5,bz=5,rhoa=rep(1,k),rhob=rep(5,k))
# mh=list(rr=.07)
regress<-function(y,l.dez,N,pi,mh,last=1000,adapt=TRUE)
{
  if(N<2000) stop("N>=2000 please\n")
  
  last=last-1
  n=length(y)
  k=length(l.dez)

  draw.lambdaz=rep(NA,N)
  draw.rhoz=matrix(NA,nrow=N,ncol=k)

  rr=rep(mh$rr,k)

  # initial guesses
  draw.lambdaz[1]=1/var(y)
  draw.rhoz[1,]=rep(.5,k)
  
  #accept/reject ratio trackers
  accept.rhoz=rep(0,k)

  lastadapt=0
  
  In=diag(n)
  one=rep(1,n)
  rhoz.new=rep(NA,k)

  cat("\n Bayesian Gaussian Process Interpolation model")
  cat("\n The last ",last," samples from the posterior will be reported")
  cat("\n The stepwidth for uniform corr. param proposal distn is rr=",rr)
  cat("\n Prior params:  az=",pi$az," bz=",pi$bz)
  cat("\n ----------------------------------------------------------------\n\n\n")

  for(i in 2:N)
  {

    # Draw the correlation parameters (Metropolis-Hastings steps)
    draw.rhoz[i,]=draw.rhoz[i-1,]
    for(j in 1:k)
    {
      rhoz.new=draw.rhoz[i,]
      rhoz.new[j]=runif(1,draw.rhoz[i-1,j]-rr[j],draw.rhoz[i-1,j]+rr[j])
      a=min(0,logp(y,draw.lambdaz[i-1],rhoz.new,l.dez,pi)
             -logp(y,draw.lambdaz[i-1],draw.rhoz[i,],l.dez,pi))
      if(log(runif(1))<a)
      {
        draw.rhoz[i,j]=rhoz.new[j]
        accept.rhoz[j]=accept.rhoz[j]+1
      }
    }

    # Draw the marginal precision of the data (Gibbs step)
    Rz=rhogeodacecormat(l.dez,draw.rhoz[i,])$R
    Rz=Rz+In*1e-14  #cheat
    cholRz=chol(Rz)
    Rzinv=chol2inv(cholRz)
    draw.lambdaz[i]=rgamma(1,pi$az+n/2,pi$bz+0.5*t(y)%*%Rzinv%*%y)


    cat(i/N*100," percent complete\r")

    # Adaptive MCMC stuff:
    # adapt the proposal step every N/20 iterations, but only for the first 50% of the iterations
    if(adapt && i%%(N/20)==0 && i<(N*.5+1))
    {
      rate.rhoz=accept.rhoz/(i-lastadapt)
      cat("Adapting rates from ",rate.rhoz,"\n");
      for(j in 1:k)
        if(rate.rhoz[j]>.49 || rate.rhoz[j]<.39) rr[j]=rr[j]*rate.rhoz[j]/.44
      lastadapt=i
      accept.rhoz=rep(0,k)
    }

  }

  cat("\n Complete.")
  rate.rhoz=accept.rhoz/(i-lastadapt)
  cat("[rate.rhoz=",rate.rhoz,"]\n")
  
  return(list(y=y,lambdaz=draw.lambdaz[(N-last):N],rhoz=as.matrix(draw.rhoz[(N-last):N,])))
}


# Draw realizations from posterior predictive distribution
# l.v - distances in my format where first n entries are for the observed data and 
#       the remaning are the prediction locations
# fit - output from running regress().
# eps.yy - fudge factor for inverting the observation correlation matrix
# eps - fudge factor for inverting the conditional covariance matrix
predict<-function(l.v,fit,eps.yy=0,eps=1e-6)
{
  n=length(fit$y)  # num observations
  m=nrow(l.v[[1]][[1]])-n  # num prediction points
  N=length(fit$lambdaz)

  draw.preds=matrix(0,nrow=N,ncol=m)

  for(i in 1:N)
  {
    Rall=rhogeodacecormat(l.v,fit$rhoz[i,])$R

    # Extract the sub-matrices we need
    Ryy=Rall[1:n,1:n]
    Rgg=Rall[(n+1):(n+m),(n+1):(n+m)]
    Rgy=Rall[(n+1):(n+m),1:n]
    Ryy.inv=chol2inv(chol(Ryy+diag(n)*eps.yy))

    # Mean of conditional distribution:
    m.cond=Rgy%*%Ryy.inv%*%y

    # Covariance of conditional distribution:
    E.cond=fit$lambdaz[i]^(-1)*(Rgg-Rgy%*%Ryy.inv%*%t(Rgy))

    # Let's generate a realization!
    L=t(chol(E.cond+diag(m)*eps))
    u=rnorm(m)
    draw.preds[i,]=m.cond + L%*%u
  }

  return(list(preds=draw.preds))
}