This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

History matching waves demonstration

History matching process (for each separate output):

generate initial design
generate a target point
generate a uniform sample (for visulasation and calculating space sizes)

for each output:
- run the model at the design points
- run the model at the target point
- fit a dicekriging km() to each output
- calculate the implausibility of each point in the uniform sample
- calculate the implausibility of each point in the augmented lhs
- return the model fit and some fit statistics, both sets of implausibility calculations, and the new design

library(lhs)
library(DiceKriging)
source('https://raw.githubusercontent.com/dougmcneall/packages-git/5f79ffe749f25c6fc39f4f7925e1538d36b7caf1/emtools.R')
source('https://raw.githubusercontent.com/dougmcneall/packages-git/5f79ffe749f25c6fc39f4f7925e1538d36b7caf1/imptools.R')
source('https://raw.githubusercontent.com/dougmcneall/packages-git/5f79ffe749f25c6fc39f4f7925e1538d36b7caf1/vistools.R')

source('hmwave_demo_0.fun.R')

Test functions from the Virtual Library of Simulation Experiments

These are combined together to make multivariate output. The key is that all the functions use all four inputs, and each one produces a single deterministic output.

Run the model a large number of times and visualise the output here.

initial design

Set up an initial design, sampling sparsely from the input domain (as if the model was expensive to run).
Experiment with n (initial design points), n.app (number of points to augment the design with in each wave), and uncertainty budget.

n = 10 # number of initial design points
k = 4  # number of model inputs
d = 3  # number of model outputs

# simplest method is to choose a subset of the discovered NROY
# points to append to the design

n.app = 10 # number of points to append

X = maximinLHS(n, k = k,dup = 2)
colnames(X) <- c('x1', 'x2', 'x3', 'x4')

X.target = matrix(runif(4), nrow = 1)

# run the model outside of the function
#Y = run.model(X)
#Y.target = run.model(X.target)


Y.raw = run.model(X)
# Mean and standard deviation of the raw output

print(c('Output mean = ', c(round(apply(Y.raw ,2, mean),2))))
##                                y1               y2               y3 
## "Output mean = "          "11.38"           "7.43"           "2.26"
print(c('Output SD = ', round(apply(Y.raw,2,sd),2)))
##                            y1             y2             y3 
## "Output SD = "          "4.8"         "4.31"         "1.15"
Y.target.raw = run.model(X.target)

print(c('Output target = ', round(Y.target.raw),2))
## [1] "Output target = " "6"                "4"               
## [4] "1"                "2"
Y.all.norm = normalize(rbind(Y.raw, Y.target.raw))
Y = Y.all.norm[1:n, ]
Y.target = matrix(Y.all.norm[n+1, ], nrow = 1)

print(c('Normalised output mean = ', round(apply(Y.all.norm ,2, mean),2)))
##                                                      y1 
## "Normalised output mean = "                      "0.44" 
##                          y2                          y3 
##                       "0.4"                      "0.31"
print(c('Normalised output SD = ', round(apply(Y.all.norm,2,sd),2)))
##                                                  y1 
## "Normalised output SD = "                    "0.29" 
##                        y2                        y3 
##                    "0.37"                    "0.27"
print(c('Output target = ', round(Y.target,2)))
## [1] "Output target = " "0.16"             "0.11"            
## [4] "0.02"
# The standard deviation of the model output is approximately [5,5,1] and the
# mean is around [12,8,2]

# With a small number of design points, the target is often at one of the margins of the normalised space

Model output

The model input is 4 dimensional and output is 3 dimensional, which is hard to visualise. This is what happens when you can run the model as many times as you like, and vary the inputs one-at-a-time.

n.sens = 21
X.oaat = oaat.design(X, n.sens, hold = X.target)
Y.oaat = run.model(X.oaat)
linecols = c('black', 'red', 'blue')


ylim = c(0,1)
xlim = c(0,1)

par(mfrow = c(2,2), mar = c(4,4,2,1), oma = c(0.5,0.5, 3, 0.5))
ndat = ncol(Y.oaat)
d = ncol(X.oaat)
for(i in 1:d){
  ix = seq(from = ((i*n.sens) - (n.sens-1)), to =  (i*n.sens), by = 1)
  plot(X.oaat[ix,i], Y.oaat[ix,2],
       type = 'n',
       ylab= 'Y', axes = FALSE,
       main = '',
       xlab = paste0('X',i),
       xlim = c(0,1), ylim = c(0,20))
  
  for(j in 1:ndat){

    y.oaat = Y.oaat[ix,j]
    lines(X.oaat[ix,i],y.oaat, col = linecols[j], lwd = 2) 
  }
  axis(1, col = 'grey', col.axis = 'grey', las = 1, title = 'X')
  axis(2, col = 'grey', col.axis = 'grey', las = 1)
 # mtext(3, text = colnames(lhs)[i], line = 0.2, cex = 0.7)  
}
## Warning in axis(1, col = "grey", col.axis = "grey", las = 1, title = "X"):
## "title" is not a graphical parameter

## Warning in axis(1, col = "grey", col.axis = "grey", las = 1, title = "X"):
## "title" is not a graphical parameter

## Warning in axis(1, col = "grey", col.axis = "grey", las = 1, title = "X"):
## "title" is not a graphical parameter

## Warning in axis(1, col = "grey", col.axis = "grey", las = 1, title = "X"):
## "title" is not a graphical parameter
reset()
legend('top',
       legend = colnames(Y), 
       col = linecols,
       cex = 0.8,
       lwd = 2,
       horiz = TRUE)

#dev.off()
# Assume the entire uncertainty budget is in the observational uncertainty initially
obs.sd.list = list(0.013,0.013,0.013)
disc.list = list(0,0,0) 
disc.sd.list = list(0, 0, 0) 

thres = 3   # implausibility threshold

mins.aug = rep(0,4)
maxes.aug = rep(1,4)

# Improvements to the above function from Andrianakis et al. (2015)
# 1) Reduce the range the emulator is fit over, as the waves continue.
# 2) Sample candidate design points from NEAR the existing NROY design points.

Do the History matching in 6 waves

wave1 = add.nroy.design.points(X = X, 
                               Y = Y, 
                               Y.target = Y.target,
                               n.aug = 100000, 
                               mins.aug = mins.aug,
                               maxes.aug = maxes.aug,
                               thres = 3,
                               disc.list=disc.list,
                               disc.sd.list = disc.sd.list,
                               obs.sd.list = obs.sd.list)

# Exclude any design points outside of the ranges returned as NROY


keep1.ix = which(apply(X, FUN = WithinRange,1,
                      maxes = wave1$X.nroy.max,
                      mins = wave1$X.nroy.min))

X2 = rbind(X[keep1.ix, ] , ChooseMaximinNroy(n.app = n.app, waveobj=wave1, nreps = 10000))
Y2.raw = run.model(X2)
Y2 = normalize(Y2.raw, wrt = Y.raw)

wave2 = add.nroy.design.points(X = X2,
                               Y = Y2, 
                               Y.target = Y.target,
                               n.aug = 50000,
                               mins.aug = wave1$X.nroy.min,
                               maxes.aug = wave1$X.nroy.max,
                               thres = 3,
                               disc.list = disc.list,
                               disc.sd.list = disc.sd.list,
                               obs.sd.list = obs.sd.list)

keep2.ix = which(apply(X2, FUN = WithinRange,1,
                       maxes = wave2$X.nroy.max,
                       mins = wave2$X.nroy.min))

X3 = rbind(X2[keep2.ix, ], ChooseMaximinNroy(n.app = n.app, waveobj=wave2, nreps = 10000))

Y3.raw = run.model(X3)
Y3 = normalize(Y3.raw, wrt = Y.raw)

wave3 = add.nroy.design.points(X = X3, 
                               Y = Y3,
                               Y.target = Y.target,
                               n.aug = 50000,
                               mins.aug = wave2$X.nroy.min,
                               maxes.aug = wave2$X.nroy.max,
                               thres = 3,
                               disc.list = disc.list,
                               disc.sd.list = disc.sd.list,
                               obs.sd.list = obs.sd.list)

keep3.ix = which(apply(X3, FUN = WithinRange,1,
                       maxes = wave3$X.nroy.max,
                       mins = wave3$X.nroy.min))

X4 = rbind(X3[keep3.ix, ],
           ChooseMaximinNroy(n.app = n.app, waveobj=wave3, nreps = 10000))

Y4.raw = run.model(X4)
Y4 = normalize(Y4.raw, wrt = Y.raw)

wave4 = add.nroy.design.points(X = X4, 
                               Y = Y4,
                               Y.target = Y.target,
                               n.aug = 30000,
                               mins.aug = wave3$X.nroy.min,
                               maxes.aug = wave3$X.nroy.max,
                               thres = 3,
                               disc.list = disc.list,
                               disc.sd.list = disc.sd.list,
                               obs.sd.list = obs.sd.list)

keep4.ix = which(apply(X4, FUN = WithinRange,1,
                       maxes = wave4$X.nroy.max,
                       mins = wave4$X.nroy.min))

X5 = rbind(X4[keep4.ix, ],
           ChooseMaximinNroy(n.app = n.app, waveobj=wave4, nreps = 10000))

Y5.raw = run.model(X5)
Y5 = normalize(Y5.raw, wrt = Y.raw)


wave5 = add.nroy.design.points(X = X5,
                               Y = Y5,
                               Y.target = Y.target,
                               n.aug = 30000, 
                               mins.aug = wave4$X.nroy.min,
                               maxes.aug = wave4$X.nroy.max,
                               thres = 3,
                               disc.list = disc.list,
                               disc.sd.list = disc.sd.list,
                               obs.sd.list = obs.sd.list)

keep5.ix = which(apply(X5, FUN = WithinRange,1,
                       maxes = wave5$X.nroy.max,
                       mins = wave5$X.nroy.min))

X6 = rbind(X5[keep5.ix, ], ChooseMaximinNroy(n.app = n.app, waveobj=wave5, nreps = 10000))

Y6.raw = run.model(X6)
Y6 = normalize(Y6.raw, wrt = Y.raw)


wave6 = add.nroy.design.points(X = X6,
                               Y = Y6,
                               Y.target = Y.target, 
                               n.aug = 30000, 
                               mins.aug = wave5$X.nroy.min,
                               maxes.aug = wave5$X.nroy.max,
                               thres = 3,
                               disc.list = disc.list,
                               disc.sd.list = disc.sd.list,
                               obs.sd.list = obs.sd.list)

Visualize the results

all.nroy = rbind(wave1$X.nroy, wave2$X.nroy, wave3$X.nroy, wave4$X.nroy, wave5$X.nroy, wave6$X.nroy, X.target)

transp = 100

cols = c(rep(makeTransparent('black',transp),nrow(wave1$X.nroy)), 
         rep(makeTransparent('red',transp),nrow(wave2$X.nroy)),
         rep(makeTransparent('purple',transp),nrow(wave3$X.nroy)),
         rep(makeTransparent('green', transp),nrow(wave4$X.nroy)),
         rep(makeTransparent('grey', transp),nrow(wave5$X.nroy)),
         rep(makeTransparent('blue',transp),nrow(wave6$X.nroy)),
          rep('gold', 1)
         
)
cex = c(rep(1, nrow(all.nroy)-1), 2)
  
pairs(all.nroy, xlim = c(0,1), ylim = c(0,1), col = cols, cex = cex, pch = 21)

How much does the volume of input space reduce with each wave?
Very often we find no real gain (or even a degradation) after the first 3 waves.

This last bit isn’t right - I think the function draws from a smaller space each time so It’s not a fair comparison. Need a function that estimates the NROY proportion from the initial space - probably using importance sampling. Check other HM papers to see how this is done.

waves_list = list(wave1, wave2, wave3, wave4 ,wave5, wave6)

GetNroyProp = function(waveobj){
  nrow(waveobj$X.nroy) / nrow(waveobj$X.aug)
}

wave.prop = lapply(waves_list, FUN = GetNroyProp)

par(las = 1)
plot(1:length(waves_list), c(wave.prop, recursive = TRUE) * 100, type = 'b',
     xlab = 'Wave', ylab = 'NROY proportion (%)')