History matching wave 0 and generation of new ensemble members for wave 1

To use History Matching, we need to specify targets for various model outputs. We treat these targets as “observations” with an uncertainty where a model run marked as “implausible” (beyond 3sd) matches the hard boundaries previously identified by A. Wiltshire as being desirable/not implausible.

Choose the centre of the (implied) uniform distribution as a target for the history matching.
cs_gb.target = (3000 - 750) / 2 = 1125
cv.target = (800 - 300) / 2 = 250
npp_n_gb.target = (80 - 35) / 2 = 22.5
nbp.target = 0

# Load packages, data, do basic constraints on the ensemble
source('JULES-ES-1p0-common.R')
source('~/myRpackages/julesR/vignettes/default_jules_parameter_perturbations.R')
ynames_const <- c('nbp_lnd_sum', 'npp_nlim_lnd_sum', 'cSoil_lnd_sum', 'cVeg_lnd_sum')
yunits_const <- c('GtC/year', 'GtC/year', 'GtC', 'GtC')
Y_const_level1a <- Y_level1a[, ynames_const]

scalevec <- c(1e12/ysec, 1e12/ysec, 1e12, 1e12)
Y_const_level1a_scaled <- sweep(Y_const_level1a, 2, STATS = scalevec, FUN = '/' )

# This is a "normalisation vector", for making the output numbers more manageable.
#cs_gb       cv    gpp_gb        nbp npp_n_gb    runoff
norm_vec = c(1e12, 1e12, 1e12/ysec , 1e12, 1e12, 1e9)

# nbp  npp  csoil  cveg
Y_lower <- c(-10, 35, 750, 300)
Y_upper <- c(10, 80, 3000, 800)

# I'm going to set it so that + 4sd aligns approximately with the original limits
# given by Andy Wiltshire. This gives room for uncertainty from the emulator
Y_target = Y_upper - (abs(Y_upper - (Y_lower)) / 2 )# abs() to fix the problem with negative numbers

# standard deviation is derived from the limits and the central target
# (this distance is assumed to be 4 standard deviations.
Y_sd = (Y_upper - Y_target) / 4
names(Y_sd) = colnames(Y_const_level1a_scaled)


p = ncol(Y_const_level1a_scaled)

obs_sd_list = as.list(rep(0.01,p))
disc_list =  as.list(rep(0,p)) 
disc_sd_list =  as.list(Y_sd)
thres = 3

mins_aug = apply(X_level1a, 2, FUN = min)
maxes_aug =apply(X_level1a, 2, FUN = max)

# convert Y_target for ingestion into function
Y_target = matrix(Y_target, nrow = 1)

Augment the design.

The function addNroyDesignPoints builds an emulator for each model output in Y. It compares the output of each emulator at a number of candidate desin points, and chooses a space-filling set of them that that are Not Ruled Out Yet (statistically close to the observation at Y_target).

# use invisible() to hide the output from km (emulator fitting)
invisible({capture.output({

# Final output needs to be expressed in terms of original LHS, then put back out to conf files.

# This function adds n.aug potential design points, and finds their implausibility
# score in X.nroy

wave1 = addNroyDesignPoints(X = X_level1a, 
                            Y = Y_const_level1a_scaled, 
                            Y_target = Y_target,
                            n_aug = 50000, 
                            mins_aug = mins_aug,
                            maxes_aug = maxes_aug,
                            thres = 3,
                            disc_list=disc_list,
                            disc_sd_list = disc_sd_list,
                            obs_sd_list = obs_sd_list,
                            n_app = 500,
                            nreps = 500)

})})

Write the augmented design to JULES configuration files

The function write_jules_design here simply takes the points calculated by addNroyDesignPoints and writes them to configuration files.

# Easiest way to generate a design of the right size is to have a "fac" which takes
# the names from the parameter list, and then multiplies everything by 0.5 or 2

tf <- 'l_vg_soil'
# we don't want anything that is TRUE/FALSE to be in fac
fac_init <- names(paramlist)
not_tf_ix <- which(names(paramlist)!=tf)
paramlist_trunc <-paramlist[not_tf_ix]

fac <- names(paramlist_trunc)

maxfac <-lapply(paramlist_trunc,function(x) x$max[which.max(x$max)] / x$standard[which.max(x$max)])
minfac <- lapply(paramlist_trunc,function(x) x$min[which.max(x$max)] / x$standard[which.max(x$max)])

# create a directory for the configuration files
confdir <- 'conf_files_augment_JULES-ES-1p0'

dir.create(confdir)

X_mm <- wave1$X_mm

# This is the function that writes the configuration files.
write_jules_design(X_mm = X_mm, paramlist=paramlist, n = nrow(X_mm),
                    fac = fac, minfac = minfac, maxfac = maxfac,
                    tf = tf,
                    fnprefix = paste0(confdir,'/param-perturb-P'),
                    lhsfn = paste0(confdir,'/lhs_example.txt'),
                    stanfn = paste0(confdir,'/stanparms_example.txt'),
                    allstanfn = paste0(confdir,'/allstanparms_example.txt'),
                    rn = 12,
                    startnum = 500)

Check the design

Check that the augmented design produces what we expect. New ensemble members should be somewhat constrained within the boundaries of the original design, if the comparison to data offers any constraint.

X_mm <- wave1$X_mm

pairs(rbind(X, X_mm), xlim = c(0,1), ylim = c(0,1), gap = 0, lower.panel = NULL, 
      col = c(rep(makeTransparent('grey', 150), nrow(X)), rep(makeTransparent('red', 80), nrow(X_mm))),
      pch = c(rep(21, nrow(X)), rep(20, nrow(X_mm)))
      )

par(xpd = NA)

legend('bottom',
       legend = c('Original design', 'New points'),
       col = c('grey', 'red'),
       inset = 0.15,
       cex = 1.5,
       pch = c(21,20)
)

Check the emulators that produce the new design

Do a leave-one-out cross validation of points inside the hard boundaries of the constraints, using the wave1 emulator fits.

aw_boundary_ix = which(Y_const_level1a_scaled[,'nbp_lnd_sum'] > -10 &
                         Y_const_level1a_scaled[,'npp_nlim_lnd_sum'] > 35 &  Y_const_level1a_scaled[,'npp_nlim_lnd_sum'] < 80 &
                         Y_const_level1a_scaled[,'cSoil_lnd_sum'] > 750 & Y_const_level1a_scaled[,'cSoil_lnd_sum'] < 3000 &
                         Y_const_level1a_scaled[,'cVeg_lnd_sum'] > 300 & Y_const_level1a_scaled[,'cVeg_lnd_sum'] < 800
)



X_aw_boundary = X_level1a[aw_boundary_ix, ]
Y_aw_boundary = Y_const_level1a_scaled[aw_boundary_ix, ]
loo_mean_Y_level1a <- matrix(nrow = nrow(X_level1a), ncol = ncol(Y_const_level1a_scaled))
loo_sd_Y_level1a <- matrix(nrow = nrow(X_level1a), ncol = ncol(Y_const_level1a_scaled))
  
  for(i in 1:ncol(Y_const_level1a_scaled)){

loo <- leaveOneOut.km(wave1$fit_list[[i]],type = 'UK', trend.reestim = TRUE )
  loo_mean_Y_level1a[,i] <- loo$mean
  loo_sd_Y_level1a[,i] <- loo$sd
}

We see in the leave-one-out analysis that the emulator is consistently under-predicting the vegetation carbon (though the uncertainty estimate often covers the actual value).This suggests (1) that there isn’t really a huge problem with a model discrepancy (or at least that isn’t the only problem), and (2) the history matching is working as it should, and taking into account a not-great emulator.

par(mfrow = c(2,2), las = 1)

for(i in 1:ncol(loo_mean_Y_level1a)){
  
  rn <- range(c(loo_mean_Y_level1a[aw_boundary_ix,i] - (2*loo_sd_Y_level1a[aw_boundary_ix,i]) , loo_mean_Y_level1a[aw_boundary_ix,i] + (2*loo_sd_Y_level1a[aw_boundary_ix,i]) ))
  
  
  
  plot(Y_const_level1a_scaled[aw_boundary_ix, i], loo_mean_Y_level1a[aw_boundary_ix,i], ylim = rn, main = colnames(Y_const_level1a_scaled)[i], xlab = 'actual', ylab = 'predicted', bty = 'l')
  
  segments(x0 = Y_const_level1a_scaled[aw_boundary_ix, i], y0 = loo_mean_Y_level1a[aw_boundary_ix,i] - (2*loo_sd_Y_level1a[aw_boundary_ix,i])  , x1 = Y_const_level1a_scaled[aw_boundary_ix, i] , y1 = loo_mean_Y_level1a[aw_boundary_ix,i] + (2*loo_sd_Y_level1a[aw_boundary_ix,i]) , col = makeTransparent('black', 70))
  abline(0,1)
  
}

Compare the straight km with a two-step emulator

Is a two-step emulator any better at emulating those crucial points which fall within aw’s hard boundaries? First, create a list of emulator fits.

invisible({capture.output({

fitlist_Y_const_level1a_scaled <- vector(mode = 'list', length = ncol(Y_const_level1a_scaled))

for(i in 1:ncol(Y_const_level1a_scaled)){
  
  y <- Y_const_level1a_scaled[,i]
  fit <- twoStep_glmnet(X = X_level1a, y)
  fitlist_Y_const_level1a_scaled[[i]] <- fit

}

})})
loo_mean_glmnet_Y_level1a <- matrix(nrow = nrow(X_level1a), ncol = ncol(Y_const_level1a_scaled))
loo_sd_glmnet_Y_level1a <- matrix(nrow = nrow(X_level1a), ncol = ncol(Y_const_level1a_scaled))
  
  for(i in 1:ncol(Y_const_level1a_scaled)){

loo <- leaveOneOut.km(fitlist_Y_const_level1a_scaled[[i]]$emulator,type = 'UK', trend.reestim = TRUE )
  loo_mean_glmnet_Y_level1a[,i] <- loo$mean
  loo_sd_glmnet_Y_level1a[,i] <- loo$sd
}

It doesn’t appear that the two-step emulator (here plotted in red) is doing any better than the regular emulator.

par(mfrow = c(2,2))

for(i in 1:ncol(loo_mean_Y_level1a)){
  
  rn <- range(c(loo_mean_Y_level1a[aw_boundary_ix,i] - (2*loo_sd_Y_level1a[aw_boundary_ix,i]) , loo_mean_Y_level1a[aw_boundary_ix,i] + (2*loo_sd_Y_level1a[aw_boundary_ix,i]) ))
  
  
  
  plot(Y_const_level1a_scaled[aw_boundary_ix, i], loo_mean_Y_level1a[aw_boundary_ix,i], ylim = rn, main = colnames(Y_const_level1a_scaled)[i], xlab = 'actual', ylab = 'predicted')
  
  points(Y_const_level1a_scaled[aw_boundary_ix, i], loo_mean_glmnet_Y_level1a[aw_boundary_ix,i], col = 'red')
  
  
  segments(x0 = Y_const_level1a_scaled[aw_boundary_ix, i], y0 = loo_mean_Y_level1a[aw_boundary_ix,i] - (2*loo_sd_Y_level1a[aw_boundary_ix,i])  , x1 = Y_const_level1a_scaled[aw_boundary_ix, i] , y1 = loo_mean_Y_level1a[aw_boundary_ix,i] + (2*loo_sd_Y_level1a[aw_boundary_ix,i]) , col = makeTransparent('black', 70))
  abline(0,1)
  
}

Two other things I can think of to check: 1) how about using “multistart” to choose different starting conditions for optimising the emulators and 2) Using a flat prior for the mean function rather than a linear prior.

fitlist_flatprior_Y_const_level1a_scaled <- vector(mode = 'list', length = ncol(Y_const_level1a_scaled))

for(i in 1:ncol(Y_const_level1a_scaled)){
  
  y <- Y_const_level1a_scaled[,i]
  fit <- km(formula =~1, design = X_level1a, response = y)
  fitlist_flatprior_Y_const_level1a_scaled[[i]] <- fit

}
## 
## optimisation start
## ------------------
## * estimation method   : MLE 
## * optimisation method : BFGS 
## * analytical gradient : used
## * trend model : ~1
## * covariance model : 
##   - type :  matern5_2 
##   - nugget : NO
##   - parameters lower bounds :  1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 
##   - parameters upper bounds :  2 2 1.999244 1.698141 1.994798 1.990622 1.998154 1.778477 2 2 2 1.998084 2 2 2 1.99598 2 1.989889 1.997502 1.989483 1.98997 1.987916 1.983347 1.988424 2 1.990947 1.995643 1.994232 1.992247 2 2 2 
##   - best initial criterion value(s) :  -317.9823 
## 
## N = 32, M = 5 machine precision = 2.22045e-16
## At X0, 0 variables are exactly at the bounds
## At iterate     0  f=       317.98  |proj g|=       1.8645
## At iterate     1  f =       209.89  |proj g|=        1.6324
## At iterate     2  f =       181.43  |proj g|=        1.6511
## At iterate     3  f =       148.63  |proj g|=        1.6202
## At iterate     4  f =       139.03  |proj g|=        1.5461
## At iterate     5  f =       135.51  |proj g|=        1.3619
## At iterate     6  f =       134.86  |proj g|=        1.2467
## At iterate     7  f =       134.58  |proj g|=       0.80536
## At iterate     8  f =       134.54  |proj g|=       0.74081
## At iterate     9  f =       134.45  |proj g|=       0.46106
## At iterate    10  f =       134.42  |proj g|=       0.17524
## At iterate    11  f =       134.42  |proj g|=       0.21984
## At iterate    12  f =       134.42  |proj g|=      0.029537
## At iterate    13  f =       134.42  |proj g|=      0.017501
## At iterate    14  f =       134.42  |proj g|=      0.016084
## At iterate    15  f =       134.42  |proj g|=     0.0023756
## At iterate    16  f =       134.42  |proj g|=     0.0020765
## At iterate    17  f =       134.42  |proj g|=     0.0013181
## At iterate    18  f =       134.42  |proj g|=    0.00065574
## 
## iterations 18
## function evaluations 22
## segments explored during Cauchy searches 53
## BFGS updates skipped 0
## active bounds at final generalized Cauchy point 19
## norm of the final projected gradient 0.00065574
## final function value 134.421
## 
## F = 134.421
## final  value 134.420708 
## converged
## 
## optimisation start
## ------------------
## * estimation method   : MLE 
## * optimisation method : BFGS 
## * analytical gradient : used
## * trend model : ~1
## * covariance model : 
##   - type :  matern5_2 
##   - nugget : NO
##   - parameters lower bounds :  1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 
##   - parameters upper bounds :  2 2 1.999244 1.698141 1.994798 1.990622 1.998154 1.778477 2 2 2 1.998084 2 2 2 1.99598 2 1.989889 1.997502 1.989483 1.98997 1.987916 1.983347 1.988424 2 1.990947 1.995643 1.994232 1.992247 2 2 2 
##   - best initial criterion value(s) :  -1610.825 
## 
## N = 32, M = 5 machine precision = 2.22045e-16
## At X0, 0 variables are exactly at the bounds
## At iterate     0  f=       1610.8  |proj g|=        1.793
## At iterate     1  f =         1492  |proj g|=        1.4076
## At iterate     2  f =       1452.5  |proj g|=        1.4328
## At iterate     3  f =       1430.2  |proj g|=        1.2479
## At iterate     4  f =       1419.2  |proj g|=       0.94457
## At iterate     5  f =       1416.3  |proj g|=        1.4023
## At iterate     6  f =       1415.3  |proj g|=       0.63661
## At iterate     7  f =       1415.1  |proj g|=       0.33873
## At iterate     8  f =       1414.6  |proj g|=        0.4778
## At iterate     9  f =       1414.5  |proj g|=       0.32178
## At iterate    10  f =       1414.5  |proj g|=       0.37181
## At iterate    11  f =       1414.5  |proj g|=      0.061596
## At iterate    12  f =       1414.5  |proj g|=      0.030088
## At iterate    13  f =       1414.5  |proj g|=      0.036096
## At iterate    14  f =       1414.5  |proj g|=      0.073783
## At iterate    15  f =       1414.5  |proj g|=       0.01632
## At iterate    16  f =       1414.5  |proj g|=     0.0052506
## At iterate    17  f =       1414.5  |proj g|=     0.0044646
## 
## iterations 17
## function evaluations 19
## segments explored during Cauchy searches 65
## BFGS updates skipped 0
## active bounds at final generalized Cauchy point 25
## norm of the final projected gradient 0.00446464
## final function value 1414.5
## 
## F = 1414.5
## final  value 1414.500020 
## converged
## 
## optimisation start
## ------------------
## * estimation method   : MLE 
## * optimisation method : BFGS 
## * analytical gradient : used
## * trend model : ~1
## * covariance model : 
##   - type :  matern5_2 
##   - nugget : NO
##   - parameters lower bounds :  1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 
##   - parameters upper bounds :  2 2 1.999244 1.698141 1.994798 1.990622 1.998154 1.778477 2 2 2 1.998084 2 2 2 1.99598 2 1.989889 1.997502 1.989483 1.98997 1.987916 1.983347 1.988424 2 1.990947 1.995643 1.994232 1.992247 2 2 2 
##   - best initial criterion value(s) :  -2798.765 
## 
## N = 32, M = 5 machine precision = 2.22045e-16
## At X0, 0 variables are exactly at the bounds
## At iterate     0  f=       2798.8  |proj g|=       1.7327
## At iterate     1  f =       2611.5  |proj g|=        1.7401
## At iterate     2  f =         2586  |proj g|=        1.7334
## At iterate     3  f =       2583.1  |proj g|=        1.8304
## At iterate     4  f =       2576.3  |proj g|=        1.9363
## At iterate     5  f =       2573.9  |proj g|=         1.912
## At iterate     6  f =       2571.7  |proj g|=        1.8296
## At iterate     7  f =       2571.3  |proj g|=        1.6789
## At iterate     8  f =       2571.1  |proj g|=        1.4155
## At iterate     9  f =         2571  |proj g|=       0.83555
## At iterate    10  f =       2570.9  |proj g|=       0.75226
## At iterate    11  f =       2570.9  |proj g|=       0.81884
## At iterate    12  f =       2570.9  |proj g|=       0.13804
## At iterate    13  f =       2570.9  |proj g|=      0.064844
## At iterate    14  f =       2570.9  |proj g|=      0.020635
## At iterate    15  f =       2570.9  |proj g|=      0.020669
## At iterate    16  f =       2570.9  |proj g|=     0.0020916
## 
## iterations 16
## function evaluations 17
## segments explored during Cauchy searches 42
## BFGS updates skipped 0
## active bounds at final generalized Cauchy point 27
## norm of the final projected gradient 0.00209164
## final function value 2570.93
## 
## F = 2570.93
## final  value 2570.925355 
## converged
## 
## optimisation start
## ------------------
## * estimation method   : MLE 
## * optimisation method : BFGS 
## * analytical gradient : used
## * trend model : ~1
## * covariance model : 
##   - type :  matern5_2 
##   - nugget : NO
##   - parameters lower bounds :  1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 
##   - parameters upper bounds :  2 2 1.999244 1.698141 1.994798 1.990622 1.998154 1.778477 2 2 2 1.998084 2 2 2 1.99598 2 1.989889 1.997502 1.989483 1.98997 1.987916 1.983347 1.988424 2 1.990947 1.995643 1.994232 1.992247 2 2 2 
##   - best initial criterion value(s) :  -2445.585 
## 
## N = 32, M = 5 machine precision = 2.22045e-16
## At X0, 0 variables are exactly at the bounds
## At iterate     0  f=       2445.6  |proj g|=       1.9267
## At iterate     1  f =       2377.4  |proj g|=        1.7604
## At iterate     2  f =       2352.7  |proj g|=        1.7505
## At iterate     3  f =       2338.8  |proj g|=        1.6328
## At iterate     4  f =       2327.5  |proj g|=         1.657
## At iterate     5  f =       2324.3  |proj g|=         1.633
## At iterate     6  f =       2321.4  |proj g|=        1.5865
## At iterate     7  f =       2318.4  |proj g|=        1.5374
## At iterate     8  f =       2317.3  |proj g|=        1.3338
## At iterate     9  f =       2316.9  |proj g|=        1.3016
## At iterate    10  f =       2316.3  |proj g|=        1.0996
## At iterate    11  f =       2315.7  |proj g|=         1.187
## At iterate    12  f =       2315.6  |proj g|=       0.57114
## At iterate    13  f =       2315.5  |proj g|=        0.5331
## At iterate    14  f =       2315.5  |proj g|=        1.1872
## At iterate    15  f =       2315.4  |proj g|=       0.26652
## At iterate    16  f =       2315.4  |proj g|=       0.52958
## At iterate    17  f =       2315.4  |proj g|=       0.53029
## At iterate    18  f =       2315.4  |proj g|=      0.078599
## At iterate    19  f =       2315.4  |proj g|=      0.059164
## At iterate    20  f =       2315.4  |proj g|=      0.052012
## At iterate    21  f =       2315.4  |proj g|=      0.078643
## At iterate    22  f =       2315.4  |proj g|=      0.056094
## At iterate    23  f =       2315.4  |proj g|=      0.019279
## At iterate    24  f =       2315.4  |proj g|=     0.0078961
## At iterate    25  f =       2315.4  |proj g|=     0.0085813
## At iterate    26  f =       2315.4  |proj g|=      0.012819
## 
## iterations 26
## function evaluations 30
## segments explored during Cauchy searches 76
## BFGS updates skipped 0
## active bounds at final generalized Cauchy point 21
## norm of the final projected gradient 0.0128194
## final function value 2315.38
## 
## F = 2315.38
## final  value 2315.375073 
## converged
loo_mean_flatprior_Y_level1a <- matrix(nrow = nrow(X_level1a), ncol = ncol(Y_const_level1a_scaled))
loo_sd_flatprior_Y_level1a <- matrix(nrow = nrow(X_level1a), ncol = ncol(Y_const_level1a_scaled))
  
  for(i in 1:ncol(Y_const_level1a_scaled)){

loo <- leaveOneOut.km(fitlist_flatprior_Y_const_level1a_scaled[[i]],type = 'UK', trend.reestim = TRUE )
  loo_mean_flatprior_Y_level1a[,i] <- loo$mean
  loo_sd_flatprior_Y_level1a[,i] <- loo$sd
}
par(mfrow = c(2,2))

for(i in 1:ncol(loo_mean_Y_level1a)){
  
  rn <- range(c(loo_mean_Y_level1a[aw_boundary_ix,i] - (2*loo_sd_Y_level1a[aw_boundary_ix,i]) , loo_mean_Y_level1a[aw_boundary_ix,i] + (2*loo_sd_Y_level1a[aw_boundary_ix,i]) ))
  
  
  
  plot(Y_const_level1a_scaled[aw_boundary_ix, i], loo_mean_Y_level1a[aw_boundary_ix,i], ylim = rn, main = colnames(Y_const_level1a_scaled)[i], xlab = 'actual', ylab = 'predicted')
  
  points(Y_const_level1a_scaled[aw_boundary_ix, i], loo_mean_flatprior_Y_level1a[aw_boundary_ix,i], col = 'red')
  
  
  segments(x0 = Y_const_level1a_scaled[aw_boundary_ix, i], y0 = loo_mean_Y_level1a[aw_boundary_ix,i] - (2*loo_sd_Y_level1a[aw_boundary_ix,i])  , x1 = Y_const_level1a_scaled[aw_boundary_ix, i] , y1 = loo_mean_flatprior_Y_level1a[aw_boundary_ix,i] + (2*loo_sd_flatprior_Y_level1a[aw_boundary_ix,i]) , col = makeTransparent('black', 70))
  abline(0,1)
  
}

Does the emulator have a bias? Leave-one-out analysis

Y_loo_list <- vector(mode ='list', length = length(wave1$fit_list))

for(i in 1:length(wave1$fit_list)){

y_loo <- leaveOneOut.km(wave1$fit_list[[i]], type = 'UK', trend.reestim = TRUE)

Y_loo_list[[i]] <- y_loo

}


for(i in 1:length(wave1$fit_list)){
  

  
}

Visualise the predicted outputs at the NROY points of the old design, and the new suggested design.

Y_mm_list <- vector(mode ='list', length = length(wave1$fit_list))

for(i in 1:length(wave1$fit_list)){

y_mm <- predict(object=wave1$fit_list[[i]], newdata = wave1$X_mm, type = 'UK')

Y_mm_list[[i]] <- y_mm

}

Visualising the emulated outputs at the proposed new design points.

Interestingly, these aren’t perfectly within the original hard boundaries set by Andy, even though I’ve set those boundaries to be the +- 4 standard deviation threholds in the History Match. I suggest this is because there is model discrepancy, and that there is considerable wriggle room induced from emulator uncertainty.

In particular, it appears that vegetation carbon is difficult to keep high, and that many NROY proposed members have a fairly low vegetation carbon. This might need a discrepancy term, or adjusting in some other way. It certainly needs exploring, and a OAAT plot might give clues as to the parameters to choose.

# Histogram of level 1 constraints
hcol = 'darkgrey'
lcol = 'black'
par(mfrow = c(2,2), fg = 'darkgrey', las = 1)

discsd <- c(disc_sd_list, recursive = TRUE)

hist(Y_const_level1a_scaled[,'nbp_lnd_sum'], col = hcol, main = 'NBP', xlab = 'GtC/year')
hist(Y_mm_list[[1]]$mean, add = TRUE, col = 'black')
polygon(x = c(-10, 100, 100, -10), y = c(0, 0, 1000, 1000),
        col = makeTransparent('tomato2'),
        border = makeTransparent('tomato2'))
rug(Y_target[1], col = 'black')
rug(c(Y_target[1] + 3*discsd[1],Y_target[1] - 3*discsd[1]) , col = 'red')
## Warning in rug(c(Y_target[1] + 3 * discsd[1], Y_target[1] - 3 * discsd[1]), :
## some values will be clipped
hist(Y_const_level1a_scaled[,'npp_nlim_lnd_sum'], col = hcol , main = 'NPP', xlab = 'GtC/year')
hist(Y_mm_list[[2]]$mean, add = TRUE, col = 'black')
polygon(x = c(35, 80, 80, 35), y = c(0, 0, 1000, 1000),
        col = makeTransparent('tomato2'),
        border = makeTransparent('tomato2'))
rug(Y_target[2], col = 'black')
rug(c(Y_target[2] + 3*discsd[2],Y_target[2] - 3*discsd[2]) , col = 'red')

hist(Y_const_level1a_scaled[,'cSoil_lnd_sum'], col = hcol, main = 'Soil Carbon', xlab = 'GtC')
hist(Y_mm_list[[3]]$mean, add = TRUE, col = 'black')
polygon(x = c(750, 3000, 3000, 750), y = c(0, 0, 1000, 1000),
        col = makeTransparent('tomato2'),
        border = makeTransparent('tomato2'))
rug(Y_target[3], col = 'black')
rug(c(Y_target[3] + 3*discsd[3],Y_target[3] - 3*discsd[3]) , col = 'red')

hist(Y_const_level1a_scaled[,'cVeg_lnd_sum'], col = hcol, main = 'Vegetation Carbon', xlab = 'GtC')
hist(Y_mm_list[[4]]$mean, add = TRUE, col = 'black')
polygon(x = c(300, 800, 800, 300), y = c(0, 0, 1000, 1000),
        col = makeTransparent('tomato2'),
       border =  makeTransparent('tomato2'))
rug(Y_target[4], col = 'black')
rug(c(Y_target[4] + 3*discsd[4],Y_target[4] - 3*discsd[4]) , col = 'red')

How good are the four emulators that we’ve built? Are there biases? (there’s no real evidence of this)

par(mfrow = c(2,2))

for(i in 1:4){
  
hist(wave1$pred_list[[i]]$mean, main = colnames(Y_const_level1a_scaled)[i])
  
}