Load libraries, functions and data.
# How about MAE over the range
prop_mae <- function(Y, Ypred){
# mean absolute error as a proportion of the range of output
absdiff <- abs(diff(range(Y)))
mae <- MAE(Y, Ypred)
propmae <- (mae / absdiff) * 100
propmae
}
withinConstraints <- function(X, Xrange){
# return the index of a matrix that conforms to constraints.
# X ........ Matrix to be tested
# Xrange ........ Range matrix. Each column has min value in row 1 and max value in row 2
kept_list <-vector(mode = 'list', length = ncol(X))
for(i in 1:ncol(X)){
kept_ix <- which(X[ ,i] > Xrange[1, i] & X[ ,i] < Xrange[2, i])
kept_list[[i]] <- kept_ix
}
out <- kept_list[[1]]
# run along the list and just keep the intersection
# with each iteration
for(i in 1:length(kept_list)){
out <- intersect(out, kept_list[[i]])
}
out
}
AW_const <- matrix(c(0, 1e12, 35, 80, 750, 3000, 300, 800), byrow = FALSE, nrow = 2)
oneStep <- function(X, y, nvec, nreps ){
# one-step-ahead prediction of the ensemble
# nvec is a vector of ensemble sizes at which we would like to test prediction
p <- nrow(X)
# error matrix with repeat samples in the rows and columns matching nvec
errmat <- matrix(nrow = nreps, ncol = length(nvec))
for (j in 1:length(nvec)){
n <- nvec[j]
print(n)
for(i in 1:nreps){
ix <- sample(1:p, size = n+1, replace = FALSE) # we can use the last sample as the target
ix_train <- head(ix, length(ix) - 1)
ix_target <- tail(ix, 1)
X_train <- X[ix_train, ]
X_target <- matrix(X[ix_target, ], nrow = 1)
colnames(X_target) <- colnames(X)
colnames(X_train) <- colnames(X)
y_train <- y[ix_train]
y_target <- y[ix_target]
# n is the number of ensemble members we are building the emulator with
# sample i time
em <- km(~., design = X_train, response = y_train)
pred_target <- predict(em, newdata = X_target, type = 'UK')
err <- pred_target$mean - y_target
errmat[i,j] <- err
}
}
errmat
}
NA
The leave-one-out section in a for loop can be replaced by using lapply (or mclapply for speed).
[move to core code]
First, build a list of emulators and then perform a leave-one-out.
Apply the leave-one-out test to each of the list members.
loolist_km_Y_level1a <- mclapply(X = emlist_km_Y_level1a, FUN = leaveOneOut.km, type = 'UK', trend.reestim = TRUE)
loolist_km_YAnom_level1a <- mclapply(X = emlist_km_YAnom_level1a, FUN = leaveOneOut.km, type = 'UK', trend.reestim = TRUE)
loostats_km_Y_level1a <- lapply(emlist_km_Y_level1a, FUN = kmLooStats)
loostats_km_YAnom_level1a <- lapply(emlist_km_YAnom_level1a, FUN = kmLooStats)
test4all <- cbind(test4, test4b)
nvecall <- c(nvec,nvecb)
test4all_mae <- apply(abs(test4all), 2, mean)
plot(nvecall, test4all_mae, ylim = c(0,1.2e6))
for(i in 1:ncol(test4all)){
points(rnorm(100, mean = nvecall[i], sd = 2), test4all[,i], pch = 20, cex = 0.8)
points(nvecall, test4all_mae,col = 'red', pch = 19)
}
# Here, the list is a list version of the matrix Y_
emlist_km_Y_level1a <- mclapply(X = Y_sum_level1a_list, FUN = km, formula = ~., design = X_level1a, mc.cores = 4)
# Identify all of the members which fall under a "level 2" constraint.
level2_ix <- withinConstraints(Y_const_level1a_scaled, AW_const)
# Build an emulator for the scaled output
Y_const_level1a_scaled_list <- mat2list(Y_const_level1a_scaled)
emlist_km_Y_const_level1a_scaled <- mclapply(X = Y_const_level1a_scaled_list, FUN = km, formula = ~., design = X_level1a, mc.cores = 4)
#pdf(file = 'figs/kmloostats_Y_level1a.pdf', width = 12, height = 12)
par(mfrow = c(4,4), mar = c(3,4,2,2), oma = c(4,4,4,0.1))
for(i in 1:length(loolist_km_Y_level1a)){
y <- Y_level1a[, y_names_sum[i]]
loo <- loolist_km_Y_level1a[[i]]
ylim <- range(c(loo$mean - (2*loo$sd), loo$mean + (2*loo$sd)) )
plot(y, loo$mean, xlab = '', ylab = '', main = '' , ylim = ylim, col = makeTransparent('black', 70),
pch = 19)
segments(x0 = y, y0 = loo$mean - (2*loo$sd) , x1 = y , y1 = loo$mean + (2*loo$sd), col = makeTransparent('black', 70))
abline(0,1)
legend('topleft', legend = y_names_sum[i], bty = 'n', text.font = 2 )
legend('bottomright',legend = paste('pmae =',round(loostats_km_Y_level1a[[i]]$pmae,2),'%') , bty = 'n', text.font = 2)
}
mtext('Actual', side = 1, line = 1, outer = TRUE, cex = 2 )
mtext('Predicted', side = 2, line = 0, outer = TRUE, cex = 2)
mtext('Level 1a ensemble outputs', side = 3, line = 0, outer = TRUE, cex = 2)
#dev.off()
# Would the prediction be in the constrained?
pred_km_Y_const_level1a_scaled <- matrix(ncol = ncol(Y_const_level1a_scaled), nrow = nrow(Y_const_level1a_scaled))
colnames(pred_km_Y_const_level1a_scaled) <- colnames(Y_const_level1a_scaled)
for(i in 1:ncol(Y_const_level1a_scaled)){
pred <- loolist_km_Y_const_level1a_scaled[[i]]$mean
pred_km_Y_const_level1a_scaled[, i] <- pred
}
pred_level2_ix <- withinConstraints(pred_km_Y_const_level1a_scaled, AW_const)
Plotting the predictions, with a special focus on the constrained members. The message is, to do better on predicting the members, we need to do better with the cVeg emulator. Many of its members are way too low, and this is dragging down the predictions. Some are false positives though. Could we plot Loo error vs the parameters?
# Identify all of the members which fall under a "level 2" constraint.
level2_ix <- withinConstraints(Y_const_level1a_scaled, AW_const)
# Build an emulator for the scaled output
Y_const_level1a_scaled_list <- mat2list(Y_const_level1a_scaled)
emlist_km_Y_const_level1a_scaled <- mclapply(X = Y_const_level1a_scaled_list, FUN = km, formula = ~., design = X_level1a, mc.cores = 4)
library(verification)
model <- vector(mode = "logical", length = nrow(Y_const_level1a_scaled))
emulator <- vector(mode = "logical", length = nrow(Y_const_level1a_scaled))
model[level2_ix] <- TRUE
emulator[pred_level2_ix] <- TRUE
ver <- verify(obs = model, pred = emulator, frcst.type = 'binary')
# ETS runs from -1/3 to 1, with 0 showing no skill, so we have some skill. Could be better.
ver$ETS
ver$HSS
It’s clear from other experiments that a major barrier to a good prediction of a constrained is a better emulator for cVeg.
Some experiments to create a better emulator
# more multistarts
y <- Y_const_level1a_scaled[, 'cVeg_lnd_sum']
# change the output
# This breaks
#m2 <- km(~.^2, design=X_level1a, response=y, multistart = 4)
# True positive is things in both observed and predicted.
tp_ix <- intersect(level2_ix, pred_level2_ix)
# False positive is things in predicted but not in observed.
fp_ix <- setdiff(pred_level2_ix, level2_ix)
# False negative is things in observed but not predicted
fn_ix <- setdiff(level2_ix, pred_level2_ix)
# true negative is things not in observed or predicted
tn_ix <- setdiff(1:nrow(Y_const_level1a_scaled), union(level2_ix, pred_level2_ix))
#should be 362
#length(c(tp_ix, fp_ix, fn_ix, tn_ix))
clines_lower <- c(0, 35, 750, 300)
clines_upper <- c(NA, 80, 3000, 800)
# correctly predicted in constrained group = red
colvec <- rep('black', nrow(Y_const_level1a_scaled))
pchvec <- rep(21, nrow(Y_const_level1a_scaled))
colvec[tp_ix] <- 'blue'
pchvec[tp_ix] <- 19
colvec[fp_ix] <- 'red'
pchvec[fp_ix] <- 19
colvec[fn_ix] <- 'gold'
pchvec[fn_ix] <- 19
colvec[tn_ix] <- 'darkgrey'
pchvec[tn_ix] <- 21
pdf(width = 10, height = 10, file = 'figs/Y_const_loo.pdf')
par(mfrow = c(2,2), oma = c(0.1,0.1,4,0.1))
for(i in 1:4){
plot(Y_const_level1a_scaled[ ,i], pred_km_Y_const_level1a_scaled[ ,i], type = 'n', las = 1, main = colnames(Y_const_level1a_scaled)[i],
xlab = 'model', ylab = 'emulator')
abline(0,1)
abline(v = clines_lower[i], col = 'darkgrey')
abline(v = clines_upper[i], col = 'darkgrey', lty = 'dashed')
abline(h = clines_lower[i], col = 'darkgrey')
abline(h = clines_upper[i], col = 'darkgrey', lty = 'dashed')
points(Y_const_level1a_scaled[ ,i], pred_km_Y_const_level1a_scaled[ ,i], col = colvec, pch = pchvec)
}
reset()
legend('top', legend = c('True positive', 'False Positive', 'False Negative', 'True Negative', 'lower bound', 'upper bound'), pch = c(19,19, 19, 21, NA, NA), col = c('blue', 'red', 'gold', 'darkgrey', 'darkgrey', 'darkgrey'), lty = c(NA,NA,NA,NA, 'solid', 'dashed'), horiz = TRUE)
dev.off()
null device
1
library(verification)
Loading required package: boot
Loading required package: CircStats
Loading required package: dtw
Loading required package: proxy
Attaching package: ‘proxy’
The following object is masked from ‘package:spam’:
as.matrix
The following objects are masked from ‘package:stats’:
as.dist, dist
The following object is masked from ‘package:base’:
as.matrix
Loaded dtw v1.22-3. See ?dtw for help, citation("dtw") for use in publication.
model <- vector(mode = "logical", length = nrow(Y_const_level1a_scaled))
emulator <- vector(mode = "logical", length = nrow(Y_const_level1a_scaled))
model[level2_ix] <- TRUE
emulator[pred_level2_ix] <- TRUE
ver <- verify(obs = model, pred = emulator, frcst.type = 'binary')
# ETS runs from -1/3 to 1, with 0 showing no skill, so we have some skill. Could be better.
ver$ETS
[1] 0.3477098
ver$HSS
[1] 0.5160008
How good are the leave-one-out predictions of the transformed data?
It appears the square root transformation outperforms the standard - but mostly at low values (which we don’t think are realistic).
# more multistarts
y <- Y_const_level1a_scaled[, 'cVeg_lnd_sum']
# change the output
# This breaks
#m2 <- km(~.^2, design=X_level1a, response=y, multistart = 4)
loo_m_stan <- leaveOneOut.km(m_stan, type = 'UK', trend.reestim = TRUE)
loo_m_logy <- leaveOneOut.km(m_logy, type = 'UK', trend.reestim = TRUE)
loo_m_sqrty <- leaveOneOut.km(m_sqrty, type = 'UK', trend.reestim = TRUE)
# Make a new matrix of predictions, and find which enemble members would pass the constraint.
pred_km_Y_const_level1a_scaled_cVeg_logy <- pred_km_Y_const_level1a_scaled
pred_km_Y_const_level1a_scaled_cVeg_logy[, 'cVeg_lnd_sum'] <- exp(loo_m_logy$mean)
pred_level2_logy_ix <- withinConstraints(pred_km_Y_const_level1a_scaled_cVeg_logy, AW_const)
emulator_logy <- vector(mode = "logical", length = nrow(Y_const_level1a_scaled))
emulator_logy[pred_level2_logy_ix] <- TRUE
ver_logy <- verify(obs = model, pred = emulator_logy, frcst.type = 'binary')
# ETS runs from -1/3 to 1, with 0 showing no skill, so we have some skill. Could be better.
plot(y, loo_m_stan$mean,col = makeTransparent('black', 150), pch = 19)
points(y, exp(loo_m_logy$mean), col = makeTransparent('red', 150), pch = 19)
points(y, (loo_m_sqrty$mean)^2, col = makeTransparent('blue', 150), pch = 19)
abline(0,1)
abline(h = c(300, 800))
abline(v = c(300, 800))
errSummary <- function(obs, pred){
err <- pred - obs
mae <- mean(abs(err))
rmse <- sqrt(mean(err^2))
maxerr <- max(err)
absdiff <- abs(diff(range(obs)))
pmae <- (mae/absdiff) * 100
return(list(mae = mae, pmae = pmae, maxerr = maxerr))
}
errSummary(y, loo_m_stan$mean)
$mae
[1] 86.09347
$pmae
[1] 4.241667
$maxerr
[1] 472.4355
errSummary(y, exp(loo_m_logy$mean))
$mae
[1] 68.824
$pmae
[1] 3.390832
$maxerr
[1] 1135.581
errSummary(y, (loo_m_sqrty$mean)^2)
$mae
[1] 61.27904
$pmae
[1] 3.019106
$maxerr
[1] 461.909
ver$tab
ver_logy$tab
ver_sqrty$tab
# Make a new matrix of predictions, and find which enemble members would pass the constraint.
pred_km_Y_const_level1a_scaled_cVeg_sqrty <- pred_km_Y_const_level1a_scaled
pred_km_Y_const_level1a_scaled_cVeg_sqrty[, 'cVeg_lnd_sum'] <- (loo_m_sqrty$mean)^2
pred_level2_sqrty_ix <- withinConstraints(pred_km_Y_const_level1a_scaled_cVeg_sqrty, AW_const)
emulator_sqrty <- vector(mode = "logical", length = nrow(Y_const_level1a_scaled))
emulator_sqrty[pred_level2_sqrty_ix] <- TRUE
ver_sqrty<- verify(obs = model, pred = emulator_sqrty, frcst.type = 'binary')
ver$tab
0 1
0 311 14
1 17 20
ver_logy$tab
0 1
0 313 12
1 24 13
ver_sqrty$tab
0 1
0 315 10
1 21 16
plot(y, (loo_m_sqrt_gauss_gen$mean)^2)
abline(0,1)
pred_km_Y_const_level1a_scaled_cVeg_logy_gauss_gen <- pred_km_Y_const_level1a_scaled
pred_km_Y_const_level1a_scaled_cVeg_logy_gauss_gen[, 'cVeg_lnd_sum'] <- exp(loo_m_logy_gauss_gen$mean)
pred_level2_logy_gauss_gen_ix <- withinConstraints(pred_km_Y_const_level1a_scaled_cVeg_logy_gauss_gen, AW_const)
emulator_logy_gauss_gen <- vector(mode = "logical", length = nrow(Y_const_level1a_scaled))
emulator_logy_gauss_gen[pred_level2_logy_gauss_gen_ix] <- TRUE
ver_logy_gauss_gen <- verify(obs = model, pred = emulator_logy_gauss_gen, frcst.type = 'binary')
pred_km_Y_const_level1a_scaled_cVeg_logy_gauss_gen <- pred_km_Y_const_level1a_scaled
pred_km_Y_const_level1a_scaled_cVeg_logy_gauss_gen[, 'cVeg_lnd_sum'] <- exp(loo_m_logy_gauss_gen$mean)
pred_level2_logy_gauss_gen_ix <- withinConstraints(pred_km_Y_const_level1a_scaled_cVeg_logy_gauss_gen, AW_const)
emulator_logy_gauss_gen <- vector(mode = "logical", length = nrow(Y_const_level1a_scaled))
emulator_logy_gauss_gen[pred_level2_logy_gauss_gen_ix] <- TRUE
ver_logy_gauss_gen <- verify(obs = model, pred = emulator_logy_gauss_gen, frcst.type = 'binary')
plot(y, loo_m_stan_gauss_gen$mean)
abline(0,1)
(I don’t think I have this right yet.)
pred_km_Y_const_level1a_scaled_cVeg_logy_gauss_gen <- pred_km_Y_const_level1a_scaled
pred_km_Y_const_level1a_scaled_cVeg_logy_gauss_gen[, 'cVeg_lnd_sum'] <- exp(loo_m_logy_gauss_gen$mean)
pred_level2_logy_gauss_gen_ix <- withinConstraints(pred_km_Y_const_level1a_scaled_cVeg_logy_gauss_gen, AW_const)
emulator_logy_gauss_gen <- vector(mode = "logical", length = nrow(Y_const_level1a_scaled))
emulator_logy_gauss_gen[pred_level2_logy_gauss_gen_ix] <- TRUE
ver_logy_gauss_gen <- verify(obs = model, pred = emulator_logy_gauss_gen, frcst.type = 'binary')
pred_km_Y_const_level1a_scaled_cVeg_stan_gauss_gen <- pred_km_Y_const_level1a_scaled
pred_km_Y_const_level1a_scaled_cVeg_stan_gauss_gen[, 'cVeg_lnd_sum'] <- (loo_m_stan_gauss_gen$mean)
pred_level2_stan_gauss_gen_ix <- withinConstraints(pred_km_Y_const_level1a_scaled_cVeg_stan_gauss_gen, AW_const)
emulator_stan_gauss_gen <- vector(mode = "logical", length = nrow(Y_const_level1a_scaled))
emulator_stan_gauss_gen[pred_level2_stan_gauss_gen_ix] <- TRUE
ver_stan_gauss_gen <- verify(obs = model, pred = emulator_stan_gauss_gen, frcst.type = 'binary')
First, plot and emulate the value of cVeg
mins <- apply(X_level1a,2,FUN = min)
maxes <- apply(X_level1a,2,FUN = max)
nsamp_unif <- 1000
X_unif <- samp_unif(nsamp_unif, mins = mins, maxes = maxes)
pred_cVeg_stan_unif <- predict(m_cVeg_stan, newdata = X_unif, type = 'UK')
There is a really important threshold for cVeg in b_wl_io. This is actually picked up nicely in the History matching section, so I’m not worried that it’s being missed.
DM_const <- AW_const <- matrix(c(0, 1e12, 10, 120, 500, 5000, 100, 1000), byrow = FALSE, nrow = 2)
level1b_ix <- withinConstraints(Y_const_level1a_scaled, DM_const)
X_level1b <- X_level1a[level1b_ix , ]
y_level1b <- Y_const_level1a_scaled[level1b_ix, 'cVeg_lnd_sum']
m_stan_level1b <- km(~., design = X_level1b, response = y_level1b )
optimisation start
------------------
* estimation method : MLE
* optimisation method : BFGS
* analytical gradient : used
* trend model : ~alpha_io + a_wl_io + bio_hum_cn + b_wl_io + dcatch_dlai_io +
dqcrit_io + dz0v_dh_io + f0_io + fd_io + g_area_io + g_root_io +
g_wood_io + gs_nvg_io + hw_sw_io + kaps_roth + knl_io + lai_max_io +
lai_min_io + lma_io + n_inorg_turnover + nmass_io + nr_io +
retran_l_io + retran_r_io + r_grow_io + rootd_ft_io + sigl_io +
sorp + tleaf_of_io + tlow_io + tupp_io + l_vg_soil
* covariance model :
- type : matern5_2
- nugget : NO
- parameters lower bounds : 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10
- parameters upper bounds : 1.866821 1.998562 1.993715 1.625276 1.907336 1.985483 1.987869 1.752652 1.930075 1.979062 2 1.911669 2 1.983312 1.945102 1.959521 1.932368 1.971158 1.929582 1.968588 1.928133 1.968285 1.928935 1.964885 1.985963 1.980488 1.98846 1.965067 1.963628 1.976007 1.944043 1.957393
- best initial criterion value(s) : -698.8532
N = 32, M = 5 machine precision = 2.22045e-16
At X0, 0 variables are exactly at the bounds
At iterate 0 f= 698.85 |proj g|= 1.893
At iterate 1 f = 698.68 |proj g|= 0.87708
At iterate 2 f = 698.47 |proj g|= 0.75146
At iterate 3 f = 698.39 |proj g|= 0.7534
At iterate 4 f = 698.33 |proj g|= 0.80178
At iterate 5 f = 698.15 |proj g|= 1.8415
At iterate 6 f = 697.95 |proj g|= 1.8491
At iterate 7 f = 697.54 |proj g|= 1.8746
At iterate 8 f = 696.94 |proj g|= 1.6954
At iterate 9 f = 696.64 |proj g|= 1.8521
At iterate 10 f = 696.54 |proj g|= 1.4949
At iterate 11 f = 696.47 |proj g|= 1.4526
At iterate 12 f = 696.38 |proj g|= 1.8729
At iterate 13 f = 696.28 |proj g|= 1.1266
At iterate 14 f = 696.2 |proj g|= 0.86561
At iterate 15 f = 695.92 |proj g|= 1.8725
At iterate 16 f = 695.78 |proj g|= 1.8655
At iterate 17 f = 695.63 |proj g|= 1.2476
At iterate 18 f = 695.26 |proj g|= 0.40643
At iterate 19 f = 695.18 |proj g|= 0.40228
At iterate 20 f = 695.04 |proj g|= 1.8677
At iterate 21 f = 694.64 |proj g|= 1.8651
At iterate 22 f = 694.51 |proj g|= 1.8663
At iterate 23 f = 694.45 |proj g|= 0.63258
At iterate 24 f = 694.42 |proj g|= 0.63243
At iterate 25 f = 694.4 |proj g|= 0.65058
At iterate 26 f = 694.39 |proj g|= 1.864
At iterate 27 f = 694.36 |proj g|= 1.8624
At iterate 28 f = 694.31 |proj g|= 1.2125
At iterate 29 f = 694.26 |proj g|= 0.63557
At iterate 30 f = 694.16 |proj g|= 0.46324
At iterate 31 f = 694.11 |proj g|= 1.8746
At iterate 32 f = 693.98 |proj g|= 1.868
At iterate 33 f = 693.89 |proj g|= 0.29336
At iterate 34 f = 693.85 |proj g|= 0.24402
At iterate 35 f = 693.83 |proj g|= 0.28975
At iterate 36 f = 693.81 |proj g|= 1.8644
At iterate 37 f = 693.8 |proj g|= 1.8657
At iterate 38 f = 693.77 |proj g|= 1.8678
At iterate 39 f = 693.72 |proj g|= 1.8672
At iterate 40 f = 693.68 |proj g|= 0.2783
At iterate 41 f = 693.66 |proj g|= 0.26836
At iterate 42 f = 693.63 |proj g|= 0.27367
At iterate 43 f = 693.51 |proj g|= 1.861
At iterate 44 f = 693.47 |proj g|= 1.8564
At iterate 45 f = 693.44 |proj g|= 1.0159
At iterate 46 f = 693.42 |proj g|= 0.26437
At iterate 47 f = 693.4 |proj g|= 1.7395
At iterate 48 f = 693.39 |proj g|= 1.0814
At iterate 49 f = 693.38 |proj g|= 0.67703
At iterate 50 f = 693.37 |proj g|= 0.62383
At iterate 51 f = 693.36 |proj g|= 0.67295
At iterate 52 f = 693.36 |proj g|= 0.62037
At iterate 53 f = 693.34 |proj g|= 0.49582
At iterate 54 f = 693.31 |proj g|= 1.1353
At iterate 55 f = 693.3 |proj g|= 0.45097
At iterate 56 f = 693.27 |proj g|= 1.8567
At iterate 57 f = 693.26 |proj g|= 1.6931
At iterate 58 f = 693.23 |proj g|= 1.3271
At iterate 59 f = 693.22 |proj g|= 0.71561
At iterate 60 f = 693.21 |proj g|= 1.84
At iterate 61 f = 693.18 |proj g|= 1.2344
At iterate 62 f = 693.17 |proj g|= 0.13801
At iterate 63 f = 693.17 |proj g|= 0.82232
At iterate 64 f = 693.16 |proj g|= 0.15622
At iterate 65 f = 693.16 |proj g|= 0.14535
At iterate 66 f = 693.14 |proj g|= 1.4028
At iterate 67 f = 693.13 |proj g|= 0.92428
At iterate 68 f = 693.12 |proj g|= 1.5166
At iterate 69 f = 693.12 |proj g|= 1.1582
At iterate 70 f = 693.1 |proj g|= 1.8639
At iterate 71 f = 693.1 |proj g|= 1.0532
At iterate 72 f = 693.09 |proj g|= 0.28912
At iterate 73 f = 693.09 |proj g|= 0.19426
At iterate 74 f = 693.09 |proj g|= 0.12491
At iterate 75 f = 693.09 |proj g|= 0.27997
At iterate 76 f = 693.09 |proj g|= 1.2754
At iterate 77 f = 693.08 |proj g|= 1.8634
At iterate 78 f = 693.07 |proj g|= 1.6847
At iterate 79 f = 693.07 |proj g|= 0.1567
At iterate 80 f = 693.06 |proj g|= 0.19142
At iterate 81 f = 693.06 |proj g|= 0.65331
At iterate 82 f = 693.05 |proj g|= 0.52327
At iterate 83 f = 693.05 |proj g|= 0.19058
At iterate 84 f = 693.05 |proj g|= 0.11956
At iterate 85 f = 693.04 |proj g|= 1.8636
At iterate 86 f = 693.04 |proj g|= 0.59169
At iterate 87 f = 693.03 |proj g|= 0.54006
At iterate 88 f = 693.03 |proj g|= 0.42748
At iterate 89 f = 693.03 |proj g|= 0.28833
At iterate 90 f = 693.03 |proj g|= 0.21177
At iterate 91 f = 693.03 |proj g|= 0.26261
At iterate 92 f = 693.03 |proj g|= 0.26001
At iterate 93 f = 693.03 |proj g|= 0.26053
At iterate 94 f = 693.02 |proj g|= 0.78214
At iterate 95 f = 693.02 |proj g|= 0.94871
At iterate 96 f = 693.02 |proj g|= 1.2453
At iterate 97 f = 693.02 |proj g|= 0.29434
At iterate 98 f = 693.02 |proj g|= 0.066866
At iterate 99 f = 693.02 |proj g|= 0.067068
At iterate 100 f = 693.02 |proj g|= 0.11515
At iterate 101 f = 693.02 |proj g|= 0.15044
final value 693.017000
stopped after 101 iterations
loo_m_stan_level1b <- leaveOneOut.km(m_stan_level1b, type = 'UK', trend.reestim = TRUE)
pred_km_Y_const_level1a_scaled_cVeg_stan_level1b <- pred_km_Y_const_level1a_scaled[level1b_ix, ]
pred_km_Y_const_level1a_scaled_cVeg_stan_level1b[, 'cVeg_lnd_sum'] <- (loo_m_stan_level1b$mean)
pred_level2_stan_level1b_ix <- withinConstraints(pred_km_Y_const_level1a_scaled_cVeg_stan_level1b, AW_const)
emulator_stan_level1b <- vector(mode = "logical", length = nrow(Y_const_level1a_scaled))
emulator_stan_level1b[pred_level2_stan_level1b_ix] <- TRUE
ver_stan_level1b <- verify(obs = model, pred = emulator_stan_level1b, frcst.type = 'binary')
cVeg_level1a <- Y_const_level1a_scaled[, 'cVeg_lnd_sum']
require(foreach)
# below an example for a computer with 2 cores, but also work with 1 core
nCores <- 4
require(doParallel)
cl <- makeCluster(nCores)
registerDoParallel(cl)
m_cVeg_stan <- km(~., design = X_level1a, response = cVeg_level1a, multistart = 4 )
optimisation start
------------------
* estimation method : MLE
* optimisation method : BFGS
* analytical gradient : used
* trend model : ~alpha_io + a_wl_io + bio_hum_cn + b_wl_io + dcatch_dlai_io +
dqcrit_io + dz0v_dh_io + f0_io + fd_io + g_area_io + g_root_io +
g_wood_io + gs_nvg_io + hw_sw_io + kaps_roth + knl_io + lai_max_io +
lai_min_io + lma_io + n_inorg_turnover + nmass_io + nr_io +
retran_l_io + retran_r_io + r_grow_io + rootd_ft_io + sigl_io +
sorp + tleaf_of_io + tlow_io + tupp_io + l_vg_soil
* covariance model :
- type : matern5_2
- nugget : NO
- parameters lower bounds : 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10
- parameters upper bounds : 2 2 1.999244 1.698141 1.994798 1.990622 1.998154 1.778477 2 2 2 1.998084 2 2 2 1.99598 2 1.989889 1.997502 1.989483 1.98997 1.987916 1.983347 1.988424 2 1.990947 1.995643 1.994232 1.992247 2 2 2
- best initial criterion value(s) : -2315.161 -2315.2 -2315.245 -2315.484
* The 4 best values (multistart) obtained are:
2271.948 2271.948 2271.948 2296.345
* The model corresponding to the best one (2271.948) is stored.
stopCluster(cl)
Then use the leave-one-out differences to get an idea of parameters where the emulator is going wrong. (these should be roughly the same)
# Plot the regular km emulator. Doesn’t look great.
cVeg_level1a_trunc <- cVeg_level1a
cVeg_level1a_trunc[cVeg_level1a > 800] <- 800
par(oma = c(0,0,0,3), bg = 'white')
pairs(X_level1a,
labels = 1:d,
gap = 0, lower.panel = NULL, xlim = c(0,1), ylim = c(0,1),
panel = cpoints,
z = cVeg_level1a_trunc,
col = byr,
cex.labels = 1,
col.axis = 'white',
pch = 20
)
image.plot(legend.only = TRUE,
zlim = range(cVeg_level1a_trunc),
col = byr,
legend.args = list(text = 'cVeg', side = 3, line = 1),
horizontal = TRUE
)
legend('left', legend = paste(1:d, colnames(lhs)), cex = 0.9, bty = 'n')
plot(em_npp_level1)
par(mfrow = c(2,2))
plot(X_level1a[, 'b_wl_io'], cVeg_level1a)
plot(X_level1a[, 'kaps_roth'], cVeg_level1a)
plot(X_level1a[, 'lai_max_io'], cVeg_level1a)
plot(X_level1a[, 'retran_l_io'], cVeg_level1a)
# It doesn't look like Multistart makes a big difference at all for NPP
errstats_npp_level0 <- kmLooStats(km = em_npp_level0)
errstats_npp_level0_ms <- kmLooStats(km = em_npp_level0_ms)
errstats_npp_level1 <- kmLooStats(km = em_npp_level1)
errstats_npp_level1_ms <- kmLooStats(km = em_npp_level1_ms)
# proportional mean absolute error
errstats_npp_level0$pmae
errstats_npp_level0_ms$pmae
errstats_npp_level1$pmae
errstats_npp_level1_ms$pmae
The emulators appear to be at least capturing the broad response for all of the output variables.
First, plot the straight kriging emulators
#tic()
#fit_list_Y_sum_level1a <- createKmFitList(X = X_level1a, Y = Y_sum_level1a)
#toc(log = TRUE)
#tic()
#fit_list_Y_sum_level1a_par <- createKmFitListParallel(X = X_level1a, Y = Y_sum_level1a, multistart = 4)
#toc(log = TRUE)
#Ytest <- vector(mode = 'list', length = ncol(Y_sum_level1a))
#for(i in 1:2){
# Ytest[[i]] <- Y_sum_level1a[, i]
#}
#tic()
#test <- lapply(X = Y_sum_level1a_list, FUN = km, formula = ~., design = X_level1a)
#toc(log = TRUE)
#tic()
#partest <- mclapply(X = Y_sum_level1a_list, FUN = km, formula = ~., design = X_level1a, mc.cores = 4)
#toc(log = TRUE)
par(mfrow = c(4,4), mar = c(3,4,2,2), oma = c(4,4,4,0.1))
for(i in 1:length(loolist_km_Y)){
y <- Y_level1[, y_names_sum[i]]
loo <- loolist_km_Y[[i]]
ylim <- range(c(loo$mean - (2*loo$sd), loo$mean + (2*loo$sd)) )
plot(y, loo$mean, xlab = '', ylab = '', main = y_names_sum[i] , ylim = ylim, col = makeTransparent('black', 70),
pch = 19)
segments(x0 = y, y0 = loo$mean - (2*loo$sd) , x1 = y , y1 = loo$mean + (2*loo$sd), col = makeTransparent('black', 70))
abline(0,1)
legend('bottomright',legend = paste('mae =',round(loostats_km_Y[[i]]$pmae,2),'%') , bty = 'n')
}
mtext('Actual', side = 1, line = 1, outer = TRUE, cex = 2 )
mtext('Predicted', side = 2, line = 0, outer = TRUE, cex = 2)
mtext('Modern Value (Y)', side = 3, line = 0, outer = TRUE, cex = 2)
loostats_km_Ylevel1a <- vector(mode = 'list', length = length(y_names_sum))
for(i in 1:length(emlist_km_Ylevel1a)){
loostats <- kmLooStats(emlist_km_Ylevel1a[[i]])
loostats_km_Ylevel1a[[i]] <- loostats
print(loostats$pmae)
}
# level1 vs level 1a
km_pmae_level1 <- sapply(loostats_km_Y, function(x) x$pmae)
km_pmae_level1a <- sapply(loostats_km_Ylevel1a, function(x) x$pmae)
plot(km_pmae_level1 , km_pmae_level1a )
abline(0,1)
par(mfrow = c(4,4), mar = c(3,4,2,2), oma = c(4,4,4,0.1))
for(i in 1:length(loolist_km_YAnom)){
y <- YAnom_level1[, y_names_sum[i]]
loo <- loolist_km_YAnom[[i]]
ylim <- range(c(loo$mean - (2*loo$sd), loo$mean + (2*loo$sd)) )
plot(y, loo$mean, xlab = '', ylab = '', main = y_names_sum[i] , ylim = ylim, col = makeTransparent('black', 70),
pch = 19)
segments(x0 = y, y0 = loo$mean - (2*loo$sd) , x1 = y , y1 = loo$mean + (2*loo$sd), col = makeTransparent('black', 70))
abline(0,1)
}
mtext('Actual', side = 1, line = 1, outer = TRUE, cex = 2 )
mtext('Predicted', side = 2, line = 0, outer = TRUE, cex = 2)
mtext('Change over time (YAnom)', side = 3, line = 1, outer = TRUE, cex = 2)
Next, plot the twostep glmnet/km emulators
# Twostep glmnet emulators
if (file.exists("ts_emulators_Y.rdata")) {
load("ts_emulators_Y.rdata")
} else {
emlist_twoStep_glmnet_Y <- vector(mode = 'list', length = length(y_names_sum))
for(i in 1:length(y_names_sum)){
yname <- y_names_sum[i]
y <- Y_level1[, yname]
em <- twoStep_glmnet(X = X_level1, y = y)
emlist_twoStep_glmnet_Y[[i]] <- em
}
loolist_twoStep_glmnet_Y <- vector(mode = 'list', length = length(y_names_sum))
for(i in 1:length(y_names_sum)){
loo <- leaveOneOut.km(model = emlist_twoStep_glmnet_Y[[i]]$emulator, type = 'UK', trend.reestim = TRUE)
loolist_twoStep_glmnet_Y[[i]] <- loo
}
save(emlist_twoStep_glmnet_Y, loolist_twoStep_glmnet_Y, file = "ts_emulators_Y.rdata")
}
# Can get numerical performance using
loostats_twoStep_glmnet_Y <- vector(mode = 'list', length = length(y_names_sum))
for(i in 1:length(emlist_twoStep_glmnet_Y)){
loostats <- kmLooStats(emlist_twoStep_glmnet_Y[[i]]$emulator)
loostats_twoStep_glmnet_Y[[i]] <- loostats
print(loostats$pmae)
}
par(mfrow = c(4,4), mar = c(3,4,2,2), oma = c(4,4,4,0.1))
for(i in 1:length(loolist_twoStep_glmnet_Y)){
y <- Y_level1[, y_names_sum[i]]
loo <- loolist_twoStep_glmnet_Y[[i]]
ylim <- range(c(loo$mean - (2*loo$sd), loo$mean + (2*loo$sd)) )
plot(y, loo$mean, xlab = '', ylab = '', main = y_names_sum[i] , ylim = ylim, col = makeTransparent('black', 70),
pch = 19)
segments(x0 = y, y0 = loo$mean - (2*loo$sd) , x1 = y , y1 = loo$mean + (2*loo$sd), col = makeTransparent('black', 70))
abline(0,1)
legend('bottomright',legend = paste('mae =',round(loostats_twoStep_glmnet_Y[[i]]$pmae,2),'%') , bty = 'n')
}
mtext('Actual', side = 1, line = 1, outer = TRUE, cex = 2 )
mtext('Predicted', side = 2, line = 0, outer = TRUE, cex = 2)
mtext('Modern Value (Y)', side = 3, line = 0, outer = TRUE, cex = 2)
We use the leave-one-out Mean Absolute Error, expressed as a percentage of the range of the output across the ensemble. We find that the twostep emulatorisn’t significantly more accurate, and is indeed less accurate for tree fraction.
km_pmae <- sapply(loostats_km_Y, '[[', 'pmae')
ts_pmae <- sapply(loostats_twoStep_glmnet_Y, '[[', 'pmae')
par(mar = c(12,4,2,1), las =1 )
plot(1:length(y_names_sum), km_pmae,
ylim = c(0,15), pch = 19,
axes = FALSE, xlab = '',
ylab = 'LOO MAE (% of range)',
cex = 1.2)
points(1:length(y_names_sum),ts_pmae , col= 'red', pch = 19, cex = 1.2)
legend('topleft', c('km emulator', 'twoStep emulator'), pch = 19, col = c('black', 'red'), pt.cex = 1.2)
axis (2)
par(las = 2)
axis(1, at = 1:length(y_names_sum), labels = y_names_sum)