Sensitivity analysis of MetaWards with an emulator

This code takes a small ensemble of runs of MetaWards and fits an emulator to the maximum number of infections in each run. The code then does a sensitivty analysis using the FASTT99 algorithm, and emulated output. Finally, it looks at the one-at-a-time sensitivity using emulated output.

This code is an example only, and not a serious analysis. Results of the sensitivity analysis will change - perhaps dramatically - when sensible ranges for the parameters are used.

Load Packages

library(sensitivity)
library(DiceKriging)
library(dplyr)
source("https://raw.githubusercontent.com/dougmcneall/packages-git/master/emtools.R")

Read the design matrix from a designed ensemble of MetaWards, created in file “create_design_MetaWards.Rmd”

# Need to fix the parameter names
X <- read.csv('experiments/2020-05-07-sensitivity-analysis/design.csv', sep = "")
parnames = colnames(X)

Read and summarise the results from the runs

# A container for all the data
# Each row has a "fingerprint" that contains the values of all the changed parameters,
# and the values of the parameters are also given.
# This alters the order of the parameters.
dat <- read.csv('experiments/2020-05-07-sensitivity-analysis/output/results.csv.bz2')


# find an
unique_fingerprint = unique(dat$fingerprint)

# Use the dplyr package to find the maximum number of infections for each ensemble member.
max_infections <- dat %>% 
                      group_by(fingerprint) %>%
                      summarize(max(I))

reorder_ix <- match(unique_fingerprint, max_infections$fingerprint)
max_infections <- max_infections[reorder_ix, ]

head(max_infections)

## # A tibble: 6 x 2
##   fingerprint                                                      `max(I)`
##   <fct>                                                               <int>
## 1 0_0396911522:0_5487738012:0_4820402197:0_4917127313:0_774297676…    14011
## 2 0_7294788517:0_4236460749:0_5140176034:0_7666981902:0_403178914… 10625619
## 3 0_2940997886:0_7397552424:0_5851974157:0_5933214784:0_609800471…  5952759
## 4 0_4117703374:0_5074890216:0_3288689441:0_4084163311:0_826460217…  1853024
## 5 0_5366404451:0_7915200822:0_7097633424:0_7255256459:0_475659231… 11130717
## 6 0_6555091723:0_348384008:0_5585651195:0_4524178774:0_3879190864… 20328204

Plot each parameter against the output to get an idea of sensitivity

d <- ncol(X)

X.norm <- normalize(X)
y <- pull(max_infections,'max(I)')

par(mfrow = c(3,3), las = 1)

for(i in 1:d){
  plot(X[ ,i], y, xlab = parnames[i], ylab = 'max(I)')
}

Fit the emulator

# Fit an emulator using DiceKriging
fit = km(~., design=X.norm, response=y)

## 
## optimisation start
## ------------------
## * estimation method   : MLE 
## * optimisation method : BFGS 
## * analytical gradient : used
## * trend model : ~beta.2. + beta.3. + beta.4. + progress.1. + progress.2. + progress.3. + 
##     progress.4. + too_ill_to_move.3. + too_ill_to_move.4.
## * covariance model : 
##   - type :  matern5_2 
##   - nugget : NO
##   - parameters lower bounds :  1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 1e-10 
##   - parameters upper bounds :  2 2 2 2 2 2 2 2 2 
##   - best initial criterion value(s) :  -1547.657 
## 
## N = 9, M = 5 machine precision = 2.22045e-16
## At X0, 0 variables are exactly at the bounds
## At iterate     0  f=       1547.7  |proj g|=        1.706
## At iterate     1  f =       1533.3  |proj g|=        1.3233
## At iterate     2  f =       1529.4  |proj g|=        1.1674
## At iterate     3  f =       1520.8  |proj g|=        1.5863
## At iterate     4  f =       1520.5  |proj g|=        1.5345
## At iterate     5  f =       1520.2  |proj g|=        1.5135
## At iterate     6  f =       1519.9  |proj g|=        1.4639
## At iterate     7  f =       1519.6  |proj g|=        1.4743
## At iterate     8  f =       1519.5  |proj g|=        1.4892
## At iterate     9  f =       1519.5  |proj g|=       0.34272
## At iterate    10  f =       1519.5  |proj g|=        0.3497
## At iterate    11  f =       1519.5  |proj g|=       0.54602
## At iterate    12  f =       1519.5  |proj g|=       0.54951
## At iterate    13  f =       1519.5  |proj g|=       0.54331
## At iterate    14  f =       1519.5  |proj g|=       0.08441
## At iterate    15  f =       1519.5  |proj g|=      0.055983
## At iterate    16  f =       1519.4  |proj g|=      0.051919
## At iterate    17  f =       1519.4  |proj g|=      0.020515
## At iterate    18  f =       1519.4  |proj g|=      0.030553
## At iterate    19  f =       1519.4  |proj g|=        0.1016
## At iterate    20  f =       1519.4  |proj g|=      0.021806
## At iterate    21  f =       1519.4  |proj g|=     0.0049405
## At iterate    22  f =       1519.4  |proj g|=    0.00089605
## 
## iterations 22
## function evaluations 27
## segments explored during Cauchy searches 41
## BFGS updates skipped 0
## active bounds at final generalized Cauchy point 4
## norm of the final projected gradient 0.000896048
## final function value 1519.45
## 
## F = 1519.45
## final  value 1519.449375 
## converged

# a quick check of the emulator using cross validation
loo = leaveOneOut.km(fit, type = 'UK', trend.reestim = TRUE)

ylim = range(loo$mean - (2*loo$sd),loo$mean + (2*loo$sd) )
plot(y, loo$mean, xlab = 'max(I)', ylab = 'emulator prediction', ylim = ylim)
segments(x0 = y, y0 = loo$mean - (2*loo$sd), x1 = y, y1 = loo$mean + (2*loo$sd))
abline(0,1)

FAST99 sensitivity analysis of Saltelli et al (1999)

# Generate a design for the FAST99 analysis
X.fast <- fast99(model = NULL, factors = colnames(X), n = 3000,
                 q = "qunif", q.arg = list(min = 0, max = 1))


# Predict the response at the FAST99 design points using the emulator
pred.fast = predict(fit, newdata = X.fast$X, type = 'UK')

# Calculate the sensitivity indices
fast.tell <- tell(X.fast, pred.fast$mean)

bp.convert <- function(fastmodel){
  # get the FAST summary into an easier format for barplot
  fast.summ <- print(fastmodel)
  fast.diff <- fast.summ[ ,2] - fast.summ[ ,1]
  fast.bp <- t(cbind(fast.summ[ ,1], fast.diff))
  fast.bp
}

par(las = 2, mar = c(9,5,3,2))
barplot(bp.convert(fast.tell), col = c('skyblue', 'grey'), ylab = 'relative sensitivity', main = 'FAST99 Sensitivity')

## 
## Call:
## fast99(model = NULL, factors = colnames(X), n = 3000, q = "qunif",     q.arg = list(min = 0, max = 1))
## 
## Model runs: 27000 
## 
## Estimations of the indices:
##                    first order total order
## beta.2.            0.083486858 0.119578287
## beta.3.            0.045428476 0.060061080
## beta.4.            0.001766284 0.009332087
## progress.1.        0.064348967 0.098699718
## progress.2.        0.358255545 0.409314956
## progress.3.        0.320018453 0.390561654
## progress.4.        0.013346639 0.030384006
## too_ill_to_move.3. 0.002378648 0.008861577
## too_ill_to_move.4. 0.001826915 0.008732466

legend('topleft',legend = c('Main effect', 'Interactions'), fill = c('skyblue', 'grey') )

One-at-a-time sensitivity analysis

Parameters are swept across their range one at a time, with the remaining parameters held at central values.

n.oat <- 21
X.oat <- oaat.design(X.norm, n = n.oat, hold = rep(0.5,9))

colnames(X.oat) <- colnames(X)
pred.oat <- predict(fit, newdata = X.oat, type = 'UK')

col.transp <- adjustcolor('grey', alpha = 0.5)
par(mfrow = c(3,3), oma = c(0.1,0.1,3,0.1))

  for(i in 1:9){
    
  ix <- seq(from = ((i*n.oat) - (n.oat-1)), to =  (i*n.oat), by = 1)
  
  plot(X.oat[ix,i], pred.oat$mean[ix], 
       xlim = c(0,1), ylim = range(pred.oat$mean),
       xlab = parnames[i], ylab = 'maximum infections',
       type= 'n')
  
     polygon(x = c(X.oat[ix, i], rev(X.oat[ix, i])),
            y = c(pred.oat$mean[ix] - (2*pred.oat$sd[ix]), rev(pred.oat$mean[ix] + (2*pred.oat$sd[ix]))),
            col = col.transp, border = col.transp)
     
  lines(X.oat[ix,i], pred.oat$mean[ix], xlim = c(0,1), lty = 'solid')
  
  }

mtext('One-at-a-time sensitivity', side = 3, outer = TRUE, cex = 1.5)