library(purrr)
library(dplyr)
Outbreaks simulations
Implementation of the method described in Noufaily et al. (2013).
Required packages:
A tuning of the dplyr::bind_col()
function:
<- function(...) dplyr::bind_cols(..., .name_repair = "unique_quiet") bind_cols2
A tuning of the rpois()
function:
<- function(lambda) rpois(length(lambda), lambda) rpois2
A tuning of the plot()
function:
<- function(...) plot(..., type = "s", xlab = "week", ylab = "count") plot2
A function that generates the mean of the incidence as a function of time (in weeks) with trend and seasonality:
<- function(t, theta, beta, gamma1, gamma2, m) {
mu <- 2 * pi * t((1:m) %*% t(t)) / 52
t2 exp(theta + beta * t + rowSums(gamma1 * cos(t2) + gamma2 * sin(t2)))
}
A function that stochastically generates incidence as a function of time using the mean as generated by the above mu()
function and a negative binomial distribution. This is how it should be done if the dispersion parameter \(\phi\) is interpreted correctly:
<- function(n, t, theta, beta, gamma1, gamma2, phi, m) {
baseline1 <- mu(t, theta, beta, gamma1, gamma2, m)
counts 1:n |>
map(~ rnbinom(t, phi, mu = counts)) |>
bind_cols2()
}
This is how it’s done in the article:
<- function(n, t, theta, beta, gamma1, gamma2, phi, m) {
baseline2 <- mu(t, theta, beta, gamma1, gamma2, m)
counts <- 1 / phi
prob <- counts * prob / (1 - prob)
size 1:n |>
map(~ rnbinom(t, size, prob)) |>
bind_cols2()
}
A function that generates the standard deviation of the incidence as a function of time, as it should be done if the dispersion parameter \(\phi\) is interpreted correctly:
<- function(t, theta, beta, gamma1, gamma2, phi, m) {
stddev1 <- mu(t, theta, beta, gamma1, gamma2, m)
counts sqrt(counts * (1 + counts / phi))
}
As it’s done in the article:
<- function(t, theta, beta, gamma1, gamma2, phi, m) {
stddev2 sqrt(mu(t, theta, beta, gamma1, gamma2, m) * phi)
}
A function that simulates outbreak cases to add to non-outbreak cases:
<- function(stimes, stddev, indexes, k, weights) {
outbreaks_cases * stddev[stimes] |>
k rpois2() |>
map2(stimes - 1, ~ c(rep(0, .y), round(.x * weights))[indexes]) |>
bind_cols2() |>
rowSums()
}
where stimes
is a vector of outbreak starting times, stddev
is a vector of the standard deviation of incidence as a function of time, indexes
is a vector of indexes of the input time series (basically seq_along(stddev)
here), k
is the constant used for the rate of the Poisson distribution that determines the size of the outbreak, and weights
is a vector of temporal weights used to spread the total number of cases during the outbreak along time. A function that adds outbreak cases to non-outbreak cases, using the above outbreaks_cases()
function:
<- function(weeks, stddev, nb, k, weights) {
add_outbreak_cases <- seq_along(stddev)
indexes 1:ncol(weeks) |>
map(~ sample(indexes, nb)) |>
bind_cols2() |>
map(outbreaks_cases, stddev, indexes, k, weights) |>
bind_cols2() |>
::add(weeks)
magrittr }
where weeks
is the time series of non-outbreak cases and nb
is the number of outbreaks we wish to add to the time series of incidence. The simulator that puts together all the above functions:
<- function(n = 12 * 52, N = 100,
simulator theta = 1.5, beta = .003, gamma1 = .2, gamma2 = -.4,
phi = 1, m = 1,
baseline_function = baseline1,
stddev_function = stddev1,
baseline_from = 313, baseline_to = 575,
current_from = 576, current_to = NULL,
meanlog = 0, sdlog = .5,
nb_outbreaks_baseline = 4, nb_outbreaks_current = 1, k = 2) {
if (is.null(current_to)) current_to <- n
<- baseline_from:baseline_to
baseline_indexes <- current_from:current_to
current_indexes
<- 1:n
t_vals <- diff(c(0, plnorm(seq_along(t_vals), meanlog, sdlog)))
t_weights
<- baseline_function(N, t_vals, theta, beta, gamma1, gamma2, phi, m)
baseline_vals <- stddev_function(t_vals, theta, beta, gamma1, gamma2, phi, m)
stddev_vals
<- baseline_vals[baseline_indexes, ]
baseline_weeks <- stddev_vals[baseline_indexes]
baseline_stddev
<- baseline_vals[current_indexes, ]
current_weeks <- stddev_vals[current_indexes]
current_stddev
list(bsln_outbrk = add_outbreak_cases(baseline_weeks, baseline_stddev,
nb_outbreaks_baseline, k, t_weights),curr_outbrk = add_outbreak_cases(current_weeks, current_stddev,
nb_outbreaks_current, k, t_weights)) }
The scenarios of Figure 2:
<- list(
scenarios s8 = list(theta = - 2, beta = 0, gamma1 = .1, gamma2 = .3, phi = 2, m = 1),
s10 = list(theta = - 2, beta = .005, gamma1 = 0, gamma2 = 0, phi = 2, m = 0),
s12 = list(theta = - 2, beta = .005, gamma1 = .1, gamma2 = .3, phi = 2, m = 2),
s17 = list(theta = 1.5, beta = .003, gamma1 = .2, gamma2 = - .4, phi = 1, m = 1))
Generating the data for Figure 2:
set.seed(30101976)
<- 1:(12 * 52)
t_vals
<- scenarios |>
data_fig2_1 map(~ with(.x, baseline1(n = 1, t = t_vals, theta = theta, beta = beta,
gamma1 = gamma1, gamma2 = gamma2, phi = phi, m = m))) |>
map(unlist)
<- scenarios |>
data_fig2_2 map(~ with(.x, baseline2(n = 1, t = t_vals, theta = theta, beta = beta,
gamma1 = gamma1, gamma2 = gamma2, phi = phi, m = m))) |>
map(unlist)
Figure 2 as it should be done:
<- par(mfrow = c(2, 2), plt = c(.12, 1, .18, .98))
opar walk(data_fig2_1, ~ plot2(t_vals, .x))
par(opar)
Figure 2 as it’s done in the article:
<- par(mfrow = c(2, 2), plt = c(.12, 1, .18, .98))
opar walk(data_fig2_2, ~ plot2(t_vals, .x))
par(opar)
There seems to be something wrong with scenario 17… Let’s now run the simulator with the outbreaks for the 4 scenarios and the 2 options regarding the dispersion parameter:
<- map(scenarios,
simulations1 ~ with(.x, simulator(theta = theta, beta = beta, gamma1 = gamma1,
gamma2 = gamma2, phi = phi, m = m,
baseline_function = baseline1,
stddev_function = stddev1)))
<- map(scenarios,
simulations2 ~ with(.x, simulator(theta = theta, beta = beta, gamma1 = gamma1,
gamma2 = gamma2, phi = phi, m = m,
baseline_function = baseline2,
stddev_function = stddev2)))
Values considered for \(k\) is the article: 2, 3, 5 and 10.