Model calibration

Data

Code
plot_info <- read.csv("data/raw_data/bioforest-plot-information.csv") |>
  select(site, plot, Year_of_harvest, Treatment)

data <- read.csv("data/derived_data/aggregated_data_v9.csv") |>
  mutate(plot_new = Plot) |>
  separate(Plot, "Plot", "_") |>
  left_join(plot_info, by = join_by(Site == site, Plot == plot)) |>
  filter(grepl("^agb", variable)) |>
  pivot_wider(names_from = "variable", values_from = "value") |>
  mutate(prod = agb_growth + agb_recr, mort = agb_mort, stocks = agb) |>
  select(!contains("agb")) |>
  pivot_longer(cols = c("stocks", "prod", "mort"), names_to = "variable") |>
  mutate(Year_of_harvest = ifelse(Site == "Mbaiki" & Treatment == "Logging",
    1985, Year_of_harvest
  )) |>
  mutate(trec = Year - Year_of_harvest)

For now, we only calibrate the model with sites that meet all the criteria listed here, and that have at least 10 censuses.

Code
sites <- read.csv("data/derived_data/sites_to_keep.csv") |>
  filter(ncensus >= 10)
data <- data |>
  filter(Site %in% sites$Site)

Model

We used the following parameter bounds when calibrating the model.

Code
bounds <- data.frame(
  "theta_min" = 50,
  "theta_max" = 1000,
  "lambda_min" = 1e-2,
  "lambda_max" = 5,
  "alpha_min" = 0,
  "alpha_max" = 200,
  "delta_min" = 0,
  "delta_max" = 20,
  "tau_min" = 2,
  "tau_max" = 30,
  "omega_min" = 1e-3,
  "omega_max" = 0.5,
  "y0_min" = 10,
  "y0_max" = 1000
)
bounds |>
  pivot_longer(everything(), names_to = "parameter") |>
  separate(parameter, c("parameter", "bound")) |>
  pivot_wider(names_from = bound) |>
  flextable()

parameter

min

max

theta

50.000

1,000.0

lambda

0.010

5.0

alpha

0.000

200.0

delta

0.000

20.0

tau

2.000

30.0

omega

0.001

0.5

y0

10.000

1,000.0

Code
data_rec <- data |>
  mutate(sitenum = as.numeric(as.factor(Site))) |>
  subset(Treatment == "Logging" & trec > 3) |>
  pivot_wider(names_from = "variable", values_from = "value") |>
  subset(!is.na(prod) & !is.na(mort)) |>
  mutate(plotnum = as.numeric(as.factor(paste(Site, plot_new))))
data_old <- data |>
  mutate(sitenum = as.numeric(as.factor(Site))) |>
  subset(Treatment == "Control" | trec < 0) |>
  pivot_wider(names_from = "variable", values_from = "value") |>
  subset(!is.na(prod) & !is.na(mort))
Code
ind_rec <- data_rec |>
  select(Site, Plot, sitenum, plotnum) |>
  unique() |>
  arrange(plotnum)

mdata_all <- list(
  n_equ = nrow(data_old),
  n_log = nrow(data_rec),
  s = max(data_old$sitenum),
  p_log = max(data_rec$plotnum),
  y_equ = data_old$stocks,
  in_equ = data_old$prod,
  out_equ = data_old$mort,
  y_log = data_rec$stocks,
  in_log = data_rec$prod,
  out_log = data_rec$mort,
  time = data_rec$trec,
  site_equ = data_old$sitenum,
  site_log = data_rec$sitenum,
  plot_log = data_rec$plotnum,
  site_plot = ind_rec$sitenum,
  theta_bounds = c(bounds$theta_min, bounds$theta_max),
  lambda_bounds = c(bounds$lambda_min, bounds$lambda_max),
  alpha_bounds = c(bounds$alpha_min, bounds$alpha_max),
  delta_bounds = c(bounds$delta_min, bounds$delta_max),
  tau_bounds = c(bounds$tau_min, bounds$tau_max),
  omega_bounds = c(bounds$omega_min, bounds$omega_max),
  y0_bounds = c(bounds$y0_min, bounds$y0_max)
)
sample_model("flux_all", mdata_all)

The resulting Stan fit summary is presented below.

Code
fit_flux_all <- as_cmdstan_fit(
  list.files("chains/flux_all/", full.names = TRUE)
)
fit_flux_all
 variable    mean  median     sd   mad      q5     q95 rhat ess_bulk ess_tail
 lp__     6932.06 6911.00 116.91 90.30 6774.74 7184.77 1.25       12       41
 mu_y0      88.92   88.92   3.81  3.91   82.50   95.22 1.03       91      195
 sigma_y0   19.64   19.63   0.54  0.54   18.74   20.50 1.02      158      492
 y0_p[1]   111.83  111.77  13.38 13.05   90.87  134.98 1.02      300      480
 y0_p[2]    88.15   87.75  13.59 14.01   66.13  110.43 1.01      449      326
 y0_p[3]    76.80   77.14  13.40 13.10   54.90   99.77 1.03      132       81
 y0_p[4]    96.75   96.75  13.26 13.64   75.06  118.78 1.03      133      148
 y0_p[5]    90.59   90.09  12.83 12.28   69.11  111.06 1.02      488     1563
 y0_p[6]    92.56   93.41  12.97 13.10   70.62  112.69 1.01      477     1801
 y0_p[7]    58.61   59.14   6.96  6.41   46.87   69.78 1.01      586     1584

 # showing 10 of 7128 rows (change via 'max_rows' argument or 'cmdstanr_max_rows' option)
Code
mcmc_trace(fit_flux_all$draws(variables = c("lp__")))

The main parameter values are consistent with our expectations.

Code
c("lambda_s", "tau_s", "theta_s", "omega_s") |>
  fit_flux_all$summary() |>
  separate(variable, c("variable", NA, "sitenum")) |>
  mutate(sitenum = as.numeric(sitenum)) |>
  left_join(unique(data_old[, c("sitenum", "Site")])) |>
  select(variable, Site, mean, median, q5, q95, rhat)
# A tibble: 32 × 7
   variable Site           mean median    q5   q95  rhat
   <chr>    <chr>         <dbl>  <dbl> <dbl> <dbl> <dbl>
 1 lambda   Corinto       1.23   1.23  1.07  1.39   1.02
 2 lambda   Lesong        4.39   4.47  3.60  4.94   1.01
 3 lambda   Mbaiki        1.29   1.29  1.17  1.39   1.15
 4 lambda   Paracou       1.29   1.30  1.17  1.40   1.13
 5 lambda   SUAS          0.757  0.761 0.682 0.820  1.13
 6 lambda   Sungai Lalang 4.64   4.70  4.08  4.99   1.02
 7 lambda   Tapajos Km67  1.68   1.69  1.53  1.84   1.11
 8 lambda   Ulu Muda      4.31   4.34  3.53  4.93   1.04
 9 tau      Corinto       4.80   4.79  3.80  5.85   1.02
10 tau      Lesong        7.53   7.52  6.91  8.16   1.01
# ℹ 22 more rows

The goodness of fit is satisfactory.

Code
pred_data <- data_rec |>
  select(-plotnum) |>
  mutate(
    stocks = fit_flux_all$summary(c("mu_y_log"), median)$median,
    prod = fit_flux_all$summary(c("mu_in_log"), median)$median,
    mort = fit_flux_all$summary(c("mu_out_log"), median)$median,
    val = "pred",
    type = "rec"
  ) |>
  rbind(mutate(
    data_old,
    stocks =
      fit_flux_all$summary(c("theta_s"), median)$median[data_old$sitenum],
    prod = fit_flux_all$summary(c("theta_s"), median)$median[data_old$sitenum] *
      fit_flux_all$summary(c("omega_s"), median)$median[data_old$sitenum], # nolint
    mort = fit_flux_all$summary(c("theta_s"), median)$median[data_old$sitenum] *
      fit_flux_all$summary(c("omega_s"), median)$median[data_old$sitenum], # nolint
    val = "pred",
    type = "equ"
  ))

obs_data <- data_rec |>
  mutate(
    val = "obs",
    type = "rec"
  ) |>
  select(-plotnum) |>
  rbind(mutate(
    data_old,
    val = "obs",
    type = "equ"
  ))

pov_data <- rbind(pred_data, obs_data) |>
  pivot_longer(cols = c(stocks, prod, mort), names_to = "variable") |>
  pivot_wider(names_from = val, values_from = value)

df_rmse <- pov_data |>
  group_by(variable) |>
  summarise(rmse = sqrt(mean((pred - obs)^2)))

labs <- c(stocks = "BA stocks", prod = "BA productivity", mort = "BA mortality")

pov_data |>
  ggplot(aes(pred, obs)) +
  geom_point(alpha = 0.25, aes(col = type)) +
  geom_abline(col = "red", linetype = "dashed") +
  geom_text(
    data = df_rmse,
    aes(
      x = -Inf, y = Inf, hjust = -0.1, vjust = 1,
      label = paste("RMSE =", round(rmse, 2))
    )
  ) +
  theme_bw() +
  xlab("Predicted") +
  ylab("Observed") +
  facet_wrap(~variable, scales = "free", labeller = labeller(variable = labs))

Predicted vs observed values.

The equilibrium AGB estimated in the logged plots is consistent overall with the control plots, except in the Malaysian sites (Lesong, Sungai Lalang and Ulu Muda), where it is significantly lower.

Code
ind_rec <- data_rec |>
  select(Site, Plot, sitenum, plotnum) |>
  unique() |>
  arrange(plotnum)
data.frame(
  site = levels(as.factor(data_old$Site))[ind_rec$sitenum],
  theta_rec = fit_flux_all$summary(c("theta_log"), median)$median,
  theta_equ =
    fit_flux_all$summary(c("theta_s"), median)$median[ind_rec$sitenum]
) |>
  ggplot(aes(theta_equ, theta_rec, col = site)) +
  geom_point() +
  geom_abline(intercept = 0, slope = 1, lty = 2) +
  labs(x = "Equilibrium AGB - controls", y = "Equilibrium AGB - logged") +
  theme_minimal()

Visualising the trajectories

The predicted flux and stocks trajectories look consistent with the data.

Code
pov_data |>
  subset(variable == "stocks") |>
  mutate(Plot = plot_new) |>
  graph_traj(expression("AGB [" ~ Mg ~ ha^-1 ~ "]"))

Predicted trajectories.
Code
pov_data |>
  subset(variable == "prod") |>
  mutate(Plot = plot_new) |>
  graph_traj(expression("AGB productivity [" ~ Mg ~ ha^-1 ~ yr^-1 ~ "]"))

Predicted trajectories.
Code
pov_data |>
  subset(variable == "mort") |>
  mutate(Plot = plot_new) |>
  graph_traj(expression("AGB mortality [" ~ Mg ~ ha^-1 ~ yr^-1 ~ "]"))

Predicted trajectories.