Data
Code
plot_info <- read.csv ("data/raw_data/bioforest-plot-information.csv" ) |>
select (site, plot, Year_of_harvest, Treatment)
data <- read.csv ("data/derived_data/aggregated_data_v9.csv" ) |>
mutate (plot_new = Plot) |>
separate (Plot, "Plot" , "_" ) |>
left_join (plot_info, by = join_by (Site == site, Plot == plot)) |>
filter (grepl ("^agb" , variable)) |>
pivot_wider (names_from = "variable" , values_from = "value" ) |>
mutate (prod = agb_growth + agb_recr, mort = agb_mort, stocks = agb) |>
select (! contains ("agb" )) |>
pivot_longer (cols = c ("stocks" , "prod" , "mort" ), names_to = "variable" ) |>
mutate (Year_of_harvest = ifelse (Site == "Mbaiki" & Treatment == "Logging" ,
1985 , Year_of_harvest
)) |>
mutate (trec = Year - Year_of_harvest)
For now, we only calibrate the model with sites that meet all the criteria listed here , and that have at least 10 censuses.
Code
sites <- read.csv ("data/derived_data/sites_to_keep.csv" ) |>
filter (ncensus >= 10 )
data <- data |>
filter (Site %in% sites$ Site)
Model
We used the following parameter bounds when calibrating the model.
Code
bounds <- data.frame (
"theta_min" = 50 ,
"theta_max" = 1000 ,
"lambda_min" = 1e-2 ,
"lambda_max" = 5 ,
"alpha_min" = 0 ,
"alpha_max" = 200 ,
"delta_min" = 0 ,
"delta_max" = 20 ,
"tau_min" = 2 ,
"tau_max" = 30 ,
"omega_min" = 1e-3 ,
"omega_max" = 0.5 ,
"y0_min" = 10 ,
"y0_max" = 1000
)
bounds |>
pivot_longer (everything (), names_to = "parameter" ) |>
separate (parameter, c ("parameter" , "bound" )) |>
pivot_wider (names_from = bound) |>
flextable ()
parameter
min
max
theta
50.000
1,000.0
lambda
0.010
5.0
alpha
0.000
200.0
delta
0.000
20.0
tau
2.000
30.0
omega
0.001
0.5
y0
10.000
1,000.0
Code
data_rec <- data |>
mutate (sitenum = as.numeric (as.factor (Site))) |>
subset (Treatment == "Logging" & trec > 3 ) |>
pivot_wider (names_from = "variable" , values_from = "value" ) |>
subset (! is.na (prod) & ! is.na (mort)) |>
mutate (plotnum = as.numeric (as.factor (paste (Site, plot_new))))
data_old <- data |>
mutate (sitenum = as.numeric (as.factor (Site))) |>
subset (Treatment == "Control" | trec < 0 ) |>
pivot_wider (names_from = "variable" , values_from = "value" ) |>
subset (! is.na (prod) & ! is.na (mort))
Code
ind_rec <- data_rec |>
select (Site, Plot, sitenum, plotnum) |>
unique () |>
arrange (plotnum)
mdata_all <- list (
n_equ = nrow (data_old),
n_log = nrow (data_rec),
s = max (data_old$ sitenum),
p_log = max (data_rec$ plotnum),
y_equ = data_old$ stocks,
in_equ = data_old$ prod,
out_equ = data_old$ mort,
y_log = data_rec$ stocks,
in_log = data_rec$ prod,
out_log = data_rec$ mort,
time = data_rec$ trec,
site_equ = data_old$ sitenum,
site_log = data_rec$ sitenum,
plot_log = data_rec$ plotnum,
site_plot = ind_rec$ sitenum,
theta_bounds = c (bounds$ theta_min, bounds$ theta_max),
lambda_bounds = c (bounds$ lambda_min, bounds$ lambda_max),
alpha_bounds = c (bounds$ alpha_min, bounds$ alpha_max),
delta_bounds = c (bounds$ delta_min, bounds$ delta_max),
tau_bounds = c (bounds$ tau_min, bounds$ tau_max),
omega_bounds = c (bounds$ omega_min, bounds$ omega_max),
y0_bounds = c (bounds$ y0_min, bounds$ y0_max)
)
sample_model ("flux_all" , mdata_all)
The resulting Stan fit summary is presented below.
Code
fit_flux_all <- as_cmdstan_fit (
list.files ("chains/flux_all/" , full.names = TRUE )
)
fit_flux_all
variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
lp__ 7039.36 7031.27 78.60 82.12 6928.00 7181.21 1.35 9 26
mu_y0 355.39 354.87 6.18 6.66 346.58 366.01 1.07 44 441
sigma_y0 19.72 19.65 0.52 0.46 18.92 20.62 1.02 1149 1637
y0_p[1] 373.63 372.63 14.30 15.00 352.71 398.15 1.03 89 1564
y0_p[2] 349.68 348.19 13.89 12.35 326.38 373.04 1.02 218 367
y0_p[3] 338.83 339.91 13.63 13.03 314.71 360.31 1.02 214 621
y0_p[4] 358.32 356.63 13.36 12.34 336.40 381.53 1.03 650 1493
y0_p[5] 350.10 349.98 15.20 17.22 329.15 376.08 1.05 53 424
y0_p[6] 350.93 351.35 15.64 17.19 327.90 377.04 1.06 40 271
y0_p[7] 323.11 324.72 14.00 13.01 295.73 343.77 1.13 21 12
# showing 10 of 7128 rows (change via 'max_rows' argument or 'cmdstanr_max_rows' option)
Code
mcmc_trace (fit_flux_all$ draws (variables = c ("lp__" )))
The main parameter values are consistent with our expectations.
Code
c ("lambda_s" , "tau_s" , "theta_s" , "omega_s" ) |>
fit_flux_all$ summary () |>
separate (variable, c ("variable" , NA , "sitenum" )) |>
mutate (sitenum = as.numeric (sitenum)) |>
left_join (unique (data_old[, c ("sitenum" , "Site" )])) |>
select (variable, Site, mean, median, q5, q95, rhat)
# A tibble: 32 × 7
variable Site mean median q5 q95 rhat
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 lambda Corinto 1.82 1.81 1.53 2.16 1.04
2 lambda Lesong 0.868 0.862 0.797 0.950 1.13
3 lambda Mbaiki 1.70 1.70 1.53 1.86 1.04
4 lambda Paracou 1.59 1.58 1.42 1.72 1.09
5 lambda SUAS 4.72 4.79 4.15 4.99 1.09
6 lambda Sungai Lalang 0.788 0.786 0.734 0.846 1.09
7 lambda Tapajos Km67 1.30 1.30 1.19 1.39 1.06
8 lambda Ulu Muda 0.835 0.829 0.776 0.900 1.03
9 tau Corinto 4.82 4.85 3.74 5.90 1.04
10 tau Lesong 7.62 7.61 6.95 8.20 1.04
# ℹ 22 more rows
The goodness of fit is satisfactory.
Code
pred_data <- data_rec |>
select (- plotnum) |>
mutate (
stocks = fit_flux_all$ summary (c ("mu_y_log" ), median)$ median,
prod = fit_flux_all$ summary (c ("mu_in_log" ), median)$ median,
mort = fit_flux_all$ summary (c ("mu_out_log" ), median)$ median,
val = "pred" ,
type = "rec"
) |>
rbind (mutate (
data_old,
stocks =
fit_flux_all$ summary (c ("theta_s" ), median)$ median[data_old$ sitenum],
prod = fit_flux_all$ summary (c ("theta_s" ), median)$ median[data_old$ sitenum] *
fit_flux_all$ summary (c ("omega_s" ), median)$ median[data_old$ sitenum], # nolint
mort = fit_flux_all$ summary (c ("theta_s" ), median)$ median[data_old$ sitenum] *
fit_flux_all$ summary (c ("omega_s" ), median)$ median[data_old$ sitenum], # nolint
val = "pred" ,
type = "equ"
))
obs_data <- data_rec |>
mutate (
val = "obs" ,
type = "rec"
) |>
select (- plotnum) |>
rbind (mutate (
data_old,
val = "obs" ,
type = "equ"
))
pov_data <- rbind (pred_data, obs_data) |>
pivot_longer (cols = c (stocks, prod, mort), names_to = "variable" ) |>
pivot_wider (names_from = val, values_from = value)
df_rmse <- pov_data |>
group_by (variable) |>
summarise (rmse = sqrt (mean ((pred - obs)^ 2 )))
labs <- c (stocks = "BA stocks" , prod = "BA productivity" , mort = "BA mortality" )
pov_data |>
ggplot (aes (pred, obs)) +
geom_point (alpha = 0.25 , aes (col = type)) +
geom_abline (col = "red" , linetype = "dashed" ) +
geom_text (
data = df_rmse,
aes (
x = - Inf , y = Inf , hjust = - 0.1 , vjust = 1 ,
label = paste ("RMSE =" , round (rmse, 2 ))
)
) +
theme_bw () +
xlab ("Predicted" ) +
ylab ("Observed" ) +
facet_wrap (~ variable, scales = "free" , labeller = labeller (variable = labs))
The equilibrium AGB estimated in the logged plots is consistent overall with the control plots, except in the Malaysian sites (Lesong, Sungai Lalang and Ulu Muda), where it is significantly lower.
Code
ind_rec <- data_rec |>
select (Site, Plot, sitenum, plotnum) |>
unique () |>
arrange (plotnum)
data.frame (
site = levels (as.factor (data_old$ Site))[ind_rec$ sitenum],
theta_rec = fit_flux_all$ summary (c ("theta_log" ), median)$ median,
theta_equ =
fit_flux_all$ summary (c ("theta_s" ), median)$ median[ind_rec$ sitenum]
) |>
ggplot (aes (theta_equ, theta_rec, col = site)) +
geom_point () +
geom_abline (intercept = 0 , slope = 1 , lty = 2 ) +
labs (x = "Equilibrium AGB - controls" , y = "Equilibrium AGB - logged" ) +
theme_minimal ()
Visualising the trajectories
The predicted flux and stocks trajectories look consistent with the data.
Code
pov_data |>
subset (variable == "stocks" ) |>
mutate (Plot = plot_new) |>
graph_traj (expression ("AGB [" ~ Mg ~ ha^- 1 ~ "]" ))
Code
pov_data |>
subset (variable == "prod" ) |>
mutate (Plot = plot_new) |>
graph_traj (expression ("AGB productivity [" ~ Mg ~ ha^- 1 ~ yr^- 1 ~ "]" ))
Code
pov_data |>
subset (variable == "mort" ) |>
mutate (Plot = plot_new) |>
graph_traj (expression ("AGB mortality [" ~ Mg ~ ha^- 1 ~ yr^- 1 ~ "]" ))