Data
Code
plot_info <- read.csv ("data/raw_data/bioforest-plot-information.csv" ) |>
select (site, plot, Year_of_harvest, Treatment)
data <- read.csv ("data/derived_data/aggregated_data_v9.csv" ) |>
mutate (plot_new = Plot) |>
separate (Plot, "Plot" , "_" ) |>
left_join (plot_info, by = join_by (Site == site, Plot == plot)) |>
filter (grepl ("^agb" , variable)) |>
pivot_wider (names_from = "variable" , values_from = "value" ) |>
mutate (prod = agb_growth + agb_recr, mort = agb_mort, stocks = agb) |>
select (! contains ("agb" )) |>
pivot_longer (cols = c ("stocks" , "prod" , "mort" ), names_to = "variable" ) |>
mutate (Year_of_harvest = ifelse (Site == "Mbaiki" & Treatment == "Logging" ,
1985 , Year_of_harvest
)) |>
mutate (trec = Year - Year_of_harvest)
For now, we only calibrate the model with sites that meet all the criteria listed here , and that have at least 10 censuses.
Code
sites <- read.csv ("data/derived_data/sites_to_keep.csv" ) |>
filter (ncensus >= 10 )
data <- data |>
filter (Site %in% sites$ Site)
Model
We used the following parameter bounds when calibrating the model.
Code
bounds <- data.frame (
"theta_min" = 50 ,
"theta_max" = 1000 ,
"lambda_min" = 1e-2 ,
"lambda_max" = 5 ,
"alpha_min" = 0 ,
"alpha_max" = 200 ,
"delta_min" = 0 ,
"delta_max" = 20 ,
"tau_min" = 2 ,
"tau_max" = 30 ,
"omega_min" = 1e-3 ,
"omega_max" = 0.5 ,
"y0_min" = 10 ,
"y0_max" = 1000
)
bounds |>
pivot_longer (everything (), names_to = "parameter" ) |>
separate (parameter, c ("parameter" , "bound" )) |>
pivot_wider (names_from = bound) |>
flextable ()
parameter
min
max
theta
50.000
1,000.0
lambda
0.010
5.0
alpha
0.000
200.0
delta
0.000
20.0
tau
2.000
30.0
omega
0.001
0.5
y0
10.000
1,000.0
Code
data_rec <- data |>
mutate (sitenum = as.numeric (as.factor (Site))) |>
subset (Treatment == "Logging" & trec > 3 ) |>
pivot_wider (names_from = "variable" , values_from = "value" ) |>
subset (! is.na (prod) & ! is.na (mort)) |>
mutate (plotnum = as.numeric (as.factor (paste (Site, plot_new))))
data_old <- data |>
mutate (sitenum = as.numeric (as.factor (Site))) |>
subset (Treatment == "Control" | trec < 0 ) |>
pivot_wider (names_from = "variable" , values_from = "value" ) |>
subset (! is.na (prod) & ! is.na (mort))
Code
ind_rec <- data_rec |>
select (Site, Plot, sitenum, plotnum) |>
unique () |>
arrange (plotnum)
mdata_all <- list (
n_equ = nrow (data_old),
n_log = nrow (data_rec),
s = max (data_old$ sitenum),
p_log = max (data_rec$ plotnum),
y_equ = data_old$ stocks,
in_equ = data_old$ prod,
out_equ = data_old$ mort,
y_log = data_rec$ stocks,
in_log = data_rec$ prod,
out_log = data_rec$ mort,
time = data_rec$ trec,
site_equ = data_old$ sitenum,
site_log = data_rec$ sitenum,
plot_log = data_rec$ plotnum,
site_plot = ind_rec$ sitenum,
theta_bounds = c (bounds$ theta_min, bounds$ theta_max),
lambda_bounds = c (bounds$ lambda_min, bounds$ lambda_max),
alpha_bounds = c (bounds$ alpha_min, bounds$ alpha_max),
delta_bounds = c (bounds$ delta_min, bounds$ delta_max),
tau_bounds = c (bounds$ tau_min, bounds$ tau_max),
omega_bounds = c (bounds$ omega_min, bounds$ omega_max),
y0_bounds = c (bounds$ y0_min, bounds$ y0_max)
)
sample_model ("flux_all" , mdata_all)
The resulting Stan fit summary is presented below.
Code
fit_flux_all <- as_cmdstan_fit (
list.files ("chains/flux_all/" , full.names = TRUE )
)
fit_flux_all
variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
lp__ 6932.06 6911.00 116.91 90.30 6774.74 7184.77 1.25 12 41
mu_y0 88.92 88.92 3.81 3.91 82.50 95.22 1.03 91 195
sigma_y0 19.64 19.63 0.54 0.54 18.74 20.50 1.02 158 492
y0_p[1] 111.83 111.77 13.38 13.05 90.87 134.98 1.02 300 480
y0_p[2] 88.15 87.75 13.59 14.01 66.13 110.43 1.01 449 326
y0_p[3] 76.80 77.14 13.40 13.10 54.90 99.77 1.03 132 81
y0_p[4] 96.75 96.75 13.26 13.64 75.06 118.78 1.03 133 148
y0_p[5] 90.59 90.09 12.83 12.28 69.11 111.06 1.02 488 1563
y0_p[6] 92.56 93.41 12.97 13.10 70.62 112.69 1.01 477 1801
y0_p[7] 58.61 59.14 6.96 6.41 46.87 69.78 1.01 586 1584
# showing 10 of 7128 rows (change via 'max_rows' argument or 'cmdstanr_max_rows' option)
Code
mcmc_trace (fit_flux_all$ draws (variables = c ("lp__" )))
The main parameter values are consistent with our expectations.
Code
c ("lambda_s" , "tau_s" , "theta_s" , "omega_s" ) |>
fit_flux_all$ summary () |>
separate (variable, c ("variable" , NA , "sitenum" )) |>
mutate (sitenum = as.numeric (sitenum)) |>
left_join (unique (data_old[, c ("sitenum" , "Site" )])) |>
select (variable, Site, mean, median, q5, q95, rhat)
# A tibble: 32 × 7
variable Site mean median q5 q95 rhat
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 lambda Corinto 1.23 1.23 1.07 1.39 1.02
2 lambda Lesong 4.39 4.47 3.60 4.94 1.01
3 lambda Mbaiki 1.29 1.29 1.17 1.39 1.15
4 lambda Paracou 1.29 1.30 1.17 1.40 1.13
5 lambda SUAS 0.757 0.761 0.682 0.820 1.13
6 lambda Sungai Lalang 4.64 4.70 4.08 4.99 1.02
7 lambda Tapajos Km67 1.68 1.69 1.53 1.84 1.11
8 lambda Ulu Muda 4.31 4.34 3.53 4.93 1.04
9 tau Corinto 4.80 4.79 3.80 5.85 1.02
10 tau Lesong 7.53 7.52 6.91 8.16 1.01
# ℹ 22 more rows
The goodness of fit is satisfactory.
Code
pred_data <- data_rec |>
select (- plotnum) |>
mutate (
stocks = fit_flux_all$ summary (c ("mu_y_log" ), median)$ median,
prod = fit_flux_all$ summary (c ("mu_in_log" ), median)$ median,
mort = fit_flux_all$ summary (c ("mu_out_log" ), median)$ median,
val = "pred" ,
type = "rec"
) |>
rbind (mutate (
data_old,
stocks =
fit_flux_all$ summary (c ("theta_s" ), median)$ median[data_old$ sitenum],
prod = fit_flux_all$ summary (c ("theta_s" ), median)$ median[data_old$ sitenum] *
fit_flux_all$ summary (c ("omega_s" ), median)$ median[data_old$ sitenum], # nolint
mort = fit_flux_all$ summary (c ("theta_s" ), median)$ median[data_old$ sitenum] *
fit_flux_all$ summary (c ("omega_s" ), median)$ median[data_old$ sitenum], # nolint
val = "pred" ,
type = "equ"
))
obs_data <- data_rec |>
mutate (
val = "obs" ,
type = "rec"
) |>
select (- plotnum) |>
rbind (mutate (
data_old,
val = "obs" ,
type = "equ"
))
pov_data <- rbind (pred_data, obs_data) |>
pivot_longer (cols = c (stocks, prod, mort), names_to = "variable" ) |>
pivot_wider (names_from = val, values_from = value)
df_rmse <- pov_data |>
group_by (variable) |>
summarise (rmse = sqrt (mean ((pred - obs)^ 2 )))
labs <- c (stocks = "BA stocks" , prod = "BA productivity" , mort = "BA mortality" )
pov_data |>
ggplot (aes (pred, obs)) +
geom_point (alpha = 0.25 , aes (col = type)) +
geom_abline (col = "red" , linetype = "dashed" ) +
geom_text (
data = df_rmse,
aes (
x = - Inf , y = Inf , hjust = - 0.1 , vjust = 1 ,
label = paste ("RMSE =" , round (rmse, 2 ))
)
) +
theme_bw () +
xlab ("Predicted" ) +
ylab ("Observed" ) +
facet_wrap (~ variable, scales = "free" , labeller = labeller (variable = labs))
The equilibrium AGB estimated in the logged plots is consistent overall with the control plots, except in the Malaysian sites (Lesong, Sungai Lalang and Ulu Muda), where it is significantly lower.
Code
ind_rec <- data_rec |>
select (Site, Plot, sitenum, plotnum) |>
unique () |>
arrange (plotnum)
data.frame (
site = levels (as.factor (data_old$ Site))[ind_rec$ sitenum],
theta_rec = fit_flux_all$ summary (c ("theta_log" ), median)$ median,
theta_equ =
fit_flux_all$ summary (c ("theta_s" ), median)$ median[ind_rec$ sitenum]
) |>
ggplot (aes (theta_equ, theta_rec, col = site)) +
geom_point () +
geom_abline (intercept = 0 , slope = 1 , lty = 2 ) +
labs (x = "Equilibrium AGB - controls" , y = "Equilibrium AGB - logged" ) +
theme_minimal ()
Visualising the trajectories
The predicted flux and stocks trajectories look consistent with the data.
Code
pov_data |>
subset (variable == "stocks" ) |>
mutate (Plot = plot_new) |>
graph_traj (expression ("AGB [" ~ Mg ~ ha^- 1 ~ "]" ))
Code
pov_data |>
subset (variable == "prod" ) |>
mutate (Plot = plot_new) |>
graph_traj (expression ("AGB productivity [" ~ Mg ~ ha^- 1 ~ yr^- 1 ~ "]" ))
Code
pov_data |>
subset (variable == "mort" ) |>
mutate (Plot = plot_new) |>
graph_traj (expression ("AGB mortality [" ~ Mg ~ ha^- 1 ~ yr^- 1 ~ "]" ))