Soil

We retrieved all data from SoilGrids detailed below (Poggio et al. 2021), but the spatial extrapolation and reference points have some issues as for instance in French Guiana. This could be compared to site data, but with issues regarding site data standardization, representativeness, and validation.

Code
list.files("data/soil/", full.names = TRUE) %>%
  read_tsv() %>%
  select(-time) %>%
  gather(variable, value, -site, -plot, -lon, -lat) %>%
  separate(variable, c("variable", "depth"), "_") %>%
  separate(depth, c("mindepth", "depth"), "-") %>%
  mutate(depth = as.numeric(gsub("cm", "", depth))) %>%
  select(-mindepth) %>%
  pivot_wider(names_from = variable, values_from = value) %>%
  mutate(
    bdod = bdod / 100,
    cec = cec / 10,
    cfvo = cfvo / 10,
    clay = clay / 10,
    sand = sand / 10,
    silt = silt / 10,
    nitrogen = nitrogen / 100,
    phh2o = phh2o / 10,
    soc = soc / 10,
    ocd = ocd / 10,
    ocs = ocs / 10
  ) %>%
  write_tsv("outputs/soil.tsv")

BAFOG and Paracou are lacking because too close to the sea.

The plots showed low variation within site but wider variation among sites with for instance sandy soils from Iwokrama opposed to silt and clay soils from Montagne Tortue.

Code
read_tsv("outputs/soil.tsv") %>%
  select(-lon, -lat, -plot) %>%
  group_by(site, depth) %>%
  summarise_all(mean, na.omit = TRUE) %>%
  gather(variable, value, -site, -depth) %>%
  mutate(var_long = recode(variable,
    "bdod" = '"BD ["~kg~dm^{-3}~"]"',
    "cec" = '"CEC ["~cmol~kg^{-1}~"]"',
    "cfvo" = '"CF ["~cm^3~"100"~cm^{-3}~"]"',
    "clay" = '"Clay ["~"%"~"]"',
    "nitrogen" = '"N ["~g~kg^{-1}~"]"',
    "ocd" = '"OCD ["~kg~dm^{3}~"]"',
    "ocs" = '"OCS ["~kg~m^{-2}~"]"',
    "phh2o" = "pH",
    "sand" = '"Sand ["~"%"~"]"',
    "silt" = '"Silt ["~"%"~"]"',
    "soc" = '"SOC ["~g~kg^{-1}~"]"'
  )) %>%
  ggplot(aes(site, value, fill = depth)) +
  geom_col(aes(group = as.factor(depth)), position = "dodge") +
  facet_wrap(~var_long, scales = "free_x", labeller = label_parsed) +
  theme_bw() +
  theme(
    axis.title = element_blank(),
    axis.text.y = element_text(size = 5)
  ) +
  scale_fill_viridis_c() +
  coord_flip()

Mean soil data per site and depth.
Code
read_tsv("outputs/soil.tsv") %>%
  select(-lon, -lat, -plot) %>%
  group_by(site, depth) %>%
  summarise_all(mean, na.omit = TRUE) %>%
  filter(depth == 5) %>%
  gather(variable, value, -site, -depth) %>%
  na.omit() %>%
  mutate(var_long = recode(variable,
    "bdod" = '"BD ["~kg~dm^{-3}~"]"',
    "cec" = '"CEC ["~cmol~kg^{-1}~"]"',
    "cfvo" = '"CF ["~cm^3~"100"~cm^{-3}~"]"',
    "clay" = '"Clay ["~"%"~"]"',
    "nitrogen" = '"N ["~g~kg^{-1}~"]"',
    "ocd" = '"OCD ["~kg~dm^{3}~"]"',
    "ocs" = '"OCS ["~kg~m^{-2}~"]"',
    "phh2o" = "pH",
    "sand" = '"Sand ["~"%"~"]"',
    "silt" = '"Silt ["~"%"~"]"',
    "soc" = '"SOC ["~g~kg^{-1}~"]"'
  )) %>%
  ggplot(aes(site, value, fill = site)) +
  geom_col(aes(group = as.factor(depth)), position = "dodge") +
  facet_wrap(~var_long, scales = "free_y", labeller = label_parsed) +
  theme_bw() +
  theme(
    legend.position = "bottom", axis.title = element_blank(),
    axis.text.x = element_blank()
  ) +
  scale_fill_discrete("")

Mean soil data per site for top horizon (up to 5cm).
Code
read_tsv("outputs/soil.tsv") %>%
  ggplot(aes(x = sand, y = clay, z = silt, col = site)) +
  ggtern::coord_tern(L = "x", T = "y", R = "z") +
  geom_point() +
  theme_bw() +
  labs(
    yarrow = "Clay [ % ]",
    zarrow = "Silt [ % ]",
    xarrow = "Sand [ % ]"
  ) +
  ggtern::theme_showarrows() +
  ggtern::theme_hidetitles() +
  ggtern::theme_clockwise() +
  scale_color_discrete("") +
  theme(legend.position = "bottom")

Soil texture for all sites, plots and depths.

The first ordination axis retained almost half of the variations of the 10 soil variables and opposed sandy poor soils to more fertile silty soils.

This is a very quick interpretation of the figure and should be explored more in depth.

Code
library(ggfortify)
data <- read_tsv("outputs/soil.tsv") %>%
  filter(depth == 5) %>%
  select(-ocs) %>%
  na.omit()
pca <- prcomp(data %>% select(-site, -plot, -lat, -lon, -depth), scale. = TRUE)
autoplot(pca,
  loadings = TRUE, loadings.label = TRUE,
  loadings.label.repel = TRUE,
  data = data, colour = "site"
) +
  theme_bw() +
  coord_equal() +
  scale_color_discrete("")

Soil variables PCA per site and plot.