Estimating the nursery effect in Inverewe (FW)

Author

Juliette Archambeau

Published

June 17, 2024

For details on the data, see report ExploringFormattingCGdata.qmd.

Code
data <- readRDS(file=here("data/ScotsPine/formatted_CG_data.rds")) %>% 
  filter(FieldSite == "FW")

1 Mathematical model

We model each trait \(y\) such as:

\[\begin{equation} \begin{aligned} y_{bpfnr} & \sim \mathcal{N}(\mu_{bpfn},\sigma^{2}_{r})\\[3pt] \mu_{bpc} & = \beta_{0} + B_{b} + P_{p} + F_{f} + N_{n} + N_{n} \times F_{f} \\[3pt] \beta_{0} & \sim \mathcal{N}(\mu_{y},2)\\[3pt] \begin{bmatrix} S_{s} \\ B_{b} \\ P_{p} \\ F_{f} \\ N_{n} \\ N_{n} \times F_{f} \end{bmatrix} & \sim \mathcal{N}\left(0, \begin{bmatrix} \sigma^{2}_{B}\\[3pt] \sigma^{2}_{P}\\[3pt] \sigma^{2}_{F}\\[3pt] \sigma^{2}_{N}\\[3pt] \sigma^{2}_{N \times F}\\[3pt] \end{bmatrix} \right)\\ \end{aligned} \end{equation}\]

\(\beta_{0}\) is the global intercept. \(B_{b}\) are the block intercepts with a variance \(\sigma^{2}_{B}\). \(P_{p}\) are the population intercepts with a variance \(\sigma^{2}_{P}\). \(F_{f}\) are the family intercepts with a variance \(\sigma^{2}_{F}\). \(N_{n}\) are the nursery intercepts with a variance \(\sigma^{2}_{N}\). \(\sigma^{2}_{N \times F}\) is the variance of the interaction among the nurseries and families. \(\sigma^{2}_{r}\) is the residual variance.

Partitioning of the total variance \(\sigma_{tot}^{2}\):

\[\begin{equation} \begin{aligned} \sigma_{tot}^{2} & = \sigma_{r}^{2} + \sigma_{B}^{2} + \sigma_{P}^{2} + \sigma_{F}^{2} + \sigma_{N}^{2} + \sigma_{N \times F}^{2} \\[3pt] \sigma_{r} & = \sigma_{tot} \times \sqrt(\pi_{r})\\[3pt] \sigma_{B} & = \sigma_{tot} \times \sqrt(\pi_{B})\\[3pt] \sigma_{P} & = \sigma_{tot} \times \sqrt(\pi_{P})\\[3pt] \sigma_{F} & = \sigma_{tot} \times \sqrt(\pi_{F})\\[3pt] \sigma_{N} & = \sigma_{tot} \times \sqrt(\pi_{N})\\[3pt] \sigma_{S \times F} & = \sigma_{tot} \times \sqrt(\pi_{N \times F})\\[3pt] \sigma_{tot} & \sim \mathcal{S}^{*}(0,1,3) \end{aligned} \end{equation}\]

with \(\sum_{l}^{6}\pi_{l}=1\).

We also calculate \(h^2\) (narrow-sense heritability) and \(I\) (evolvability) within the Stan code. For that, we assume that the seedlings from each maternal tree are half-siblings (i.e. same mother but each with a different father) then \(\sigma^2_A = 4\sigma^2_{family}\), and so \(h^2 = \frac{4\sigma^2_{family}}{\sigma^2_{family} + \sigma^2_{population} + \sigma^2_r}\) and \(I = \frac{4\sigma^2_{family}}{\text{mean(y)}^2}\).

Here is the Stan code of the model:

Code
model <- stan_model(here("scripts/stan_models/HMM_FW_NurseryEffect.stan"))
model
S4 class stanmodel 'anon_model' coded as follows:
data {
  int<lower=1> n;                                                               // Number of observations
  vector[n] y;                                                                  // Response variable
  int<lower=0> n_bloc;                                                          // Number of blocks
  int<lower=0> n_pop;                                                           // Number of populations
  int<lower=0> n_fam;                                                           // Number of families
  int<lower=0> n_nurs;                                                          // Number of nurseries
  int<lower=0> n_nurs_fam;                                                      // Number of nurseries x families
  int<lower=0, upper=n_bloc> bloc[n];                                           // Blocks
  int<lower=0, upper=n_pop> pop[n];                                             // Populations
  int<lower=0, upper=n_fam> fam[n];                                             // Families
  int<lower=0, upper=n_nurs> nurs[n];                                           // Nurseries
  int<lower=0, upper=n_nurs_fam> nurs_fam[n];                                   // Nurseries * families
}
parameters {
  real beta0; // global intercept
  simplex[6] pi;
  real<lower = 0> sigma_tot;

  vector[n_bloc] z_block;
  vector[n_pop] z_pop;
  vector[n_fam] z_fam;
  vector[n_nurs] z_nurs;
  vector[n_nurs_fam] z_nurs_fam;

}
transformed parameters {
  real R_squared;

  real<lower = 0>  sigma_r;
  real<lower = 0>  sigma_block;
  real<lower = 0>  sigma_pop;
  real<lower = 0>  sigma_fam;
  real<lower = 0>  sigma_nurs;
  real<lower = 0>  sigma_nurs_fam;

  vector[n_bloc] alpha_block;
  vector[n_pop] alpha_pop;
  vector[n_fam] alpha_fam;
  vector[n_nurs] alpha_nurs;
  vector[n_nurs_fam] alpha_nurs_fam;


  vector[n] mu; // linear predictor

  // variance partitioning with the simplex pi
  sigma_r = sqrt(pi[1]) * sigma_tot;
  sigma_block = sqrt(pi[2]) * sigma_tot;
  sigma_pop = sqrt(pi[3]) * sigma_tot;
  sigma_fam= sqrt(pi[4]) * sigma_tot;
  sigma_nurs= sqrt(pi[5]) * sigma_tot;
  sigma_nurs_fam= sqrt(pi[6]) * sigma_tot;

  alpha_block = z_block*sigma_block;
  alpha_pop = z_pop*sigma_pop;
  alpha_fam = z_fam*sigma_fam;
  alpha_nurs = z_nurs*sigma_nurs;
  alpha_nurs_fam = z_nurs_fam*sigma_nurs_fam;


  mu = rep_vector(beta0, n) +
    alpha_block[bloc] +
    alpha_pop[pop] +
    alpha_fam[fam] +
    alpha_nurs[nurs] +
    alpha_nurs_fam[nurs_fam];

  R_squared = 1 - variance(y - mu) / variance(y);
}
model{
  //Priors
  beta0 ~ normal(mean(y),20);
  sigma_tot ~ student_t(3, 0.0, 1.0);

  z_block ~ std_normal();
  z_pop ~ std_normal();
  z_fam ~ std_normal();
  z_nurs ~ std_normal();
  z_nurs_fam ~ std_normal();

  // Likelihood
  y ~ normal(mu, sigma_r);
}
generated quantities {
  // Variances, narrow-sense heritability and evolvability
  real<lower=0> sigma2_tot;
  real<lower=0> sigma2_r;
  real<lower=0> sigma2_block;
  real<lower=0> sigma2_pop;
  real<lower=0> sigma2_fam;
  real<lower=0> sigma2_nurs;
  real<lower=0> sigma2_nurs_fam;
  real<lower=0> h2;
  real<lower=0> I;

  sigma2_tot = square(sigma_tot);
  sigma2_r = square(sigma_r);
  sigma2_block = square(sigma_block);
  sigma2_pop = square(sigma_pop);
  sigma2_fam = square(sigma_fam);
  sigma2_nurs = square(sigma_nurs);
  sigma2_nurs_fam = square(sigma_nurs_fam);
  h2 = (4*sigma2_fam)/(sigma2_r+sigma2_fam);
  I = (4*sigma2_fam)/(mean(y)^2);

} 

Options for the Stan models across the document:

Code
# Sampling in Bayesian models
n_chains <- 4 # number of chains (MCMC)
n_iter <- 2500 # number of iterations
n_warm <- 1250 # number of iterations in the warm-up phase
n_thin <- 1 # thinning interval
save_warmup <- FALSE 

# Credible intervals
conf_level <- 0.95 # probability level for CI

2 Applying the model to height in 2016

We run the model for height measurements in 2016.

Code
# Trait measurements in 2016
trait <- "HA16"

subdata <- data %>% 
  dplyr::select(Block,PopulationCode,Family,Nursery,all_of(trait)) %>% 
  mutate(nurs_fam = paste0(Nursery,"_",Family)) %>% 
  drop_na()

# Using compose_data to build the stanlist
# ========================================

subdata <- subdata %>% 
  dplyr::rename(bloc=Block,
                pop=PopulationCode,
                fam=Family,
                nurs=Nursery,
                y=any_of(trait)) 
Code
stanfit <- sampling(model,
                    data = compose_data(subdata),
                    pars=c("beta0", "pi", "R_squared", "h2","I",
                           "alpha_block","alpha_pop","alpha_fam","alpha_nurs","alpha_nurs_fam",
                           "sigma2_r","sigma2_block","sigma2_pop","sigma2_fam","sigma2_nurs","sigma2_nurs_fam","sigma2_tot"),
                    iter = n_iter, 
                    chains = n_chains, 
                    cores = n_chains,
                    save_warmup = save_warmup,
                    thin=n_thin)

stanfit %>% saveRDS(file=here(paste0("outputs/ScotsPine/StanModels/",trait,"_HMM_FW_NurseryEffect.rds")))
Code
stanfit <- readRDS(file=here(paste0("outputs/ScotsPine/StanModels/",trait,"_HMM_FW_NurseryEffect.rds")))

We look at the variance partitioning among the different components.

Code
mod_factors <- c("Residuals","Blocks","Populations","Families","Nurseries","Nurseries*Families")

df <- stanfit %>% 
    broom.mixed::tidyMCMC(pars=("pi"),
                robust = T, # median and mean absolute deviation are used to compute point estimates and uncertainty
                ess = F, 
                rhat = F, 
                conf.int = T,
                conf.level = conf_level) %>% 
    mutate(term=mod_factors) %>% 
    mutate(term=factor(term,levels=mod_factors)) 

df %>% kable_mydf(font_size = 12)
term estimate std.error conf.low conf.high
Residuals 0.54 0.10 0.33 0.71
Blocks 0.02 0.03 0.00 0.21
Populations 0.03 0.02 0.00 0.08
Families 0.04 0.03 0.00 0.11
Nurseries 0.31 0.10 0.15 0.56
Nurseries*Families 0.02 0.02 0.00 0.09
Code
df %>% ggplot(aes(y = term, x = estimate, xmin = conf.low, xmax = conf.high)) +
  geom_pointinterval(position = position_dodge(width = .8), point_size=3, alpha=0.6, size=5) +
  ylab("") +
  xlab("Proportion of variance explained") +
  labs(color = "Potential drivers") +
  theme(axis.text = element_text(size=23),
        panel.grid.minor.y=element_blank(),
        panel.grid.major.y=element_blank())  +
    guides(color=guide_legend(ncol=1))

Narrow-sense heritability and evolvability. The blue areas corresponds to the 95% credible intervals and the blue lines are the medians.

Code
stanfit %>% 
  broom.mixed::tidyMCMC(pars=c("h2","I"),
                robust = T, # median and mean absolute deviation are used to compute point estimates and uncertainty
                ess = F, 
                rhat = F, 
                conf.int = T,
                conf.level = conf_level) %>% 
  kable_mydf(round_number = 4, font_size = 12)
term estimate std.error conf.low conf.high
h2 0.2749 0.2004 0.0183 0.7082
I 0.0109 0.0081 0.0007 0.0301
Code
posterior <- as.matrix(stanfit)

mcmc_areas(posterior,
           pars = c("h2"),
           prob = conf_level,
           point_est = "median",
           prob_outer = 1) 

Code
mcmc_areas(posterior,
           pars = c("I"),
           prob = conf_level,
           point_est = "median",
           prob_outer = 1)

Intercepts of the nurseries:

Code
stanfit %<>% recover_types(subdata)

stanfit %>% 
  spread_draws(alpha_nurs[nurs]) %>%
  ggplot(aes(y = nurs, x = alpha_nurs)) +
  stat_halfeye() +
  ylab("") +
  xlab("") +
  labs(color = "Potential drivers") +
  theme(axis.text = element_text(size=25),
        panel.grid.minor.y=element_blank(),
        panel.grid.major.y=element_blank())  +
    guides(color=guide_legend(ncol=1))

3 Applying the model to height in 2020

We run the model for height measurements in 2020.

Code
# Trait measurements in 2016
trait <- "HA20"

subdata <- data %>% 
  dplyr::select(Block,PopulationCode,Family,Nursery,all_of(trait)) %>% 
  mutate(nurs_fam = paste0(Nursery,"_",Family)) %>% 
  drop_na()

# Using compose_data to build the stanlist
# ========================================

subdata <- subdata %>% 
  dplyr::rename(bloc=Block,
                pop=PopulationCode,
                fam=Family,
                nurs=Nursery,
                y=any_of(trait)) 
Code
stanfit <- sampling(model,
                    data = compose_data(subdata),
                    pars=c("beta0", "pi", "R_squared", "h2","I",
                           "alpha_block","alpha_pop","alpha_fam","alpha_nurs","alpha_nurs_fam",
                           "sigma2_r","sigma2_block","sigma2_pop","sigma2_fam","sigma2_nurs","sigma2_nurs_fam","sigma2_tot"),
                    iter = n_iter, 
                    chains = n_chains, 
                    cores = n_chains,
                    save_warmup = save_warmup,
                    thin=n_thin)

stanfit %>% saveRDS(file=here(paste0("outputs/ScotsPine/StanModels/",trait,"_HMM_FW_NurseryEffect.rds")))
Code
stanfit <- readRDS(file=here(paste0("outputs/ScotsPine/StanModels/",trait,"_HMM_FW_NurseryEffect.rds")))

We look at the variance partitioning among the different components.

Code
mod_factors <- c("Residuals","Blocks","Populations","Families","Nurseries","Nurseries*Families")

df <- stanfit %>% 
    broom.mixed::tidyMCMC(pars=("pi"),
                robust = T, # median and mean absolute deviation are used to compute point estimates and uncertainty
                ess = F, 
                rhat = F, 
                conf.int = T,
                conf.level = conf_level) %>% 
    mutate(term=mod_factors) %>% 
    mutate(term=factor(term,levels=mod_factors)) 

df %>% kable_mydf(font_size = 12)
term estimate std.error conf.low conf.high
Residuals 0.60 0.09 0.40 0.76
Blocks 0.02 0.02 0.00 0.18
Populations 0.05 0.03 0.01 0.13
Families 0.06 0.04 0.01 0.16
Nurseries 0.19 0.09 0.07 0.44
Nurseries*Families 0.04 0.03 0.00 0.13
Code
df %>% ggplot(aes(y = term, x = estimate, xmin = conf.low, xmax = conf.high)) +
  geom_pointinterval(position = position_dodge(width = .8), point_size=3, alpha=0.6, size=5) +
  ylab("") +
  xlab("Proportion of variance explained") +
  labs(color = "Potential drivers") +
  theme(axis.text = element_text(size=23),
        panel.grid.minor.y=element_blank(),
        panel.grid.major.y=element_blank())  +
    guides(color=guide_legend(ncol=1))

Narrow-sense heritability and evolvability. The blue areas corresponds to the 95% credible intervals and the blue lines are the medians.

Code
stanfit %>% 
  broom.mixed::tidyMCMC(pars=c("h2","I"),
                robust = T, # median and mean absolute deviation are used to compute point estimates and uncertainty
                ess = F, 
                rhat = F, 
                conf.int = T,
                conf.level = conf_level) %>% 
  kable_mydf(round_number = 4, font_size = 12)
term estimate std.error conf.low conf.high
h2 0.3846 0.2324 0.0387 0.8754
I 0.0178 0.0115 0.0017 0.0433
Code
posterior <- as.matrix(stanfit)

mcmc_areas(posterior,
           pars = c("h2"),
           prob = conf_level,
           point_est = "median",
           prob_outer = 1) 

Code
mcmc_areas(posterior,
           pars = c("I"),
           prob = conf_level,
           point_est = "median",
           prob_outer = 1)

Intercepts of the nurseries:

Code
stanfit %<>% recover_types(subdata)

stanfit %>% 
  spread_draws(alpha_nurs[nurs]) %>%
  ggplot(aes(y = nurs, x = alpha_nurs)) +
  stat_halfeye() +
  ylab("") +
  xlab("") +
  labs(color = "Potential drivers") +
  theme(axis.text = element_text(size=25),
        panel.grid.minor.y=element_blank(),
        panel.grid.major.y=element_blank())  +
    guides(color=guide_legend(ncol=1))