Code
<- readRDS(file=here("data/ScotsPine/formatted_CG_data.rds")) %>%
data filter(FieldSite == "FW")
For details on the data, see report ExploringFormattingCGdata.qmd
.
<- readRDS(file=here("data/ScotsPine/formatted_CG_data.rds")) %>%
data filter(FieldSite == "FW")
We model each trait \(y\) such as:
\[\begin{equation} \begin{aligned} y_{bpfnr} & \sim \mathcal{N}(\mu_{bpfn},\sigma^{2}_{r})\\[3pt] \mu_{bpc} & = \beta_{0} + B_{b} + P_{p} + F_{f} + N_{n} + N_{n} \times F_{f} \\[3pt] \beta_{0} & \sim \mathcal{N}(\mu_{y},2)\\[3pt] \begin{bmatrix} S_{s} \\ B_{b} \\ P_{p} \\ F_{f} \\ N_{n} \\ N_{n} \times F_{f} \end{bmatrix} & \sim \mathcal{N}\left(0, \begin{bmatrix} \sigma^{2}_{B}\\[3pt] \sigma^{2}_{P}\\[3pt] \sigma^{2}_{F}\\[3pt] \sigma^{2}_{N}\\[3pt] \sigma^{2}_{N \times F}\\[3pt] \end{bmatrix} \right)\\ \end{aligned} \end{equation}\]
\(\beta_{0}\) is the global intercept. \(B_{b}\) are the block intercepts with a variance \(\sigma^{2}_{B}\). \(P_{p}\) are the population intercepts with a variance \(\sigma^{2}_{P}\). \(F_{f}\) are the family intercepts with a variance \(\sigma^{2}_{F}\). \(N_{n}\) are the nursery intercepts with a variance \(\sigma^{2}_{N}\). \(\sigma^{2}_{N \times F}\) is the variance of the interaction among the nurseries and families. \(\sigma^{2}_{r}\) is the residual variance.
Partitioning of the total variance \(\sigma_{tot}^{2}\):
\[\begin{equation} \begin{aligned} \sigma_{tot}^{2} & = \sigma_{r}^{2} + \sigma_{B}^{2} + \sigma_{P}^{2} + \sigma_{F}^{2} + \sigma_{N}^{2} + \sigma_{N \times F}^{2} \\[3pt] \sigma_{r} & = \sigma_{tot} \times \sqrt(\pi_{r})\\[3pt] \sigma_{B} & = \sigma_{tot} \times \sqrt(\pi_{B})\\[3pt] \sigma_{P} & = \sigma_{tot} \times \sqrt(\pi_{P})\\[3pt] \sigma_{F} & = \sigma_{tot} \times \sqrt(\pi_{F})\\[3pt] \sigma_{N} & = \sigma_{tot} \times \sqrt(\pi_{N})\\[3pt] \sigma_{S \times F} & = \sigma_{tot} \times \sqrt(\pi_{N \times F})\\[3pt] \sigma_{tot} & \sim \mathcal{S}^{*}(0,1,3) \end{aligned} \end{equation}\]
with \(\sum_{l}^{6}\pi_{l}=1\).
We also calculate \(h^2\) (narrow-sense heritability) and \(I\) (evolvability) within the Stan code. For that, we assume that the seedlings from each maternal tree are half-siblings (i.e. same mother but each with a different father) then \(\sigma^2_A = 4\sigma^2_{family}\), and so \(h^2 = \frac{4\sigma^2_{family}}{\sigma^2_{family} + \sigma^2_{population} + \sigma^2_r}\) and \(I = \frac{4\sigma^2_{family}}{\text{mean(y)}^2}\).
Here is the Stan code of the model:
<- stan_model(here("scripts/stan_models/HMM_FW_NurseryEffect.stan"))
model model
S4 class stanmodel 'anon_model' coded as follows:
data {
int<lower=1> n; // Number of observations
vector[n] y; // Response variable
int<lower=0> n_bloc; // Number of blocks
int<lower=0> n_pop; // Number of populations
int<lower=0> n_fam; // Number of families
int<lower=0> n_nurs; // Number of nurseries
int<lower=0> n_nurs_fam; // Number of nurseries x families
int<lower=0, upper=n_bloc> bloc[n]; // Blocks
int<lower=0, upper=n_pop> pop[n]; // Populations
int<lower=0, upper=n_fam> fam[n]; // Families
int<lower=0, upper=n_nurs> nurs[n]; // Nurseries
int<lower=0, upper=n_nurs_fam> nurs_fam[n]; // Nurseries * families
}
parameters {
real beta0; // global intercept
simplex[6] pi;
real<lower = 0> sigma_tot;
vector[n_bloc] z_block;
vector[n_pop] z_pop;
vector[n_fam] z_fam;
vector[n_nurs] z_nurs;
vector[n_nurs_fam] z_nurs_fam;
}
transformed parameters {
real R_squared;
real<lower = 0> sigma_r;
real<lower = 0> sigma_block;
real<lower = 0> sigma_pop;
real<lower = 0> sigma_fam;
real<lower = 0> sigma_nurs;
real<lower = 0> sigma_nurs_fam;
vector[n_bloc] alpha_block;
vector[n_pop] alpha_pop;
vector[n_fam] alpha_fam;
vector[n_nurs] alpha_nurs;
vector[n_nurs_fam] alpha_nurs_fam;
vector[n] mu; // linear predictor
// variance partitioning with the simplex pi
sigma_r = sqrt(pi[1]) * sigma_tot;
sigma_block = sqrt(pi[2]) * sigma_tot;
sigma_pop = sqrt(pi[3]) * sigma_tot;
sigma_fam= sqrt(pi[4]) * sigma_tot;
sigma_nurs= sqrt(pi[5]) * sigma_tot;
sigma_nurs_fam= sqrt(pi[6]) * sigma_tot;
alpha_block = z_block*sigma_block;
alpha_pop = z_pop*sigma_pop;
alpha_fam = z_fam*sigma_fam;
alpha_nurs = z_nurs*sigma_nurs;
alpha_nurs_fam = z_nurs_fam*sigma_nurs_fam;
mu = rep_vector(beta0, n) +
alpha_block[bloc] +
alpha_pop[pop] +
alpha_fam[fam] +
alpha_nurs[nurs] +
alpha_nurs_fam[nurs_fam];
R_squared = 1 - variance(y - mu) / variance(y);
}
model{
//Priors
beta0 ~ normal(mean(y),20);
sigma_tot ~ student_t(3, 0.0, 1.0);
z_block ~ std_normal();
z_pop ~ std_normal();
z_fam ~ std_normal();
z_nurs ~ std_normal();
z_nurs_fam ~ std_normal();
// Likelihood
y ~ normal(mu, sigma_r);
}
generated quantities {
// Variances, narrow-sense heritability and evolvability
real<lower=0> sigma2_tot;
real<lower=0> sigma2_r;
real<lower=0> sigma2_block;
real<lower=0> sigma2_pop;
real<lower=0> sigma2_fam;
real<lower=0> sigma2_nurs;
real<lower=0> sigma2_nurs_fam;
real<lower=0> h2;
real<lower=0> I;
sigma2_tot = square(sigma_tot);
sigma2_r = square(sigma_r);
sigma2_block = square(sigma_block);
sigma2_pop = square(sigma_pop);
sigma2_fam = square(sigma_fam);
sigma2_nurs = square(sigma_nurs);
sigma2_nurs_fam = square(sigma_nurs_fam);
h2 = (4*sigma2_fam)/(sigma2_r+sigma2_fam);
I = (4*sigma2_fam)/(mean(y)^2);
}
Options for the Stan models across the document:
# Sampling in Bayesian models
<- 4 # number of chains (MCMC)
n_chains <- 2500 # number of iterations
n_iter <- 1250 # number of iterations in the warm-up phase
n_warm <- 1 # thinning interval
n_thin <- FALSE
save_warmup
# Credible intervals
<- 0.95 # probability level for CI conf_level
We run the model for height measurements in 2016.
# Trait measurements in 2016
<- "HA16"
trait
<- data %>%
subdata ::select(Block,PopulationCode,Family,Nursery,all_of(trait)) %>%
dplyrmutate(nurs_fam = paste0(Nursery,"_",Family)) %>%
drop_na()
# Using compose_data to build the stanlist
# ========================================
<- subdata %>%
subdata ::rename(bloc=Block,
dplyrpop=PopulationCode,
fam=Family,
nurs=Nursery,
y=any_of(trait))
<- sampling(model,
stanfit data = compose_data(subdata),
pars=c("beta0", "pi", "R_squared", "h2","I",
"alpha_block","alpha_pop","alpha_fam","alpha_nurs","alpha_nurs_fam",
"sigma2_r","sigma2_block","sigma2_pop","sigma2_fam","sigma2_nurs","sigma2_nurs_fam","sigma2_tot"),
iter = n_iter,
chains = n_chains,
cores = n_chains,
save_warmup = save_warmup,
thin=n_thin)
%>% saveRDS(file=here(paste0("outputs/ScotsPine/StanModels/",trait,"_HMM_FW_NurseryEffect.rds"))) stanfit
<- readRDS(file=here(paste0("outputs/ScotsPine/StanModels/",trait,"_HMM_FW_NurseryEffect.rds"))) stanfit
We look at the variance partitioning among the different components.
<- c("Residuals","Blocks","Populations","Families","Nurseries","Nurseries*Families")
mod_factors
<- stanfit %>%
df ::tidyMCMC(pars=("pi"),
broom.mixedrobust = T, # median and mean absolute deviation are used to compute point estimates and uncertainty
ess = F,
rhat = F,
conf.int = T,
conf.level = conf_level) %>%
mutate(term=mod_factors) %>%
mutate(term=factor(term,levels=mod_factors))
%>% kable_mydf(font_size = 12) df
term | estimate | std.error | conf.low | conf.high |
---|---|---|---|---|
Residuals | 0.54 | 0.10 | 0.33 | 0.71 |
Blocks | 0.02 | 0.03 | 0.00 | 0.21 |
Populations | 0.03 | 0.02 | 0.00 | 0.08 |
Families | 0.04 | 0.03 | 0.00 | 0.11 |
Nurseries | 0.31 | 0.10 | 0.15 | 0.56 |
Nurseries*Families | 0.02 | 0.02 | 0.00 | 0.09 |
%>% ggplot(aes(y = term, x = estimate, xmin = conf.low, xmax = conf.high)) +
df geom_pointinterval(position = position_dodge(width = .8), point_size=3, alpha=0.6, size=5) +
ylab("") +
xlab("Proportion of variance explained") +
labs(color = "Potential drivers") +
theme(axis.text = element_text(size=23),
panel.grid.minor.y=element_blank(),
panel.grid.major.y=element_blank()) +
guides(color=guide_legend(ncol=1))
Narrow-sense heritability and evolvability. The blue areas corresponds to the 95% credible intervals and the blue lines are the medians.
%>%
stanfit ::tidyMCMC(pars=c("h2","I"),
broom.mixedrobust = T, # median and mean absolute deviation are used to compute point estimates and uncertainty
ess = F,
rhat = F,
conf.int = T,
conf.level = conf_level) %>%
kable_mydf(round_number = 4, font_size = 12)
term | estimate | std.error | conf.low | conf.high |
---|---|---|---|---|
h2 | 0.2749 | 0.2004 | 0.0183 | 0.7082 |
I | 0.0109 | 0.0081 | 0.0007 | 0.0301 |
<- as.matrix(stanfit)
posterior
mcmc_areas(posterior,
pars = c("h2"),
prob = conf_level,
point_est = "median",
prob_outer = 1)
mcmc_areas(posterior,
pars = c("I"),
prob = conf_level,
point_est = "median",
prob_outer = 1)
Intercepts of the nurseries:
%<>% recover_types(subdata)
stanfit
%>%
stanfit spread_draws(alpha_nurs[nurs]) %>%
ggplot(aes(y = nurs, x = alpha_nurs)) +
stat_halfeye() +
ylab("") +
xlab("") +
labs(color = "Potential drivers") +
theme(axis.text = element_text(size=25),
panel.grid.minor.y=element_blank(),
panel.grid.major.y=element_blank()) +
guides(color=guide_legend(ncol=1))
We run the model for height measurements in 2020.
# Trait measurements in 2016
<- "HA20"
trait
<- data %>%
subdata ::select(Block,PopulationCode,Family,Nursery,all_of(trait)) %>%
dplyrmutate(nurs_fam = paste0(Nursery,"_",Family)) %>%
drop_na()
# Using compose_data to build the stanlist
# ========================================
<- subdata %>%
subdata ::rename(bloc=Block,
dplyrpop=PopulationCode,
fam=Family,
nurs=Nursery,
y=any_of(trait))
<- sampling(model,
stanfit data = compose_data(subdata),
pars=c("beta0", "pi", "R_squared", "h2","I",
"alpha_block","alpha_pop","alpha_fam","alpha_nurs","alpha_nurs_fam",
"sigma2_r","sigma2_block","sigma2_pop","sigma2_fam","sigma2_nurs","sigma2_nurs_fam","sigma2_tot"),
iter = n_iter,
chains = n_chains,
cores = n_chains,
save_warmup = save_warmup,
thin=n_thin)
%>% saveRDS(file=here(paste0("outputs/ScotsPine/StanModels/",trait,"_HMM_FW_NurseryEffect.rds"))) stanfit
<- readRDS(file=here(paste0("outputs/ScotsPine/StanModels/",trait,"_HMM_FW_NurseryEffect.rds"))) stanfit
We look at the variance partitioning among the different components.
<- c("Residuals","Blocks","Populations","Families","Nurseries","Nurseries*Families")
mod_factors
<- stanfit %>%
df ::tidyMCMC(pars=("pi"),
broom.mixedrobust = T, # median and mean absolute deviation are used to compute point estimates and uncertainty
ess = F,
rhat = F,
conf.int = T,
conf.level = conf_level) %>%
mutate(term=mod_factors) %>%
mutate(term=factor(term,levels=mod_factors))
%>% kable_mydf(font_size = 12) df
term | estimate | std.error | conf.low | conf.high |
---|---|---|---|---|
Residuals | 0.60 | 0.09 | 0.40 | 0.76 |
Blocks | 0.02 | 0.02 | 0.00 | 0.18 |
Populations | 0.05 | 0.03 | 0.01 | 0.13 |
Families | 0.06 | 0.04 | 0.01 | 0.16 |
Nurseries | 0.19 | 0.09 | 0.07 | 0.44 |
Nurseries*Families | 0.04 | 0.03 | 0.00 | 0.13 |
%>% ggplot(aes(y = term, x = estimate, xmin = conf.low, xmax = conf.high)) +
df geom_pointinterval(position = position_dodge(width = .8), point_size=3, alpha=0.6, size=5) +
ylab("") +
xlab("Proportion of variance explained") +
labs(color = "Potential drivers") +
theme(axis.text = element_text(size=23),
panel.grid.minor.y=element_blank(),
panel.grid.major.y=element_blank()) +
guides(color=guide_legend(ncol=1))
Narrow-sense heritability and evolvability. The blue areas corresponds to the 95% credible intervals and the blue lines are the medians.
%>%
stanfit ::tidyMCMC(pars=c("h2","I"),
broom.mixedrobust = T, # median and mean absolute deviation are used to compute point estimates and uncertainty
ess = F,
rhat = F,
conf.int = T,
conf.level = conf_level) %>%
kable_mydf(round_number = 4, font_size = 12)
term | estimate | std.error | conf.low | conf.high |
---|---|---|---|---|
h2 | 0.3846 | 0.2324 | 0.0387 | 0.8754 |
I | 0.0178 | 0.0115 | 0.0017 | 0.0433 |
<- as.matrix(stanfit)
posterior
mcmc_areas(posterior,
pars = c("h2"),
prob = conf_level,
point_est = "median",
prob_outer = 1)
mcmc_areas(posterior,
pars = c("I"),
prob = conf_level,
point_est = "median",
prob_outer = 1)
Intercepts of the nurseries:
%<>% recover_types(subdata)
stanfit
%>%
stanfit spread_draws(alpha_nurs[nurs]) %>%
ggplot(aes(y = nurs, x = alpha_nurs)) +
stat_halfeye() +
ylab("") +
xlab("") +
labs(color = "Potential drivers") +
theme(axis.text = element_text(size=25),
panel.grid.minor.y=element_blank(),
panel.grid.major.y=element_blank()) +
guides(color=guide_legend(ncol=1))