Code
<- readRDS(file=here("data/formatted_CG_data.rds")) data
For details on the data, see report ExploringFormattingCGdata.qmd
.
<- readRDS(file=here("data/formatted_CG_data.rds")) data
In the present report, we estimate phenotypic plasticity at the population-level for different traits (height and phenology-related traits) using two methods:
Relative distance plasticity index (RDPI) developed in Valladares, Sanchez-Gomez, and Zavala (2006).
Bayesian hierarchical mixed model (hereafter HMM).
We first estimate the variance explained by the different components of the experimental design: populations
, families
, sites
, blocks
, nurseries
, the interaction site*families
which should capture plasticity due to the environmental variation among field sites, and the interaction site*nurseries
.
We model each trait \(y\) such as:
\[\begin{equation} \begin{aligned} y_{sbpfnr} & \sim \mathcal{N}(\mu_{sbpfn},\sigma^{2}_{r})\\[3pt] \mu_{bpc} & = \beta_{0} + S_{s} + B_{b} + P_{p} + F_{f} + N_{n} + S_{s} \times N_{n} + S_{s} \times F_{f} \\[3pt] \beta_{0} & \sim \mathcal{N}(\mu_{y},2)\\[3pt] \begin{bmatrix} S_{s} \\ B_{b} \\ P_{p} \\ F_{f} \\ N_{n} \\ S_{s} \times F_{f} \\ S_{s} \times N_{n} \end{bmatrix} & \sim \mathcal{N}\left(0, \begin{bmatrix} \sigma^{2}_{S}\\[3pt] \sigma^{2}_{B}\\[3pt] \sigma^{2}_{P}\\[3pt] \sigma^{2}_{F}\\[3pt] \sigma^{2}_{N}\\[3pt] \sigma^{2}_{S \times F}\\[3pt] \sigma^{2}_{S \times N}\\[3pt] \end{bmatrix} \right)\\ \end{aligned} \end{equation}\]
\(\beta_{0}\) is the global intercept. \(S_{s}\) are the site intercepts with a variance \(\sigma^{2}_{S}\). \(B_{b}\) are the block intercepts with a variance \(\sigma^{2}_{B}\). \(P_{p}\) are the population intercepts with a variance \(\sigma^{2}_{P}\). \(F_{f}\) are the family intercepts with a variance \(\sigma^{2}_{F}\). \(N_{n}\) are the nursery intercepts with a variance \(\sigma^{2}_{N}\). \(\sigma^{2}_{S \times F}\) is the variance of the interaction among the sites and families, which is used as an approximation of the G\(\times\)E variance. \(\sigma^{2}_{S \times N}\) is the variance of the interaction among the sites and nurseries. \(\sigma^{2}_{r}\) is the residual variance.
Partitioning of the total variance \(\sigma_{tot}^{2}\):
\[\begin{equation} \begin{aligned} \sigma_{tot}^{2} & = \sigma_{r}^{2} + \sigma_{S}^{2} + \sigma_{B}^{2} + \sigma_{P}^{2} + \sigma_{F}^{2} + \sigma_{N}^{2} + \sigma_{S \times F}^{2} + \sigma_{S \times N}^{2}\\[3pt] \sigma_{r} & = \sigma_{tot} \times \sqrt(\pi_{r})\\[3pt] \sigma_{S} & = \sigma_{tot} \times \sqrt(\pi_{S})\\[3pt] \sigma_{B} & = \sigma_{tot} \times \sqrt(\pi_{B})\\[3pt] \sigma_{P} & = \sigma_{tot} \times \sqrt(\pi_{P})\\[3pt] \sigma_{F} & = \sigma_{tot} \times \sqrt(\pi_{F})\\[3pt] \sigma_{N} & = \sigma_{tot} \times \sqrt(\pi_{N})\\[3pt] \sigma_{S \times F} & = \sigma_{tot} \times \sqrt(\pi_{S \times F})\\[3pt] \sigma_{S \times N} & = \sigma_{tot} \times \sqrt(\pi_{S \times N})\\[3pt] \sigma_{tot} & \sim \mathcal{S}^{*}(0,1,3) \end{aligned} \end{equation}\]
with \(\sum_{l}^{8}\pi_{l}=1\).
We also calculate \(h^2\) (narrow-sense heritability) and \(I\) (evolvability) within the Stan code. For that, we assume that the seedlings from each maternal tree are half-siblings (i.e. same mother but each with a different father) then \(\sigma^2_A = 4\sigma^2_{family}\), and so \(h^2 = \frac{4\sigma^2_{family}}{\sigma^2_{family} + \sigma^2_{population} + \sigma^2_r}\) and \(I = \frac{4\sigma^2_{family}}{\text{mean(y)}^2}\).
Here is the Stan code of the model:
<- stan_model(here("scripts/stan_models/BaselineHMM.stan"))
model model
S4 class stanmodel 'anon_model' coded as follows:
data {
int<lower=1> n; // Number of observations
vector[n] y; // Response variable
int<lower=0> n_site; // Number of sites
int<lower=0> n_bloc; // Number of blocks
int<lower=0> n_pop; // Number of populations
int<lower=0> n_fam; // Number of families
int<lower=0> n_nurs; // Number of nurseries
int<lower=0> n_site_fam; // Number of sites x families
int<lower=0> n_site_nurs; // Number of sites x nurseries
int<lower=0, upper=n_site> site[n]; // Sites
int<lower=0, upper=n_bloc> bloc[n]; // Blocks
int<lower=0, upper=n_pop> pop[n]; // Populations
int<lower=0, upper=n_fam> fam[n]; // Families
int<lower=0, upper=n_nurs> nurs[n]; // Nurseries
int<lower=0, upper=n_site_fam> site_fam[n]; // Sites * families
int<lower=0, upper=n_site_nurs> site_nurs[n]; // Sites * nurseries
}
parameters {
real beta0; // global intercept
simplex[8] pi;
real<lower = 0> sigma_tot;
vector[n_site] z_site;
vector[n_bloc] z_block;
vector[n_pop] z_pop;
vector[n_fam] z_fam;
vector[n_nurs] z_nurs;
vector[n_site_fam] z_site_fam;
vector[n_site_nurs] z_site_nurs;
}
transformed parameters {
real R_squared;
real<lower = 0> sigma_r;
real<lower = 0> sigma_site;
real<lower = 0> sigma_block;
real<lower = 0> sigma_pop;
real<lower = 0> sigma_fam;
real<lower = 0> sigma_nurs;
real<lower = 0> sigma_site_fam;
real<lower = 0> sigma_site_nurs;
vector[n_site] alpha_site;
vector[n_bloc] alpha_block;
vector[n_pop] alpha_pop;
vector[n_fam] alpha_fam;
vector[n_nurs] alpha_nurs;
vector[n_site_fam] alpha_site_fam;
vector[n_site_nurs] alpha_site_nurs;
vector[n] mu; // linear predictor
// variance partitioning with the simplex pi
sigma_r = sqrt(pi[1]) * sigma_tot;
sigma_site = sqrt(pi[2]) * sigma_tot;
sigma_block = sqrt(pi[3]) * sigma_tot;
sigma_pop = sqrt(pi[4]) * sigma_tot;
sigma_fam= sqrt(pi[5]) * sigma_tot;
sigma_nurs= sqrt(pi[6]) * sigma_tot;
sigma_site_fam= sqrt(pi[7]) * sigma_tot;
sigma_site_nurs= sqrt(pi[8]) * sigma_tot;
alpha_site = z_site*sigma_site;
alpha_block = z_block*sigma_block;
alpha_pop = z_pop*sigma_pop;
alpha_fam = z_fam*sigma_fam;
alpha_nurs = z_nurs*sigma_nurs;
alpha_site_fam = z_site_fam*sigma_site_fam;
alpha_site_nurs = z_site_nurs*sigma_site_nurs;
mu = rep_vector(beta0, n) +
alpha_site[site] +
alpha_block[bloc] +
alpha_pop[pop] +
alpha_fam[fam] +
alpha_nurs[nurs] +
alpha_site_fam[site_fam] +
alpha_site_nurs[site_nurs];
R_squared = 1 - variance(y - mu) / variance(y);
}
model{
//Priors
beta0 ~ normal(mean(y),20);
sigma_tot ~ student_t(3, 0.0, 1.0);
z_site ~ std_normal();
z_block ~ std_normal();
z_pop ~ std_normal();
z_fam ~ std_normal();
z_nurs ~ std_normal();
z_site_fam ~ std_normal();
z_site_nurs ~ std_normal();
// Likelihood
y ~ normal(mu, sigma_r);
}
generated quantities {
// Variances, narrow-sense heritability and evolvability
real<lower=0> sigma2_tot;
real<lower=0> sigma2_r;
real<lower=0> sigma2_site;
real<lower=0> sigma2_block;
real<lower=0> sigma2_pop;
real<lower=0> sigma2_fam;
real<lower=0> sigma2_nurs;
real<lower=0> sigma2_site_fam;
real<lower=0> sigma2_site_nurs;
real<lower=0> h2;
real<lower=0> I;
sigma2_tot = square(sigma_tot);
sigma2_r = square(sigma_r);
sigma2_site = square(sigma_site);
sigma2_block = square(sigma_block);
sigma2_pop = square(sigma_pop);
sigma2_fam = square(sigma_fam);
sigma2_nurs = square(sigma_nurs);
sigma2_site_fam = square(sigma_site_fam);
sigma2_site_nurs = square(sigma_site_nurs);
h2 = (4*sigma2_fam)/(sigma2_r+sigma2_fam);
I = (4*sigma2_fam)/(mean(y)^2);
}
Options for the Stan models across the document:
# Sampling in Bayesian models
<- 4 # number of chains (MCMC)
n_chains <- 2500 # number of iterations
n_iter <- 1250 # number of iterations in the warm-up phase
n_warm <- 1 # thinning interval
n_thin <- FALSE
save_warmup
# Credible intervals
<- 0.95 # probability level for CI conf_level
We run the model for height measurements in 2016.
# Trait measurements in 2016
<- "HA16"
trait
<- data %>%
subdata ::select(FieldSite,Block,PopulationCode,Family,Nursery,all_of(trait)) %>%
dplyrmutate(site_fam = paste0(FieldSite,"_",Family),
site_nurs = paste0(FieldSite,"_",Nursery)) %>%
drop_na()
# Using compose_data to build the stanlist
# ========================================
<- subdata %>%
subdata ::rename(site=FieldSite,
dplyrbloc=Block,
pop=PopulationCode,
fam=Family,
nurs=Nursery,
y=any_of(trait))
<- sampling(model,
stanfit data = compose_data(subdata),
pars=c("beta0", "pi", "R_squared", "h2","I",
"alpha_site","alpha_block","alpha_pop","alpha_fam","alpha_nurs","alpha_site_fam","alpha_site_nurs",
"sigma2_r","sigma2_site","sigma2_block","sigma2_pop","sigma2_fam","sigma2_nurs","sigma2_site_fam","sigma2_site_nurs","sigma2_tot"),
iter = n_iter,
chains = n_chains,
cores = n_chains,
save_warmup = save_warmup,
thin=n_thin)
# 2 divergent transitions
%>% saveRDS(file=here(paste0("outputs/StanModels/",trait,"_BaselineHMM.rds"))) stanfit
<- readRDS(file=here(paste0("outputs/StanModels/",trait,"_BaselineHMM.rds"))) stanfit
We look at the variance partitioning among the different components.
<- c("Residuals","Sites","Blocks","Populations","Families","Nurseries","Sites*Families","Sites*Nurseries")
mod_factors
%>%
stanfit ::tidyMCMC(pars=("pi"),
broom.mixedrobust = T, # median and mean absolute deviation are used to compute point estimates and uncertainty
ess = F,
rhat = F,
conf.int = T,
conf.level = conf_level) %>%
mutate(prop_var=mod_factors) %>%
mutate(prop_var=factor(mod_factors,
levels=mod_factors)) %>%
ggplot(aes(y = prop_var, x = estimate, xmin = conf.low, xmax = conf.high)) +
geom_pointinterval(position = position_dodge(width = .8), point_size=3, alpha=0.6, size=5) +
ylab("") +
xlab("Proportion of variance explained") +
labs(color = "Potential drivers") +
theme(axis.text = element_text(size=25),
panel.grid.minor.y=element_blank(),
panel.grid.major.y=element_blank()) +
guides(color=guide_legend(ncol=1))
Narrow-sense heritability and evolvability. The blue areas corresponds to the 95% credible intervals and the blue lines are the medians.
readRDS(file=here(paste0("outputs/StanModels/",trait,"_BaselineHMM.rds"))) %>%
::tidyMCMC(pars=c("h2","I"),
broom.mixedrobust = T, # median and mean absolute deviation are used to compute point estimates and uncertainty
ess = F,
rhat = F,
conf.int = T,
conf.level = conf_level) %>%
kable_mydf(round_number = 4)
term | estimate | std.error | conf.low | conf.high |
---|---|---|---|---|
h2 | 0.2613 | 0.0892 | 0.0978 | 0.4465 |
I | 0.0071 | 0.0025 | 0.0025 | 0.0128 |
<- as.matrix(stanfit)
posterior
mcmc_areas(posterior,
pars = c("h2"),
prob = conf_level,
point_est = "median",
prob_outer = 1)
mcmc_areas(posterior,
pars = c("I"),
prob = conf_level,
point_est = "median",
prob_outer = 1)
Intercepts Sites * Nurseries:
%<>% recover_types(subdata)
stanfit
%>%
stanfit spread_draws(alpha_site_nurs[site_nurs]) %>%
ggplot(aes(y = site_nurs, x = alpha_site_nurs)) +
stat_halfeye() +
ylab("") +
xlab("") +
labs(color = "Potential drivers") +
theme(axis.text = element_text(size=25),
panel.grid.minor.y=element_blank(),
panel.grid.major.y=element_blank()) +
guides(color=guide_legend(ncol=1))
Intercepts of the field sites:
%>%
stanfit spread_draws(alpha_site[site]) %>%
ggplot(aes(y = site, x = alpha_site)) +
stat_halfeye() +
ylab("") +
xlab("") +
labs(color = "Potential drivers") +
theme(axis.text = element_text(size=25),
panel.grid.minor.y=element_blank(),
panel.grid.major.y=element_blank()) +
guides(color=guide_legend(ncol=1))
Intercepts of the nurseries:
%>%
stanfit spread_draws(alpha_nurs[nurs]) %>%
ggplot(aes(y = nurs, x = alpha_nurs)) +
stat_halfeye() +
ylab("") +
xlab("") +
labs(color = "Potential drivers") +
theme(axis.text = element_text(size=25),
panel.grid.minor.y=element_blank(),
panel.grid.major.y=element_blank()) +
guides(color=guide_legend(ncol=1))
We run the model for height measurements in 2020.
<- "HA20"
trait
<- data %>%
subdata ::select(FieldSite,Block,PopulationCode,Family,Nursery,all_of(trait)) %>%
dplyrmutate(site_fam = paste0(FieldSite,"_",Family),
site_nurs = paste0(FieldSite,"_",Nursery)) %>%
drop_na()
# Building the stanlist ourselves
# ===============================
# stanlist <- list(N=nrow(subdata),
# y=subdata[[trait]],
# nsite = length(unique(subdata$FieldSite)),
# nblock = length(unique(subdata$Block)),
# npop = length(unique(subdata$PopulationCode)),
# nfam = length(unique(subdata$Family)),
# nnurs = length(unique(subdata$Nursery)),
# nsite_fam = length(unique(subdata$site_fam)),
# nsite_nurs = length(unique(subdata$site_nurs)),
# site = as.numeric(as.factor(subdata$FieldSite)),
# bloc = as.numeric(as.factor(subdata$Block)),
# pop = as.numeric(as.factor(subdata$PopulationCode)),
# fam = as.numeric(as.factor(subdata$Family)),
# nurs = as.numeric(as.factor(subdata$Nursery)),
# site_fam = as.numeric(as.factor(subdata$site_fam)),
# site_nurs = as.numeric(as.factor(subdata$site_nurs)))
# Using compose_data to build the stanlist
# ========================================
<- subdata %>%
subdata ::rename(site=FieldSite,
dplyrbloc=Block,
pop=PopulationCode,
fam=Family,
nurs=Nursery,
y=any_of(trait))
# takes about 2 min to run
<- sampling(model,
stanfit data = compose_data(subdata),
pars=c("beta0", "pi", "R_squared", "h2","I",
"alpha_site","alpha_block","alpha_pop","alpha_fam","alpha_nurs","alpha_site_fam","alpha_site_nurs",
"sigma2_r","sigma2_site","sigma2_block","sigma2_pop","sigma2_fam","sigma2_nurs","sigma2_site_fam","sigma2_site_nurs","sigma2_tot"),
iter = n_iter,
chains = n_chains,
cores = n_chains,
save_warmup = save_warmup,
thin=n_thin)
# 1 divergent transition
%>% saveRDS(file=here(paste0("outputs/StanModels/",trait,"_BaselineHMM.rds"))) stanfit
<- readRDS(file=here(paste0("outputs/StanModels/",trait,"_BaselineHMM.rds"))) stanfit
We look at the variance partitioning among the different components.
<- c("Residuals","Sites","Blocks","Populations","Families","Nurseries","Sites*Families","Sites*Nurseries")
mod_factors
%>%
stanfit ::tidyMCMC(pars=("pi"),
broom.mixedrobust = T, # median and mean absolute deviation are used to compute point estimates and uncertainty
ess = F,
rhat = F,
conf.int = T,
conf.level = conf_level) %>%
mutate(prop_var=mod_factors) %>%
mutate(prop_var=factor(mod_factors,
levels=mod_factors)) %>%
ggplot(aes(y = prop_var, x = estimate, xmin = conf.low, xmax = conf.high)) +
geom_pointinterval(position = position_dodge(width = .8), point_size=3, alpha=0.6, size=5) +
ylab("") +
xlab("Proportion of variance explained") +
labs(color = "Potential drivers") +
theme(axis.text = element_text(size=25),
panel.grid.minor.y=element_blank(),
panel.grid.major.y=element_blank()) +
guides(color=guide_legend(ncol=1))
Narrow-sense heritability and evolvability. The blue area corresponds to the 95% credible intervals and the blue line is the median.
<- as.matrix(stanfit)
posterior
mcmc_areas(posterior,
pars = c("h2"),
prob = conf_level,
point_est = "median",
prob_outer = 1)
mcmc_areas(posterior,
pars = c("I"),
prob = conf_level,
point_est = "median",
prob_outer = 1)
Comments on \(h^2\) values:
\(h^2\) for height in 2020 is lower than for height in 2016. I was expecting this result as some studies (at least in maritime pine) reported decreasing \(h^2\) with age.
\(h^2\) values are in line with other estimates, e.g. Kroon et al. (2011). See Figure below from their article.
Intercepts Sites * Nurseries:
%<>% recover_types(subdata)
stanfit
%>%
stanfit spread_draws(alpha_site_nurs[site_nurs]) %>%
ggplot(aes(y = site_nurs, x = alpha_site_nurs)) +
stat_halfeye() +
ylab("") +
xlab("") +
labs(color = "Potential drivers") +
theme(axis.text = element_text(size=25),
panel.grid.minor.y=element_blank(),
panel.grid.major.y=element_blank()) +
guides(color=guide_legend(ncol=1))
# Plotting point summaries and intervals
# ======================================
# readRDS(file=here(paste0("outputs/VariancePartitioning/",trait,"_VarPartModel.rds"))) %>%
# broom.mixed::tidyMCMC(pars=("alpha_site_nurs"),
# droppars = NULL, estimate.method = point_est, ess = F, rhat = F, conf.int = T,conf.level = conf_level) %>%
# mutate(param = levels(as.factor(subdata$site_nurs))) %>%
# ggplot(aes(y = param, x = estimate, xmin = conf.low, xmax = conf.high)) +
# geom_pointinterval(position = position_dodge(width = .8),point_size=3,alpha=0.6,size=5) +
# ylab("") +
# xlab("") +
# labs(color = "Potential drivers") +
# theme(axis.text = element_text(size=25),
# panel.grid.minor.y=element_blank(),
# panel.grid.major.y=element_blank()) +
# guides(color=guide_legend(ncol=1))
Intercepts of the field sites:
%>%
stanfit spread_draws(alpha_site[site]) %>%
ggplot(aes(y = site, x = alpha_site)) +
stat_halfeye() +
ylab("") +
xlab("") +
labs(color = "Potential drivers") +
theme(axis.text = element_text(size=25),
panel.grid.minor.y=element_blank(),
panel.grid.major.y=element_blank()) +
guides(color=guide_legend(ncol=1))
Intercepts of the nurseries:
%>%
stanfit spread_draws(alpha_nurs[nurs]) %>%
ggplot(aes(y = nurs, x = alpha_nurs)) +
stat_halfeye() +
ylab("") +
xlab("") +
labs(color = "Potential drivers") +
theme(axis.text = element_text(size=25),
panel.grid.minor.y=element_blank(),
panel.grid.major.y=element_blank()) +
guides(color=guide_legend(ncol=1))
For height (both in 2016 and 2020), the nursery has an effect on height variation, and this effect is different across sites. So we cannot directly use the site*family
intercepts to compare the plastic responses of populations to the environments of the common gardens, we have to account for the nursery effects. However, I do not think that the nursery effect can be separated from the site effect. I thought about two options:
One option would be to consider the combination site*nursery
as one environment. Then the RDPI will be calculated considering each combination site*nursery
as one unique environment. However, I do not think this option can work because populations that come from three different nurseries (i.e. populations planted in FW and from the NW nursery) are likely to show higher phenotypic variation than those from only two nurseries, and their RDPI indexes will be calculated based on more environments (i.e. site*nursery
combinations).
The second option would consist in estimating plasticity based only on two common gardens: FS (in which all trees come from NG) and FE (in which all trees come from NE, except four trees that come from NG, that we will remove to estimate plasticity).
As the nursery and site effects cannot be separated, we merge the site and nursery effects and estimate site*nursery
intercepts instead of the separate site
and nursery
intercepts of the baseline model. These site*nursery
intercepts are noted \(D_d\) with \(1 < d < 6\) as there are 6 possible combinations site/nursery.
Each trait \(y\) is modeled such as:
\[\begin{equation} \begin{aligned} y_{dbpfr} & \sim \mathcal{N}(\mu_{dbpf},\sigma^{2}_{r})\\[3pt] \mu_{bpc} & = \beta_{0} + D_{d} + B_{b} + P_{p} + F_{f} + D_{d} \times F_{f} \\[3pt] \beta_{0} & \sim \mathcal{N}(\mu_{y},2)\\[3pt] \begin{bmatrix} S_{s} \\ B_{b} \\ P_{p} \\ F_{f} \\ D_{d} \times F_{f} \end{bmatrix} & \sim \mathcal{N}\left(0, \begin{bmatrix} \sigma^{2}_{D}\\[3pt] \sigma^{2}_{B}\\[3pt] \sigma^{2}_{P}\\[3pt] \sigma^{2}_{F}\\[3pt] \sigma^{2}_{D \times F}\\[3pt] \end{bmatrix} \right)\\ \end{aligned} \end{equation}\]
\(\beta_{0}\) is the global intercept. \(D_{d}\) are the sitenursery intercepts with a variance \(\sigma^{2}_{D}\). \(B_{b}\) are the block intercepts with a variance \(\sigma^{2}_{B}\). \(P_{p}\) are the population intercepts with a variance \(\sigma^{2}_{P}\). \(F_{f}\) are the family intercepts with a variance \(\sigma^{2}_{F}\). \(\sigma^{2}_{D \times F}\) is the variance of the interaction among the sitenursery combinations and families, i.e. the proxy of the G\(\times\)E variance. \(\sigma^{2}_{r}\) is the residual variance.
Partitioning of the total variance \(\sigma_{tot}^{2}\):
\[\begin{equation} \begin{aligned} \sigma_{tot}^{2} & = \sigma_{r}^{2} + \sigma_{D}^{2} + \sigma_{B}^{2} + \sigma_{P}^{2} + \sigma_{F}^{2} + \sigma_{D \times F}^{2}\\[3pt] \sigma_{r} & = \sigma_{tot} \times \sqrt(\pi_{r})\\[3pt] \sigma_{D} & = \sigma_{tot} \times \sqrt(\pi_{D})\\[3pt] \sigma_{B} & = \sigma_{tot} \times \sqrt(\pi_{B})\\[3pt] \sigma_{P} & = \sigma_{tot} \times \sqrt(\pi_{P})\\[3pt] \sigma_{F} & = \sigma_{tot} \times \sqrt(\pi_{F})\\[3pt] \sigma_{D \times F} & = \sigma_{tot} \times \sqrt(\pi_{D \times F})\\[3pt] \sigma_{tot} & \sim \mathcal{S}^{*}(0,1,3) \end{aligned} \end{equation}\]
with \(\sum_{l}^{6}\pi_{l}=1\).
<- stan_model(here("scripts/stan_models/HMM1.stan"))
model model
S4 class stanmodel 'anon_model' coded as follows:
data {
int<lower=1> n; // Number of observations
vector[n] y; // Response variable
int<lower=0> n_bloc; // Number of blocks
int<lower=0> n_pop; // Number of populations
int<lower=0> n_fam; // Number of families
int<lower=0> n_sitenurs; // Number of combinations site-nursery
int<lower=0> n_sitenurs_fam; // Number of combinations site-nursery x families
int<lower=0, upper=n_bloc> bloc[n]; // Blocks
int<lower=0, upper=n_pop> pop[n]; // Populations
int<lower=0, upper=n_fam> fam[n]; // Families
int<lower=0, upper=n_sitenurs> sitenurs[n]; // Combinations site-nursery
int<lower=0, upper=n_sitenurs_fam> sitenurs_fam[n]; // Combinations site-nursery * families
}
parameters {
real beta0; // global intercept
simplex[6] pi;
real<lower = 0> sigma_tot;
vector[n_bloc] z_block;
vector[n_pop] z_pop;
vector[n_fam] z_fam;
vector[n_sitenurs] z_sitenurs;
vector[n_sitenurs_fam] z_sitenurs_fam;
}
transformed parameters {
real R_squared;
real<lower = 0> sigma_r;
real<lower = 0> sigma_block;
real<lower = 0> sigma_pop;
real<lower = 0> sigma_fam;
real<lower = 0> sigma_sitenurs_fam;
real<lower = 0> sigma_sitenurs;
vector[n_bloc] alpha_block;
vector[n_pop] alpha_pop;
vector[n_fam] alpha_fam;
vector[n_sitenurs] alpha_sitenurs;
vector[n_sitenurs_fam] alpha_sitenurs_fam;
vector[n] mu; // linear predictor
// variance partitioning with the simplex pi
sigma_r = sqrt(pi[1]) * sigma_tot;
sigma_block = sqrt(pi[2]) * sigma_tot;
sigma_pop = sqrt(pi[3]) * sigma_tot;
sigma_fam= sqrt(pi[4]) * sigma_tot;
sigma_sitenurs= sqrt(pi[5]) * sigma_tot;
sigma_sitenurs_fam= sqrt(pi[6]) * sigma_tot;
alpha_block = z_block*sigma_block;
alpha_pop = z_pop*sigma_pop;
alpha_fam = z_fam*sigma_fam;
alpha_sitenurs = z_sitenurs*sigma_sitenurs;
alpha_sitenurs_fam = z_sitenurs_fam*sigma_sitenurs_fam;
mu = rep_vector(beta0, n) +
alpha_block[bloc] +
alpha_pop[pop] +
alpha_fam[fam] +
alpha_sitenurs_fam[sitenurs_fam] +
alpha_sitenurs[sitenurs];
R_squared = 1 - variance(y - mu) / variance(y);
}
model{
//Priors
beta0 ~ normal(mean(y),20);
sigma_tot ~ student_t(3, 0.0, 1.0);
z_block ~ std_normal();
z_pop ~ std_normal();
z_fam ~ std_normal();
z_sitenurs_fam ~ std_normal();
z_sitenurs ~ std_normal();
// Likelihood
y ~ normal(mu, sigma_r);
}
generated quantities {
//Variances, narrow-sense heritability and evolvability
real<lower=0> sigma2_tot;
real<lower=0> sigma2_r;
real<lower=0> sigma2_block;
real<lower=0> sigma2_pop;
real<lower=0> sigma2_fam;
real<lower=0> sigma2_sitenurs;
real<lower=0> sigma2_sitenurs_fam;
real<lower=0> h2; // narrow-sense heritability
real<lower=0> I; // evolvability
sigma2_tot = square(sigma_tot);
sigma2_r = square(sigma_r);
sigma2_block = square(sigma_block);
sigma2_pop = square(sigma_pop);
sigma2_fam = square(sigma_fam);
sigma2_sitenurs_fam = square(sigma_sitenurs_fam);
sigma2_sitenurs = square(sigma_sitenurs);
h2 = (4*sigma2_fam)/(sigma2_r+sigma2_fam);
I = (4*sigma2_fam)/(mean(y)^2);
}
We run the model for height measurements in 2020.
<- "HA20"
trait
<- data %>%
subdata ::select(FieldSite,Block,PopulationCode,Family,Nursery,all_of(trait)) %>%
dplyrmutate(sitenurs = paste0(FieldSite,"_",Nursery),
sitenurs_fam = paste0(sitenurs,"_",Family)) %>%
drop_na()
# Using compose_data to build the stanlist
# ========================================
<- subdata %>%
subdata ::select(-FieldSite,-Nursery) %>%
dplyr::rename(bloc=Block,
dplyrpop=PopulationCode,
fam=Family,
y=any_of(trait))
<- sampling(model,
stanfit data = compose_data(subdata),
pars=c("beta0", "pi", "R_squared", "h2","I",
"alpha_block","alpha_pop","alpha_fam","alpha_sitenurs_fam","alpha_sitenurs",
"sigma2_r","sigma2_block","sigma2_pop","sigma2_fam","sigma2_sitenurs_fam","sigma2_sitenurs","sigma2_tot"),
iter = n_iter,
chains = n_chains,
cores = n_chains,
save_warmup = save_warmup,
thin=n_thin)
# no divergent transitions
%>% saveRDS(file=here(paste0("outputs/StanModels/",trait,"_HMM1.rds"))) stanfit
Intercepts site*nursery
:
<- readRDS(file=here(paste0("outputs/StanModels/",trait,"_HMM1.rds")))
stanfit
%<>% recover_types(subdata)
stanfit
%>%
stanfit spread_draws(alpha_sitenurs[sitenurs]) %>%
ggplot(aes(y = sitenurs, x = alpha_sitenurs)) +
stat_halfeye() +
ylab("") +
xlab("") +
labs(color = "Potential drivers") +
theme(axis.text = element_text(size=25),
panel.grid.minor.y=element_blank(),
panel.grid.major.y=element_blank()) +
guides(color=guide_legend(ncol=1))
Narrow-sense heritability and evolvability. The blue area corresponds to the 95% credible intervals and the blue line is the median.
<- as.matrix(stanfit)
posterior
mcmc_areas(posterior,
pars = c("h2"),
prob = conf_level,
point_est = "median",
prob_outer = 1)
mcmc_areas(posterior,
pars = c("I"),
prob = conf_level,
point_est = "median",
prob_outer = 1)
Variance partitioning:
<- c("Residuals","Blocks","Populations","Families","Site-Nurseries","Site-Nurseries * Families")
mod_factors
%>%
stanfit ::tidyMCMC(pars=("pi"),
broom.mixeddroppars = NULL, estimate.method = point_est, ess = F, rhat = F, conf.int = T,conf.level = conf_level) %>%
mutate(prop_var=mod_factors) %>%
mutate(prop_var=factor(mod_factors,
levels=mod_factors)) %>%
ggplot(aes(y = prop_var, x = estimate, xmin = conf.low, xmax = conf.high)) +
geom_pointinterval(position = position_dodge(width = .8),point_size=3,alpha=0.6,size=5) +
ylab("") +
xlab("Proportion of variance explained") +
labs(color = "Potential drivers") +
theme(axis.text = element_text(size=25),
panel.grid.minor.y=element_blank(),
panel.grid.major.y=element_blank()) +
guides(color=guide_legend(ncol=1))
However, as said above, I do not think that we can use this model to compare the plastic response of populations as some populations show more combinations site*nursery
than others.
To remove the nursery effect (or more precisely to confound its effect with the site effect), we use the two common gardens in which the trees come from a unique nursery, i.e. FS (in which all trees come from NG) and FE (in which all trees come from NE, except four trees that come from NG and that we remove to run the model).
Each trait \(y\) is modeled such as:
\[\begin{equation} \begin{aligned} y_{sbpfr} & \sim \mathcal{N}(\mu_{sbpf},\sigma^{2}_{r})\\[3pt] \mu_{bpc} & = \beta_{0} + S_{s} + B_{b} + P_{p} + F_{f} + S_{s} \times F_{f} \\[3pt] \beta_{0} & \sim \mathcal{N}(\mu_{y},2)\\[3pt] \begin{bmatrix} S_{s} \\ B_{b} \\ P_{p} \\ F_{f} \\ S_{s} \times F_{f} \end{bmatrix} & \sim \mathcal{N}\left(0, \begin{bmatrix} \sigma^{2}_{S}\\[3pt] \sigma^{2}_{B}\\[3pt] \sigma^{2}_{P}\\[3pt] \sigma^{2}_{F}\\[3pt] \sigma^{2}_{S \times F}\\[3pt] \end{bmatrix} \right)\\ \end{aligned} \end{equation}\]
\(\beta_{0}\) is the global intercept. \(S_{s}\) are the site intercepts with a variance \(\sigma^{2}_{S}\). \(B_{b}\) are the block intercepts with a variance \(\sigma^{2}_{B}\). \(P_{p}\) are the population intercepts with a variance \(\sigma^{2}_{P}\). \(F_{f}\) are the family intercepts with a variance \(\sigma^{2}_{F}\). \(\sigma^{2}_{S \times F}\) is the variance of the interaction among the sites and families, i.e. the proxy of the G\(\times\)E variance. \(\sigma^{2}_{r}\) is the residual variance.
Partitioning of the total variance \(\sigma_{tot}^{2}\):
\[\begin{equation} \begin{aligned} \sigma_{tot}^{2} & = \sigma_{r}^{2} + \sigma_{S}^{2} + \sigma_{B}^{2} + \sigma_{P}^{2} + \sigma_{F}^{2} + \sigma_{S \times F}^{2}\\[3pt] \sigma_{r} & = \sigma_{tot} \times \sqrt(\pi_{r})\\[3pt] \sigma_{S} & = \sigma_{tot} \times \sqrt(\pi_{S})\\[3pt] \sigma_{B} & = \sigma_{tot} \times \sqrt(\pi_{B})\\[3pt] \sigma_{P} & = \sigma_{tot} \times \sqrt(\pi_{P})\\[3pt] \sigma_{F} & = \sigma_{tot} \times \sqrt(\pi_{F})\\[3pt] \sigma_{S \times F} & = \sigma_{tot} \times \sqrt(\pi_{S \times F})\\[3pt] \sigma_{tot} & \sim \mathcal{S}^{*}(0,1,3) \end{aligned} \end{equation}\]
with \(\sum_{l}^{6}\pi_{l}=1\).
Here is the Stan code:
<- stan_model(here("scripts/stan_models/HMM2.stan"))
model model
S4 class stanmodel 'anon_model' coded as follows:
data {
int<lower=1> n; // Number of observations
vector[n] y; // Response variable
int<lower=0> n_site; // Number of sites
int<lower=0> n_bloc; // Number of blocks
int<lower=0> n_pop; // Number of populations
int<lower=0> n_fam; // Number of families
int<lower=0> n_site_fam; // Number of sites x families
int<lower=0, upper=n_site> site[n]; // Sites
int<lower=0, upper=n_bloc> bloc[n]; // Blocks
int<lower=0, upper=n_pop> pop[n]; // Populations
int<lower=0, upper=n_fam> fam[n]; // Families
int<lower=0, upper=n_site_fam> site_fam[n]; // Sites * families
//int<lower=0, upper=n_pop> which_pop[n_site_fam]; // Populations of the site*families
}
parameters {
real beta0; // global intercept
simplex[6] pi;
real<lower = 0> sigma_tot;
vector[n_site] z_site;
vector[n_bloc] z_block;
vector[n_pop] z_pop;
vector[n_fam] z_fam;
vector[n_site_fam] z_site_fam;
}
transformed parameters {
real R_squared;
real<lower = 0> sigma_r;
real<lower = 0> sigma_site;
real<lower = 0> sigma_block;
real<lower = 0> sigma_pop;
real<lower = 0> sigma_fam;
real<lower = 0> sigma_site_fam;
vector[n_site] alpha_site;
vector[n_bloc] alpha_block;
vector[n_pop] alpha_pop;
vector[n_fam] alpha_fam;
vector[n_site_fam] alpha_site_fam;
vector[n] mu; // linear predictor
//vector[n_pop] pop_var_alpha_site_fam; // Variance of population-specific `site*nursery` intercepts
// variance partitioning with the simplex pi
sigma_r = sqrt(pi[1]) * sigma_tot;
sigma_site = sqrt(pi[2]) * sigma_tot;
sigma_block = sqrt(pi[3]) * sigma_tot;
sigma_pop = sqrt(pi[4]) * sigma_tot;
sigma_fam= sqrt(pi[5]) * sigma_tot;
sigma_site_fam= sqrt(pi[6]) * sigma_tot;
alpha_site = z_site*sigma_site;
alpha_block = z_block*sigma_block;
alpha_pop = z_pop*sigma_pop;
alpha_fam = z_fam*sigma_fam;
alpha_site_fam = z_site_fam*sigma_site_fam;
mu = rep_vector(beta0, n) + alpha_site[site] + alpha_block[bloc] + alpha_pop[pop] + alpha_fam[fam] + alpha_site_fam[site_fam];
R_squared = 1 - variance(y - mu) / variance(y);
//for(p in 1:n_pop){
// pop_var_alpha_site_fam[p] = variance(alpha_site_fam[which_pop[p]]);
// }
}
model{
//Priors
beta0 ~ normal(mean(y),20);
sigma_tot ~ student_t(3, 0.0, 1.0);
z_site ~ std_normal();
z_block ~ std_normal();
z_pop ~ std_normal();
z_fam ~ std_normal();
z_site_fam ~ std_normal();
// Likelihood
y ~ normal(mu, sigma_r);
}
generated quantities {
//Variances, narrow-sense heritability and evolvability
real<lower=0> sigma2_tot;
real<lower=0> sigma2_r;
real<lower=0> sigma2_site;
real<lower=0> sigma2_block;
real<lower=0> sigma2_pop;
real<lower=0> sigma2_fam;
real<lower=0> sigma2_site_fam;
real<lower=0> h2;
real<lower=0> I;
sigma2_tot = square(sigma_tot);
sigma2_r = square(sigma_r);
sigma2_site = square(sigma_site);
sigma2_block = square(sigma_block);
sigma2_pop = square(sigma_pop);
sigma2_fam = square(sigma_fam);
sigma2_site_fam = square(sigma_site_fam);
h2 = (4*sigma2_fam)/(sigma2_r+sigma2_fam);
I = (4*sigma2_fam)/(mean(y)^2);
}
We run the model for height measurements in 2020.
<- "HA20"
trait
<- data %>%
subdata filter(FieldSite %in% c("FE","FS")) %>% # we keep only trees in FS and FE
filter(!(FieldSite=="FE" & Nursery =="NG")) %>% # we remove the four trees in FE that come from NG
::select(FieldSite,Block,PopulationCode,Family,all_of(trait)) %>%
dplyrmutate(site_fam = paste0(FieldSite,"_",Family)) %>%
drop_na() %>%
::rename(bloc=Block,
dplyrsite=FieldSite,
pop=PopulationCode,
fam=Family,
y=any_of(trait))
<- sampling(model,
stanfit data = compose_data(subdata),
pars=c("beta0", "pi", "R_squared", "h2","I",
"alpha_site","alpha_block","alpha_pop","alpha_fam","alpha_site_fam",
"sigma2_r","sigma2_site","sigma2_block","sigma2_pop","sigma2_fam","sigma2_site_fam","sigma2_tot"),
iter = n_iter,
chains = n_chains,
cores = n_chains,
save_warmup = save_warmup,
thin=n_thin)
# no divergent transition
%>% saveRDS(file=here(paste0("outputs/StanModels/",trait,"_HMM2.rds"))) stanfit
We look at the variance partitioning among the different components.
<- readRDS(file=here(paste0("outputs/StanModels/",trait,"_HMM2.rds")))
stanfit
<- c("Residuals","Sites","Blocks","Populations","Families","Sites*Families")
mod_factors
%>%
stanfit ::tidyMCMC(pars=("pi"),
broom.mixedrobust = T, # median and mean absolute deviation are used to compute point estimates and uncertainty
ess = F,
rhat = F,
conf.int = T,
conf.level = conf_level) %>%
mutate(prop_var=mod_factors) %>%
mutate(prop_var=factor(mod_factors,
levels=mod_factors)) %>%
ggplot(aes(y = prop_var, x = estimate, xmin = conf.low, xmax = conf.high)) +
geom_pointinterval(position = position_dodge(width = .8), point_size=3, alpha=0.6, size=5) +
ylab("") +
xlab("Proportion of variance explained") +
labs(color = "Potential drivers") +
theme(axis.text = element_text(size=25),
panel.grid.minor.y=element_blank(),
panel.grid.major.y=element_blank()) +
guides(color=guide_legend(ncol=1))
%>% broom.mixed::tidyMCMC(pars=c("pi"),
stanfit robust = T, # median and mean absolute deviation are used to compute point estimates and uncertainty
ess = F,
rhat = F,
conf.int = T,
conf.level = conf_level) %>%
mutate(term= mod_factors) %>%
kable_mydf(round_number = 4)
term | estimate | std.error | conf.low | conf.high |
---|---|---|---|---|
Residuals | 0.5268 | 0.0887 | 0.3413 | 0.6724 |
Sites | 0.3164 | 0.1089 | 0.1444 | 0.5500 |
Blocks | 0.0159 | 0.0086 | 0.0012 | 0.0636 |
Populations | 0.0634 | 0.0238 | 0.0245 | 0.1285 |
Families | 0.0285 | 0.0172 | 0.0028 | 0.0666 |
Sites*Families | 0.0488 | 0.0210 | 0.0127 | 0.0937 |
Narrow-sense heritability and evolvability. The blue area corresponds to the 95% credible intervals and the blue line is the median.
<- as.matrix(stanfit)
posterior
mcmc_areas(posterior,
pars = c("h2"),
prob = conf_level,
point_est = "median",
prob_outer = 1)
mcmc_areas(posterior,
pars = c("I"),
prob = conf_level,
point_est = "median",
prob_outer = 1)
We compare the heritability and evolvability values obtained with the three models (for height in 2020), they are very similar:
list("BaselineHMM" = readRDS(file=here(paste0("outputs/StanModels/",trait,"_BaselineHMM.rds"))),
"HMM1" = readRDS(file=here(paste0("outputs/StanModels/",trait,"_HMM1.rds"))),
"HMM2" = readRDS(file=here(paste0("outputs/StanModels/",trait,"_HMM2.rds")))) %>%
lapply(function(x){
%>% broom.mixed::tidyMCMC(pars=c("h2","I"),
x robust = T, # median and mean absolute deviation are used to compute point estimates and uncertainty
ess = F,
rhat = F,
conf.int = T,
conf.level = conf_level)
%>%
}) bind_rows(.id="model") %>%
kable_mydf(round_number = 4)
model | term | estimate | std.error | conf.low | conf.high |
---|---|---|---|---|---|
BaselineHMM | h2 | 0.2015 | 0.0852 | 0.0464 | 0.3753 |
BaselineHMM | I | 0.0068 | 0.0029 | 0.0015 | 0.0130 |
HMM1 | h2 | 0.2193 | 0.0801 | 0.0682 | 0.3932 |
HMM1 | I | 0.0074 | 0.0028 | 0.0022 | 0.0137 |
HMM2 | h2 | 0.2027 | 0.1149 | 0.0221 | 0.4303 |
HMM2 | I | 0.0062 | 0.0036 | 0.0006 | 0.0137 |
Santi’s proposition - 01/08/2023
Same as above but we keep the trees in FW which were grown locally in NW (290 trees). Like that, the nursery effect is still confounded with the site effect as the trees in FS come from NG, the trees in FE come from NE and the trees from FW come from NW. The drawback is that we now have an unbalanced design with some families and populations in 2 common gardens and others in 3 common gardens.
Here is the number of trees in each site, each population/site, and each family/site:
table(subdata$site)
FE FS
654 653
table(subdata$site, subdata$pop)
AB AC AM BB BE BW CC CG CR GA GC GD GE GL GT LC MG RD RM SD SO
FE 32 30 32 30 30 30 32 32 32 31 32 28 32 31 31 32 31 32 32 32 30
FS 30 32 32 30 31 31 32 29 31 32 30 31 31 32 30 32 32 31 32 32 30
table(subdata$site, subdata$fam)
AB1 AB10 AB2 AB3 AB4 AB5 AB6 AB7 AB9 AC1 AC2 AC3 AC5 AC6 AC7 AC8 AC9 AM1 AM10 AM3 AM4 AM5 AM6 AM8 AM9 BB1 BB2 BB3 BB4 BB5 BB6 BB7 BB8 BB9 BE1 BE10 BE2 BE3 BE4 BE5 BE7 BE8 BE9 BW10 BW2 BW3 BW4 BW6 BW7 BW8 BW9 CC1 CC10 CC3 CC4 CC5 CC6 CC7 CC9 CG1 CG2 CG3 CG4 CG5 CG6 CG7 CG8 CR10 CR2 CR3 CR4 CR5
FE 0 4 4 4 4 4 4 4 4 4 4 4 4 4 2 4 4 4 4 4 4 4 4 4 4 3 4 4 0 4 4 3 4 4 1 2 4 4 3 4 4 4 4 4 4 4 4 3 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
FS 4 3 4 4 4 4 4 0 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 2 4 4 0 4 4 4 4 0 4 4 4 4 4 3 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 3 4 3 4 4 4 4 0
CR6 CR7 CR8 CR9 GA1 GA10 GA3 GA5 GA6 GA7 GA8 GA9 GC1 GC10 GC2 GC3 GC5 GC6 GC7 GC8 GC9 GD1 GD10 GD2 GD4 GD5 GD6 GD7 GD9 GE1 GE2 GE3 GE4 GE6 GE7 GE8 GE9 GL1 GL10 GL2 GL3 GL4 GL5 GL6 GL7 GL8 GT1 GT2 GT3 GT4 GT5 GT6 GT7 GT8 GT9 LC1 LC10 LC2 LC3 LC4 LC5 LC6 LC7 LC8 MG1 MG10 MG2 MG3 MG5 MG6 MG7 MG8
FE 4 4 0 4 4 4 4 4 4 4 3 4 4 4 4 0 4 4 4 4 4 2 4 4 4 3 4 4 3 4 4 4 4 4 4 4 4 4 0 4 4 3 4 4 4 4 4 4 3 0 4 4 4 4 4 4 0 4 4 4 4 4 4 4 4 4 4 4 4 3 0 4
FS 3 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 3 4 0 4 4 4 4 4 4 4 4 3 4 4 4 3 4 4 4 4 4 4 4 4 4 4 0 4 4 4 4 4 0 4 4 4 2 4 4 4 4 4 4 4 4 4 4 0 0 4 4 4 4 4 4 4
MG9 RD1 RD10 RD2 RD3 RD5 RD7 RD8 RD9 RM1 RM10 RM3 RM4 RM5 RM6 RM7 RM9 SD1 SD10 SD2 SD4 SD6 SD7 SD8 SD9 SO10 SO2 SO3 SO4 SO5 SO6 SO7 SO9
FE 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 3 4
FS 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 3 4 4 4 4 4 4
<- "HA20"
trait
<- data %>%
subdata filter(FieldSite %in% c("FE","FS") | FieldSite == "FW" & Nursery == "NW") %>%
filter(!(FieldSite=="FE" & Nursery =="NG")) %>% # we remove the four trees in FE that come from NG
::select(FieldSite,Block,PopulationCode,Family,all_of(trait)) %>%
dplyrmutate(site_fam = paste0(FieldSite,"_",Family)) %>%
drop_na() %>%
::rename(bloc=Block,
dplyrsite=FieldSite,
pop=PopulationCode,
fam=Family,
y=any_of(trait))
<- sampling(model,
stanfit data = compose_data(subdata),
pars=c("beta0", "pi", "R_squared", "h2","I",
"alpha_site","alpha_block","alpha_pop","alpha_fam","alpha_site_fam",
"sigma2_r","sigma2_site","sigma2_block","sigma2_pop","sigma2_fam","sigma2_site_fam","sigma2_tot"),
iter = n_iter,
chains = n_chains,
cores = n_chains,
save_warmup = save_warmup,
thin=n_thin)
%>% saveRDS(file=here(paste0("outputs/StanModels/",trait,"_HMM2_3Sites.rds"))) stanfit
Variance partitioning among the different components:
<- readRDS(file=here(paste0("outputs/StanModels/",trait,"_HMM2_3Sites.rds")))
stanfit
<- c("Residuals","Sites","Blocks","Populations","Families","Sites*Families")
mod_factors
%>%
stanfit ::tidyMCMC(pars=("pi"),
broom.mixedrobust = T, # median and mean absolute deviation are used to compute point estimates and uncertainty
ess = F,
rhat = F,
conf.int = T,
conf.level = conf_level) %>%
mutate(prop_var=mod_factors) %>%
mutate(prop_var=factor(mod_factors,
levels=mod_factors)) %>%
ggplot(aes(y = prop_var, x = estimate, xmin = conf.low, xmax = conf.high)) +
geom_pointinterval(position = position_dodge(width = .8), point_size=3, alpha=0.6, size=5) +
ylab("") +
xlab("Proportion of variance explained") +
labs(color = "Potential drivers") +
theme(axis.text = element_text(size=25),
panel.grid.minor.y=element_blank(),
panel.grid.major.y=element_blank()) +
guides(color=guide_legend(ncol=1))
Comparing the values with HMM2 fitted based on two field sites:
list("HMM2 - 2 sites" = readRDS(file=here(paste0("outputs/StanModels/",trait,"_HMM2.rds"))),
"HMM2 - 3 sites" = readRDS(file=here(paste0("outputs/StanModels/",trait,"_HMM2_3Sites.rds")))) %>%
lapply(function(x){
%>% broom.mixed::tidyMCMC(pars=c("pi"),
x robust = T, # median and mean absolute deviation are used to compute point estimates and uncertainty
ess = F,
rhat = F,
conf.int = T,
conf.level = conf_level) %>%
mutate(term=mod_factors)
%>%
}) bind_rows(.id="model") %>%
arrange(term) %>%
kable_mydf(round_number = 4)
model | term | estimate | std.error | conf.low | conf.high |
---|---|---|---|---|---|
HMM2 - 2 sites | Blocks | 0.0159 | 0.0086 | 0.0012 | 0.0636 |
HMM2 - 3 sites | Blocks | 0.0129 | 0.0074 | 0.0012 | 0.0469 |
HMM2 - 2 sites | Families | 0.0285 | 0.0172 | 0.0028 | 0.0666 |
HMM2 - 3 sites | Families | 0.0282 | 0.0141 | 0.0043 | 0.0604 |
HMM2 - 2 sites | Populations | 0.0634 | 0.0238 | 0.0245 | 0.1285 |
HMM2 - 3 sites | Populations | 0.0614 | 0.0227 | 0.0255 | 0.1241 |
HMM2 - 2 sites | Residuals | 0.5268 | 0.0887 | 0.3413 | 0.6724 |
HMM2 - 3 sites | Residuals | 0.5446 | 0.0807 | 0.3645 | 0.6835 |
HMM2 - 2 sites | Sites | 0.3164 | 0.1089 | 0.1444 | 0.5500 |
HMM2 - 3 sites | Sites | 0.3068 | 0.0969 | 0.1471 | 0.5326 |
HMM2 - 2 sites | Sites*Families | 0.0488 | 0.0210 | 0.0127 | 0.0937 |
HMM2 - 3 sites | Sites*Families | 0.0461 | 0.0200 | 0.0124 | 0.0894 |
The median estimates are not very different between HMM2 fitted on two or three sites. However, good news, the credible intervals are smaller when using three sites.
Intercepts of the field sites:
%<>% recover_types(subdata)
stanfit
%>%
stanfit spread_draws(alpha_site[site]) %>%
ggplot(aes(y = site, x = alpha_site)) +
stat_halfeye() +
ylab("") +
xlab("") +
labs(color = "Potential drivers") +
theme(axis.text = element_text(size=25),
panel.grid.minor.y=element_blank(),
panel.grid.major.y=element_blank()) +
guides(color=guide_legend(ncol=1))
Narrow-sense heritability and evolvability. The blue area corresponds to the 95% credible intervals and the blue line is the median.
<- as.matrix(stanfit)
posterior
mcmc_areas(posterior,
pars = c("h2"),
prob = conf_level,
point_est = "median",
prob_outer = 1)
mcmc_areas(posterior,
pars = c("I"),
prob = conf_level,
point_est = "median",
prob_outer = 1)
Comparing heritability and evolvability values with those obtained in the previous models.
list("BaselineHMM" = readRDS(file=here(paste0("outputs/StanModels/",trait,"_BaselineHMM.rds"))),
"HMM1" = readRDS(file=here(paste0("outputs/StanModels/",trait,"_HMM1.rds"))),
"HMM2 - 2 sites" = readRDS(file=here(paste0("outputs/StanModels/",trait,"_HMM2.rds"))),
"HMM2 - 3 sites" = readRDS(file=here(paste0("outputs/StanModels/",trait,"_HMM2_3Sites.rds")))) %>%
lapply(function(x){
%>% broom.mixed::tidyMCMC(pars=c("h2","I"),
x robust = T, # median and mean absolute deviation are used to compute point estimates and uncertainty
ess = F,
rhat = F,
conf.int = T,
conf.level = conf_level)
%>%
}) bind_rows(.id="model") %>%
arrange(term) %>%
kable_mydf(round_number = 4)
model | term | estimate | std.error | conf.low | conf.high |
---|---|---|---|---|---|
BaselineHMM | h2 | 0.2015 | 0.0852 | 0.0464 | 0.3753 |
HMM1 | h2 | 0.2193 | 0.0801 | 0.0682 | 0.3932 |
HMM2 - 2 sites | h2 | 0.2027 | 0.1149 | 0.0221 | 0.4303 |
HMM2 - 3 sites | h2 | 0.1959 | 0.0942 | 0.0314 | 0.3893 |
BaselineHMM | I | 0.0068 | 0.0029 | 0.0015 | 0.0130 |
HMM1 | I | 0.0074 | 0.0028 | 0.0022 | 0.0137 |
HMM2 - 2 sites | I | 0.0062 | 0.0036 | 0.0006 | 0.0137 |
HMM2 - 3 sites | I | 0.0064 | 0.0032 | 0.0010 | 0.0132 |
Same as for the variance partitioning, similar \(h^2\) and \(I\) median estimates between HMM2 fitted on two or three sites, but much smaller credible intervals when HMM2 is fitted on three sites!
In the previous section, we investigated how height variation was explained by the different components of the experimental design. We showed that the effects of the site and the nurseries were confounded and, in my opinion, cannot be separated.
In this section, we want to estimate the plastic response to different environmental conditions at the population-level (i.e. one value per population) based on a Bayesian hierarchical mixed model. More precisely, we want to estimate the variance of the site*nursery
intercepts for each population. In my opinion, like for HMM2 in the previous section, we should use only the sites FS (in which all trees come from NG) and FE (using only trees coming from NE, so removing the four trees that come from NG), so that all populations experience the same number of environmental conditions. So the model in this section is built from HMM2.
In HMM2, the site*nursery
intercepts are drawn from a common distribution, i.e. \(S_{s} \times F_{f} \sim \mathcal{N}(0,\sigma^{2}_{S \times F})\). In this section, we would like to estimate \(\sigma^{2}_{S \times F}\) parameters that would be specific to each population.
Here is the model I propose:
\[\begin{equation} \begin{aligned} y_{sbpfr} & \sim \mathcal{N}(\mu_{sbpf},\sigma^{2}_{r})\\[3pt] \mu_{bpc} & = \beta_{0} + S_{s} + B_{b} + P_{p} + F_{f} + S_{s} \times F_{f(p)} \\[3pt] \beta_{0} & \sim \mathcal{N}(\mu_{y},2)\\[3pt] \begin{bmatrix} S_{s} \\ B_{b} \\ P_{p} \\ F_{f} \\ S_{s} \times F_{f(p)} \end{bmatrix} & \sim \mathcal{N}\left(0, \begin{bmatrix} \sigma^{2}_{S}\\[3pt] \sigma^{2}_{B}\\[3pt] \sigma^{2}_{P}\\[3pt] \sigma^{2}_{F}\\[3pt] \sigma^{2}_{S \times F_p}\\[3pt] \end{bmatrix} \right)\\ \end{aligned} \end{equation}\]
\(\beta_{0}\) is the global intercept. \(S_{s}\) are the site intercepts with a variance \(\sigma^{2}_{S}\). \(B_{b}\) are the block intercepts with a variance \(\sigma^{2}_{B}\). \(P_{p}\) are the population intercepts with a variance \(\sigma^{2}_{P}\). \(F_{f}\) are the family intercepts with a variance \(\sigma^{2}_{F}\). \(\sigma^{2}_{S \times F_p}\) are the population-specific variances of the interaction among the sites and families. \(\sigma^{2}_{r}\) is the residual variance.
Partitioning of the total variance \(\sigma_{tot}^{2}\):
\[\begin{equation} \begin{aligned} \sigma_{tot}^{2} & = \sigma_{r}^{2} + \sigma_{S}^{2} + \sigma_{B}^{2} + \sigma_{P}^{2} + \sigma_{F}^{2} + \overline{\sigma_{S \times F_p}}^{2}\\[3pt] \sigma_{r} & = \sigma_{tot} \times \sqrt(\pi_{r})\\[3pt] \sigma_{S} & = \sigma_{tot} \times \sqrt(\pi_{S})\\[3pt] \sigma_{B} & = \sigma_{tot} \times \sqrt(\pi_{B})\\[3pt] \sigma_{P} & = \sigma_{tot} \times \sqrt(\pi_{P})\\[3pt] \sigma_{F} & = \sigma_{tot} \times \sqrt(\pi_{F})\\[3pt] \sigma_{S \times F_p} & = \sigma_{tot} \times \sqrt(\pi_{S \times F})\\[3pt] \sigma_{tot} & \sim \mathcal{S}^{*}(0,1,3) \end{aligned} \end{equation}\]
with \(\overline{\sigma_{S \times F_p}}\) the mean of population-specific standard deviations of the interaction among the sites and families, and with \(\sum_{l}^{6}\pi_{l}=1\) (using the simplex
function in Stan).
The population-specific standard deviations of the interaction among the sites and families (\(\sigma_{S \times F_p}\)) follow a normal distribution of mean \(\overline{\sigma_{S \times F_p}}\) and variance \(\sigma_K\) such as:
\[\begin{equation} \begin{aligned} \sigma_{C_{p}} & \sim \mathcal{LN}\left(\ln(\overline{\sigma_{S \times F_p}})-\frac{\sigma^{2}_{K}}{2},\sigma^{2}_{K}\right)\\[3pt] \sigma_{K} & \sim \exp(1)\\[3pt] \end{aligned} \end{equation}\]
Here is the Stan code:
<- stan_model(here("scripts/stan_models/HMM3.stan"))
model model
S4 class stanmodel 'anon_model' coded as follows:
data {
int<lower=1> n; // Number of observations
vector[n] y; // Response variable
int<lower=0> n_site; // Number of sites
int<lower=0> n_bloc; // Number of blocks
int<lower=0> n_pop; // Number of populations
int<lower=0> n_fam; // Number of families
int<lower=0> n_site_fam; // Number of sites x families
int<lower=0, upper=n_site> site[n]; // Sites
int<lower=0, upper=n_bloc> bloc[n]; // Blocks
int<lower=0, upper=n_pop> pop[n]; // Populations
int<lower=0, upper=n_fam> fam[n]; // Families
int<lower=0, upper=n_site_fam> site_fam[n]; // Sites * families
int<lower=0, upper=n_pop> which_pop[n_site_fam]; // Populations of the site*families
}
parameters {
real beta0; // global intercept
simplex[6] pi;
real<lower = 0> sigma_tot;
real<lower = 0> sigma_K;
vector[n_pop] z_log_sigma_site_fam;
vector[n_site] z_site;
vector[n_bloc] z_block;
vector[n_pop] z_pop;
vector[n_fam] z_fam;
vector[n_site_fam] z_site_fam;
}
transformed parameters {
real R_squared;
real<lower = 0> sigma_r;
real<lower = 0> sigma_site;
real<lower = 0> sigma_block;
real<lower = 0> sigma_pop;
real<lower = 0> sigma_fam;
real mean_sigma_site_fam;
vector[n_pop] sigma_site_fam;
vector[n_site] alpha_site;
vector[n_bloc] alpha_block;
vector[n_pop] alpha_pop;
vector[n_fam] alpha_fam;
vector[n_site_fam] alpha_site_fam;
vector[n] mu; // linear predictor
//vector[n_pop] pop_var_alpha_site_fam; // Variance of population-specific `site*nursery` intercepts
// variance partitioning with the simplex pi
sigma_r = sqrt(pi[1]) * sigma_tot;
sigma_site = sqrt(pi[2]) * sigma_tot;
sigma_block = sqrt(pi[3]) * sigma_tot;
sigma_pop = sqrt(pi[4]) * sigma_tot;
sigma_fam= sqrt(pi[5]) * sigma_tot;
mean_sigma_site_fam= sqrt(pi[6]) * sigma_tot;
sigma_site_fam = exp(log(mean_sigma_site_fam) - (square(sigma_K)/2) + z_log_sigma_site_fam*sigma_K);
alpha_site = z_site*sigma_site;
alpha_block = z_block*sigma_block;
alpha_pop = z_pop*sigma_pop;
alpha_fam = z_fam*sigma_fam;
for(i in 1:n_site_fam){
alpha_site_fam[i] = z_site_fam[i]*sigma_site_fam[which_pop[i]];
}
mu = rep_vector(beta0, n) + alpha_site[site] + alpha_block[bloc] + alpha_pop[pop] + alpha_fam[fam] + alpha_site_fam[site_fam];
R_squared = 1 - variance(y - mu) / variance(y);
}
model{
//Priors
beta0 ~ normal(mean(y),20);
sigma_tot ~ student_t(3, 0.0, 1.0);
z_site ~ std_normal();
z_block ~ std_normal();
z_pop ~ std_normal();
z_fam ~ std_normal();
z_site_fam ~ std_normal();
z_log_sigma_site_fam ~ std_normal();
sigma_K ~ exponential(1);
// Likelihood
y ~ normal(mu, sigma_r);
}
generated quantities {
//Variances
real<lower=0> sigma2_tot;
real<lower=0> sigma2_r;
real<lower=0> sigma2_site;
real<lower=0> sigma2_block;
real<lower=0> sigma2_pop;
real<lower=0> sigma2_fam;
vector[n_pop] sigma2_site_fam;
sigma2_tot = square(sigma_tot);
sigma2_r = square(sigma_r);
sigma2_site = square(sigma_site);
sigma2_block = square(sigma_block);
sigma2_pop = square(sigma_pop);
sigma2_fam = square(sigma_fam);
sigma2_site_fam = square(sigma_site_fam);
}
We run the model for height measurements in 2020.
<- "HA20"
trait
<- data %>%
subdata filter(FieldSite %in% c("FE","FS")) %>% # we keep only trees in FS and FE
filter(!(FieldSite=="FE" & Nursery =="NG")) %>% # we remove the four trees in FE that come from NG
::select(FieldSite,Block,PopulationCode,Family,all_of(trait)) %>%
dplyrmutate(site_fam = paste0(FieldSite,"_",Family)) %>%
drop_na() %>%
::rename(bloc=Block,
dplyrsite=FieldSite,
pop=PopulationCode,
fam=Family,
y=any_of(trait))
<- compose_data(subdata)
stanlist
$which_pop <- as.numeric(as.factor(pull(unique(subdata[c("site_fam","pop")])[,"pop"]))) stanlist
<- sampling(model,
stanfit data = stanlist,
pars=c("beta0", "pi", "R_squared",
"alpha_site","alpha_block","alpha_pop","alpha_fam","alpha_site_fam",
"sigma2_r","sigma2_site","sigma2_block","sigma2_pop","sigma2_fam","sigma2_site_fam","sigma2_tot",
"mean_sigma_site_fam","sigma_K"),
iter = n_iter,
chains = n_chains,
cores = n_chains,
save_warmup = save_warmup,
thin=n_thin)
%>% saveRDS(file=here(paste0("outputs/StanModels/",trait,"_HMM3.rds"))) stanfit
<- readRDS(file=here(paste0("outputs/StanModels/",trait,"_HMM3.rds")))
stanfit
<- stanfit %>%
sigma2_site_fam ::tidyMCMC(
broom.mixedpars="sigma2_site_fam",
robust = T, # median and mean absolute deviation are used to compute point estimates and uncertainty
ess = F,
rhat = F,
conf.int = T,
conf.level = conf_level) %>%
mutate(pop=unique(subdata[["pop"]]))
%>% ggplot(aes(y = pop, x = estimate, xmin = conf.low, xmax = conf.high)) +
sigma2_site_fam geom_pointinterval(position = position_dodge(width = .6), point_size=2.5, show.legend = c(size = TRUE)) +
xlab(TeX("Population-specific variances of the site*family intercepts")) + ylab("") +
theme_bw() +
theme(axis.text.x = element_text(size=13),
axis.text.y = element_text(size=16),
axis.title = element_text(size=16),
legend.title=element_text(size=13),
legend.text=element_text(size=10),
strip.text.x = element_text(size = 14),
panel.grid.minor.x=element_blank(),
panel.grid.major.x=element_blank()) +
guides(color=guide_legend(ncol=1))
Valladares, Sanchez-Gomez, and Zavala (2006) proposes to quantify phenotypic plasticity based on phenotypic distances among individuals of a given species (in our study, population) exposed to different environments (in our study, planted in different common gardens), which is summarized in a relative distance plasticity index (RDPI) that allows for statistical comparisons of phenotypic plasticity between species (in our study, populations).
For a given trait, the relative distance plasticity index \(RDPI_p\) for a given population \(p\) can be calculated as follows:
\[RDPI_p = \frac{\sum_{n=1}^N{\frac{x_{ij}-x_{i'j'}}{x_{ij}+x_{i'j'}}}}{N}\]
where \(N\) is the number of pairwise comparisons among individuals of the same population but from different common gardens. \(x_{ij}\) and \(x_{i'j'}\) are the phenotypic values of individual \(j\) and \(j'\) in common gardens \(i\) and \(i’\). As such, the phenotypic plasticity of a population for a given trait corresponds to the sum of the relative phenotypic distances for all pairs of individuals of the population grown in different common gardens.
Comment: Note that RDPI may also be calculated at the family level (instead of the population level as above) and then be averaged for each population. However, as some families in FS are not the as in FE and FW, I thought that calculating the RDPI indexes directly at the population level would allow to consider all families (not having to remove some). But this is point I will be happy to discuss!
We calculate the RDPI for:
height measurements
the duration of bud burst
the number of days to reach of stage 4, 5 or 6 of budburst.
We check the number of NAs for the different traits:
%>% dplyr::select(contains("HA")) %>% sapply(function(x) sum(is.na(x))) data
HA13 HA14 HA15 HA16 HA17 HA18 HA19 HA20
748 57 68 79 70 76 74 83
%>% dplyr::select(contains("BD")) %>% sapply(function(x) sum(is.na(x))) data
BD15 BD16 BD17 BD18 BD19
88 176 86 64 94
%>% dplyr::select(contains("BT")) %>% sapply(function(x) sum(is.na(x))) data
BT15_4 BT15_5 BT15_6 BT16_4 BT16_5 BT16_6 BT17_4 BT17_5 BT17_6 BT18_4 BT18_5 BT18_6 BT19_4 BT19_5 BT19_6
71 54 71 133 62 99 63 58 81 60 60 63 70 67 91
Height in 2013 was not measured in FS so we do not calculate the RDPI index for this trait.
<- unique(data$PopulationCode) # population codes
pop_codes <- unique(data$FieldSite) # field site codes
site_codes <- combn(site_codes,2)
site_comb <- function(x){abs(x[1]-x[2])/(x[1]+x[2])}
calc_rel_dist
<- list(heights = data %>% dplyr::select(contains("HA"), -contains("13")) %>% colnames(), # height measurements (except in 2013)
list_traits duration_budburst = data %>% dplyr::select(contains("BD")) %>% colnames(), # duration of bud burst
timing_budburst = data %>% dplyr::select(contains("BT")) %>% colnames()) # time taken to reach budburst stages
lapply(list_traits, function(traits) {
lapply(traits, function(trait){
lapply(pop_codes, function(pop){
<- data %>%
subpop drop_na(any_of(trait)) %>%
::filter(PopulationCode %in% pop)
dplyr
<- nrow(subpop) # nb of individuals in the population
n_pop
<- lapply(1:dim(site_comb)[[2]], function(nb_comb){
rel_dist
<- subpop %>% dplyr::filter(FieldSite %in% site_comb[1,nb_comb])
subsite1 <- subpop %>% dplyr::filter(FieldSite %in% site_comb[2,nb_comb])
subsite2
lapply(1:nrow(subsite1), function(x) {
rbind(subsite1[x,trait],subsite2[,trait]) %>%
pull() %>%
combn(2, calc_rel_dist) %>%
1:nrow(subsite2)] %>%
.[as.numeric()
%>% unlist()
})
%>% unlist()
})
<- length(rel_dist)
N
sum(rel_dist)/N
%>%
}) setNames(pop_codes) %>%
as_tibble() %>%
pivot_longer(everything(), names_to = "PopulationCode", values_to = "RDPI") %>%
mutate(Trait=trait)
%>% bind_rows()
})
%>% saveRDS(file=here("outputs/PhenotypicPlasticity/RDPI.rds")) })
RDPI for height in 2014 (HA14
) and 2020 (HA20
) for each population:
lapply(c("HA14","HA20"), function(trait){
readRDS(file=here("outputs/PhenotypicPlasticity/RDPI.rds"))$heights %>%
filter(Trait == trait) %>%
make_spatialpoints_map(var="RDPI",
ggtitle=trait)})
RDPI for the duration of bud burst in 2015 (BD15
) and in 2019 (BD19
) for each population:
lapply(c("BD15","BD19"), function(trait){
readRDS(file=here("outputs/PhenotypicPlasticity/RDPI.rds"))$duration_budburst %>%
filter(Trait == trait) %>%
make_spatialpoints_map(var="RDPI",
ggtitle=trait)})
RDPI for the time taken to reach stage 4, 5 and 6 in 2019 for each population:
lapply(c("BT19_4","BT19_5","BT19_6"), function(trait){
readRDS(file=here("outputs/PhenotypicPlasticity/RDPI.rds"))$timing_budburst %>%
filter(Trait == trait) %>%
make_spatialpoints_map(var="RDPI",
ggtitle=trait)})
<- list(list("heights","height"),
list_titles list("duration_budburst","the duration of budburst"),
list("timing_budburst","the time taken to reach budburst stages"))
lapply(list_titles, function(x){
readRDS(file=here("outputs/PhenotypicPlasticity/RDPI.rds"))[[x[[1]]]] %>%
pivot_wider(values_from = RDPI, names_from = Trait) %>%
::select(-PopulationCode) %>%
dplyrcor() %>%
corrplot(method = 'number',
type = 'lower', diag = FALSE,
title=paste0("Correlation among RDPI for ",x[[2]]),
mar=c(0,0,2,0),
number.cex=0.9,tl.cex=0.9)
})
Interpretation
Strong correlation of the RDPI indexes among years for height, which is expected as height at year \(t\) is a function of height at year \(t-1\), which is not the case of phenology variables such as the duration of bud burst and the time taken to reach budburst stages.
We may have expected higher correlations of the RDPI indexes calculated for a given year, e.g. correlation among the RDPI indexes calculated for the time taken to reach budburst stage 4, 5 or 6 of the same year, but surprisingly, the correlations are not very high.
The correlations are even negative when the RDPI indexes are calculated for phenology variables across different years.
We can also look at the correlation among the RDPI indexes calculated for the different traits (see the correlation graph below) but correlations are also very low across traits:
readRDS(file=here("outputs/PhenotypicPlasticity/RDPI.rds")) %>%
bind_rows() %>%
pivot_wider(values_from = RDPI, names_from = Trait) %>%
::select(-PopulationCode) %>%
dplyrcor() %>%
corrplot(method = 'number',
type = 'lower', diag = FALSE,
title=paste0("Correlation among RDPI for all traits"),
mar=c(0,0,2,0),
number.cex=0.6,tl.cex=0.9)
In the section above, we saw that RDPI indexes capturing plasticity to the environmental variation across common gardens show very low correlations among phenology-related traits.
Here we calculated the RDPI indexes capturing plasticity to the climatic variation across years in each common garden successively.
<- unique(data$PopulationCode) # population codes
pop_codes <- unique(data$FieldSite) # field site codes
site_codes
<- list(duration_budburst = data %>% dplyr::select(contains("BD")) %>% colnames(), # duration of bud burst
list_traits timing_budburst_stage4 = data %>% dplyr::select(contains("_4")) %>% colnames(), # time taken to reach budburst stage 4
timing_budburst_stage5 = data %>% dplyr::select(contains("_5")) %>% colnames(), # time taken to reach budburst stage 5
timing_budburst_stage6 = data %>% dplyr::select(contains("_6")) %>% colnames()) # time taken to reach budburst stage 6
<- function(x){abs(x[1]-x[2])/(x[1]+x[2])}
calc_rel_dist
lapply(site_codes, function(site){
<- data %>%
subsite ::filter(FieldSite %in% site)
dplyr
lapply(list_traits, function(traits){
<- subsite %>%
subtrait ::select(PopulationCode, any_of(traits)) %>%
dplyrpivot_longer(cols=any_of(traits), names_to = "year", values_to="trait",values_drop_na = TRUE) %>%
mutate(year=paste0(20,str_sub(year,3,4)))
<- combn(unique(subtrait$year),2)
year_comb
lapply(pop_codes, function(pop){
<- subtrait %>%
subpop ::filter(PopulationCode %in% pop)
dplyr
<- nrow(subpop) # nb of individuals in the population
n_pop
<- lapply(1:dim(year_comb)[[2]], function(nb_comb){
rel_dist
<- subpop %>% dplyr::filter(year %in% year_comb[1,nb_comb])
subyear1 <- subpop %>% dplyr::filter(year %in% year_comb[2,nb_comb])
subyear2
lapply(1:nrow(subyear1), function(x) {
rbind(subyear1[x,"trait"],subyear2[,"trait"]) %>%
pull() %>%
combn(2, calc_rel_dist) %>%
1:nrow(subyear2)] %>%
.[as.numeric()
%>% unlist()
})
%>% unlist()
})
<- length(rel_dist)
N
sum(rel_dist)/N
%>%
}) setNames(pop_codes) %>%
as_tibble() %>%
pivot_longer(everything(), names_to = "PopulationCode", values_to = "RDPI")
%>% bind_rows(.id="Trait")
})
%>%
}) setNames(site_codes) %>%
bind_rows(.id="FieldSite") %>%
saveRDS(file=here("outputs/PhenotypicPlasticity/RDPI_AcrossYears.rds"))
We then look at the correlation across these RDPI indexes among the different traits and common gardens.
readRDS(file=here("outputs/PhenotypicPlasticity/RDPI_AcrossYears.rds")) %>%
pivot_wider(values_from = RDPI, names_from = c(Trait,FieldSite)) %>%
::select(-PopulationCode) %>%
dplyrcor() %>%
corrplot(method = 'number',
type = 'lower', diag = FALSE,
title=paste0("Correlation among RDPI for all traits"),
mar=c(0,0,2,0),
number.cex=0.6,tl.cex=0.9)
The correlations among the RDPI capturing plasticity to climatic variation across years in a given environment (i.e. common garden) are mostly very low, with some even negative. Very few show high correlations.
To account for the nursery effects, we can estimate plasticity to environment variation originating from the combined effect of the site and the nursery.
For that, the first option is to use the two common gardens in which the trees come from a unique nursery, i.e. FS (in which all trees come from NG) and FE (in which all trees come from NE, except four trees that come from NG and that we remove to calculate the RDPI index).
So we calculate the RDPI indexes capturing plasticity to environment variation between FS and FE.
<- data %>% filter(!(FieldSite=="FE" & Nursery =="NG")) # we remove the four trees in FE that come from NG
data <- unique(data$PopulationCode) # population codes
pop_codes <- c("FE","FS") # field site codes
site_codes <- combn(site_codes,2)
site_comb <- function(x){abs(x[1]-x[2])/(x[1]+x[2])}
calc_rel_dist
<- list(heights = data %>% dplyr::select(contains("HA"), -contains("13")) %>% colnames(), # height measurements (except 2013)
list_traits duration_budburst = data %>% dplyr::select(contains("BD")) %>% colnames(), # duration of bud burst
timing_budburst = data %>% dplyr::select(contains("BT")) %>% colnames()) # time taken to reach budburst stages
lapply(list_traits, function(traits) {
lapply(traits, function(trait){
lapply(pop_codes, function(pop){
<- data %>%
subpop drop_na(any_of(trait)) %>%
::filter(PopulationCode %in% pop)
dplyr
<- nrow(subpop) # nb of individuals in the population
n_pop
<- lapply(1:dim(site_comb)[[2]], function(nb_comb){
rel_dist
<- subpop %>% dplyr::filter(FieldSite %in% site_comb[1,nb_comb])
subsite1 <- subpop %>% dplyr::filter(FieldSite %in% site_comb[2,nb_comb])
subsite2
lapply(1:nrow(subsite1), function(x) {
rbind(subsite1[x,trait],subsite2[,trait]) %>%
pull() %>%
combn(2, calc_rel_dist) %>%
1:nrow(subsite2)] %>%
.[as.numeric()
%>% unlist()
})
%>% unlist()
})
<- length(rel_dist)
N
sum(rel_dist)/N
%>%
}) setNames(pop_codes) %>%
as_tibble() %>%
pivot_longer(everything(), names_to = "PopulationCode", values_to = "RDPI") %>%
mutate(Trait=trait)
%>% bind_rows()
})
%>% saveRDS(file=here("outputs/PhenotypicPlasticity/RDPI_based_on_FE_FS.rds")) })
We look at the correlation among the RDPI calculated with the three sites (i.e. not accounting for the nursery effects) and the RDPI calculated based only on FE and FS so that the site and nursery effects are confounded.
<- readRDS(file=here("outputs/PhenotypicPlasticity/RDPI.rds")) %>%
rdpi_3CG bind_rows() %>%
pivot_wider(values_from = RDPI, names_from = Trait) %>%
::select(-PopulationCode)
dplyr
<-readRDS(file=here("outputs/PhenotypicPlasticity/RDPI_based_on_FE_FS.rds")) %>%
rdpi_2CG bind_rows() %>%
pivot_wider(values_from = RDPI, names_from = Trait) %>%
::select(-PopulationCode)
dplyr
lapply(colnames(rdpi_2CG), function(x) cor(rdpi_3CG[[x]],rdpi_2CG[[x]])) %>%
setNames(colnames(rdpi_2CG)) %>%
bind_rows(.id="Trait") %>%
pivot_longer(everything(),names_to = "Traits", values_to = "Correlation") %>%
kable_mydf()
Traits | Correlation |
---|---|
HA14 | 0.92 |
HA15 | 0.89 |
HA16 | 0.90 |
HA17 | 0.85 |
HA18 | 0.90 |
HA19 | 0.93 |
HA20 | 0.92 |
BD15 | 0.97 |
BD16 | 0.79 |
BD17 | 0.92 |
BD18 | 0.64 |
BD19 | 0.86 |
BT15_4 | 0.67 |
BT15_5 | 0.06 |
BT15_6 | 0.38 |
BT16_4 | 0.79 |
BT16_5 | 0.47 |
BT16_6 | 0.48 |
BT17_4 | 0.90 |
BT17_5 | 0.82 |
BT17_6 | 0.97 |
BT18_4 | 0.97 |
BT18_5 | 0.97 |
BT18_6 | 0.98 |
BT19_4 | 0.47 |
BT19_5 | 0.51 |
BT19_6 | 0.88 |
Plasticity estimates obtained with the hierarchical mixed models show low correlations with plasticity estimates obtained with the RDPI approach, see section below. I thought that one explanation could be that the RDPI index includes within-family phenotypic variability (being based on the pairwise comparisons of the phenotypic values of all individuals from the same population but from different sites), while the HMM approach remove the within-family variability (considering only the variance of the `sitefamily intercepts). That’s why in this section, I calculated the RDPI index in two steps:
first, in each common garden, calculating the mean phenotype of each family.
calculate the population-specific RDPI based on pairwise comparisons of the mean phenotypic values of the families.
However, as we can see in the section below, the RDPI calculated based on family means is very similar to the RDPI calculated based on individual phenotypic values (the first way I tried).
<- data %>%
data filter(FieldSite %in% c("FE","FS")) %>% # we keep only trees in FS and FE
filter(!(FieldSite=="FE" & Nursery =="NG"))
<- unique(data$PopulationCode) # population codes
pop_codes <- c("FE","FS") # field site codes
site_codes <- combn(site_codes,2)
site_comb <- function(x){abs(x[1]-x[2])/(x[1]+x[2])}
calc_rel_dist
<- list(heights = data %>% dplyr::select(contains("HA"), -contains("13")) %>% colnames(), # height measurements (except in 2013)
list_traits duration_budburst = data %>% dplyr::select(contains("BD")) %>% colnames(), # duration of bud burst
timing_budburst = data %>% dplyr::select(contains("BT")) %>% colnames()) # time taken to reach budburst stages
lapply(list_traits, function(traits) {
lapply(traits, function(trait){
lapply(pop_codes, function(pop){
<- data %>%
subpop drop_na(any_of(trait)) %>%
::filter(PopulationCode %in% pop)
dplyr
#n_pop <- nrow(subpop) # nb of individuals in the population
<- lapply(1:dim(site_comb)[[2]], function(nb_comb){
rel_dist
<- subpop %>% dplyr::filter(FieldSite %in% site_comb[1,nb_comb]) %>%
subsite1 ::select(Family,any_of(trait)) %>%
dplyrgroup_by(Family) %>%
summarise_if(is.numeric, mean)
<- subpop %>% dplyr::filter(FieldSite %in% site_comb[2,nb_comb]) %>%
subsite2 ::select(Family,any_of(trait)) %>%
dplyrgroup_by(Family) %>%
summarise_if(is.numeric, mean)
lapply(1:nrow(subsite1), function(x) {
rbind(subsite1[x,trait],subsite2[,trait]) %>%
pull() %>%
combn(2, calc_rel_dist) %>%
1:nrow(subsite2)] %>%
.[as.numeric()
%>% unlist()
})
%>% unlist()
})
<- length(rel_dist)
N
sum(rel_dist)/N
%>%
}) setNames(pop_codes) %>%
as_tibble() %>%
pivot_longer(everything(), names_to = "PopulationCode", values_to = "RDPI") %>%
mutate(Trait=trait)
%>% bind_rows()
})
%>% saveRDS(file=here("outputs/PhenotypicPlasticity/RDPI_FamilyMeans.rds")) })
<- readRDS(file=here("outputs/PhenotypicPlasticity/RDPI.rds")) %>%
df bind_rows() %>%
filter(Trait == "HA20") %>%
::rename(RDPI_3CG=RDPI)
dplyr
<- readRDS(file=here("outputs/PhenotypicPlasticity/RDPI_based_on_FE_FS.rds")) %>%
df bind_rows() %>%
filter(Trait == "HA20") %>%
::rename(RDPI_2CG=RDPI) %>%
dplyrleft_join(df, by=c("Trait","PopulationCode")) %>%
::select(-Trait)
dplyr
<- readRDS(file=here("outputs/PhenotypicPlasticity/RDPI_FamilyMeans.rds")) %>%
df bind_rows() %>%
filter(Trait == "HA20") %>%
::rename(RDPI_FamilyMeans=RDPI) %>%
dplyr::select(-Trait) %>%
dplyrleft_join(df, by="PopulationCode")
<- sigma2_site_fam[,c("pop","estimate")] %>%
df ::rename(PopulationCode = pop,
dplyrHMM3 = estimate) %>%
left_join(df, by = "PopulationCode")
cor(df[,-1]) %>%
corrplot(method = 'number',
type = 'lower', diag = FALSE,
title=paste0("Correlation among the RDPI approaches and HMM3"),
mar=c(0,0,2,0),
number.cex=0.6,tl.cex=0.9)
How to know which plasticity responses are adaptive or not?
And how to determine which populations are the most plastic based on all traits? Indeed, depending on the traits, the ranking of the populations for the plasticity indexes is very different.. Should we combine the different traits? But if yes, how? It seems that for height we can use only one RDPI index (as they are very similar across years) but for the phenology variables, the choice of the year and even the stage considered (4, 5 or 6) for the timing of budburst will be very important.