1 Introduction

In this document, we estimate the association between the within-population quantitative additive genetic variation and the ten potential drivers for three phenotypic traits: height measured the 17/12/2007 (when the trees were 3-year old), height measured in 2010 (when the trees were 6-year old) and SLA measured in 2009 (when the trees were 5-year old).

2 Load the progeny test data

# Load the progeny test data
# The spreadsheet 2 corresponds to measurement in Cavada (Asturias common garden)
data <- read_excel("data/ProgenyTests/F26SNP_d130809_20120515.xls", sheet = 2) %>% 
  drop_na(Prov_code) # we remove the four rows at the end of the file that have NAs for id, prov id, site...

# Provenance names in the progeny tests:
# prov.progeny <- unique(data$Prov_code) %>% 
#   str_to_upper() %>% 
#   str_sub(1,3) # 30 provenances

# Load CLONAPIN data
clonapin <- readRDS(file="data/ClonapinData/PhenoDataNovember2019_AnnualTraits.rds")

# Provenance names in CLONAPIN
prov.clonapin <- unique(clonapin$prov) %>% setdiff("MAD") # 33 provenances
# prov.progeny[prov.progeny %in% prov.clonapin] # 20 provenances in common
# prov.progeny[!(prov.progeny %in% prov.clonapin)] # 10 provenances that have no match

# But this method to do the name matching is not ok for some provenances such as SanL,
# which is SAL with the CLONAPIN code

# Load the file with the matching of code between CLONAPIN (column "CODE") and the progeny tests (column "CODE_FIELD")
name.clonapin.progeny <- read_csv(file="data/ClonapinData/coordinates_provenances.csv") %>% 
  filter(CODE %in% prov.clonapin) # keep only the 33 provenances of CLONAPIN used in the study

unique(data$Prov_code)[unique(data$Prov_code) %in% name.clonapin.progeny$CODE_FIELD] # 23 provenances in common

##  [1] "Oria" "Sier" "Mimi" "Tamr" "Cuel" "Pleu" "Segu" "Vald" "Cast" "Bayu" "Cada" "Aren" "Leir" "SanL" "Carb" "Pini" "Alto" "Ceni" "Pine" "Arma" "Lamu" "Coca" "Puer"

# We keep only these 23 provenances
data <- name.clonapin.progeny %>% 
  dplyr::select(CODE,CODE_FIELD) %>% 
  dplyr::rename(Prov_code=CODE_FIELD, Prov_code_clonapin=CODE) %>%  # Prov_code_clonapin is the code names used in CLONAPIN, Prov_code that used in the progeny tests
  right_join(data,by = "Prov_code") %>% 
  drop_na(Prov_code_clonapin)

Dataset of 3196 observations.

3 Options and parameters

# Options for model sampling
n_chains <- 4
n_iter <- 2500
n_warm <- 1250
n_thin <- 1
save_warmup = FALSE 

# Plot theme
theme_set(theme_bw(base_size = 20))

# option for the MCMC intervals
point.est="median"
conf.level <- 0.95

selected.drivers <- c("A","D",
                      "mean_MCMT","mean_MSP","var_MCMT","var_MSP",
                      "SH.20km.PC1","SH.20km.PC2","SH.1km.PC1","SH.1km.PC2")

selected.drivers.labels <- c("A","D",
                             "mean(MCMT)","mean(SP)",
                             "var(MCMT)","var(SP)",
                             "SH1[20km]","SH2[20km]","SH1[1.6km]","SH2[1.6km]")

selected.drivers.colors <- c("#4D9221" ,"#7FBC41", "#C51B7D" ,"#DE77AE",
                              "#E08214" ,"#FDB863",
                              "#2D004B","#542788" ,"#8073AC" ,"#B2ABD2")




# "HT07" => Height measurement in 2007 (3-year old)
# "HT10" => Height measurement in 2010 (6-year old)
# "SLA_09" => Height measurement in 2009 (5-year old)
traits <- c("HT07","HT10", "SLA_09")

trait.options <- list("HT07"=list(x.label.histogram="Height at 3-year old",
                                  provs = unique(data$Prov_code_clonapin) %>% str_sort()),
                      "HT10"=list(x.label.histogram= "Height at 6-year old",
                                  provs = unique(data$Prov_code_clonapin) %>% str_sort()),
                      "SLA_09"=list(x.label.histogram= "Specific Leaf Area at 5-year old"))

4 Trait distribution - exploratory analyses

4.1 Filtering

# remove NAs for the traits of interest
subdata.traits <- mclapply(traits,function(trait){
  data %>% 
  drop_na(all_of(trait)) %>% 
  mutate(Family.id=paste0(Prov_code_clonapin,"_",Fam)) %>% 
  dplyr::select("Prov_code_clonapin","Prov_code","ID","Fam","Family.id","Block",all_of(trait)) %>% 
  rename(Trait=trait)
}) %>% setNames(traits)

hists <- mclapply(traits,function(trait){
  subdata.traits[[trait]] %>%
    ggplot(aes(x=Trait)) +
  geom_histogram(bins = 30, fill="seagreen3",alpha=0.7) +
  theme_bw() +
  labs(x=trait.options[[trait]]$x.label.histogram,y="Number of trees") +
  theme(axis.text = element_text(size=15),
        axis.title = element_text(size=15))
    
})
hists[[2]] <- hists[[2]] + ylab("")
hists[[3]] <- hists[[3]] + ylab("")
plot_grid(hists[[1]],hists[[2]],hists[[3]],nrow=1)

For height, we remove the zeros because they may represent dead individuals. And for SLA, we remove the outlier with SLA = 82.34 because it is likely to be a measurement error.

# removing zeros for height
subdata.traits <- mclapply(traits,function(trait){
subdata.traits[[trait]] <- subdata.traits[[trait]] %>% filter(!(Trait==0)) # removing zeros
  }) %>% setNames(traits)

# removing one outlier for SLA
subdata.traits[["SLA_09"]] <- subdata.traits[["SLA_09"]] %>% 
  filter(Trait < 70)

FOr SLA, we also remove the populations SEG and SAL as they have only 1 family in which SLA was measured.

subdata.traits[["SLA_09"]] %>% 
  group_by(Prov_code_clonapin) %>% 
  dplyr::summarise(n_families = n_distinct(Family.id),
            n_individuals = n())

## # A tibble: 14 × 3
##    Prov_code_clonapin n_families n_individuals
##    <chr>                   <int>         <int>
##  1 ARN                        12            88
##  2 BAY                        12            94
##  3 CAD                        10            74
##  4 COC                         7            55
##  5 CUE                        14           109
##  6 LAM                         2             7
##  7 LEI                        11            78
##  8 MIM                         9            72
##  9 ORI                        15           116
## 10 PLE                         9            71
## 11 PUE                         5            36
## 12 SAL                         1             2
## 13 SEG                         1             4
## 14 TAM                        16           125

subdata.traits[["SLA_09"]] <- subdata.traits[["SLA_09"]] %>% 
  filter(!Prov_code_clonapin %in% c("SEG", "SAL"))

# prov ID for SLA
trait.options$SLA_09$provs <- subdata.traits[["SLA_09"]] %>% distinct(Prov_code_clonapin) %>% as_vector() %>%  str_sort()

Let’s plot it again:

hists <- mclapply(traits,function(trait){
subdata.traits[[trait]] %>% 
    ggplot(aes(x=Trait)) + 
  geom_histogram(bins = 30, fill="seagreen3",alpha=0.7) +  
  theme_bw() + 
  labs(x=trait.options[[trait]]$x.label.histogram,y="Number of trees") +
  theme(axis.title = element_text(size=15),
        axis.text = element_text(size=15)) 
})

hists[[2]] <- hists[[2]] + ylab("")
hists[[3]] <- hists[[3]] + ylab("")
hists <- plot_grid(hists[[1]],hists[[2]],hists[[3]],nrow=1)

# Figure provided in the Supplementary Information
ggsave(hists,
       file = paste0("figs/ExploratoryAnalyses/ValidationStep_Histograms.png"),height=6,width=14)

hists

As the traits have close to normal distribution, we do not tranform them prior to analyses.

hists <- mclapply(traits,function(trait){
subdata.traits[[trait]] %>% 
    ggplot(aes(x=Trait)) + 
  geom_histogram(fill="seagreen3",alpha=0.7) + 
  theme_bw()  +
  facet_wrap(~as.factor(Prov_code_clonapin), nrow=3) + 
  labs(x="",y="",title=trait.options[[trait]]$x.label.histogram) +
  theme(axis.text.x = element_text(size=6),
        axis.text.y = element_text(size=12)) })

hists <- plot_grid(hists[[1]],hists[[2]],hists[[3]],ncol=1)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Figure provided in the Supplementary Information
ggsave(hists,
       file = paste0("figs/ExploratoryAnalyses/ValidationStep_Histograms_PopSpecific.png"),height=15,width=12)
hists

mclapply(traits,function(trait){

# Number of individuals per provenance
counts.ind <- subdata.traits[[trait]] %>% 
  dplyr::select(Prov_code_clonapin,Fam,ID) %>% 
  distinct() %>% 
  group_by(Prov_code_clonapin) %>% 
  dplyr::summarise(count.individuals=n())

# Number of family per provenance
counts.fam <- subdata.traits[[trait]] %>% 
  dplyr::select(Prov_code_clonapin,Fam) %>% 
  distinct() %>% 
  group_by(Prov_code_clonapin) %>% 
  dplyr::summarise(count.families=n())

# Mean and variance for height in each provenance (merged with the nb of family per prov)
meta.subdata <- subdata.traits[[trait]] %>% 
  dplyr::select(Prov_code_clonapin,Fam,Trait) %>% 
  group_by(Prov_code_clonapin) %>% 
  summarise(mean=mean(Trait),var=var(Trait)) %>% 
  inner_join(counts.fam,by = "Prov_code_clonapin") %>% 
  inner_join(counts.ind,by = "Prov_code_clonapin") %>% 
  dplyr::rename(Populations=Prov_code_clonapin,
                Mean=mean,
                Variance=var,
                "Number of families"=count.families)

meta.subdata %>% 
  xtable(type = "latex",digits=3) %>% 
  print(file = paste0("tables/ExpDesign_ProgenyTestAsturias_",trait,".tex"), 
        include.rownames=FALSE)

corr.list <- list("Correlation between the number of family per provenance and the within-population variance of the trait" = cor(meta.subdata$Variance,meta.subdata$`Number of families`) %>%  round(3),
                  "Correlation between the number of family per provenance and the within-population mean of the trait"= cor(meta.subdata$Mean,meta.subdata$`Number of families`) %>%  round(3),
                  "Correlation between the within-population mean and variance of the trait" = cor(meta.subdata$Variance,meta.subdata$Mean) %>%  round(3))
  
corr.list
  }) %>% setNames(traits)

## $HT07
## $HT07$`Correlation between the number of family per provenance and the within-population variance of the trait`
## [1] 0.073
## 
## $HT07$`Correlation between the number of family per provenance and the within-population mean of the trait`
## [1] -0.301
## 
## $HT07$`Correlation between the within-population mean and variance of the trait`
## [1] 0.421
## 
## 
## $HT10
## $HT10$`Correlation between the number of family per provenance and the within-population variance of the trait`
## [1] -0.015
## 
## $HT10$`Correlation between the number of family per provenance and the within-population mean of the trait`
## [1] -0.205
## 
## $HT10$`Correlation between the within-population mean and variance of the trait`
## [1] 0.433
## 
## 
## $SLA_09
## $SLA_09$`Correlation between the number of family per provenance and the within-population variance of the trait`
## [1] 0.101
## 
## $SLA_09$`Correlation between the number of family per provenance and the within-population mean of the trait`
## [1] 0.026
## 
## $SLA_09$`Correlation between the within-population mean and variance of the trait`
## [1] -0.125

5 Model equation and code

We used the same mathematical model as the one used on CLONAPIN data but replacing clones by families.

We modeled each trait \(y_{bpfr}\) such as:

\[\begin{equation} \begin{aligned} y_{bpfr} & \sim \mathcal{N}(\mu_{bpcf},\sigma^{2}_{r})\\[3pt] \mu_{bpf} & = \beta_{0} + B_{b} + P_{p} + F_{f(p)}\\[3pt] \end{aligned} \end{equation}\]

where \(\beta_{0}\) is the global intercept, \(B_{b}\) the block intercepts, \(P_{p}\) the population intercepts, \(F_{f(p)}\) the family intercepts and \(\sigma^{2}_{r}\) the residual variance.

The prior of \(\beta_{0}\) was weakly informative and centered around the mean of the observed values for the trait under considered, as follows:

\[\beta_{0} \sim \mathcal{N}(\mu_{y},2)\]

The population and block intercepts, \(P_{p}\) and \(B_{b}\) were considered normally-distributed with variances \(\sigma^{2}_{P}\) and \(\sigma^{2}_{B}\), such as:

\[\begin{bmatrix} B_{b} \\ P_{p} \end{bmatrix} \sim \mathcal{N}\left(0, \begin{bmatrix}\sigma^{2}_{B}\\[3pt] \sigma^{2}_{P}\\ \end{bmatrix} \right)\\[3pt]\]

The family intercepts \(F_{f(p)}\) were considered to follow some population-specific normal distributions, such as: \[F_{f(p)} \sim \mathcal{N}(0,\sigma^{2}_{F_{p}})\]

where \(\sigma^{2}_{F_{p}}\) are the population-specific variances among families and are equal to:

\[\sigma^{2}_{F_{p}} = \frac{1}{4} \sigma^{2}_{A_{p}} \]

where \(\sigma^{2}_{A_{p}}\) are the population-specific additive genetic variances.

To partition the total variance, we parameterize our model so that only the total variance, \(\sigma_{tot}^{2}\) has a prior, such that:

\[\begin{equation} \begin{aligned} \sigma_{tot}^{2} & = \sigma_{r}^{2} + \sigma_{B}^{2} + \overline{\sigma_{F_{p}}^{2}} + \sigma_{P}^{2}\\[3pt] \sigma_{r} & = \sigma_{tot} \times \sqrt(\pi_{r})\\[3pt] \sigma_{B} & = \sigma_{tot} \times \sqrt(\pi_{B})\\[3pt] \sigma_{P} & = \sigma_{tot} \times \sqrt(\pi_{P})\\[3pt] \overline{\sigma_{F_{p}}} & = \sigma_{tot} \times \sqrt(\pi_{F})\\[3pt] \sigma_{tot} & \sim \mathcal{S}^{*}(0,1,3) \end{aligned} \end{equation}\]

where \(\overline{\sigma_{F_{p}}}\) and \(\overline{\sigma_{F_{p}}^{2}}\) are the mean of the population-specific among-families standard deviations (\(\sigma_{F_{p}}\)) and variances (\(\sigma^{2}_{F_{p}}\)), respectively, and \(\sum_{l}^{4}\pi_{l}=1\) (using the simplex function in Stan).

The population-specific additive genetic standard deviations \(\sigma_{A_{p}}\) follow a log-normal distribution with mean \(2\overline{\sigma_{F_{p}}}\) and standard deviation \(\sigma_{K}\), such as:

\[\begin{equation} \begin{aligned} \sigma_{A_{p}} & \sim \mathcal{LN}\left(\ln(2\overline{\sigma_{F_{p}}})-\frac{\sigma^{2}_{K}}{2} + \beta_{X}X_{p},\sigma^{2}_{K}\right)\\[3pt] \sigma_{K} & \sim \exp(1)\\[3pt] \end{aligned} \end{equation}\]

with \(X_{p}\) the potential driver considered and \(\beta_{x}\) its associated coefficient.

model = stan_model("scripts/StanModels/HierarchicalModel_VaryingInterClonesSD_ProgenyTestAsturias.stan")
model

## S4 class stanmodel 'anon_model' coded as follows:
## //Model to estimate the additive genetic variance in the common garden of Asturias
## data {
##   int<lower=1> N;                                                              // Number of observations
##   vector[N] y;                                                                 // Response variable
##   int<lower=0> nblock;                                                         // Number of blocks
##   int<lower=0> nprov;                                                          // Number of provenances
##   int<lower=0> nfam;                                                          // Number of families
##   int<lower=0, upper=nblock> bloc[N];                                          // Blocks
##   int<lower=0, upper=nprov> prov[N];                                           // Provenances
##   int<lower=0, upper=nprov> which_prov[nfam];                                 // Provenances
##   int<lower=0, upper=nfam> fam[N];                                           // Families
##   vector[nprov] X;                                                             // Potential predictor of the additive genetic variance
## }
## parameters {
##   real beta0; // global intercept
##   real betaX; // coefficient of Xp
##   simplex[4] pi;
##   real<lower = 0> sigma_tot;
##   real<lower = 0> sigma_K;
##   vector[nprov] z_log_sigma_A;
## 
##   vector[nblock] z_block;
##   vector[nprov] z_prov;
##   vector[nfam] z_fam;
## 
## }
## transformed parameters {
##   real R_squared;
## 
##   real<lower = 0>  sigma_r;
##   real<lower = 0>  sigma_block;
##   real<lower = 0>  sigma_prov;
## 
##   real mean_sigma_fam;
##   vector[nprov] sigma_fam;
##   vector[nprov] sigma_A;
## 
##   vector[nprov] alpha_prov;
##   vector[nfam] alpha_fam;
##   vector[nblock] alpha_block;
## 
##   vector[N] mu; // linear predictor
## 
##   // variance partitioning with the simplex pi
##   sigma_r = sqrt(pi[1]) * sigma_tot;
##   sigma_block = sqrt(pi[2]) * sigma_tot;
##   sigma_prov = sqrt(pi[3]) * sigma_tot;
## 
##   mean_sigma_fam= sqrt(pi[4]) * sigma_tot;
##   sigma_A = exp(log(2*mean_sigma_fam) - (square(sigma_K)/2)  + betaX*X + z_log_sigma_A*sigma_K);
## 
##   alpha_prov = z_prov*sigma_prov;
## 
##   for(f in 1:nfam){
##     alpha_fam[f] =  z_fam[f]*0.5*sigma_A[which_prov[f]];
##     sigma_fam[which_prov[f]] = 0.5*sigma_A[which_prov[f]]; // sigma2_fam = 1/4 sigma2_A ==> sigma_fam = 1/2 sigma_A
##   }
## 
##   alpha_block = z_block*sigma_block;
## 
##   mu = rep_vector(beta0, N) + alpha_fam[fam] + alpha_block[bloc] + alpha_prov[prov];
##   R_squared = 1 - variance(y - mu) / variance(y);
## }
## model{
##   //Priors
##   beta0 ~ normal(mean(y),2);
##   betaX ~ std_normal();
##   sigma_tot ~ student_t(3, 0.0, 1.0);
## 
##   z_prov ~ std_normal();
##   z_block ~ std_normal();
##   z_fam ~ std_normal();
## 
##   z_log_sigma_A ~ std_normal();
##   sigma_K ~ exponential(1);
## 
##   // Likelihood
##   y ~ normal(mu, sigma_r);
## }
## generated quantities {
##   //Variances
##   real<lower=0> sigma2_r;
##   real<lower=0> sigma2_prov;
##   real<lower=0> sigma2_block;
##   vector[nprov] sigma2_fam;
##   vector[nprov] sigma2_A;
##   vector[nprov] h2_prov;
## 
##   // Posterior predictive check
##   vector[N] y_rep;
## 
##   // Log-Likelihood (for WAIC/loo computations)
##   vector[N] log_lik;
## 
##   sigma2_r = square(sigma_r);
##   sigma2_prov = square(sigma_prov);
##   sigma2_fam = square(sigma_fam);
##   sigma2_A = square(sigma_A);
##   for(p in 1:nprov)  h2_prov[p] = sigma2_A[p]/(sigma2_r+sigma2_A[p]);
##   sigma2_block = square(sigma_block);
##   for(i in 1:N)  {
##     y_rep[i] = normal_rng(mu[i], sigma_r);
##     log_lik[i] = normal_lpdf(y[i]| mu[i], sigma_r); // log probability density function
##   }
## }

6 Running the models

list.stan.traits <- mclapply(traits,function(trait){

df <- readRDS(file="data/DF_Drivers.rds") %>% # downloading the dataframe with the drivers
  dplyr::select(all_of(c("prov",selected.drivers))) %>% 
  dplyr::mutate(across(-prov,scale)) %>% # scaling (mean-centering) the potential drivers
  dplyr::rename(Prov_code_clonapin=prov) %>% 
  right_join(subdata.traits[[trait]],by="Prov_code_clonapin")

#### Create the stan lists (one for each driver):
list.stan <- lapply(selected.drivers, function(x)    {
    list(N=dim(df)[1],
         y=df$Trait,
         X=df[,c(x,"Prov_code_clonapin")] %>% distinct() %>% pull(x) %>% as.numeric(),
         nprov = length(unique(df$Prov_code_clonapin)),
         nfam = length(unique(df$Family.id)),
         nblock = length(unique(df$Block)),
         prov = as.numeric(as.factor(df$Prov_code_clonapin)),
         which_prov = as.numeric(as.factor(unique(df[c("Prov_code_clonapin","Family.id")])[,"Prov_code_clonapin"])),
         fam = as.numeric(as.factor(df$Family.id)),
         bloc = as.numeric(as.factor(df$Block)))}) %>% 
  setNames(selected.drivers)

}) %>% setNames(traits)

mclapply(traits,function(trait){

set.seed(87)
  
list.models <- list()

for(i in selected.drivers){
  
  list.models[[i]] <- sampling(model, 
                               data = list.stan.traits[[trait]][[i]], 
                               pars=c("beta0","betaX",
                                      "pi","sigma2_r","sigma2_block","sigma2_prov","sigma2_fam",
                                      "h2_prov",
                                      "sigma_tot","sigma_block","sigma_prov",
                                      "mean_sigma_fam","sigma_fam","sigma_K",
                                      "sigma_A","sigma2_A"),
                               iter = n_iter, 
                               chains = n_chains, 
                               cores = n_chains,
                               save_warmup = save_warmup,
                               thin=n_thin)
}

saveRDS(list.models,file=paste0("outputs/models/",trait,"_ProgenyTestsAsturias.rds"))

})

7 Figures

7.1 \(\beta_X\) coefficients

param <- "betaX"

ExtractParam <- function(x)  {
  readRDS(file=paste0("outputs/models/",x,"_ProgenyTestsAsturias.rds")) %>% 
  mclapply(function(x) broom.mixed::tidyMCMC(x,pars=param,
                droppars = NULL, 
                estimate.method = point.est, 
                ess = F, 
                rhat = F, 
                conf.int = T,
                conf.level = conf.level)) %>% 
  bind_rows(.id="Drivers") %>% 
  mutate(Traits=x)
}

p <- mclapply(traits,ExtractParam) %>% 
  bind_rows() %>%
  mutate(Drivers=case_when(Drivers=="SH.20km.PC1"~"SH1[20km]",
                      Drivers=="SH.20km.PC2"~"SH2[20km]",
                      Drivers=="SH.1km.PC1"~"SH1[1.6km]",
                      Drivers=="SH.1km.PC2"~"SH2[1.6km]",
                      Drivers=="mean_bio6"~"mean(bio6)",
                      Drivers=="mean_MCMT"~"mean(MCMT)",
                      Drivers=="mean_EMT"~"mean(EMT)",
                      Drivers=="mean_bio14"~"mean(bio14)",
                      Drivers=="mean_MSP"~"mean(SP)",
                      Drivers=="mean_SHM"~"mean(SHM)",
                      Drivers=="var_bio6"~"var(bio6)",
                      Drivers=="var_MCMT"~"var(MCMT)",
                      Drivers=="var_EMT"~"var(EMT)",
                      Drivers=="var_bio14"~"var(bio14)",
                      Drivers=="var_MSP"~"var(SP)",
                      Drivers=="var_SHM"~"var(SHM)",
                      TRUE ~ as.character(Drivers)),
         Hyp=case_when(Drivers=="A"~"Admixture",
                       Drivers=="D"~"Admixture",
                       Drivers=="SH1[20km]"~"Spatial heterogeneity",
                       Drivers=="SH2[20km]"~"Spatial heterogeneity",
                       Drivers=="SH1[1.6km]"~"Spatial heterogeneity",
                       Drivers=="SH2[1.6km]"~"Spatial heterogeneity",
                       Drivers=="mean(bio6)"~"Climate harshness",
                       Drivers=="mean(MCMT)"~"Climate harshness",
                       Drivers=="mean(EMT)"~"Climate harshness",
                       Drivers=="mean(bio14)"~"Climate harshness",
                       Drivers=="mean(SHM)"~"Climate harshness",
                       Drivers=="mean(SP)"~"Climate harshness",
                       Drivers=="var(bio6)"~"Temporal heterogeneity",
                       Drivers=="var(MCMT)"~"Temporal heterogeneity",
                       Drivers=="var(EMT)"~"Temporal heterogeneity",
                       Drivers=="var(bio14)"~"Temporal heterogeneity",
                       Drivers=="var(SHM)"~"Temporal heterogeneity",
                       Drivers=="var(SP)"~"Temporal heterogeneity")) %>% 
  dplyr::mutate(Drivers=factor(Drivers,levels=selected.drivers.labels)) %>% 
  ggplot(aes(x = Drivers, y = estimate,ymin = conf.low, ymax = conf.high,colour=Traits),shape=16) +
  geom_pointinterval(position = position_dodge(width = .6),point_size=3,show.legend = c(size = TRUE),size=4) +
  facet_grid(.~Hyp,scales="free", space = "free") + 
  ylab(TeX("$\\beta_{X}$ estimates")) + xlab("") +
  scale_colour_manual(labels = c("Height (3-year old)","Height (6-year old)","Specific Leaf Area (5-year old)"), 
                      values=c("#FDAE61","#B2182B","royalblue3")) +
  theme_bw() +
  theme(axis.text.x = element_text(size=10),
        axis.text.y = element_text(size=16),
        axis.title = element_text(size=16),
      legend.title=element_text(size=13), 
      legend.text=element_text(size=10),
      legend.position = c(.12, .12), #.414
      legend.background = element_rect(colour = "grey"),
      strip.text.x = element_text(size = 14),
      panel.grid.minor.x=element_blank(),
      panel.grid.major.x=element_blank()) +
    guides(color=guide_legend(ncol=1))

p

# save as png
ggsave(p, file="figs/PosteriorDistributions/ValidationStep_BetaX_PaperFig.png",height=6.5,width=13)

# save as pdf
ggsave(p, file="figs/PosteriorDistributions/ValidationStep_BetaX_PaperFig.pdf",height=6.5,width=13)

7.2 Additive genetic variances \(\sigma^{2}_{A_{p}}\)

mclapply(traits,function(trait){

p <- readRDS(file=paste0("outputs/models/",trait,"_ProgenyTestsAsturias.rds")) %>% 
  mclapply(function(driver) {
    broom.mixed::tidyMCMC(driver,pars=("sigma2_A"),
                droppars = NULL, 
                estimate.method = point.est, 
                ess = F, 
                rhat = F, 
                conf.int = T,
                conf.level = conf.level)}) %>%  
  bind_rows(.id="Drivers") %>% 
  mutate(Traits=trait,
         prov=rep(trait.options[[trait]]$provs,length(selected.drivers.labels)),
         Drivers=case_when(Drivers=="SH.20km.PC1"~"SH1[20km]",
                      Drivers=="SH.20km.PC2"~"SH2[20km]",
                      Drivers=="SH.1km.PC1"~"SH1[1.6km]",
                      Drivers=="SH.1km.PC2"~"SH2[1.6km]",
                      Drivers=="mean_bio6"~"mean(bio6)",
                      Drivers=="mean_MCMT"~"mean(MCMT)",
                      Drivers=="mean_EMT"~"mean(EMT)",
                      Drivers=="mean_bio14"~"mean(bio14)",
                      Drivers=="mean_MSP"~"mean(SP)",
                      Drivers=="mean_SHM"~"mean(SHM)",
                      Drivers=="var_bio6"~"var(bio6)",
                      Drivers=="var_MCMT"~"var(MCMT)",
                      Drivers=="var_EMT"~"var(EMT)",
                      Drivers=="var_bio14"~"var(bio14)",
                      Drivers=="var_MSP"~"var(SP)",
                      Drivers=="var_SHM"~"var(SHM)",
                      TRUE ~ as.character(Drivers))) %>% 
  dplyr::mutate(Drivers=factor(Drivers,levels=selected.drivers.labels)) %>% 
  ggplot(aes(x = prov, y = estimate,ymin = conf.low, ymax = conf.high,color=Drivers)) +
  geom_pointinterval(position = position_dodge(width = .8),point_size=2,alpha=0.6,size=4) +
  scale_color_manual(values=selected.drivers.colors) +
  xlab("Populations") +
  ylab(TeX("$\\sigma^{2}_{A_{p}}$ estimates")) +
  labs(color = "Potential drivers") +
  theme(legend.position = "bottom",
        panel.grid.minor.x=element_blank(),
        panel.grid.major.x=element_blank())  +
    guides(color=guide_legend(ncol=5))

ggsave(p,file=paste0("figs/PosteriorDistributions/ValidationStep_",trait,"_SigmaA.png"),height=6,width=22)
  
})

7.3 Narrow-sense heritabilities \(h^2_{p}\)

We calculated population-specific narrow-sense heritabilities, such as:

\[ h^2_{p} = \frac{\sigma^{2}_{A_{p}}}{\sigma^{2}_{A_{p}} + \sigma^{2}_r} \]

mclapply(traits,function(trait){

p <- readRDS(file=paste0("outputs/models/",trait,"_ProgenyTestsAsturias.rds")) %>% 
  mclapply(function(driver) {
    broom.mixed::tidyMCMC(driver,pars=("h2_prov"),
                droppars = NULL, 
                estimate.method = point.est, 
                ess = F, 
                rhat = F, 
                conf.int = T,
                conf.level = conf.level)}) %>%  
  bind_rows(.id="Drivers") %>% 
  mutate(Traits=trait,
         prov=rep(trait.options[[trait]]$provs,length(selected.drivers.labels)),
         Drivers=case_when(Drivers=="SH.20km.PC1"~"SH1[20km]",
                      Drivers=="SH.20km.PC2"~"SH2[20km]",
                      Drivers=="SH.1km.PC1"~"SH1[1.6km]",
                      Drivers=="SH.1km.PC2"~"SH2[1.6km]",
                      Drivers=="mean_bio6"~"mean(bio6)",
                      Drivers=="mean_MCMT"~"mean(MCMT)",
                      Drivers=="mean_EMT"~"mean(EMT)",
                      Drivers=="mean_bio14"~"mean(bio14)",
                      Drivers=="mean_MSP"~"mean(SP)",
                      Drivers=="mean_SHM"~"mean(SHM)",
                      Drivers=="var_bio6"~"var(bio6)",
                      Drivers=="var_MCMT"~"var(MCMT)",
                      Drivers=="var_EMT"~"var(EMT)",
                      Drivers=="var_bio14"~"var(bio14)",
                      Drivers=="var_MSP"~"var(SP)",
                      Drivers=="var_SHM"~"var(SHM)",
                      TRUE ~ as.character(Drivers))) %>% 
  dplyr::mutate(Drivers=factor(Drivers,levels=selected.drivers.labels)) %>% 
  ggplot(aes(x = prov, y = estimate,ymin = conf.low, ymax = conf.high,color=Drivers)) +
  geom_pointinterval(position = position_dodge(width = .8),point_size=2,alpha=0.6,size=4) +
  scale_color_manual(values=selected.drivers.colors) +
  xlab("Populations") +
  ylab(TeX("$\\h^{2}_{p}$ estimates")) +
  labs(color = "Potential drivers") +
  theme(legend.position = "bottom",
        panel.grid.minor.x=element_blank(),
        panel.grid.major.x=element_blank())  +
    guides(color=guide_legend(ncol=5))

ggsave(p,file=paste0("figs/PosteriorDistributions/ValidationStep_",trait,"_h2prov.png"),height=6,width=22)
  
})

7.4 Variance partitioning

mclapply(traits,function(trait){
p <- readRDS(file=paste0("outputs/models/",trait,"_ProgenyTestsAsturias.rds")) %>% 
  mclapply(function(driver) {
    broom.mixed::tidyMCMC(driver,pars=("pi"),
                droppars = NULL, estimate.method = point.est, ess = F, rhat = F, conf.int = T,conf.level = 0.95)}) %>%  
  bind_rows(.id="Drivers") %>% 
  mutate(Traits=trait,
         prop.var=rep(c("Residuals","Blocks","Populations","Clones"),length(selected.drivers.labels)),
         Drivers=case_when(Drivers=="SH.20km.PC1"~"SH1[20km]",
                      Drivers=="SH.20km.PC2"~"SH2[20km]",
                      Drivers=="SH.1km.PC1"~"SH1[1.6km]",
                      Drivers=="SH.1km.PC2"~"SH2[1.6km]",
                      Drivers=="mean_bio6"~"mean(bio6)",
                      Drivers=="mean_MCMT"~"mean(MCMT)",
                      Drivers=="mean_EMT"~"mean(EMT)",
                      Drivers=="mean_bio14"~"mean(bio14)",
                      Drivers=="mean_MSP"~"mean(SP)",
                      Drivers=="mean_SHM"~"mean(SHM)",
                      Drivers=="var_bio6"~"var(bio6)",
                      Drivers=="var_MCMT"~"var(MCMT)",
                      Drivers=="var_EMT"~"var(EMT)",
                      Drivers=="var_bio14"~"var(bio14)",
                      Drivers=="var_MSP"~"var(SP)",
                      Drivers=="var_SHM"~"var(SHM)",
                      TRUE ~ as.character(Drivers))) %>% 
  mutate(prop.var=factor(prop.var,levels=c("Blocks","Populations","Clones","Residuals"))) %>%
  dplyr::mutate(Drivers=factor(Drivers,levels=selected.drivers.labels)) %>%  
  ggplot(aes(x = prop.var, y = estimate,ymin = conf.low, ymax = conf.high,color=Drivers)) +
  geom_pointinterval(position = position_dodge(width = .8),point_size=3,alpha=0.6,size=5) +
  scale_color_manual(values=selected.drivers.colors) +
  xlab("") +
  ylab("Proportion of variance explained") +
  labs(color = "Potential drivers") +
  theme(axis.text = element_text(size=25),
        panel.grid.minor.x=element_blank(),
        panel.grid.major.x=element_blank())  +
    guides(color=guide_legend(ncol=1))

ggsave(p,file=paste0("figs/PosteriorDistributions/ValidationStep_",trait,"_VarPart.png"),height=6,width=15)
  
})

Validation analysis in a family-based progeny test

Associating within-population additive genetic variance for height and SLA with the potential drivers

Juliette Archambeau

28 April, 2023