In this document, we estimate the association between the within-population quantitative additive genetic variation and the ten potential drivers for three phenotypic traits: height measured the 17/12/2007 (when the trees were 3-year old), height measured in 2010 (when the trees were 6-year old) and SLA measured in 2009 (when the trees were 5-year old).
# Load the progeny test data
# The spreadsheet 2 corresponds to measurement in Cavada (Asturias common garden)
data <- read_excel("data/ProgenyTests/F26SNP_d130809_20120515.xls", sheet = 2) %>%
drop_na(Prov_code) # we remove the four rows at the end of the file that have NAs for id, prov id, site...
# Provenance names in the progeny tests:
# prov.progeny <- unique(data$Prov_code) %>%
# str_to_upper() %>%
# str_sub(1,3) # 30 provenances
# Load CLONAPIN data
clonapin <- readRDS(file="data/ClonapinData/PhenoDataNovember2019_AnnualTraits.rds")
# Provenance names in CLONAPIN
prov.clonapin <- unique(clonapin$prov) %>% setdiff("MAD") # 33 provenances
# prov.progeny[prov.progeny %in% prov.clonapin] # 20 provenances in common
# prov.progeny[!(prov.progeny %in% prov.clonapin)] # 10 provenances that have no match
# But this method to do the name matching is not ok for some provenances such as SanL,
# which is SAL with the CLONAPIN code
# Load the file with the matching of code between CLONAPIN (column "CODE") and the progeny tests (column "CODE_FIELD")
name.clonapin.progeny <- read_csv(file="data/ClonapinData/coordinates_provenances.csv") %>%
filter(CODE %in% prov.clonapin) # keep only the 33 provenances of CLONAPIN used in the study
unique(data$Prov_code)[unique(data$Prov_code) %in% name.clonapin.progeny$CODE_FIELD] # 23 provenances in common
## [1] "Oria" "Sier" "Mimi" "Tamr" "Cuel" "Pleu" "Segu" "Vald" "Cast" "Bayu" "Cada" "Aren" "Leir" "SanL" "Carb" "Pini" "Alto" "Ceni" "Pine" "Arma" "Lamu" "Coca" "Puer"
# We keep only these 23 provenances
data <- name.clonapin.progeny %>%
dplyr::select(CODE,CODE_FIELD) %>%
dplyr::rename(Prov_code=CODE_FIELD, Prov_code_clonapin=CODE) %>% # Prov_code_clonapin is the code names used in CLONAPIN, Prov_code that used in the progeny tests
right_join(data,by = "Prov_code") %>%
drop_na(Prov_code_clonapin)
Dataset of 3196 observations.
# Options for model sampling
n_chains <- 4
n_iter <- 2500
n_warm <- 1250
n_thin <- 1
save_warmup = FALSE
# Plot theme
theme_set(theme_bw(base_size = 20))
# option for the MCMC intervals
point.est="median"
conf.level <- 0.95
selected.drivers <- c("A","D",
"mean_MCMT","mean_MSP","var_MCMT","var_MSP",
"SH.20km.PC1","SH.20km.PC2","SH.1km.PC1","SH.1km.PC2")
selected.drivers.labels <- c("A","D",
"mean(MCMT)","mean(SP)",
"var(MCMT)","var(SP)",
"SH1[20km]","SH2[20km]","SH1[1.6km]","SH2[1.6km]")
selected.drivers.colors <- c("#4D9221" ,"#7FBC41", "#C51B7D" ,"#DE77AE",
"#E08214" ,"#FDB863",
"#2D004B","#542788" ,"#8073AC" ,"#B2ABD2")
# "HT07" => Height measurement in 2007 (3-year old)
# "HT10" => Height measurement in 2010 (6-year old)
# "SLA_09" => Height measurement in 2009 (5-year old)
traits <- c("HT07","HT10", "SLA_09")
trait.options <- list("HT07"=list(x.label.histogram="Height at 3-year old",
provs = unique(data$Prov_code_clonapin) %>% str_sort()),
"HT10"=list(x.label.histogram= "Height at 6-year old",
provs = unique(data$Prov_code_clonapin) %>% str_sort()),
"SLA_09"=list(x.label.histogram= "Specific Leaf Area at 5-year old"))
# remove NAs for the traits of interest
subdata.traits <- mclapply(traits,function(trait){
data %>%
drop_na(all_of(trait)) %>%
mutate(Family.id=paste0(Prov_code_clonapin,"_",Fam)) %>%
dplyr::select("Prov_code_clonapin","Prov_code","ID","Fam","Family.id","Block",all_of(trait)) %>%
rename(Trait=trait)
}) %>% setNames(traits)
hists <- mclapply(traits,function(trait){
subdata.traits[[trait]] %>%
ggplot(aes(x=Trait)) +
geom_histogram(bins = 30, fill="seagreen3",alpha=0.7) +
theme_bw() +
labs(x=trait.options[[trait]]$x.label.histogram,y="Number of trees") +
theme(axis.text = element_text(size=15),
axis.title = element_text(size=15))
})
hists[[2]] <- hists[[2]] + ylab("")
hists[[3]] <- hists[[3]] + ylab("")
plot_grid(hists[[1]],hists[[2]],hists[[3]],nrow=1)
For height, we remove the zeros because they may represent dead individuals. And for SLA, we remove the outlier with SLA = 82.34 because it is likely to be a measurement error.
# removing zeros for height
subdata.traits <- mclapply(traits,function(trait){
subdata.traits[[trait]] <- subdata.traits[[trait]] %>% filter(!(Trait==0)) # removing zeros
}) %>% setNames(traits)
# removing one outlier for SLA
subdata.traits[["SLA_09"]] <- subdata.traits[["SLA_09"]] %>%
filter(Trait < 70)
FOr SLA, we also remove the populations SEG and SAL as they have only 1 family in which SLA was measured.
subdata.traits[["SLA_09"]] %>%
group_by(Prov_code_clonapin) %>%
dplyr::summarise(n_families = n_distinct(Family.id),
n_individuals = n())
## # A tibble: 14 × 3
## Prov_code_clonapin n_families n_individuals
## <chr> <int> <int>
## 1 ARN 12 88
## 2 BAY 12 94
## 3 CAD 10 74
## 4 COC 7 55
## 5 CUE 14 109
## 6 LAM 2 7
## 7 LEI 11 78
## 8 MIM 9 72
## 9 ORI 15 116
## 10 PLE 9 71
## 11 PUE 5 36
## 12 SAL 1 2
## 13 SEG 1 4
## 14 TAM 16 125
subdata.traits[["SLA_09"]] <- subdata.traits[["SLA_09"]] %>%
filter(!Prov_code_clonapin %in% c("SEG", "SAL"))
# prov ID for SLA
trait.options$SLA_09$provs <- subdata.traits[["SLA_09"]] %>% distinct(Prov_code_clonapin) %>% as_vector() %>% str_sort()
Let’s plot it again:
hists <- mclapply(traits,function(trait){
subdata.traits[[trait]] %>%
ggplot(aes(x=Trait)) +
geom_histogram(bins = 30, fill="seagreen3",alpha=0.7) +
theme_bw() +
labs(x=trait.options[[trait]]$x.label.histogram,y="Number of trees") +
theme(axis.title = element_text(size=15),
axis.text = element_text(size=15))
})
hists[[2]] <- hists[[2]] + ylab("")
hists[[3]] <- hists[[3]] + ylab("")
hists <- plot_grid(hists[[1]],hists[[2]],hists[[3]],nrow=1)
# Figure provided in the Supplementary Information
ggsave(hists,
file = paste0("figs/ExploratoryAnalyses/ValidationStep_Histograms.png"),height=6,width=14)
hists
As the traits have close to normal distribution, we do not tranform them prior to analyses.
hists <- mclapply(traits,function(trait){
subdata.traits[[trait]] %>%
ggplot(aes(x=Trait)) +
geom_histogram(fill="seagreen3",alpha=0.7) +
theme_bw() +
facet_wrap(~as.factor(Prov_code_clonapin), nrow=3) +
labs(x="",y="",title=trait.options[[trait]]$x.label.histogram) +
theme(axis.text.x = element_text(size=6),
axis.text.y = element_text(size=12)) })
hists <- plot_grid(hists[[1]],hists[[2]],hists[[3]],ncol=1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Figure provided in the Supplementary Information
ggsave(hists,
file = paste0("figs/ExploratoryAnalyses/ValidationStep_Histograms_PopSpecific.png"),height=15,width=12)
hists
mclapply(traits,function(trait){
# Number of individuals per provenance
counts.ind <- subdata.traits[[trait]] %>%
dplyr::select(Prov_code_clonapin,Fam,ID) %>%
distinct() %>%
group_by(Prov_code_clonapin) %>%
dplyr::summarise(count.individuals=n())
# Number of family per provenance
counts.fam <- subdata.traits[[trait]] %>%
dplyr::select(Prov_code_clonapin,Fam) %>%
distinct() %>%
group_by(Prov_code_clonapin) %>%
dplyr::summarise(count.families=n())
# Mean and variance for height in each provenance (merged with the nb of family per prov)
meta.subdata <- subdata.traits[[trait]] %>%
dplyr::select(Prov_code_clonapin,Fam,Trait) %>%
group_by(Prov_code_clonapin) %>%
summarise(mean=mean(Trait),var=var(Trait)) %>%
inner_join(counts.fam,by = "Prov_code_clonapin") %>%
inner_join(counts.ind,by = "Prov_code_clonapin") %>%
dplyr::rename(Populations=Prov_code_clonapin,
Mean=mean,
Variance=var,
"Number of families"=count.families)
meta.subdata %>%
xtable(type = "latex",digits=3) %>%
print(file = paste0("tables/ExpDesign_ProgenyTestAsturias_",trait,".tex"),
include.rownames=FALSE)
corr.list <- list("Correlation between the number of family per provenance and the within-population variance of the trait" = cor(meta.subdata$Variance,meta.subdata$`Number of families`) %>% round(3),
"Correlation between the number of family per provenance and the within-population mean of the trait"= cor(meta.subdata$Mean,meta.subdata$`Number of families`) %>% round(3),
"Correlation between the within-population mean and variance of the trait" = cor(meta.subdata$Variance,meta.subdata$Mean) %>% round(3))
corr.list
}) %>% setNames(traits)
## $HT07
## $HT07$`Correlation between the number of family per provenance and the within-population variance of the trait`
## [1] 0.073
##
## $HT07$`Correlation between the number of family per provenance and the within-population mean of the trait`
## [1] -0.301
##
## $HT07$`Correlation between the within-population mean and variance of the trait`
## [1] 0.421
##
##
## $HT10
## $HT10$`Correlation between the number of family per provenance and the within-population variance of the trait`
## [1] -0.015
##
## $HT10$`Correlation between the number of family per provenance and the within-population mean of the trait`
## [1] -0.205
##
## $HT10$`Correlation between the within-population mean and variance of the trait`
## [1] 0.433
##
##
## $SLA_09
## $SLA_09$`Correlation between the number of family per provenance and the within-population variance of the trait`
## [1] 0.101
##
## $SLA_09$`Correlation between the number of family per provenance and the within-population mean of the trait`
## [1] 0.026
##
## $SLA_09$`Correlation between the within-population mean and variance of the trait`
## [1] -0.125
We used the same mathematical model as the one used on CLONAPIN data but replacing clones by families.
We modeled each trait \(y_{bpfr}\) such as:
\[\begin{equation} \begin{aligned} y_{bpfr} & \sim \mathcal{N}(\mu_{bpcf},\sigma^{2}_{r})\\[3pt] \mu_{bpf} & = \beta_{0} + B_{b} + P_{p} + F_{f(p)}\\[3pt] \end{aligned} \end{equation}\]
where \(\beta_{0}\) is the global intercept, \(B_{b}\) the block intercepts, \(P_{p}\) the population intercepts, \(F_{f(p)}\) the family intercepts and \(\sigma^{2}_{r}\) the residual variance.
The prior of \(\beta_{0}\) was weakly informative and centered around the mean of the observed values for the trait under considered, as follows:
\[\beta_{0} \sim \mathcal{N}(\mu_{y},2)\]
The population and block intercepts, \(P_{p}\) and \(B_{b}\) were considered normally-distributed with variances \(\sigma^{2}_{P}\) and \(\sigma^{2}_{B}\), such as:
\[\begin{bmatrix} B_{b} \\ P_{p} \end{bmatrix} \sim \mathcal{N}\left(0, \begin{bmatrix}\sigma^{2}_{B}\\[3pt] \sigma^{2}_{P}\\ \end{bmatrix} \right)\\[3pt]\]
The family intercepts \(F_{f(p)}\) were considered to follow some population-specific normal distributions, such as: \[F_{f(p)} \sim \mathcal{N}(0,\sigma^{2}_{F_{p}})\]
where \(\sigma^{2}_{F_{p}}\) are the population-specific variances among families and are equal to:
\[\sigma^{2}_{F_{p}} = \frac{1}{4} \sigma^{2}_{A_{p}} \]
where \(\sigma^{2}_{A_{p}}\) are the population-specific additive genetic variances.
To partition the total variance, we parameterize our model so that only the total variance, \(\sigma_{tot}^{2}\) has a prior, such that:
\[\begin{equation} \begin{aligned} \sigma_{tot}^{2} & = \sigma_{r}^{2} + \sigma_{B}^{2} + \overline{\sigma_{F_{p}}^{2}} + \sigma_{P}^{2}\\[3pt] \sigma_{r} & = \sigma_{tot} \times \sqrt(\pi_{r})\\[3pt] \sigma_{B} & = \sigma_{tot} \times \sqrt(\pi_{B})\\[3pt] \sigma_{P} & = \sigma_{tot} \times \sqrt(\pi_{P})\\[3pt] \overline{\sigma_{F_{p}}} & = \sigma_{tot} \times \sqrt(\pi_{F})\\[3pt] \sigma_{tot} & \sim \mathcal{S}^{*}(0,1,3) \end{aligned} \end{equation}\]
where \(\overline{\sigma_{F_{p}}}\)
and \(\overline{\sigma_{F_{p}}^{2}}\)
are the mean of the population-specific among-families standard
deviations (\(\sigma_{F_{p}}\)) and
variances (\(\sigma^{2}_{F_{p}}\)),
respectively, and \(\sum_{l}^{4}\pi_{l}=1\) (using the
simplex
function in Stan
).
The population-specific additive genetic standard deviations \(\sigma_{A_{p}}\) follow a log-normal distribution with mean \(2\overline{\sigma_{F_{p}}}\) and standard deviation \(\sigma_{K}\), such as:
\[\begin{equation} \begin{aligned} \sigma_{A_{p}} & \sim \mathcal{LN}\left(\ln(2\overline{\sigma_{F_{p}}})-\frac{\sigma^{2}_{K}}{2} + \beta_{X}X_{p},\sigma^{2}_{K}\right)\\[3pt] \sigma_{K} & \sim \exp(1)\\[3pt] \end{aligned} \end{equation}\]
with \(X_{p}\) the potential driver considered and \(\beta_{x}\) its associated coefficient.
model = stan_model("scripts/StanModels/HierarchicalModel_VaryingInterClonesSD_ProgenyTestAsturias.stan")
model
## S4 class stanmodel 'anon_model' coded as follows:
## //Model to estimate the additive genetic variance in the common garden of Asturias
## data {
## int<lower=1> N; // Number of observations
## vector[N] y; // Response variable
## int<lower=0> nblock; // Number of blocks
## int<lower=0> nprov; // Number of provenances
## int<lower=0> nfam; // Number of families
## int<lower=0, upper=nblock> bloc[N]; // Blocks
## int<lower=0, upper=nprov> prov[N]; // Provenances
## int<lower=0, upper=nprov> which_prov[nfam]; // Provenances
## int<lower=0, upper=nfam> fam[N]; // Families
## vector[nprov] X; // Potential predictor of the additive genetic variance
## }
## parameters {
## real beta0; // global intercept
## real betaX; // coefficient of Xp
## simplex[4] pi;
## real<lower = 0> sigma_tot;
## real<lower = 0> sigma_K;
## vector[nprov] z_log_sigma_A;
##
## vector[nblock] z_block;
## vector[nprov] z_prov;
## vector[nfam] z_fam;
##
## }
## transformed parameters {
## real R_squared;
##
## real<lower = 0> sigma_r;
## real<lower = 0> sigma_block;
## real<lower = 0> sigma_prov;
##
## real mean_sigma_fam;
## vector[nprov] sigma_fam;
## vector[nprov] sigma_A;
##
## vector[nprov] alpha_prov;
## vector[nfam] alpha_fam;
## vector[nblock] alpha_block;
##
## vector[N] mu; // linear predictor
##
## // variance partitioning with the simplex pi
## sigma_r = sqrt(pi[1]) * sigma_tot;
## sigma_block = sqrt(pi[2]) * sigma_tot;
## sigma_prov = sqrt(pi[3]) * sigma_tot;
##
## mean_sigma_fam= sqrt(pi[4]) * sigma_tot;
## sigma_A = exp(log(2*mean_sigma_fam) - (square(sigma_K)/2) + betaX*X + z_log_sigma_A*sigma_K);
##
## alpha_prov = z_prov*sigma_prov;
##
## for(f in 1:nfam){
## alpha_fam[f] = z_fam[f]*0.5*sigma_A[which_prov[f]];
## sigma_fam[which_prov[f]] = 0.5*sigma_A[which_prov[f]]; // sigma2_fam = 1/4 sigma2_A ==> sigma_fam = 1/2 sigma_A
## }
##
## alpha_block = z_block*sigma_block;
##
## mu = rep_vector(beta0, N) + alpha_fam[fam] + alpha_block[bloc] + alpha_prov[prov];
## R_squared = 1 - variance(y - mu) / variance(y);
## }
## model{
## //Priors
## beta0 ~ normal(mean(y),2);
## betaX ~ std_normal();
## sigma_tot ~ student_t(3, 0.0, 1.0);
##
## z_prov ~ std_normal();
## z_block ~ std_normal();
## z_fam ~ std_normal();
##
## z_log_sigma_A ~ std_normal();
## sigma_K ~ exponential(1);
##
## // Likelihood
## y ~ normal(mu, sigma_r);
## }
## generated quantities {
## //Variances
## real<lower=0> sigma2_r;
## real<lower=0> sigma2_prov;
## real<lower=0> sigma2_block;
## vector[nprov] sigma2_fam;
## vector[nprov] sigma2_A;
## vector[nprov] h2_prov;
##
## // Posterior predictive check
## vector[N] y_rep;
##
## // Log-Likelihood (for WAIC/loo computations)
## vector[N] log_lik;
##
## sigma2_r = square(sigma_r);
## sigma2_prov = square(sigma_prov);
## sigma2_fam = square(sigma_fam);
## sigma2_A = square(sigma_A);
## for(p in 1:nprov) h2_prov[p] = sigma2_A[p]/(sigma2_r+sigma2_A[p]);
## sigma2_block = square(sigma_block);
## for(i in 1:N) {
## y_rep[i] = normal_rng(mu[i], sigma_r);
## log_lik[i] = normal_lpdf(y[i]| mu[i], sigma_r); // log probability density function
## }
## }
list.stan.traits <- mclapply(traits,function(trait){
df <- readRDS(file="data/DF_Drivers.rds") %>% # downloading the dataframe with the drivers
dplyr::select(all_of(c("prov",selected.drivers))) %>%
dplyr::mutate(across(-prov,scale)) %>% # scaling (mean-centering) the potential drivers
dplyr::rename(Prov_code_clonapin=prov) %>%
right_join(subdata.traits[[trait]],by="Prov_code_clonapin")
#### Create the stan lists (one for each driver):
list.stan <- lapply(selected.drivers, function(x) {
list(N=dim(df)[1],
y=df$Trait,
X=df[,c(x,"Prov_code_clonapin")] %>% distinct() %>% pull(x) %>% as.numeric(),
nprov = length(unique(df$Prov_code_clonapin)),
nfam = length(unique(df$Family.id)),
nblock = length(unique(df$Block)),
prov = as.numeric(as.factor(df$Prov_code_clonapin)),
which_prov = as.numeric(as.factor(unique(df[c("Prov_code_clonapin","Family.id")])[,"Prov_code_clonapin"])),
fam = as.numeric(as.factor(df$Family.id)),
bloc = as.numeric(as.factor(df$Block)))}) %>%
setNames(selected.drivers)
}) %>% setNames(traits)
mclapply(traits,function(trait){
set.seed(87)
list.models <- list()
for(i in selected.drivers){
list.models[[i]] <- sampling(model,
data = list.stan.traits[[trait]][[i]],
pars=c("beta0","betaX",
"pi","sigma2_r","sigma2_block","sigma2_prov","sigma2_fam",
"h2_prov",
"sigma_tot","sigma_block","sigma_prov",
"mean_sigma_fam","sigma_fam","sigma_K",
"sigma_A","sigma2_A"),
iter = n_iter,
chains = n_chains,
cores = n_chains,
save_warmup = save_warmup,
thin=n_thin)
}
saveRDS(list.models,file=paste0("outputs/models/",trait,"_ProgenyTestsAsturias.rds"))
})
param <- "betaX"
ExtractParam <- function(x) {
readRDS(file=paste0("outputs/models/",x,"_ProgenyTestsAsturias.rds")) %>%
mclapply(function(x) broom.mixed::tidyMCMC(x,pars=param,
droppars = NULL,
estimate.method = point.est,
ess = F,
rhat = F,
conf.int = T,
conf.level = conf.level)) %>%
bind_rows(.id="Drivers") %>%
mutate(Traits=x)
}
p <- mclapply(traits,ExtractParam) %>%
bind_rows() %>%
mutate(Drivers=case_when(Drivers=="SH.20km.PC1"~"SH1[20km]",
Drivers=="SH.20km.PC2"~"SH2[20km]",
Drivers=="SH.1km.PC1"~"SH1[1.6km]",
Drivers=="SH.1km.PC2"~"SH2[1.6km]",
Drivers=="mean_bio6"~"mean(bio6)",
Drivers=="mean_MCMT"~"mean(MCMT)",
Drivers=="mean_EMT"~"mean(EMT)",
Drivers=="mean_bio14"~"mean(bio14)",
Drivers=="mean_MSP"~"mean(SP)",
Drivers=="mean_SHM"~"mean(SHM)",
Drivers=="var_bio6"~"var(bio6)",
Drivers=="var_MCMT"~"var(MCMT)",
Drivers=="var_EMT"~"var(EMT)",
Drivers=="var_bio14"~"var(bio14)",
Drivers=="var_MSP"~"var(SP)",
Drivers=="var_SHM"~"var(SHM)",
TRUE ~ as.character(Drivers)),
Hyp=case_when(Drivers=="A"~"Admixture",
Drivers=="D"~"Admixture",
Drivers=="SH1[20km]"~"Spatial heterogeneity",
Drivers=="SH2[20km]"~"Spatial heterogeneity",
Drivers=="SH1[1.6km]"~"Spatial heterogeneity",
Drivers=="SH2[1.6km]"~"Spatial heterogeneity",
Drivers=="mean(bio6)"~"Climate harshness",
Drivers=="mean(MCMT)"~"Climate harshness",
Drivers=="mean(EMT)"~"Climate harshness",
Drivers=="mean(bio14)"~"Climate harshness",
Drivers=="mean(SHM)"~"Climate harshness",
Drivers=="mean(SP)"~"Climate harshness",
Drivers=="var(bio6)"~"Temporal heterogeneity",
Drivers=="var(MCMT)"~"Temporal heterogeneity",
Drivers=="var(EMT)"~"Temporal heterogeneity",
Drivers=="var(bio14)"~"Temporal heterogeneity",
Drivers=="var(SHM)"~"Temporal heterogeneity",
Drivers=="var(SP)"~"Temporal heterogeneity")) %>%
dplyr::mutate(Drivers=factor(Drivers,levels=selected.drivers.labels)) %>%
ggplot(aes(x = Drivers, y = estimate,ymin = conf.low, ymax = conf.high,colour=Traits),shape=16) +
geom_pointinterval(position = position_dodge(width = .6),point_size=3,show.legend = c(size = TRUE),size=4) +
facet_grid(.~Hyp,scales="free", space = "free") +
ylab(TeX("$\\beta_{X}$ estimates")) + xlab("") +
scale_colour_manual(labels = c("Height (3-year old)","Height (6-year old)","Specific Leaf Area (5-year old)"),
values=c("#FDAE61","#B2182B","royalblue3")) +
theme_bw() +
theme(axis.text.x = element_text(size=10),
axis.text.y = element_text(size=16),
axis.title = element_text(size=16),
legend.title=element_text(size=13),
legend.text=element_text(size=10),
legend.position = c(.12, .12), #.414
legend.background = element_rect(colour = "grey"),
strip.text.x = element_text(size = 14),
panel.grid.minor.x=element_blank(),
panel.grid.major.x=element_blank()) +
guides(color=guide_legend(ncol=1))
p
# save as png
ggsave(p, file="figs/PosteriorDistributions/ValidationStep_BetaX_PaperFig.png",height=6.5,width=13)
# save as pdf
ggsave(p, file="figs/PosteriorDistributions/ValidationStep_BetaX_PaperFig.pdf",height=6.5,width=13)
mclapply(traits,function(trait){
p <- readRDS(file=paste0("outputs/models/",trait,"_ProgenyTestsAsturias.rds")) %>%
mclapply(function(driver) {
broom.mixed::tidyMCMC(driver,pars=("sigma2_A"),
droppars = NULL,
estimate.method = point.est,
ess = F,
rhat = F,
conf.int = T,
conf.level = conf.level)}) %>%
bind_rows(.id="Drivers") %>%
mutate(Traits=trait,
prov=rep(trait.options[[trait]]$provs,length(selected.drivers.labels)),
Drivers=case_when(Drivers=="SH.20km.PC1"~"SH1[20km]",
Drivers=="SH.20km.PC2"~"SH2[20km]",
Drivers=="SH.1km.PC1"~"SH1[1.6km]",
Drivers=="SH.1km.PC2"~"SH2[1.6km]",
Drivers=="mean_bio6"~"mean(bio6)",
Drivers=="mean_MCMT"~"mean(MCMT)",
Drivers=="mean_EMT"~"mean(EMT)",
Drivers=="mean_bio14"~"mean(bio14)",
Drivers=="mean_MSP"~"mean(SP)",
Drivers=="mean_SHM"~"mean(SHM)",
Drivers=="var_bio6"~"var(bio6)",
Drivers=="var_MCMT"~"var(MCMT)",
Drivers=="var_EMT"~"var(EMT)",
Drivers=="var_bio14"~"var(bio14)",
Drivers=="var_MSP"~"var(SP)",
Drivers=="var_SHM"~"var(SHM)",
TRUE ~ as.character(Drivers))) %>%
dplyr::mutate(Drivers=factor(Drivers,levels=selected.drivers.labels)) %>%
ggplot(aes(x = prov, y = estimate,ymin = conf.low, ymax = conf.high,color=Drivers)) +
geom_pointinterval(position = position_dodge(width = .8),point_size=2,alpha=0.6,size=4) +
scale_color_manual(values=selected.drivers.colors) +
xlab("Populations") +
ylab(TeX("$\\sigma^{2}_{A_{p}}$ estimates")) +
labs(color = "Potential drivers") +
theme(legend.position = "bottom",
panel.grid.minor.x=element_blank(),
panel.grid.major.x=element_blank()) +
guides(color=guide_legend(ncol=5))
ggsave(p,file=paste0("figs/PosteriorDistributions/ValidationStep_",trait,"_SigmaA.png"),height=6,width=22)
})
We calculated population-specific narrow-sense heritabilities, such as:
\[ h^2_{p} = \frac{\sigma^{2}_{A_{p}}}{\sigma^{2}_{A_{p}} + \sigma^{2}_r} \]
mclapply(traits,function(trait){
p <- readRDS(file=paste0("outputs/models/",trait,"_ProgenyTestsAsturias.rds")) %>%
mclapply(function(driver) {
broom.mixed::tidyMCMC(driver,pars=("h2_prov"),
droppars = NULL,
estimate.method = point.est,
ess = F,
rhat = F,
conf.int = T,
conf.level = conf.level)}) %>%
bind_rows(.id="Drivers") %>%
mutate(Traits=trait,
prov=rep(trait.options[[trait]]$provs,length(selected.drivers.labels)),
Drivers=case_when(Drivers=="SH.20km.PC1"~"SH1[20km]",
Drivers=="SH.20km.PC2"~"SH2[20km]",
Drivers=="SH.1km.PC1"~"SH1[1.6km]",
Drivers=="SH.1km.PC2"~"SH2[1.6km]",
Drivers=="mean_bio6"~"mean(bio6)",
Drivers=="mean_MCMT"~"mean(MCMT)",
Drivers=="mean_EMT"~"mean(EMT)",
Drivers=="mean_bio14"~"mean(bio14)",
Drivers=="mean_MSP"~"mean(SP)",
Drivers=="mean_SHM"~"mean(SHM)",
Drivers=="var_bio6"~"var(bio6)",
Drivers=="var_MCMT"~"var(MCMT)",
Drivers=="var_EMT"~"var(EMT)",
Drivers=="var_bio14"~"var(bio14)",
Drivers=="var_MSP"~"var(SP)",
Drivers=="var_SHM"~"var(SHM)",
TRUE ~ as.character(Drivers))) %>%
dplyr::mutate(Drivers=factor(Drivers,levels=selected.drivers.labels)) %>%
ggplot(aes(x = prov, y = estimate,ymin = conf.low, ymax = conf.high,color=Drivers)) +
geom_pointinterval(position = position_dodge(width = .8),point_size=2,alpha=0.6,size=4) +
scale_color_manual(values=selected.drivers.colors) +
xlab("Populations") +
ylab(TeX("$\\h^{2}_{p}$ estimates")) +
labs(color = "Potential drivers") +
theme(legend.position = "bottom",
panel.grid.minor.x=element_blank(),
panel.grid.major.x=element_blank()) +
guides(color=guide_legend(ncol=5))
ggsave(p,file=paste0("figs/PosteriorDistributions/ValidationStep_",trait,"_h2prov.png"),height=6,width=22)
})
mclapply(traits,function(trait){
p <- readRDS(file=paste0("outputs/models/",trait,"_ProgenyTestsAsturias.rds")) %>%
mclapply(function(driver) {
broom.mixed::tidyMCMC(driver,pars=("pi"),
droppars = NULL, estimate.method = point.est, ess = F, rhat = F, conf.int = T,conf.level = 0.95)}) %>%
bind_rows(.id="Drivers") %>%
mutate(Traits=trait,
prop.var=rep(c("Residuals","Blocks","Populations","Clones"),length(selected.drivers.labels)),
Drivers=case_when(Drivers=="SH.20km.PC1"~"SH1[20km]",
Drivers=="SH.20km.PC2"~"SH2[20km]",
Drivers=="SH.1km.PC1"~"SH1[1.6km]",
Drivers=="SH.1km.PC2"~"SH2[1.6km]",
Drivers=="mean_bio6"~"mean(bio6)",
Drivers=="mean_MCMT"~"mean(MCMT)",
Drivers=="mean_EMT"~"mean(EMT)",
Drivers=="mean_bio14"~"mean(bio14)",
Drivers=="mean_MSP"~"mean(SP)",
Drivers=="mean_SHM"~"mean(SHM)",
Drivers=="var_bio6"~"var(bio6)",
Drivers=="var_MCMT"~"var(MCMT)",
Drivers=="var_EMT"~"var(EMT)",
Drivers=="var_bio14"~"var(bio14)",
Drivers=="var_MSP"~"var(SP)",
Drivers=="var_SHM"~"var(SHM)",
TRUE ~ as.character(Drivers))) %>%
mutate(prop.var=factor(prop.var,levels=c("Blocks","Populations","Clones","Residuals"))) %>%
dplyr::mutate(Drivers=factor(Drivers,levels=selected.drivers.labels)) %>%
ggplot(aes(x = prop.var, y = estimate,ymin = conf.low, ymax = conf.high,color=Drivers)) +
geom_pointinterval(position = position_dodge(width = .8),point_size=3,alpha=0.6,size=5) +
scale_color_manual(values=selected.drivers.colors) +
xlab("") +
ylab("Proportion of variance explained") +
labs(color = "Potential drivers") +
theme(axis.text = element_text(size=25),
panel.grid.minor.x=element_blank(),
panel.grid.major.x=element_blank()) +
guides(color=guide_legend(ncol=1))
ggsave(p,file=paste0("figs/PosteriorDistributions/ValidationStep_",trait,"_VarPart.png"),height=6,width=15)
})