# Function to format climate data (mean-center the climatic variables and get datasets with climatic variables in columns)scale_clim <-function(clim_data, em, rcp, ref_timeslice, pred_timeslice, clim_var_avg_selected, scale_only_ref = F){# Dataset with the climatic variables of the reference period (not scaled) ref_climdf <- clim_data %>%filter(ensemble_member == em & rcp_scenario == rcp & timeslice == ref_timeslice & clim_var_avg %in% clim_var_avg_selected) %>% dplyr::select(-clim_var, -time_averages) %>%pivot_wider(names_from = clim_var_avg, values_from = value)# Mean-center the climatic variables across the reference period ref_climdf_scaled <- ref_climdf %>% dplyr::mutate(across(any_of(clim_var_avg_selected), ~ (. -mean(.)) /sd(.)))if(scale_only_ref == T){ ref_climdf_scaled } else {# Extract scaling parameters, i.e. mean and variance scale_params <-lapply(clim_var_avg_selected, function(x){ vec_var <- ref_climdf[,x] %>%pull()list(mean =mean(vec_var),sd =sd(vec_var)) }) %>%setNames(clim_var_avg_selected)# Dataset with the climatic variables across the future period (not scaled) pred_climdf <- clim_data %>%filter(ensemble_member == em & rcp_scenario == rcp & timeslice == pred_timeslice & clim_var_avg %in% clim_var_avg_selected) %>% dplyr::select(-clim_var, -time_averages) %>%pivot_wider(names_from = clim_var_avg, values_from = value)# Mean-center the climatic variables across the future period pred_climdf_scaled <- pred_climdffor(i in clim_var_avg_selected){ pred_climdf_scaled[,i] <- (pred_climdf_scaled[,i] - scale_params[[i]]$mean) / scale_params[[i]]$sd }list(ref_climdf = ref_climdf, pred_climdf = pred_climdf, ref_climdf_scaled = ref_climdf_scaled, pred_climdf_scaled = pred_climdf_scaled, scale_params = scale_params)}}
4 Gradient Forest (GF)
Code
# Predictor cumulative plotsmake_predictor_cumulative_plot <-function(x){plot(x, plot.type="C",imp.vars=names(importance(x)),show.species=F,common.scale=T,cex.axis=1, cex.lab=1.5,line.ylab=1,par.args=list(mgp=c(1.5, 0.5,0),mar=c(2.5,1,0.1,0.5),omi=c(0, +0.3,0,0)))}# R2 measure of the fit of the random forest model for each speciesmake_performance_plot <-function(x, horizontal=FALSE){ old.mar<-par()$marpar(mfrow=c(1,1),mar=old.mar+c(0,0,0,0)) Ylab <-expression(R^2) perf <-importance(x, type="Species") n <-length(perf)if (horizontal)plot(perf, 1:n, las =2, pch=19, axes=F, xlab="", ylab="")elseplot(1:n, perf, las =2, pch=19, axes=F, xlab="", ylab="")axis(labels=names(perf),side=1+horizontal,at=1:n, cex.axis=0.7, padj=0,las=2)axis(side=2-horizontal, cex.axis=1)mtext(Ylab,side=2-horizontal,line=2)title("Overall performance of random forests over loci")abline(h =0, lty =2)box()par(mar=old.mar)}# Species (alleles) cumulative plotmake_species_cumulative_plot <-function(x){plot(x,plot.type="C",imp.vars=names(importance(x)), show.overall=F,legend=T,leg.posn="topleft", leg.nspecies=10,cex.lab=1,cex.legend=1, cex.axis=1,line.ylab=1,par.args=list(mgp=c(1.5, 0.5,0),mar=c(2.5,1,0.1,0.5),omi=c(0, +0.3,0,0))) }
Code
# Function to estimate the genomic offset with Gradient ForestRCPs <-c("RCP2.6","RCP8.5")EMs <-c("01","04","06","15")ref_timeslice <-"1980-2000"pred_timeslices <-c("2030-2050","2060-2080")snp_sets <- snp_setsclim_data <-readRDS(here("data/ScotsPine/subdf.rds")) geno_data <-readRDS(here("data/ScotsPine/GenomicData/data_update_Fev2025/formatted_data/filtered_allele_freq_withoutMAF.rds"))clim_var_avg_selected <-unique(clim_data$clim_var_avg)# # for the examplesnp_set_i <- snp_sets[[1]]rcp_i <-"RCP8.5"em_i <-"15"pred_timeslice_i <-"2030-2050"df_GF_gen_off <-lapply(snp_sets, function(snp_set_i){# Subset genomic data for the set of selected SNPs sub_geno_data <- geno_data %>% dplyr::select(snp_set_i$set_snps) %>%set_colnames(paste0("snp_",1:length(snp_set_i$set_snps))) %>%as.data.frame() lapply(EMs, function(em_i){# Mean - center the reference climatic data ref_clim_data <-scale_clim(clim_data = clim_data,em = em_i,rcp = RCPs[[1]],ref_timeslice = ref_timeslice,pred_timeslice = pred_timeslices[[1]],clim_var_avg_selected = clim_var_avg_selected,scale_only_ref = T)# dataframe formatted for GF# gf_data <- ref_clim_data %>% # dplyr::select(any_of(clim_var_avg_selected)) %>% # data.frame(sub_geno_data,.,check.names=F)# # set.seed(3) # for reproducibility, as the results of GF models vary from one run to another# # gf_mod <- gradientForest(gf_data,# predictor.vars=clim_var_avg_selected,# response.vars=colnames(sub_geno_data),# corr.threshold=0.5,# ntree=500,# trace=T)# # saveRDS(gf_mod, file = here(paste0("outputs/ScotsPine/GF/models/",# snp_set_i$set_code,"_em",# em_i,"_refperiod_",# gsub("\\-", "_", ref_timeslice),# ".rds"))) gf_mod <-readRDS(file=here(paste0("outputs/ScotsPine/GF/models/", snp_set_i$set_code,"_em", em_i,"_refperiod_",gsub("\\-", "_", ref_timeslice),".rds")))######## Plots######## Overall importance plot# pdf(here(paste0("figs/ScotsPine/GF/overall_importance_plots/",# snp_set_i$set_code,"_em",# em_i,"_refperiod_",# gsub("\\-", "_", ref_timeslice),# ".pdf")), width=8,height=5)# plot(gf_mod, plot.type="Overall.Importance")# dev.off()# Species (allele) cumulative plot - do not plot if too many alleles# pdf(here(paste0("figs/ScotsPine/GF/allele_cumulative_plots/",# snp_set_i$set_code,"_em",# em_i,"_",# gsub("\\-", "_", ref_timeslice),# ".pdf")),width=7,height=7)# make_species_cumulative_plot(x=gf_mod)# dev.off()# Predictor cumulative plot# pdf(here(paste0("figs/ScotsPine/GF/predictor_cumulative_plots/",# snp_set_i$set_code,"_em",# em_i,"_",# gsub("\\-", "_", ref_timeslice),# ".pdf")), width=7,height=7)# make_predictor_cumulative_plot(x=gf_mod)# dev.off()# R2 measure of the fit of the random forest model for each species - do not run if many alleles# p <- tibble(snp=names(sort(gf_mod$result)),importance=sort(gf_mod$result)) %>% # ggplot() +# geom_point(aes(y=reorder(snp, importance),x=importance)) +# ylab("") +# xlab(expression(R^2)) +# theme_bw() # # ggsave(p, filename = here(paste0("figs/ScotsPine/GF/allele_importance_plots/",# snp_set_i$set_code,"_em",# em_i,"_",# gsub("\\-", "_", ref_timeslice),# ".pdf")), width=8, height=10)####lapply(RCPs, function(rcp_i){lapply(pred_timeslices, function(pred_timeslice_i){# Mean - center the current and future climatic data clim_data_scaled <-scale_clim(clim_data = clim_data,em = em_i,rcp = rcp_i, ref_timeslice = ref_timeslice,pred_timeslice = pred_timeslice_i,clim_var_avg_selected = clim_var_avg_selected)# Genomic offset predictions ref_pred <-predict(gf_mod) # predictions under current climates fut_pred <-predict(gf_mod, clim_data_scaled$pred_climdf_scaled %>% dplyr::select(any_of(clim_var_avg_selected)) %>%as.data.frame()) # predictions under future climates go <-lapply(1:nrow(ref_pred), function(x, ref_pred, fut_pred){as.numeric(pdist(ref_pred[x,], fut_pred[x,])@dist)},fut_pred=fut_pred, ref_pred=ref_pred) %>%unlist() clim_data_scaled$pred_climdf_scaled %>% dplyr::select(-any_of(clim_var_avg_selected)) %>% dplyr::rename(pred_timeslice = timeslice) %>% dplyr::mutate(var_code = snp_set_i$set_code,var_name = snp_set_i$set_name,method ="GF",val = go) }) %>%bind_rows() }) %>%bind_rows() }) %>%bind_rows() }) %>%bind_rows()saveRDS(df_GF_gen_off, here(paste0("outputs/ScotsPine/GF/df_gen_off_pred.rds")))
# This script can only be run on modest SNP sets. For running on all SNPs, see next chunk.# Loading allele counts without MAFgeno_data <-readRDS(here("data/ScotsPine/GenomicData/data_update_Fev2025/formatted_data/filtered_allele_counts_withoutMAF.rds"))geno_data[geno_data ==1] <-12geno_data[geno_data ==2] <-22geno_data[geno_data ==0] <-11fst_matrices <-sapply(snp_sets, function(x){ geno_data %>% dplyr::select(PopulationCode,all_of(x$set_snps)) %>%as.data.frame() %>%pairwise.WCfst(diploid=TRUE)}, USE.NAMES =TRUE,simplify=FALSE)# save the matricessaveRDS(fst_matrices,file=here("outputs/ScotsPine/GDM/fst_matrices/fst_matrices.rds"))
Code
# Script to prepare the dataset to send to Santi, so that he computes the Fst matrix with all SNPs (for GDM)geno_data <-readRDS(here("data/ScotsPine/GenomicData/data_update_Fev2025/formatted_data/filtered_allele_counts_withoutMAF.rds"))geno_data[geno_data ==1] <-12geno_data[geno_data ==2] <-22geno_data[geno_data ==0] <-11geno_data <- geno_data %>% dplyr::select(-FieldCode,-Family) %>%as.data.frame() %>%arrange(PopulationCode)#saveRDS(geno_data,file= here("data/ScotsPine/GenomicData/data_update_Fev2025/formatted_data/GenMatrix_FormattedForHierfstatPackage.rds"))# geno_data <- readRDS(file= "data/ScotsPine/GenomicData/data_update_Fev2025/formatted_data/GenMatrix_FormattedForHierfstatPackage.rds")# The matrix was run on the genotool cluster by Santi as my laptop crashed when computing itpairwise.WCfst(geno_data, diploid=TRUE)# Here is the matrix computed by Santi:pairwise_fst <-read.delim(here("data/ScotsPine/GenomicData/data_update_Fev2025/formatted_data/pairwise_fst.txt"), row.names=1)# We put the matrix in a list so that it can be used in the following scripts.fst_matrices <-list(all_snps = pairwise_fst)
Code
#fst_matrices <- readRDS(file=here("outputs/ScotsPine/GDM/fst_matrices/fst_matrices.rds"))# Is there some negative values?neg_vals <-sapply(fst_matrices, function(x){ length(x[which(x <0)])}, USE.NAMES =TRUE,simplify=FALSE)# Counting the number of negative valueslapply(names(snp_sets), function(x){tibble('SNP set'= snp_sets[[x]]$set_name,'Nb of negative values'= neg_vals[[x]])}) %>%bind_rows() %>%kable_mydf()# No negative values in the Fst matrix with all SNPs#### Scaling the matrices #### Following Fitzpatrick & Keller (2015): subtracting the minimum value and then dividing by the maximum value# fst_matrices <- sapply(fst_matrices, function(x){# # (x-min(x,na.rm=T))/max(x,na.rm=T)# # }, USE.NAMES = TRUE,simplify=FALSE)fst_matrices <-sapply(fst_matrices, function(x){ (x-min(x,na.rm=T))/(max(x,na.rm=T)-min(x,na.rm=T))}, USE.NAMES =TRUE,simplify=FALSE)# to check that it worked# sapply(fst_matrices, function(x){range(x,na.rm=T)}, USE.NAMES = TRUE,simplify=FALSE)source(here("scripts/functions/corpmat.R"))col <-colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))p.mat <-corpmat(fst_matrices$all_snps %>%as.matrix())p <- corrplot::corrplot(as.matrix(fst_matrices$all_snps %>%as.matrix()), method="color", col=col(200),type="upper", order="hclust", addCoef.col ="black", # Add coefficient of correlationtl.col="black", tl.srt=23, #Text label color and rotation# Combine with significancep.mat = p.mat, sig.level =0.05, insig ="blank", number.cex =0.8,tl.cex =0.8,# hide correlation coefficient on the principal diagonaldiag=FALSE,title ="Scaled Fst matrix with all SNPs",mar=c(0,0,2,0)) # add an upper margin to see the titlep # For GDM, the distance matrix must have as the first column the names of the populationsfst_matrices <-sapply(fst_matrices, function(x){x <- x %>%as.data.frame() %>% tibble::rownames_to_column("PopulationCode") }, USE.NAMES =TRUE,simplify=FALSE)#saveRDS(fst_matrices,file=here("outputs/ScotsPine/GDM/fst_matrices/scaled_fst_matrices.rds"))
Code
#fst_matrices <- readRDS(file=here("outputs/ScotsPine/GDM/fst_matrices/scaled_fst_matrices.rds"))# Function to estimate the genomic offset with GDMRCPs <-c("RCP2.6","RCP8.5")EMs <-c("01","04","06","15")ref_timeslice <-"1980-2000"pred_timeslices <-c("2030-2050","2060-2080")snp_sets <- snp_setsclim_data <-readRDS(here("data/ScotsPine/subdf.rds")) clim_var_avg_selected <-unique(clim_data$clim_var_avg)# for the example# snp_set_i <- snp_sets[[1]]# rcp_i <- "RCP8.5"# em_i <- "01"# pred_timeslice_i <- "2030-2050"rm(rcp_i);rm(em_i);rm(pred_timeslice_i);rm(snp_set_i)df_GDM_gen_off <-lapply(snp_sets, function(snp_set_i){lapply(EMs, function(em_i){# Mean - center the reference climatic data ref_clim_data <- clim_data %>%filter(ensemble_member == em_i & rcp_scenario == RCPs[[1]] & timeslice == ref_timeslice & clim_var_avg %in% clim_var_avg_selected) %>% dplyr::select(-clim_var, -time_averages) %>%pivot_wider(names_from = clim_var_avg, values_from = value)# Steps:# extract the geographic coordinates of the populations# transform the coordinates in spatial points and specify the CRS (WGS84) with sp package# transform to a SpatVector object for the terra package# reproject with terra package in CRS EPSG:3035# extract population coordinates from the SpatVector with terra package pop_coord_proj <- ref_clim_data %>% dplyr::select(Longitude,Latitude) %>% sp::SpatialPoints(proj4string =CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")) %>% terra::vect() %>% terra::project("EPSG:3035") %>% terra::crds() %>%as_tibble() %>% dplyr::rename(long_EPSG_3035=x, lat_EPSG_3035=y)# merge the population coordinates with the climatic variables clim_ref <-bind_cols(ref_clim_data[,c("PopulationCode",clim_var_avg_selected)],pop_coord_proj)# GDM models gdm_tab <-formatsitepair(bioData=fst_matrices[[snp_set_i$set_code]], bioFormat=3,XColumn="long_EPSG_3035", YColumn="lat_EPSG_3035", predData=clim_ref, siteColumn="PopulationCode") gdm_mod <-gdm(data=gdm_tab, geo=TRUE)saveRDS(gdm_mod, file =here(paste0("outputs/ScotsPine/GDM/models/", snp_set_i$set_code,"_em", em_i,"_refperiod_",gsub("\\-", "_", ref_timeslice),".rds")))# Plot predicted climatic distance vs observed genetic distance overlay_x <-seq( from=min(gdm_mod$ecological), to=max(gdm_mod$ecological), length=length(gdm_mod$ecological)) overlay_y <-1-exp( - overlay_x ) dfline <-tibble(x=overlay_x,y=overlay_y) p <-tibble(clim=gdm_mod$ecological,obs=gdm_mod$observed) %>%ggplot(aes(x=clim,y=obs)) +geom_point(color=rgb(0,0,1,0.5)) +geom_line(data=dfline,aes(x=x,y=y), color="gray60") +xlim(c(0,1.7)) +ylim(c(0,1)) +xlab("Predicted climatic distance") +ylab("Observed genetic distance") +ggtitle(paste0(snp_set_i$set_name,"- Ensemble member ",em_i)) +theme_bw() +theme(title =element_text(size=8)) ggsave(filename =here(paste0("figs/ScotsPine/GDM/PredictedClimaticDistance_vs_ObservedGeneticDistance_Plots/", snp_set_i$set_code,"_em",em_i,".pdf")), p, width=5, height=5)# Predicted versus observed genetic distance p <-tibble(obs=gdm_mod$observed, pred=gdm_mod$predicted) %>%ggplot(aes(x=obs,y=pred)) +geom_abline(intercept =0, slope =1, color="gray60") +geom_point(color=rgb(0,0,1,0.5)) +xlim(c(0,1)) +ylim(c(0,1)) +xlab("Observed genetic distance") +ylab("Predicted genetic distance") +ggtitle(paste0(snp_set_i$set_name,"- Ensemble member ",em_i)) +theme_bw()+theme(title =element_text(size=8))ggsave(filename =here(paste0("figs/ScotsPine/GDM/Predicted_vs_ObservedGeneticDistance_Plots/", snp_set_i$set_code,"_em",em_i,".pdf")), p, width=5, height=5)# Ispine plots spline_data <-isplineExtract(gdm_mod)# extract variables with importance different from 0 predictors <-which(apply(spline_data$y,2, sum) !=0) %>%names() plots <-lapply(predictors, function(predictor){if(predictor=="Geographic"){ predictor_name <-"Geography (km)" spline_data$x[,predictor] <- spline_data$x[,predictor] /1000 } else { predictor_name <- clim_var_names[clim_var_names$code == predictor,] %>%pull(.)}tibble(x=spline_data$x[,predictor],y=spline_data$y[,predictor]) %>%ggplot() +geom_line(aes(x=x,y=y),color="blue",linewidth=2) +ylim(c(0,1.3)) +ylab("Partial genetic distance") +xlab(predictor_name) +theme(axis.title.x =element_text(size=9)) +theme_bw() })# make title title <-ggdraw() +draw_label(paste0(snp_set_i$set_name, " - Ensemble member ",em_i),fontface ='bold',x =0,hjust =0) +theme(plot.margin =margin(0, 0, 0, 7))# merge plots p <-plot_grid(plotlist=plots)# If we want to save the figures without title# ggsave(here(paste0("figs/ScotsPine/GDM/Ispline_Plots/",snp_set_i$set_code,"_em",em_i,"_noTitle.pdf")), p, width=14,height=9, device="pdf")# merge plots + title p <-plot_grid(title, p, ncol =1,rel_heights =c(0.1, 1))# save the plotsggsave(here(paste0("figs/ScotsPine/GDM/Ispline_Plots/",snp_set_i$set_code,"_em",em_i,".pdf")), p, width=14,height=9, device="pdf") gdm_tab_pred <- clim_ref %>% dplyr::select(-PopulationCode) %>% dplyr::rename(xCoord=long_EPSG_3035,yCoord=lat_EPSG_3035) %>%set_colnames(paste0("s1.",colnames(.)))lapply(RCPs, function(rcp_i){lapply(pred_timeslices, function(pred_timeslice_i){# Mean - center the current and future climatic data gdm_tab_pred <- clim_data %>%filter(ensemble_member == em_i & rcp_scenario == rcp_i & timeslice == pred_timeslice_i & clim_var_avg %in% clim_var_avg_selected) %>% dplyr::select(-clim_var, -time_averages,-contains("ude")) %>%pivot_wider(names_from = clim_var_avg, values_from = value) %>%left_join(clim_ref[,c("PopulationCode","long_EPSG_3035","lat_EPSG_3035")], by="PopulationCode") %>% dplyr::select(contains("EPSG_3035"),any_of(clim_var_avg_selected)) %>% dplyr::rename(xCoord=long_EPSG_3035,yCoord=lat_EPSG_3035) %>%set_colnames(paste0("s2.",colnames(.))) %>%bind_cols(gdm_tab_pred, .) %>%mutate(distance=0,weights=0) %>% dplyr::select(distance,weights,contains("Coord"), everything()) gen_off <-predict(gdm_mod,gdm_tab_pred) ref_clim_data %>% dplyr::select(PopulationCode, Latitude, Longitude) %>%mutate(pred_timeslice = pred_timeslice_i,ensemble_member = em_i,rcp_scenario = rcp_i) %>% dplyr::mutate(var_code = snp_set_i$set_code,var_name = snp_set_i$set_name,method ="GDM",val = gen_off) }) %>%bind_rows() }) %>%bind_rows() }) %>%bind_rows() }) %>%bind_rows()saveRDS(df_GDM_gen_off, here(paste0("outputs/ScotsPine/GDM/df_gen_off_pred.rds")))
Genomic offset predictions using GDM are negatively correlated with the predictions from other methods. To ensure there are no errors in my analysis, I have verified the following:
Comparing predictions with geo=TRUE versus geo=FALSE (i.e., correction or not for geography).
Using population coordinates with either a geographic (spheroid) CRS (units in degrees longitude and latitude) or a projected (Cartesian, 2D) CRS (EPSG:3035).
Comparing predictions with and without mean-centered and scaled climatic data.
Exploring different standardizations of the Fst matrix.
However, none of these adjustments altered the results, which remain opposite to the predictions from other methods.
7 Latent Factor Mixed Model (LFMM)
Code
RCPs <-c("RCP2.6","RCP8.5")EMs <-c("01","04","06","15")ref_timeslice <-"1980-2000"pred_timeslices <-c("2030-2050","2060-2080")clim_data <-readRDS(here("data/ScotsPine/subdf.rds")) geno_data <-readRDS(here("data/ScotsPine/GenomicData/data_update_Fev2025/formatted_data/imputed_allele_counts_withoutMAF.rds")) # arrange by field codeclim_var_avg_selected <-unique(clim_data$clim_var_avg)K <-c(1,4)df_LFMM_gen_off <-lapply(snp_sets, function(snp_set_i){# # Subset genomic data for the set of selected SNPs# sub_geno_data <- geno_data %>% # dplyr::select(any_of(snp_set_i$set_snps))lapply(EMs, function(em_i){lapply(RCPs, function(rcp_i){lapply(pred_timeslices, function(pred_timeslice_i){# Mean - center the current and future climatic data clim_data_scaled <-scale_clim(clim_data = clim_data,em = em_i,rcp = rcp_i, ref_timeslice = ref_timeslice,pred_timeslice = pred_timeslice_i,clim_var_avg_selected = clim_var_avg_selected)# Dataset of reference climates env_ref <- geno_data %>% dplyr::select(PopulationCode, Family, FieldCode) %>%left_join(clim_data_scaled$ref_climdf_scaled %>% dplyr::select(PopulationCode,any_of(clim_var_avg_selected)),by ="PopulationCode") %>%arrange(PopulationCode) %>% dplyr::select(-PopulationCode, -Family, -FieldCode)# Dataset of prediction climates env_pred <- geno_data %>% dplyr::select(PopulationCode, Family, FieldCode) %>%left_join(clim_data_scaled$pred_climdf_scaled %>% dplyr::select(PopulationCode,any_of(clim_var_avg_selected)),by ="PopulationCode") %>%arrange(PopulationCode) %>% dplyr::select(-PopulationCode, -Family, -FieldCode)# Genomic data - subset with selected SNPs geno_sub <- geno_data %>%arrange(PopulationCode) %>% dplyr::select(-PopulationCode, -Family, -FieldCode, any_of(snp_set_i$set_snps))lapply(K, function(k_i){# Estimating the genomic offset# ggap <- genetic.gap(input = geno_sub,# env = env_ref,# pred.env = env_pred,# K = k_i)# # saveRDS(ggap, file = here(paste0("outputs/ScotsPine/LFMM/models/",# snp_set_i$set_code,"_em",# em_i,# "_refperiod_",# gsub("\\-", "_", ref_timeslice),# "_predperiod_",# gsub("\\-", "_", pred_timeslice_i),# "_K",# k_i,# ".rds"))) ggap <-readRDS(file =here(paste0("outputs/ScotsPine/LFMM/models/", snp_set_i$set_code,"_em", em_i,"_refperiod_",gsub("\\-", "_", ref_timeslice),"_predperiod_",gsub("\\-", "_", pred_timeslice_i),"_K", k_i,".rds"))) clim_data_scaled$pred_climdf_scaled %>% dplyr::select(-any_of(clim_var_avg_selected)) %>% dplyr::rename(pred_timeslice = timeslice) %>% dplyr::mutate(var_code = snp_set_i$set_code,var_name = snp_set_i$set_name,method =paste0("LFMM_K",k_i),val =unique(ggap$offset)) }) %>%bind_rows() }) %>%bind_rows() }) %>%bind_rows() }) %>%bind_rows() }) %>%bind_rows()saveRDS(df_LFMM_gen_off, here(paste0("outputs/ScotsPine/LFMM/df_gen_off_pred.rds")))
8 Climatic distances
Code
RCPs <-c("RCP2.6","RCP8.5")EMs <-c("01","04","06","15")ref_timeslice <-"1980-2000"pred_timeslices <-c("2030-2050","2060-2080")clim_data <-readRDS(here("data/ScotsPine/subdf.rds")) clim_var_avg_selected <-unique(clim_data$clim_var_avg)df_clim_dist <-lapply(EMs, function(em_i){lapply(RCPs, function(rcp_i){lapply(pred_timeslices, function(pred_timeslice_i){# Mean - center the current and future climatic data clim_data_scaled <-scale_clim(clim_data = clim_data,em = em_i,rcp = rcp_i, ref_timeslice = ref_timeslice,pred_timeslice = pred_timeslice_i,clim_var_avg_selected = clim_var_avg_selected)# Calculate absolute differences df <-abs(clim_data_scaled$ref_climdf_scaled[,clim_var_avg_selected] - clim_data_scaled$pred_climdf_scaled[,clim_var_avg_selected]) %>%as_tibble() %>%bind_cols(clim_data_scaled$pred_climdf_scaled %>% dplyr::select(PopulationCode, Latitude, Longitude, ensemble_member, rcp_scenario, timeslice),.) %>% dplyr::rename(pred_timeslice = timeslice) %>%mutate(eucli =unique(sqrt(rowSums((clim_data_scaled$ref_climdf_scaled[,clim_var_avg_selected] - clim_data_scaled$pred_climdf_scaled[,clim_var_avg_selected])^2)))) }) %>%bind_rows() }) %>%bind_rows() }) %>%bind_rows() %>%mutate(ensemble_member =paste0("EM ",ensemble_member))# We calculate the climatic distances for the average across ensemble membersdf_clim_dist <- df_clim_dist %>%#group_by(PopulationCode, rcp_scenario,pred_timeslice) %>%summarize(across(where(is.numeric), \(x) mean(x, na.rm =TRUE)), .by=c("PopulationCode", "rcp_scenario", "pred_timeslice")) %>%mutate(ensemble_member="Average across EMs") %>%bind_rows(df_clim_dist) %>%pivot_longer(cols =all_of(c(clim_var_avg_selected,"eucli")), names_to ="var_code", values_to ="val") %>%left_join(clim_var_names %>%bind_rows(tibble(code ="eucli", name="All climatic variables (Euclidean distance)")) %>% dplyr::rename(var_code = code, var_name = name)) %>%mutate(method ="CD")
Joining with `by = join_by(var_code)`
9 Merging climatic distances and genomic offset predictions