I am trying to run the following code in R on a Linux cluster. I would like to use the full processing power (specifying cores/nodes/memory). The code essentially runs predictions based on a GAM regression and saves the results as a CSV file for multiple sets of data (here I only show two).
Is it possible to run this code using HPC packages such as Rmpi/snow/doParallel? Thank you! ##################### library(data.table) library(mgcv) library(reshape2) library(dplyr) library(tidyr) library(lubridate) library(DataCombine) # gam_max_count_wk <- gam(count_pop ~ factor(citycode) + factor(year) + factor(week) + s(lnincome) + s(tmax) + s(hmax),data=cont,na.action="na.omit", method="ML") # # Historic temp_hist <- read.csv("/work/sd00815/giss_historic/giss_temp_hist.csv") humid_hist <- read.csv("/work/sd00815/giss_historic/giss_hum_hist.csv") # temp_hist <- as.data.table(temp_hist) humid_hist <- as.data.table(humid_hist) # # Merge mykey<- c("FIPS", "year","month", "week") setkeyv(temp_hist, mykey) setkeyv(humid_hist, mykey) # hist<- merge(temp_hist, humid_hist, by=mykey) # hist$X.x <- NULL hist$X.y <- NULL # # Max hist_max <- hist hist_max$FIPS <- hist_max$year <- hist_max$month <- hist_max$tmin <- hist_max$tmean <- hist_max$hmin <- hist_max$hmean <- NULL # # Adding Factors hist_max$citycode <- rep(101,nrow(hist_max)) hist_max$year <- rep(2010,nrow(hist_max)) hist_max$lnincome <- rep(10.262,nrow(hist_max)) # # Predictions pred_hist_max <- predict.gam(gam_max_count_wk,hist_max) # pred_hist_max <- as.data.table(pred_hist_max) pred_hist_max <- cbind(hist, pred_hist_max) pred_hist_max$tmax <- pred_hist_max$tmean <- pred_hist_max$tmin <- pred_hist_max$hmean <- pred_hist_max$hmax <- pred_hist_max$hmin <- NULL # # Aggregate by FIPS max_hist <- pred_hist_max %>% group_by(FIPS) %>% summarise(pred_hist = mean(pred_hist_max)) # ### Future ## 4.5 # 4.5_2021_2050 temp_sim <- read.csv("/work/sd00815/giss_future/giss_4.5_2021_2050_temp.csv") humid_sim <- read.csv("/work/sd00815/giss_future/giss_4.5_2021_2050_temp.csv") # # Max temp_sim <- as.data.table(temp_sim) setnames(temp_sim, "max", "tmax") setnames(temp_sim, "min", "tmin") setnames(temp_sim, "avg", "tmean") # humid_sim <- as.data.table(humid_sim) setnames(humid_sim, "max", "hmax") setnames(humid_sim, "min", "hmin") setnames(humid_sim, "avg", "hmean") # temp_sim$X <- NULL humid_sim$X <- NULL # # Merge mykey<- c("FIPS", "year","month", "week") setkeyv(temp_sim, mykey) setkeyv(humid_sim, mykey) # sim <- merge(temp_sim, humid_sim, by=mykey) # sim_max <- sim # sim_max$FIPS <- sim_max$year <- sim_max$month <- sim_max$tmin <- sim_max$tmean <- sim_max$hmin <- sim_max$hmean <- NULL # # Adding Factors sim_max$citycode <- rep(101,nrow(sim_max)) sim_max$year <- rep(2010,nrow(sim_max)) sim_max$week <- rep(1,nrow(sim_max)) sim_max$lnincome <- rep(10.262,nrow(sim_max)) # # Predictions pred_sim_max <- predict.gam(gam_max_count_wk,sim_max) # pred_sim_max <- as.data.table(pred_sim_max) pred_sim_max <- cbind(sim, pred_sim_max) pred_sim_max$tmax <- pred_sim_max$tmean <- pred_sim_max$tmin <- pred_sim_max$hmean <- pred_sim_max$hmax <- pred_sim_max$hmin <- NULL # # Aggregate by FIPS max_sim <- pred_sim_max %>% group_by(FIPS) %>% summarise(pred_sim = mean(pred_sim_max)) # # Merge with Historical Data max_hist$FIPS <- as.factor(max_hist$FIPS) max_sim$FIPS <- as.factor(max_sim$FIPS) # mykey1<- c("FIPS") setkeyv(max_hist, mykey1) setkeyv(max_sim, mykey1) max_change <- merge(max_hist, max_sim, by=mykey1) max_change$change <- ((max_change$pred_sim-max_change$pred_hist)/max_change$pred_hist)*100 # write.csv(max_change, file = "/work/sd00815/projections_data/year_wk_fe/giss/max/giss_4.5_2021_2050.csv") # 4.5_2081_2100 temp_sim <- read.csv("/work/sd00815/giss_future/giss_4.5_2081_2100_temp.csv") humid_sim <- read.csv("/work/sd00815/giss_future/giss_4.5_2081_2100_temp.csv") # # Max temp_sim <- as.data.table(temp_sim) setnames(temp_sim, "max", "tmax") setnames(temp_sim, "min", "tmin") setnames(temp_sim, "avg", "tmean") # humid_sim <- as.data.table(humid_sim) setnames(humid_sim, "max", "hmax") setnames(humid_sim, "min", "hmin") setnames(humid_sim, "avg", "hmean") # temp_sim$X <- NULL humid_sim$X <- NULL # # Merge mykey<- c("FIPS", "year","month", "week") setkeyv(temp_sim, mykey) setkeyv(humid_sim, mykey) # sim <- merge(temp_sim, humid_sim, by=mykey) # sim_max <- sim # sim_max$FIPS <- sim_max$year <- sim_max$month <- sim_max$tmin <- sim_max$tmean <- sim_max$hmin <- sim_max$hmean <- NULL # # Adding Factors sim_max$citycode <- rep(101,nrow(sim_max)) sim_max$year <- rep(2010,nrow(sim_max)) sim_max$week <- rep(1,nrow(sim_max)) sim_max$lnincome <- rep(10.262,nrow(sim_max)) # # Predictions pred_sim_max <- predict.gam(gam_max_count_wk,sim_max) # pred_sim_max <- as.data.table(pred_sim_max) pred_sim_max <- cbind(sim, pred_sim_max) pred_sim_max$tmax <- pred_sim_max$tmean <- pred_sim_max$tmin <- pred_sim_max$hmean <- pred_sim_max$hmax <- pred_sim_max$hmin <- NULL # # Aggregate by FIPS max_sim <- pred_sim_max %>% group_by(FIPS) %>% summarise(pred_sim = mean(pred_sim_max)) # # Merge with Historical Data max_hist$FIPS <- as.factor(max_hist$FIPS) max_sim$FIPS <- as.factor(max_sim$FIPS) # mykey1<- c("FIPS") setkeyv(max_hist, mykey1) setkeyv(max_sim, mykey1) max_change <- merge(max_hist, max_sim, by=mykey1) max_change$change <- ((max_change$pred_sim-max_change$pred_hist)/max_change$pred_hist)*100 # write.csv(max_change, file = "/work/sd00815/projections_data/year_wk_fe/giss/max/giss_4.5_2081_2100.csv") #################### Sincerely, Milu [[alternative HTML version deleted]] ______________________________________________ R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.