Dear friends, Hope you are all doing great. If I am not mistaken, cross validation is about splitting the data into two parts: the training dataset and the test dataset. I have a dataset having the number of vehicles sold, from january 2008 up to june 2019.
I decided to go for an 80-20 scheme (where I would use 80% of the data for training, and the remaining 20% of the data for testing). # I am currently using R version 3.6.1 / 64-bit # I am using packages forecast and MLmetrics So I did the following: library(MLmetrics) library(forecast) #my data consists of 138 rows in total TotalRows <- nrow(dataset) TestRowStart <- floor(0.20*nrow(mydataframe)) + 1 mydataframe <- data.frame(dataset) trainingrows <- nrow(mydataframe) - floor(0.20*nrow(mydataframe)) # this obviously gives me a tiny little bit more than the 80% roughly 80.4% mytrainingdata <- [1:trainingrows,] # took data from january 2008 to february 2017 tsmytrainingdata <- ts(mytrainingdata$vehicles, start=c(2008,1), end=c(2017,2), frequency=12) myarimamodel <- auto.arima(tsmytrainingdata, lambda=0, biasadj=TRUE) myarimaforec <- forecast(myarimamodel, h=36) myarimaforecframe <- data.frame(myarimaforec$mean) TestData <- mydataframe[TestRowStart:TotalRows,] myarimamodelMAPE <- MAPE(myarimaforecframe[1:(TotalRows - TrainRowStart),], TestData) can this be considered cross validation? here is the dput() if my dataset, in the case of the code I put above, mydataframe = datframe (which shows in the dput) structure(list(DATE = structure(c(47L, 35L, 82L, 1L, 94L, 70L, 59L, 13L, 128L, 117L, 106L, 24L, 48L, 36L, 83L, 2L, 95L, 71L, 60L, 14L, 129L, 118L, 107L, 25L, 49L, 37L, 84L, 3L, 96L, 72L, 61L, 15L, 130L, 119L, 108L, 26L, 50L, 38L, 85L, 4L, 97L, 73L, 62L, 16L, 131L, 120L, 109L, 27L, 51L, 39L, 86L, 5L, 98L, 74L, 63L, 17L, 132L, 121L, 110L, 28L, 52L, 40L, 87L, 6L, 99L, 75L, 64L, 18L, 133L, 122L, 111L, 29L, 53L, 41L, 88L, 7L, 100L, 76L, 65L, 19L, 134L, 123L, 112L, 30L, 54L, 42L, 89L, 8L, 101L, 77L, 66L, 20L, 135L, 124L, 113L, 31L, 55L, 43L, 90L, 9L, 102L, 78L, 67L, 21L, 136L, 125L, 114L, 32L, 56L, 44L, 91L, 10L, 103L, 79L, 68L, 22L, 137L, 126L, 115L, 33L, 57L, 45L, 92L, 11L, 104L, 80L, 69L, 23L, 138L, 127L, 116L, 34L, 58L, 46L, 93L, 12L, 105L, 81L ), .Label = c("Apr-08", "Apr-09", "Apr-10", "Apr-11", "Apr-12", "Apr-13", "Apr-14", "Apr-15", "Apr-16", "Apr-17", "Apr-18", "Apr-19", "Aug-08", "Aug-09", "Aug-10", "Aug-11", "Aug-12", "Aug-13", "Aug-14", "Aug-15", "Aug-16", "Aug-17", "Aug-18", "Dec-08", "Dec-09", "Dec-10", "Dec-11", "Dec-12", "Dec-13", "Dec-14", "Dec-15", "Dec-16", "Dec-17", "Dec-18", "Feb-08", "Feb-09", "Feb-10", "Feb-11", "Feb-12", "Feb-13", "Feb-14", "Feb-15", "Feb-16", "Feb-17", "Feb-18", "Feb-19", "Jan-08", "Jan-09", "Jan-10", "Jan-11", "Jan-12", "Jan-13", "Jan-14", "Jan-15", "Jan-16", "Jan-17", "Jan-18", "Jan-19", "Jul-08", "Jul-09", "Jul-10", "Jul-11", "Jul-12", "Jul-13", "Jul-14", "Jul-15", "Jul-16", "Jul-17", "Jul-18", "Jun-08", "Jun-09", "Jun-10", "Jun-11", "Jun-12", "Jun-13", "Jun-14", "Jun-15", "Jun-16", "Jun-17", "Jun-18", "Jun-19", "Mar-08", "Mar-09", "Mar-10", "Mar-11", "Mar-12", "Mar-13", "Mar-14", "Mar-15", "Mar-16", "Mar-17", "Mar-18", "Mar-19", "May-08", "May-09", "May-10", "May-11", "May-12", "May-13", "May-14", "May-15", "May-16", "May-17", "May-18", "May-19", "Nov-08", "Nov-09", "Nov-10", "Nov-11", "Nov-12", "Nov-13", "Nov-14", "Nov-15", "Nov-16", "Nov-17", "Nov-18", "Oct-08", "Oct-09", "Oct-10", "Oct-11", "Oct-12", "Oct-13", "Oct-14", "Oct-15", "Oct-16", "Oct-17", "Oct-18", "Sep-08", "Sep-09", "Sep-10", "Sep-11", "Sep-12", "Sep-13", "Sep-14", "Sep-15", "Sep-16", "Sep-17", "Sep-18" ), class = "factor"), totalmov = c(18368L, 14629L, 19310L, 20273L, 16097L, 16003L, 16146L, 14312L, 15319L, 19480L, 14267L, 18309L, 12533L, 7262L, 4914L, 5854L, 7626L, 5708L, 7678L, 6927L, 5923L, 9020L, 8975L, 11214L, 8461L, 9512L, 13410L, 12526L, 11374L, 17829L, 13174L, 22175L, 14551L, 17311L, 16491L, 11970L, 14527L, 16905L, 16488L, 14356L, 13855L, 11468L, 16514L, 13025L, 14153L, 19022L, 18262L, 9609L, 18603L, 9389L, 15899L, 13395L, 10689L, 11137L, 13818L, 12983L, 10083L, 14301L, 11912L, 12106L, 12686L, 7947L, 11442L, 13656L, 12093L, 11433L, 14732L, 11175L, 10449L, 14286L, 10935L, 10627L, 12076L, 10170L, 9264L, 13859L, 9821L, 10384L, 12372L, 14902L, 11804L, 9911L, 11841L, 10127L, 12615L, 6851L, 9181L, 13667L, 12759L, 9531L, 12636L, 14683L, 10383L, 16141L, 12132L, 8123L, 12858L, 7811L, 10865L, 11931L, 10397L, 6020L, 9384L, 13473L, 12702L, 14671L, 12485L, 16787L, 11698L, 12988L, 13120L, 11411L, 12317L, 9905L, 13387L, 10928L, 10697L, 16790L, 10381L, 10121L, 11728L, 9625L, 9345L, 18263L, 17753L, 12488L, 14469L, 13134L, 17799L, 14770L, 17104L, 11912L, 16229L, 14273L, 13223L, 15277L, 15185L, 15568L)), class = "data.frame", row.names = c(NA, -138L)) Best regards, Paul [[alternative HTML version deleted]] ______________________________________________ R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.