Dear Jen, Vectorisation is the keyword here. 250k sample takes only 2.5 seconds on my machine. 2.5 million takes 29 seconds.
n <- 250e3 # country code cc <- "+212" # prefixes IAM <- c(610, 611, 613, 615, 616, 618, 641, 642, 648, 650, 651, 652, 653, 654, 655, 658, 659, 661, 662, 666, 667, 668, 670, 671, 672, 673, 676, 677, 678) Medi <- c(612, 614, 617, 619, 644, 645, 649, 656, 657, 660, 663, 664, 665, 669, 674, 675, 679) MOROC <- c(0636, 0637) prefix <- rbind( data.frame( region = "IAM", prefix = IAM ), data.frame( region = "Medi", prefix = Medi ), data.frame( region = "MOROC", prefix = MOROC ) ) prefix <- merge( prefix, as.data.frame(table(region = prefix$region)) ) system.time({ prefix.sample <- sample(prefix$prefix, n, prob = prefix$Freq, replace = TRUE) nums <- apply( matrix( sample(0:9, 6 * n, replace = TRUE), ncol = 6 ), 1, paste, collapse = "" ) phonenumbers <- paste0(cc, prefix.sample, nums) }) ir. Thierry Onkelinx Instituut voor natuur- en bosonderzoek / Research Institute for Nature and Forest team Biometrie & Kwaliteitszorg / team Biometrics & Quality Assurance Kliniekstraat 25 1070 Anderlecht Belgium To call in the statistician after the experiment is done may be no more than asking him to perform a post-mortem examination: he may be able to say what the experiment died of. ~ Sir Ronald Aylmer Fisher The plural of anecdote is not data. ~ Roger Brinner The combination of some data and an aching desire for an answer does not ensure that a reasonable answer can be extracted from a given body of data. ~ John Tukey 2017-02-28 17:22 GMT+01:00 Jen <plessthanpointohf...@gmail.com>: > Hi, I'm trying to generate 2.5 million phone numbers. The code below > generates a random sample of 250K MPNS for Morocco. It takes about 10 > minutes. > > I need to generate 2.5 million. I've run it through once and it took about > 45 hours. > > Is there a way to speed this up? > > Thanks, > > Jen > > # generate random sample of mobile phone numbers (MPNs) - Morocco > > # Mobile phone number format: +212-6xx-xxxxxx > > library(data.table) > > # country code > > cc <- "+212" > > # prefixes > > IAM <- data.table(matrix(c(610, 611, 613, 615, 616, > 618, 641, 642, 648, 650, 651, 652, 653, > 654, 655, 658, 659, 661, 662, 666, 667, > 668, 670, 671, 672, 673, > 676, 677, 678), dimnames=list(NULL, "IAM"))) > > > > Medi <- data.table(matrix(c(612, 614, 617, 619, 644, > 645, 649, 656, 657, 660, 663, 664, 665, > 669, 674, 675, 679), dimnames=list(NULL, "Medi"))) > > MOROC <- data.table(matrix(c(0636, 0637), dimnames=list(NULL, "MOROC"))) > > # combine > > mno <- c(IAM, Medi, MOROC) > > # generate MPNs > MPN <- NULL > > system.time(for (i in 1:250000){ > # randomly select number from list > > prefix <- sapply(mno[floor(runif(1, 1, length(mno)+1))], function(x) > sample(x, 1)) > > MNO <- names(prefix) > > # randomly generate 6 numbers between 0 and 9, inclusive > > nums <- floor(runif(6, 0, 9)) > > # concatenate > > tmp <- c(paste(c(cc,prefix,t(nums)), sep="", collapse=""), MNO) > > MPN[[i]] <- tmp > > i <- i+1 > > > }) > > # unlist > > df <- data.table(matrix(unlist(MPN), nrow=length(MPN), ncol=2, byrow=T, > dimnames = list(seq(1, length(MPN),1), c("MPN", "MNO")) )) > > [[alternative HTML version deleted]] > > ______________________________________________ > R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/ > posting-guide.html > and provide commented, minimal, self-contained, reproducible code. > [[alternative HTML version deleted]] ______________________________________________ R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.