You can try this:

data <- cbind("a"=sample(1:100000), "b"=sample(1:100000))
fact <- sample(rep(1:10000, each=10))
system.time(std <- by(data, fact, colSums))
by.matrix <- function (data, INDICES, FUN, ...) {
     if (!is.list(INDICES)) {
         IND <- vector("list", 1)
         IND[[1]] <- INDICES
         names(IND) <- deparse(substitute(INDICES))[1]
     }
     else IND <- INDICES
     FUNx <- function(x) FUN(data[x, , drop = FALSE], ...)
     nd <- nrow(data)
     ans <- eval(substitute(tapply(1:nd, IND, FUNx)), as.data.frame 
(data))
     attr(ans, "call") <- match.call()
     class(ans) <- "by"
     ans
}
system.time(mod <- by(data, fact, colSums))
all.equal(std, mod)

I get a 30x speed up
(I'm not sure why the attributes differ, but I'm sure this can be  
fixed...)

______________________________________________
R-devel@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-devel

Reply via email to