On 16-02-2012, at 09:01, Petr Savicky wrote: > > Hi. > > There were several solutions in this thread. Their speed differs > quite significantly. Here is a comparison. > > patrn <- 1:4 > exmpl <- sample(1:4, 10000, replace=TRUE) > > occur1 <- function(patrn, exmpl) > { > m <- length(patrn) > n <- length(exmpl) > candidate <- seq.int(length=n-m+1) > for (i in seq.int(length=m)) { > candidate <- candidate[patrn[i] == exmpl[candidate + i - 1]] > } > candidate > } > > occur2 <- function(patrn, exmpl) > { > patrn.rev <- rev(patrn) > w <- embed(exmpl,length(patrn)) > which(apply(w,1,function(r) all(r == patrn.rev))) > } > > occur3 <- function(patrn, exmpl) > { > patrn.rev <- rev(patrn) > w <- embed(exmpl,length(patrn)) > which(rowSums(w == rep(rev(patrn), each=nrow(w))) == ncol(w)) > } > > occur4 <- function(patrn, exmpl) > { > # requires patrn without duplicates > n = length(patrn) > r = rle(diff(match(exmpl, patrn)) == 1L) > cumsum(r$length)[r$values & r$length == (n - 1L)] - (n - 2L) > } > > occur5 <- function(patrn, exmpl) > { > which( sapply( 1:(length(exmpl)-length(patrn)+1), function(i) isTRUE( > all.equal( patrn, exmpl[i + 0:(length(patrn)-1) ] ) ) ) ) > } > > occur6 <- function(patrn, exmpl) > { > indx <- embed(rev(seq_along(exmpl)), length(patrn)) > matches <- apply(indx, 1, function(.indx){ > all(exmpl[.indx] == patrn) > }) > rev(indx[matches, 1L]) > } > > occur7 <- function(patrn, exmpl) > { > which(rollapply(exmpl, length(patrn), identical, patrn, fill = FALSE, > align = "left")) > } > > library(zoo) > > t1 <- system.time( out1 <- occur1(patrn, exmpl) ) > t2 <- system.time( out2 <- occur2(patrn, exmpl) ) > t3 <- system.time( out3 <- occur3(patrn, exmpl) ) > t4 <- system.time( out4 <- occur4(patrn, exmpl) ) > t5 <- system.time( out5 <- occur5(patrn, exmpl) ) > t6 <- system.time( out6 <- occur6(patrn, exmpl) ) > t7 <- system.time( out7 <- occur7(patrn, exmpl) ) > > print(identical(out1, out2)) > print(identical(out1, out3)) > print(identical(out1, out4)) > print(identical(out1, out5)) > print(identical(out1, out6)) > print(identical(out1, out7)) > print(rbind(t1, t2, t3, t4, t5, t6, t7)) > > The output was > > [1] TRUE > [1] TRUE > [1] TRUE > [1] TRUE > [1] TRUE > [1] TRUE > user.self sys.self elapsed user.child sys.child > t1 0.001 0 0.001 0 0 > t2 0.062 0 0.061 0 0 > t3 0.002 0 0.002 0 0 > t4 0.001 0 0.001 0 0 > t5 1.749 0 1.749 0 0 > t6 0.068 0 0.068 0 0 > t7 0.172 0 0.172 0 0
And by modifying occur5 to this occur5 <- function(patrn, exmpl) { which( sapply( 1:(length(exmpl)-length(patrn)+1), function(i) identical( patrn, exmpl[i + 0:(length(patrn)-1) ] ) ) ) } occur5 can be made a lot faster. On my computer instead of user.self sys.self elapsed user.child sys.child t1 0.001 0.000 0.001 0 0 t2 0.061 0.007 0.068 0 0 t3 0.002 0.001 0.002 0 0 t4 0.001 0.000 0.002 0 0 t5 1.640 0.037 1.677 0 0 t6 0.079 0.004 0.084 0 0 t7 0.256 0.004 0.260 0 0 I got user.self sys.self elapsed user.child sys.child t1 0.000 0.000 0.001 0 0 t2 0.060 0.004 0.065 0 0 t3 0.002 0.001 0.003 0 0 t4 0.001 0.000 0.002 0 0 t5 0.070 0.002 0.071 0 0 t6 0.076 0.000 0.077 0 0 t7 0.246 0.006 0.252 0 0 Berend ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.