Hi,

Unfortunately, they can't handle NA. Any suggestion? Some row for Ip don't have 
ip address. This cause an error/ wrong result.

Eddie


> library(gsubfn)
> library(gtools)
> library(rbenchmark)
>
> n <- 10000
> df <- data.frame(
>   a = rnorm(n),
>   b = rnorm(n),
>   c = rnorm(n),
>   ip = replicate(n, paste(sample(255, 4), collapse='.'), simplify=TRUE)
> )
>
> res <- benchmark(columns=c('test', 'elapsed'), replications=10, order=NULL,
>   peda = {
>     connection <- textConnection(as.character(df$ip))
>     o <- do.call(order, read.table(connection, sep='.'))
>     close(connection)
>     df[o, ]
>   },
>
>   peda2 = {
>     connection <- textConnection(as.character(df$ip))
>     dfT <- read.table(connection, sep='.', colClasses=rep("integer",
> 4), quote="", na.strings=NULL, blank.lines.skip=FALSE)
>     close(connection)
>     o <- do.call(order, dfT)
>     df[o, ]
>   },
>
>   hb = {
>     ip <- strsplit(as.character(df$ip), split=".", fixed=TRUE)
>     ip <- unlist(ip, use.names=FALSE)
>     ip <- as.integer(ip)
>     dim(ip) <- c(4, nrow(df))
>     ip <- 256^3*ip[1,] + 256^2*ip[2,] + 256*ip[3,] + ip[4,]
>     o <- order(ip)
>     df[o, ]
>   },
>
>   hb2 = {
>     ip <- strsplit(as.character(df$ip), split=".", fixed=TRUE)
>     ip <- unlist(ip, use.names=FALSE)
>     ip <- as.integer(ip);
>     dim(ip) <- c(4, nrow(df))
>     o <- sort.list(ip[4,], method="radix", na.last=TRUE)
>     for (kk in 3:1) {
>       o <- o[sort.list(ip[kk,o], method="radix", na.last=TRUE)]
>     }
>     df[o, ]
>   }
> )
>
> print(res)
>
>    test elapsed
> 1  peda    4.12
> 2 peda2    4.08
> 3    hb    0.28
> 4   hb2    0.25
>
>
> On Sun, May 31, 2009 at 12:42 AM, Wacek Kusnierczyk
>
> <waclaw.marcin.kusnierc...@idi.ntnu.no> wrote:
> > edwin Sendjaja wrote:
> >> Hi VQ,
> >>
> >> Thank you. It works like charm. But I think Peter's code is faster. What
> >> is the difference?
> >
> > i think peter's code is more r-elegant, though less generic.  here's a
> > quick test, with not so surprising results.  gsubfn is implemented in r,
> > not c, and it is painfully slow in this test. i also added gabor's
> > suggestion.
> >
> >    library(gsubfn)
> >    library(gtools)
> >    library(rbenchmark)
> >
> >    n = 1000
> >    df = data.frame(
> >       a=rnorm(n),
> >       b = rnorm(n),
> >       c = rnorm(n),
> >       ip = replicate(n, paste(sample(255, 4), collapse='.'),
> > simplify=TRUE))
> >    benchmark(columns=c('test', 'elapsed'), replications=10, order=NULL,
> >       peda={
> >          connection = textConnection(as.character(df$ip))
> >          o = do.call(order, read.table(connection, sep='.'))
> >          close(connection)
> >          df[o, ] },
> >       waku=df[order(gsubfn(perl=TRUE,
> >          '[0-9]+',
> >          ~ sprintf('%03d', as.integer(x)),
> >          as.character(df$ip))), ],
> >       gagr=df[mixedorder(df$ip), ] )
> >
> >    # peda 0.070
> >    # waku 7.070
> >    # gagr 4.710
> >
> >
> > vQ
> >
> > ______________________________________________
> > R-help@r-project.org mailing list
> > https://stat.ethz.ch/mailman/listinfo/r-help
> > PLEASE do read the posting guide
> > http://www.R-project.org/posting-guide.html and provide commented,
> > minimal, self-contained, reproducible code.
>
> ______________________________________________
> R-help@r-project.org mailing list
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide
> http://www.R-project.org/posting-guide.html and provide commented, minimal,
> self-contained, reproducible code.



        [[alternative HTML version deleted]]

______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to